LLVM  7.0.0svn
InstCombineLoadStoreAlloca.cpp
Go to the documentation of this file.
1 //===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the visit functions for load, store and alloca.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "InstCombineInternal.h"
15 #include "llvm/ADT/MapVector.h"
16 #include "llvm/ADT/SmallString.h"
17 #include "llvm/ADT/Statistic.h"
18 #include "llvm/Analysis/Loads.h"
20 #include "llvm/IR/ConstantRange.h"
21 #include "llvm/IR/DataLayout.h"
22 #include "llvm/IR/IntrinsicInst.h"
23 #include "llvm/IR/LLVMContext.h"
24 #include "llvm/IR/MDBuilder.h"
25 #include "llvm/IR/PatternMatch.h"
27 using namespace llvm;
28 using namespace PatternMatch;
29 
30 #define DEBUG_TYPE "instcombine"
31 
32 STATISTIC(NumDeadStore, "Number of dead stores eliminated");
33 STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
34 
35 /// pointsToConstantGlobal - Return true if V (possibly indirectly) points to
36 /// some part of a constant global variable. This intentionally only accepts
37 /// constant expressions because we can't rewrite arbitrary instructions.
38 static bool pointsToConstantGlobal(Value *V) {
39  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
40  return GV->isConstant();
41 
42  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
43  if (CE->getOpcode() == Instruction::BitCast ||
44  CE->getOpcode() == Instruction::AddrSpaceCast ||
45  CE->getOpcode() == Instruction::GetElementPtr)
46  return pointsToConstantGlobal(CE->getOperand(0));
47  }
48  return false;
49 }
50 
51 /// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
52 /// pointer to an alloca. Ignore any reads of the pointer, return false if we
53 /// see any stores or other unknown uses. If we see pointer arithmetic, keep
54 /// track of whether it moves the pointer (with IsOffset) but otherwise traverse
55 /// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to
56 /// the alloca, and if the source pointer is a pointer to a constant global, we
57 /// can optimize this.
58 static bool
61  // We track lifetime intrinsics as we encounter them. If we decide to go
62  // ahead and replace the value with the global, this lets the caller quickly
63  // eliminate the markers.
64 
65  SmallVector<std::pair<Value *, bool>, 35> ValuesToInspect;
66  ValuesToInspect.emplace_back(V, false);
67  while (!ValuesToInspect.empty()) {
68  auto ValuePair = ValuesToInspect.pop_back_val();
69  const bool IsOffset = ValuePair.second;
70  for (auto &U : ValuePair.first->uses()) {
71  auto *I = cast<Instruction>(U.getUser());
72 
73  if (auto *LI = dyn_cast<LoadInst>(I)) {
74  // Ignore non-volatile loads, they are always ok.
75  if (!LI->isSimple()) return false;
76  continue;
77  }
78 
79  if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) {
80  // If uses of the bitcast are ok, we are ok.
81  ValuesToInspect.emplace_back(I, IsOffset);
82  continue;
83  }
84  if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
85  // If the GEP has all zero indices, it doesn't offset the pointer. If it
86  // doesn't, it does.
87  ValuesToInspect.emplace_back(I, IsOffset || !GEP->hasAllZeroIndices());
88  continue;
89  }
90 
91  if (auto CS = CallSite(I)) {
92  // If this is the function being called then we treat it like a load and
93  // ignore it.
94  if (CS.isCallee(&U))
95  continue;
96 
97  unsigned DataOpNo = CS.getDataOperandNo(&U);
98  bool IsArgOperand = CS.isArgOperand(&U);
99 
100  // Inalloca arguments are clobbered by the call.
101  if (IsArgOperand && CS.isInAllocaArgument(DataOpNo))
102  return false;
103 
104  // If this is a readonly/readnone call site, then we know it is just a
105  // load (but one that potentially returns the value itself), so we can
106  // ignore it if we know that the value isn't captured.
107  if (CS.onlyReadsMemory() &&
108  (CS.getInstruction()->use_empty() || CS.doesNotCapture(DataOpNo)))
109  continue;
110 
111  // If this is being passed as a byval argument, the caller is making a
112  // copy, so it is only a read of the alloca.
113  if (IsArgOperand && CS.isByValArgument(DataOpNo))
114  continue;
115  }
116 
117  // Lifetime intrinsics can be handled by the caller.
118  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
119  if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
120  II->getIntrinsicID() == Intrinsic::lifetime_end) {
121  assert(II->use_empty() && "Lifetime markers have no result to use!");
122  ToDelete.push_back(II);
123  continue;
124  }
125  }
126 
127  // If this is isn't our memcpy/memmove, reject it as something we can't
128  // handle.
130  if (!MI)
131  return false;
132 
133  // If the transfer is using the alloca as a source of the transfer, then
134  // ignore it since it is a load (unless the transfer is volatile).
135  if (U.getOperandNo() == 1) {
136  if (MI->isVolatile()) return false;
137  continue;
138  }
139 
140  // If we already have seen a copy, reject the second one.
141  if (TheCopy) return false;
142 
143  // If the pointer has been offset from the start of the alloca, we can't
144  // safely handle this.
145  if (IsOffset) return false;
146 
147  // If the memintrinsic isn't using the alloca as the dest, reject it.
148  if (U.getOperandNo() != 0) return false;
149 
150  // If the source of the memcpy/move is not a constant global, reject it.
151  if (!pointsToConstantGlobal(MI->getSource()))
152  return false;
153 
154  // Otherwise, the transform is safe. Remember the copy instruction.
155  TheCopy = MI;
156  }
157  }
158  return true;
159 }
160 
161 /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
162 /// modified by a copy from a constant global. If we can prove this, we can
163 /// replace any uses of the alloca with uses of the global directly.
164 static MemTransferInst *
166  SmallVectorImpl<Instruction *> &ToDelete) {
167  MemTransferInst *TheCopy = nullptr;
168  if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete))
169  return TheCopy;
170  return nullptr;
171 }
172 
173 /// Returns true if V is dereferenceable for size of alloca.
174 static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI,
175  const DataLayout &DL) {
176  if (AI->isArrayAllocation())
177  return false;
178  uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType());
179  if (!AllocaSize)
180  return false;
182  APInt(64, AllocaSize), DL);
183 }
184 
186  // Check for array size of 1 (scalar allocation).
187  if (!AI.isArrayAllocation()) {
188  // i32 1 is the canonical array size for scalar allocations.
189  if (AI.getArraySize()->getType()->isIntegerTy(32))
190  return nullptr;
191 
192  // Canonicalize it.
193  Value *V = IC.Builder.getInt32(1);
194  AI.setOperand(0, V);
195  return &AI;
196  }
197 
198  // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
199  if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
200  Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
201  AllocaInst *New = IC.Builder.CreateAlloca(NewTy, nullptr, AI.getName());
202  New->setAlignment(AI.getAlignment());
203 
204  // Scan to the end of the allocation instructions, to skip over a block of
205  // allocas if possible...also skip interleaved debug info
206  //
207  BasicBlock::iterator It(New);
208  while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It))
209  ++It;
210 
211  // Now that I is pointing to the first non-allocation-inst in the block,
212  // insert our getelementptr instruction...
213  //
214  Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
215  Value *NullIdx = Constant::getNullValue(IdxTy);
216  Value *Idx[2] = {NullIdx, NullIdx};
217  Instruction *GEP =
218  GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub");
219  IC.InsertNewInstBefore(GEP, *It);
220 
221  // Now make everything use the getelementptr instead of the original
222  // allocation.
223  return IC.replaceInstUsesWith(AI, GEP);
224  }
225 
226  if (isa<UndefValue>(AI.getArraySize()))
228 
229  // Ensure that the alloca array size argument has type intptr_t, so that
230  // any casting is exposed early.
231  Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType());
232  if (AI.getArraySize()->getType() != IntPtrTy) {
233  Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), IntPtrTy, false);
234  AI.setOperand(0, V);
235  return &AI;
236  }
237 
238  return nullptr;
239 }
240 
241 namespace {
242 // If I and V are pointers in different address space, it is not allowed to
243 // use replaceAllUsesWith since I and V have different types. A
244 // non-target-specific transformation should not use addrspacecast on V since
245 // the two address space may be disjoint depending on target.
246 //
247 // This class chases down uses of the old pointer until reaching the load
248 // instructions, then replaces the old pointer in the load instructions with
249 // the new pointer. If during the chasing it sees bitcast or GEP, it will
250 // create new bitcast or GEP with the new pointer and use them in the load
251 // instruction.
252 class PointerReplacer {
253 public:
254  PointerReplacer(InstCombiner &IC) : IC(IC) {}
255  void replacePointer(Instruction &I, Value *V);
256 
257 private:
258  void findLoadAndReplace(Instruction &I);
259  void replace(Instruction *I);
260  Value *getReplacement(Value *I);
261 
264  InstCombiner &IC;
265 };
266 } // end anonymous namespace
267 
268 void PointerReplacer::findLoadAndReplace(Instruction &I) {
269  for (auto U : I.users()) {
270  auto *Inst = dyn_cast<Instruction>(&*U);
271  if (!Inst)
272  return;
273  LLVM_DEBUG(dbgs() << "Found pointer user: " << *U << '\n');
274  if (isa<LoadInst>(Inst)) {
275  for (auto P : Path)
276  replace(P);
277  replace(Inst);
278  } else if (isa<GetElementPtrInst>(Inst) || isa<BitCastInst>(Inst)) {
279  Path.push_back(Inst);
280  findLoadAndReplace(*Inst);
281  Path.pop_back();
282  } else {
283  return;
284  }
285  }
286 }
287 
288 Value *PointerReplacer::getReplacement(Value *V) {
289  auto Loc = WorkMap.find(V);
290  if (Loc != WorkMap.end())
291  return Loc->second;
292  return nullptr;
293 }
294 
296  if (getReplacement(I))
297  return;
298 
299  if (auto *LT = dyn_cast<LoadInst>(I)) {
300  auto *V = getReplacement(LT->getPointerOperand());
301  assert(V && "Operand not replaced");
302  auto *NewI = new LoadInst(V);
303  NewI->takeName(LT);
304  IC.InsertNewInstWith(NewI, *LT);
305  IC.replaceInstUsesWith(*LT, NewI);
306  WorkMap[LT] = NewI;
307  } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
308  auto *V = getReplacement(GEP->getPointerOperand());
309  assert(V && "Operand not replaced");
310  SmallVector<Value *, 8> Indices;
311  Indices.append(GEP->idx_begin(), GEP->idx_end());
312  auto *NewI = GetElementPtrInst::Create(
313  V->getType()->getPointerElementType(), V, Indices);
314  IC.InsertNewInstWith(NewI, *GEP);
315  NewI->takeName(GEP);
316  WorkMap[GEP] = NewI;
317  } else if (auto *BC = dyn_cast<BitCastInst>(I)) {
318  auto *V = getReplacement(BC->getOperand(0));
319  assert(V && "Operand not replaced");
320  auto *NewT = PointerType::get(BC->getType()->getPointerElementType(),
322  auto *NewI = new BitCastInst(V, NewT);
323  IC.InsertNewInstWith(NewI, *BC);
324  NewI->takeName(BC);
325  WorkMap[BC] = NewI;
326  } else {
327  llvm_unreachable("should never reach here");
328  }
329 }
330 
331 void PointerReplacer::replacePointer(Instruction &I, Value *V) {
332 #ifndef NDEBUG
333  auto *PT = cast<PointerType>(I.getType());
334  auto *NT = cast<PointerType>(V->getType());
335  assert(PT != NT && PT->getElementType() == NT->getElementType() &&
336  "Invalid usage");
337 #endif
338  WorkMap[&I] = V;
339  findLoadAndReplace(I);
340 }
341 
343  if (auto *I = simplifyAllocaArraySize(*this, AI))
344  return I;
345 
346  if (AI.getAllocatedType()->isSized()) {
347  // If the alignment is 0 (unspecified), assign it the preferred alignment.
348  if (AI.getAlignment() == 0)
349  AI.setAlignment(DL.getPrefTypeAlignment(AI.getAllocatedType()));
350 
351  // Move all alloca's of zero byte objects to the entry block and merge them
352  // together. Note that we only do this for alloca's, because malloc should
353  // allocate and return a unique pointer, even for a zero byte allocation.
354  if (DL.getTypeAllocSize(AI.getAllocatedType()) == 0) {
355  // For a zero sized alloca there is no point in doing an array allocation.
356  // This is helpful if the array size is a complicated expression not used
357  // elsewhere.
358  if (AI.isArrayAllocation()) {
359  AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1));
360  return &AI;
361  }
362 
363  // Get the first instruction in the entry block.
364  BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock();
365  Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg();
366  if (FirstInst != &AI) {
367  // If the entry block doesn't start with a zero-size alloca then move
368  // this one to the start of the entry block. There is no problem with
369  // dominance as the array size was forced to a constant earlier already.
370  AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst);
371  if (!EntryAI || !EntryAI->getAllocatedType()->isSized() ||
372  DL.getTypeAllocSize(EntryAI->getAllocatedType()) != 0) {
373  AI.moveBefore(FirstInst);
374  return &AI;
375  }
376 
377  // If the alignment of the entry block alloca is 0 (unspecified),
378  // assign it the preferred alignment.
379  if (EntryAI->getAlignment() == 0)
380  EntryAI->setAlignment(
381  DL.getPrefTypeAlignment(EntryAI->getAllocatedType()));
382  // Replace this zero-sized alloca with the one at the start of the entry
383  // block after ensuring that the address will be aligned enough for both
384  // types.
385  unsigned MaxAlign = std::max(EntryAI->getAlignment(),
386  AI.getAlignment());
387  EntryAI->setAlignment(MaxAlign);
388  if (AI.getType() != EntryAI->getType())
389  return new BitCastInst(EntryAI, AI.getType());
390  return replaceInstUsesWith(AI, EntryAI);
391  }
392  }
393  }
394 
395  if (AI.getAlignment()) {
396  // Check to see if this allocation is only modified by a memcpy/memmove from
397  // a constant global whose alignment is equal to or exceeds that of the
398  // allocation. If this is the case, we can change all users to use
399  // the constant global instead. This is commonly produced by the CFE by
400  // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
401  // is only subsequently read.
403  if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
404  unsigned SourceAlign = getOrEnforceKnownAlignment(
405  Copy->getSource(), AI.getAlignment(), DL, &AI, &AC, &DT);
406  if (AI.getAlignment() <= SourceAlign &&
407  isDereferenceableForAllocaSize(Copy->getSource(), &AI, DL)) {
408  LLVM_DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
409  LLVM_DEBUG(dbgs() << " memcpy = " << *Copy << '\n');
410  for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
411  eraseInstFromFunction(*ToDelete[i]);
412  Constant *TheSrc = cast<Constant>(Copy->getSource());
413  auto *SrcTy = TheSrc->getType();
414  auto *DestTy = PointerType::get(AI.getType()->getPointerElementType(),
415  SrcTy->getPointerAddressSpace());
416  Constant *Cast =
418  if (AI.getType()->getPointerAddressSpace() ==
419  SrcTy->getPointerAddressSpace()) {
420  Instruction *NewI = replaceInstUsesWith(AI, Cast);
421  eraseInstFromFunction(*Copy);
422  ++NumGlobalCopies;
423  return NewI;
424  } else {
425  PointerReplacer PtrReplacer(*this);
426  PtrReplacer.replacePointer(AI, Cast);
427  ++NumGlobalCopies;
428  }
429  }
430  }
431  }
432 
433  // At last, use the generic allocation site handler to aggressively remove
434  // unused allocas.
435  return visitAllocSite(AI);
436 }
437 
438 // Are we allowed to form a atomic load or store of this type?
439 static bool isSupportedAtomicType(Type *Ty) {
440  return Ty->isIntOrPtrTy() || Ty->isFloatingPointTy();
441 }
442 
443 /// Helper to combine a load to a new type.
444 ///
445 /// This just does the work of combining a load to a new type. It handles
446 /// metadata, etc., and returns the new instruction. The \c NewTy should be the
447 /// loaded *value* type. This will convert it to a pointer, cast the operand to
448 /// that pointer type, load it, etc.
449 ///
450 /// Note that this will create all of the instructions with whatever insert
451 /// point the \c InstCombiner currently is using.
453  const Twine &Suffix = "") {
454  assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) &&
455  "can't fold an atomic load to requested type");
456 
457  Value *Ptr = LI.getPointerOperand();
458  unsigned AS = LI.getPointerAddressSpace();
460  LI.getAllMetadata(MD);
461 
462  Value *NewPtr = nullptr;
463  if (!(match(Ptr, m_BitCast(m_Value(NewPtr))) &&
464  NewPtr->getType()->getPointerElementType() == NewTy &&
465  NewPtr->getType()->getPointerAddressSpace() == AS))
466  NewPtr = IC.Builder.CreateBitCast(Ptr, NewTy->getPointerTo(AS));
467 
468  LoadInst *NewLoad = IC.Builder.CreateAlignedLoad(
469  NewPtr, LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix);
470  NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
471  MDBuilder MDB(NewLoad->getContext());
472  for (const auto &MDPair : MD) {
473  unsigned ID = MDPair.first;
474  MDNode *N = MDPair.second;
475  // Note, essentially every kind of metadata should be preserved here! This
476  // routine is supposed to clone a load instruction changing *only its type*.
477  // The only metadata it makes sense to drop is metadata which is invalidated
478  // when the pointer type changes. This should essentially never be the case
479  // in LLVM, but we explicitly switch over only known metadata to be
480  // conservatively correct. If you are adding metadata to LLVM which pertains
481  // to loads, you almost certainly want to add it here.
482  switch (ID) {
483  case LLVMContext::MD_dbg:
493  // All of these directly apply.
494  NewLoad->setMetadata(ID, N);
495  break;
496 
498  copyNonnullMetadata(LI, N, *NewLoad);
499  break;
503  // These only directly apply if the new type is also a pointer.
504  if (NewTy->isPointerTy())
505  NewLoad->setMetadata(ID, N);
506  break;
508  copyRangeMetadata(IC.getDataLayout(), LI, N, *NewLoad);
509  break;
510  }
511  }
512  return NewLoad;
513 }
514 
515 /// Combine a store to a new type.
516 ///
517 /// Returns the newly created store instruction.
519  assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) &&
520  "can't fold an atomic store of requested type");
521 
522  Value *Ptr = SI.getPointerOperand();
523  unsigned AS = SI.getPointerAddressSpace();
525  SI.getAllMetadata(MD);
526 
527  StoreInst *NewStore = IC.Builder.CreateAlignedStore(
528  V, IC.Builder.CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
529  SI.getAlignment(), SI.isVolatile());
530  NewStore->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
531  for (const auto &MDPair : MD) {
532  unsigned ID = MDPair.first;
533  MDNode *N = MDPair.second;
534  // Note, essentially every kind of metadata should be preserved here! This
535  // routine is supposed to clone a store instruction changing *only its
536  // type*. The only metadata it makes sense to drop is metadata which is
537  // invalidated when the pointer type changes. This should essentially
538  // never be the case in LLVM, but we explicitly switch over only known
539  // metadata to be conservatively correct. If you are adding metadata to
540  // LLVM which pertains to stores, you almost certainly want to add it
541  // here.
542  switch (ID) {
543  case LLVMContext::MD_dbg:
552  // All of these directly apply.
553  NewStore->setMetadata(ID, N);
554  break;
555 
562  // These don't apply for stores.
563  break;
564  }
565  }
566 
567  return NewStore;
568 }
569 
570 /// Returns true if instruction represent minmax pattern like:
571 /// select ((cmp load V1, load V2), V1, V2).
572 static bool isMinMaxWithLoads(Value *V) {
573  assert(V->getType()->isPointerTy() && "Expected pointer type.");
574  // Ignore possible ty* to ixx* bitcast.
575  V = peekThroughBitcast(V);
576  // Check that select is select ((cmp load V1, load V2), V1, V2) - minmax
577  // pattern.
578  CmpInst::Predicate Pred;
579  Instruction *L1;
580  Instruction *L2;
581  Value *LHS;
582  Value *RHS;
583  if (!match(V, m_Select(m_Cmp(Pred, m_Instruction(L1), m_Instruction(L2)),
584  m_Value(LHS), m_Value(RHS))))
585  return false;
586  return (match(L1, m_Load(m_Specific(LHS))) &&
587  match(L2, m_Load(m_Specific(RHS)))) ||
588  (match(L1, m_Load(m_Specific(RHS))) &&
589  match(L2, m_Load(m_Specific(LHS))));
590 }
591 
592 /// Combine loads to match the type of their uses' value after looking
593 /// through intervening bitcasts.
594 ///
595 /// The core idea here is that if the result of a load is used in an operation,
596 /// we should load the type most conducive to that operation. For example, when
597 /// loading an integer and converting that immediately to a pointer, we should
598 /// instead directly load a pointer.
599 ///
600 /// However, this routine must never change the width of a load or the number of
601 /// loads as that would introduce a semantic change. This combine is expected to
602 /// be a semantic no-op which just allows loads to more closely model the types
603 /// of their consuming operations.
604 ///
605 /// Currently, we also refuse to change the precise type used for an atomic load
606 /// or a volatile load. This is debatable, and might be reasonable to change
607 /// later. However, it is risky in case some backend or other part of LLVM is
608 /// relying on the exact type loaded to select appropriate atomic operations.
610  // FIXME: We could probably with some care handle both volatile and ordered
611  // atomic loads here but it isn't clear that this is important.
612  if (!LI.isUnordered())
613  return nullptr;
614 
615  if (LI.use_empty())
616  return nullptr;
617 
618  // swifterror values can't be bitcasted.
619  if (LI.getPointerOperand()->isSwiftError())
620  return nullptr;
621 
622  Type *Ty = LI.getType();
623  const DataLayout &DL = IC.getDataLayout();
624 
625  // Try to canonicalize loads which are only ever stored to operate over
626  // integers instead of any other type. We only do this when the loaded type
627  // is sized and has a size exactly the same as its store size and the store
628  // size is a legal integer type.
629  // Do not perform canonicalization if minmax pattern is found (to avoid
630  // infinite loop).
631  if (!Ty->isIntegerTy() && Ty->isSized() &&
633  DL.getTypeStoreSizeInBits(Ty) == DL.getTypeSizeInBits(Ty) &&
634  !DL.isNonIntegralPointerType(Ty) &&
636  peekThroughBitcast(LI.getPointerOperand(), /*OneUseOnly=*/true))) {
637  if (all_of(LI.users(), [&LI](User *U) {
638  auto *SI = dyn_cast<StoreInst>(U);
639  return SI && SI->getPointerOperand() != &LI &&
640  !SI->getPointerOperand()->isSwiftError();
641  })) {
642  LoadInst *NewLoad = combineLoadToNewType(
643  IC, LI,
645  // Replace all the stores with stores of the newly loaded value.
646  for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) {
647  auto *SI = cast<StoreInst>(*UI++);
649  combineStoreToNewValue(IC, *SI, NewLoad);
651  }
652  assert(LI.use_empty() && "Failed to remove all users of the load!");
653  // Return the old load so the combiner can delete it safely.
654  return &LI;
655  }
656  }
657 
658  // Fold away bit casts of the loaded value by loading the desired type.
659  // We can do this for BitCastInsts as well as casts from and to pointer types,
660  // as long as those are noops (i.e., the source or dest type have the same
661  // bitwidth as the target's pointers).
662  if (LI.hasOneUse())
663  if (auto* CI = dyn_cast<CastInst>(LI.user_back()))
664  if (CI->isNoopCast(DL))
665  if (!LI.isAtomic() || isSupportedAtomicType(CI->getDestTy())) {
666  LoadInst *NewLoad = combineLoadToNewType(IC, LI, CI->getDestTy());
667  CI->replaceAllUsesWith(NewLoad);
668  IC.eraseInstFromFunction(*CI);
669  return &LI;
670  }
671 
672  // FIXME: We should also canonicalize loads of vectors when their elements are
673  // cast to other types.
674  return nullptr;
675 }
676 
678  // FIXME: We could probably with some care handle both volatile and atomic
679  // stores here but it isn't clear that this is important.
680  if (!LI.isSimple())
681  return nullptr;
682 
683  Type *T = LI.getType();
684  if (!T->isAggregateType())
685  return nullptr;
686 
687  StringRef Name = LI.getName();
688  assert(LI.getAlignment() && "Alignment must be set at this point");
689 
690  if (auto *ST = dyn_cast<StructType>(T)) {
691  // If the struct only have one element, we unpack.
692  auto NumElements = ST->getNumElements();
693  if (NumElements == 1) {
694  LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U),
695  ".unpack");
696  AAMDNodes AAMD;
697  LI.getAAMetadata(AAMD);
698  NewLoad->setAAMetadata(AAMD);
700  UndefValue::get(T), NewLoad, 0, Name));
701  }
702 
703  // We don't want to break loads with padding here as we'd loose
704  // the knowledge that padding exists for the rest of the pipeline.
705  const DataLayout &DL = IC.getDataLayout();
706  auto *SL = DL.getStructLayout(ST);
707  if (SL->hasPadding())
708  return nullptr;
709 
710  auto Align = LI.getAlignment();
711  if (!Align)
713 
714  auto *Addr = LI.getPointerOperand();
715  auto *IdxType = Type::getInt32Ty(T->getContext());
716  auto *Zero = ConstantInt::get(IdxType, 0);
717 
718  Value *V = UndefValue::get(T);
719  for (unsigned i = 0; i < NumElements; i++) {
720  Value *Indices[2] = {
721  Zero,
722  ConstantInt::get(IdxType, i),
723  };
724  auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
725  Name + ".elt");
726  auto EltAlign = MinAlign(Align, SL->getElementOffset(i));
727  auto *L = IC.Builder.CreateAlignedLoad(Ptr, EltAlign, Name + ".unpack");
728  // Propagate AA metadata. It'll still be valid on the narrowed load.
729  AAMDNodes AAMD;
730  LI.getAAMetadata(AAMD);
731  L->setAAMetadata(AAMD);
732  V = IC.Builder.CreateInsertValue(V, L, i);
733  }
734 
735  V->setName(Name);
736  return IC.replaceInstUsesWith(LI, V);
737  }
738 
739  if (auto *AT = dyn_cast<ArrayType>(T)) {
740  auto *ET = AT->getElementType();
741  auto NumElements = AT->getNumElements();
742  if (NumElements == 1) {
743  LoadInst *NewLoad = combineLoadToNewType(IC, LI, ET, ".unpack");
744  AAMDNodes AAMD;
745  LI.getAAMetadata(AAMD);
746  NewLoad->setAAMetadata(AAMD);
748  UndefValue::get(T), NewLoad, 0, Name));
749  }
750 
751  // Bail out if the array is too large. Ideally we would like to optimize
752  // arrays of arbitrary size but this has a terrible impact on compile time.
753  // The threshold here is chosen arbitrarily, maybe needs a little bit of
754  // tuning.
755  if (NumElements > IC.MaxArraySizeForCombine)
756  return nullptr;
757 
758  const DataLayout &DL = IC.getDataLayout();
759  auto EltSize = DL.getTypeAllocSize(ET);
760  auto Align = LI.getAlignment();
761  if (!Align)
762  Align = DL.getABITypeAlignment(T);
763 
764  auto *Addr = LI.getPointerOperand();
765  auto *IdxType = Type::getInt64Ty(T->getContext());
766  auto *Zero = ConstantInt::get(IdxType, 0);
767 
768  Value *V = UndefValue::get(T);
769  uint64_t Offset = 0;
770  for (uint64_t i = 0; i < NumElements; i++) {
771  Value *Indices[2] = {
772  Zero,
773  ConstantInt::get(IdxType, i),
774  };
775  auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
776  Name + ".elt");
777  auto *L = IC.Builder.CreateAlignedLoad(Ptr, MinAlign(Align, Offset),
778  Name + ".unpack");
779  AAMDNodes AAMD;
780  LI.getAAMetadata(AAMD);
781  L->setAAMetadata(AAMD);
782  V = IC.Builder.CreateInsertValue(V, L, i);
783  Offset += EltSize;
784  }
785 
786  V->setName(Name);
787  return IC.replaceInstUsesWith(LI, V);
788  }
789 
790  return nullptr;
791 }
792 
793 // If we can determine that all possible objects pointed to by the provided
794 // pointer value are, not only dereferenceable, but also definitively less than
795 // or equal to the provided maximum size, then return true. Otherwise, return
796 // false (constant global values and allocas fall into this category).
797 //
798 // FIXME: This should probably live in ValueTracking (or similar).
799 static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize,
800  const DataLayout &DL) {
801  SmallPtrSet<Value *, 4> Visited;
802  SmallVector<Value *, 4> Worklist(1, V);
803 
804  do {
805  Value *P = Worklist.pop_back_val();
806  P = P->stripPointerCasts();
807 
808  if (!Visited.insert(P).second)
809  continue;
810 
811  if (SelectInst *SI = dyn_cast<SelectInst>(P)) {
812  Worklist.push_back(SI->getTrueValue());
813  Worklist.push_back(SI->getFalseValue());
814  continue;
815  }
816 
817  if (PHINode *PN = dyn_cast<PHINode>(P)) {
818  for (Value *IncValue : PN->incoming_values())
819  Worklist.push_back(IncValue);
820  continue;
821  }
822 
823  if (GlobalAlias *GA = dyn_cast<GlobalAlias>(P)) {
824  if (GA->isInterposable())
825  return false;
826  Worklist.push_back(GA->getAliasee());
827  continue;
828  }
829 
830  // If we know how big this object is, and it is less than MaxSize, continue
831  // searching. Otherwise, return false.
832  if (AllocaInst *AI = dyn_cast<AllocaInst>(P)) {
833  if (!AI->getAllocatedType()->isSized())
834  return false;
835 
836  ConstantInt *CS = dyn_cast<ConstantInt>(AI->getArraySize());
837  if (!CS)
838  return false;
839 
840  uint64_t TypeSize = DL.getTypeAllocSize(AI->getAllocatedType());
841  // Make sure that, even if the multiplication below would wrap as an
842  // uint64_t, we still do the right thing.
843  if ((CS->getValue().zextOrSelf(128)*APInt(128, TypeSize)).ugt(MaxSize))
844  return false;
845  continue;
846  }
847 
848  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
849  if (!GV->hasDefinitiveInitializer() || !GV->isConstant())
850  return false;
851 
852  uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType());
853  if (InitSize > MaxSize)
854  return false;
855  continue;
856  }
857 
858  return false;
859  } while (!Worklist.empty());
860 
861  return true;
862 }
863 
864 // If we're indexing into an object of a known size, and the outer index is
865 // not a constant, but having any value but zero would lead to undefined
866 // behavior, replace it with zero.
867 //
868 // For example, if we have:
869 // @f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
870 // ...
871 // %arrayidx = getelementptr inbounds [1 x i32]* @f.a, i64 0, i64 %x
872 // ... = load i32* %arrayidx, align 4
873 // Then we know that we can replace %x in the GEP with i64 0.
874 //
875 // FIXME: We could fold any GEP index to zero that would cause UB if it were
876 // not zero. Currently, we only handle the first such index. Also, we could
877 // also search through non-zero constant indices if we kept track of the
878 // offsets those indices implied.
880  Instruction *MemI, unsigned &Idx) {
881  if (GEPI->getNumOperands() < 2)
882  return false;
883 
884  // Find the first non-zero index of a GEP. If all indices are zero, return
885  // one past the last index.
886  auto FirstNZIdx = [](const GetElementPtrInst *GEPI) {
887  unsigned I = 1;
888  for (unsigned IE = GEPI->getNumOperands(); I != IE; ++I) {
889  Value *V = GEPI->getOperand(I);
890  if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
891  if (CI->isZero())
892  continue;
893 
894  break;
895  }
896 
897  return I;
898  };
899 
900  // Skip through initial 'zero' indices, and find the corresponding pointer
901  // type. See if the next index is not a constant.
902  Idx = FirstNZIdx(GEPI);
903  if (Idx == GEPI->getNumOperands())
904  return false;
905  if (isa<Constant>(GEPI->getOperand(Idx)))
906  return false;
907 
908  SmallVector<Value *, 4> Ops(GEPI->idx_begin(), GEPI->idx_begin() + Idx);
909  Type *AllocTy =
911  if (!AllocTy || !AllocTy->isSized())
912  return false;
913  const DataLayout &DL = IC.getDataLayout();
914  uint64_t TyAllocSize = DL.getTypeAllocSize(AllocTy);
915 
916  // If there are more indices after the one we might replace with a zero, make
917  // sure they're all non-negative. If any of them are negative, the overall
918  // address being computed might be before the base address determined by the
919  // first non-zero index.
920  auto IsAllNonNegative = [&]() {
921  for (unsigned i = Idx+1, e = GEPI->getNumOperands(); i != e; ++i) {
922  KnownBits Known = IC.computeKnownBits(GEPI->getOperand(i), 0, MemI);
923  if (Known.isNonNegative())
924  continue;
925  return false;
926  }
927 
928  return true;
929  };
930 
931  // FIXME: If the GEP is not inbounds, and there are extra indices after the
932  // one we'll replace, those could cause the address computation to wrap
933  // (rendering the IsAllNonNegative() check below insufficient). We can do
934  // better, ignoring zero indices (and other indices we can prove small
935  // enough not to wrap).
936  if (Idx+1 != GEPI->getNumOperands() && !GEPI->isInBounds())
937  return false;
938 
939  // Note that isObjectSizeLessThanOrEq will return true only if the pointer is
940  // also known to be dereferenceable.
941  return isObjectSizeLessThanOrEq(GEPI->getOperand(0), TyAllocSize, DL) &&
942  IsAllNonNegative();
943 }
944 
945 // If we're indexing into an object with a variable index for the memory
946 // access, but the object has only one element, we can assume that the index
947 // will always be zero. If we replace the GEP, return it.
948 template <typename T>
950  T &MemI) {
951  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
952  unsigned Idx;
953  if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
954  Instruction *NewGEPI = GEPI->clone();
955  NewGEPI->setOperand(Idx,
956  ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
957  NewGEPI->insertBefore(GEPI);
958  MemI.setOperand(MemI.getPointerOperandIndex(), NewGEPI);
959  return NewGEPI;
960  }
961  }
962 
963  return nullptr;
964 }
965 
968  return false;
969 
970  auto *Ptr = SI.getPointerOperand();
971  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr))
972  Ptr = GEPI->getOperand(0);
973  return (isa<ConstantPointerNull>(Ptr) &&
975 }
976 
978  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
979  const Value *GEPI0 = GEPI->getOperand(0);
980  if (isa<ConstantPointerNull>(GEPI0) &&
981  !NullPointerIsDefined(LI.getFunction(), GEPI->getPointerAddressSpace()))
982  return true;
983  }
984  if (isa<UndefValue>(Op) ||
985  (isa<ConstantPointerNull>(Op) &&
987  return true;
988  return false;
989 }
990 
992  Value *Op = LI.getOperand(0);
993 
994  // Try to canonicalize the loaded type.
995  if (Instruction *Res = combineLoadToOperationType(*this, LI))
996  return Res;
997 
998  // Attempt to improve the alignment.
999  unsigned KnownAlign = getOrEnforceKnownAlignment(
1000  Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, &AC, &DT);
1001  unsigned LoadAlign = LI.getAlignment();
1002  unsigned EffectiveLoadAlign =
1003  LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType());
1004 
1005  if (KnownAlign > EffectiveLoadAlign)
1006  LI.setAlignment(KnownAlign);
1007  else if (LoadAlign == 0)
1008  LI.setAlignment(EffectiveLoadAlign);
1009 
1010  // Replace GEP indices if possible.
1011  if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) {
1012  Worklist.Add(NewGEPI);
1013  return &LI;
1014  }
1015 
1016  if (Instruction *Res = unpackLoadToAggregate(*this, LI))
1017  return Res;
1018 
1019  // Do really simple store-to-load forwarding and load CSE, to catch cases
1020  // where there are several consecutive memory accesses to the same location,
1021  // separated by a few arithmetic operations.
1022  BasicBlock::iterator BBI(LI);
1023  bool IsLoadCSE = false;
1024  if (Value *AvailableVal = FindAvailableLoadedValue(
1025  &LI, LI.getParent(), BBI, DefMaxInstsToScan, AA, &IsLoadCSE)) {
1026  if (IsLoadCSE)
1027  combineMetadataForCSE(cast<LoadInst>(AvailableVal), &LI);
1028 
1029  return replaceInstUsesWith(
1030  LI, Builder.CreateBitOrPointerCast(AvailableVal, LI.getType(),
1031  LI.getName() + ".cast"));
1032  }
1033 
1034  // None of the following transforms are legal for volatile/ordered atomic
1035  // loads. Most of them do apply for unordered atomics.
1036  if (!LI.isUnordered()) return nullptr;
1037 
1038  // load(gep null, ...) -> unreachable
1039  // load null/undef -> unreachable
1040  // TODO: Consider a target hook for valid address spaces for this xforms.
1041  if (canSimplifyNullLoadOrGEP(LI, Op)) {
1042  // Insert a new store to null instruction before the load to indicate
1043  // that this code is not reachable. We do this instead of inserting
1044  // an unreachable instruction directly because we cannot modify the
1045  // CFG.
1047  Constant::getNullValue(Op->getType()), &LI);
1048  SI->setDebugLoc(LI.getDebugLoc());
1049  return replaceInstUsesWith(LI, UndefValue::get(LI.getType()));
1050  }
1051 
1052  if (Op->hasOneUse()) {
1053  // Change select and PHI nodes to select values instead of addresses: this
1054  // helps alias analysis out a lot, allows many others simplifications, and
1055  // exposes redundancy in the code.
1056  //
1057  // Note that we cannot do the transformation unless we know that the
1058  // introduced loads cannot trap! Something like this is valid as long as
1059  // the condition is always false: load (select bool %C, int* null, int* %G),
1060  // but it would not be valid if we transformed it to load from null
1061  // unconditionally.
1062  //
1063  if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
1064  // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2).
1065  unsigned Align = LI.getAlignment();
1066  if (isSafeToLoadUnconditionally(SI->getOperand(1), Align, DL, SI) &&
1067  isSafeToLoadUnconditionally(SI->getOperand(2), Align, DL, SI)) {
1068  LoadInst *V1 = Builder.CreateLoad(SI->getOperand(1),
1069  SI->getOperand(1)->getName()+".val");
1070  LoadInst *V2 = Builder.CreateLoad(SI->getOperand(2),
1071  SI->getOperand(2)->getName()+".val");
1072  assert(LI.isUnordered() && "implied by above");
1073  V1->setAlignment(Align);
1074  V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1075  V2->setAlignment(Align);
1076  V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
1077  return SelectInst::Create(SI->getCondition(), V1, V2);
1078  }
1079 
1080  // load (select (cond, null, P)) -> load P
1081  if (isa<ConstantPointerNull>(SI->getOperand(1)) &&
1082  !NullPointerIsDefined(SI->getFunction(),
1083  LI.getPointerAddressSpace())) {
1084  LI.setOperand(0, SI->getOperand(2));
1085  return &LI;
1086  }
1087 
1088  // load (select (cond, P, null)) -> load P
1089  if (isa<ConstantPointerNull>(SI->getOperand(2)) &&
1090  !NullPointerIsDefined(SI->getFunction(),
1091  LI.getPointerAddressSpace())) {
1092  LI.setOperand(0, SI->getOperand(1));
1093  return &LI;
1094  }
1095  }
1096  }
1097  return nullptr;
1098 }
1099 
1100 /// Look for extractelement/insertvalue sequence that acts like a bitcast.
1101 ///
1102 /// \returns underlying value that was "cast", or nullptr otherwise.
1103 ///
1104 /// For example, if we have:
1105 ///
1106 /// %E0 = extractelement <2 x double> %U, i32 0
1107 /// %V0 = insertvalue [2 x double] undef, double %E0, 0
1108 /// %E1 = extractelement <2 x double> %U, i32 1
1109 /// %V1 = insertvalue [2 x double] %V0, double %E1, 1
1110 ///
1111 /// and the layout of a <2 x double> is isomorphic to a [2 x double],
1112 /// then %V1 can be safely approximated by a conceptual "bitcast" of %U.
1113 /// Note that %U may contain non-undef values where %V1 has undef.
1115  Value *U = nullptr;
1116  while (auto *IV = dyn_cast<InsertValueInst>(V)) {
1117  auto *E = dyn_cast<ExtractElementInst>(IV->getInsertedValueOperand());
1118  if (!E)
1119  return nullptr;
1120  auto *W = E->getVectorOperand();
1121  if (!U)
1122  U = W;
1123  else if (U != W)
1124  return nullptr;
1125  auto *CI = dyn_cast<ConstantInt>(E->getIndexOperand());
1126  if (!CI || IV->getNumIndices() != 1 || CI->getZExtValue() != *IV->idx_begin())
1127  return nullptr;
1128  V = IV->getAggregateOperand();
1129  }
1130  if (!isa<UndefValue>(V) ||!U)
1131  return nullptr;
1132 
1133  auto *UT = cast<VectorType>(U->getType());
1134  auto *VT = V->getType();
1135  // Check that types UT and VT are bitwise isomorphic.
1136  const auto &DL = IC.getDataLayout();
1137  if (DL.getTypeStoreSizeInBits(UT) != DL.getTypeStoreSizeInBits(VT)) {
1138  return nullptr;
1139  }
1140  if (auto *AT = dyn_cast<ArrayType>(VT)) {
1141  if (AT->getNumElements() != UT->getNumElements())
1142  return nullptr;
1143  } else {
1144  auto *ST = cast<StructType>(VT);
1145  if (ST->getNumElements() != UT->getNumElements())
1146  return nullptr;
1147  for (const auto *EltT : ST->elements()) {
1148  if (EltT != UT->getElementType())
1149  return nullptr;
1150  }
1151  }
1152  return U;
1153 }
1154 
1155 /// Combine stores to match the type of value being stored.
1156 ///
1157 /// The core idea here is that the memory does not have any intrinsic type and
1158 /// where we can we should match the type of a store to the type of value being
1159 /// stored.
1160 ///
1161 /// However, this routine must never change the width of a store or the number of
1162 /// stores as that would introduce a semantic change. This combine is expected to
1163 /// be a semantic no-op which just allows stores to more closely model the types
1164 /// of their incoming values.
1165 ///
1166 /// Currently, we also refuse to change the precise type used for an atomic or
1167 /// volatile store. This is debatable, and might be reasonable to change later.
1168 /// However, it is risky in case some backend or other part of LLVM is relying
1169 /// on the exact type stored to select appropriate atomic operations.
1170 ///
1171 /// \returns true if the store was successfully combined away. This indicates
1172 /// the caller must erase the store instruction. We have to let the caller erase
1173 /// the store instruction as otherwise there is no way to signal whether it was
1174 /// combined or not: IC.EraseInstFromFunction returns a null pointer.
1176  // FIXME: We could probably with some care handle both volatile and ordered
1177  // atomic stores here but it isn't clear that this is important.
1178  if (!SI.isUnordered())
1179  return false;
1180 
1181  // swifterror values can't be bitcasted.
1182  if (SI.getPointerOperand()->isSwiftError())
1183  return false;
1184 
1185  Value *V = SI.getValueOperand();
1186 
1187  // Fold away bit casts of the stored value by storing the original type.
1188  if (auto *BC = dyn_cast<BitCastInst>(V)) {
1189  V = BC->getOperand(0);
1190  if (!SI.isAtomic() || isSupportedAtomicType(V->getType())) {
1191  combineStoreToNewValue(IC, SI, V);
1192  return true;
1193  }
1194  }
1195 
1196  if (Value *U = likeBitCastFromVector(IC, V))
1197  if (!SI.isAtomic() || isSupportedAtomicType(U->getType())) {
1198  combineStoreToNewValue(IC, SI, U);
1199  return true;
1200  }
1201 
1202  // FIXME: We should also canonicalize stores of vectors when their elements
1203  // are cast to other types.
1204  return false;
1205 }
1206 
1208  // FIXME: We could probably with some care handle both volatile and atomic
1209  // stores here but it isn't clear that this is important.
1210  if (!SI.isSimple())
1211  return false;
1212 
1213  Value *V = SI.getValueOperand();
1214  Type *T = V->getType();
1215 
1216  if (!T->isAggregateType())
1217  return false;
1218 
1219  if (auto *ST = dyn_cast<StructType>(T)) {
1220  // If the struct only have one element, we unpack.
1221  unsigned Count = ST->getNumElements();
1222  if (Count == 1) {
1223  V = IC.Builder.CreateExtractValue(V, 0);
1224  combineStoreToNewValue(IC, SI, V);
1225  return true;
1226  }
1227 
1228  // We don't want to break loads with padding here as we'd loose
1229  // the knowledge that padding exists for the rest of the pipeline.
1230  const DataLayout &DL = IC.getDataLayout();
1231  auto *SL = DL.getStructLayout(ST);
1232  if (SL->hasPadding())
1233  return false;
1234 
1235  auto Align = SI.getAlignment();
1236  if (!Align)
1238 
1239  SmallString<16> EltName = V->getName();
1240  EltName += ".elt";
1241  auto *Addr = SI.getPointerOperand();
1242  SmallString<16> AddrName = Addr->getName();
1243  AddrName += ".repack";
1244 
1245  auto *IdxType = Type::getInt32Ty(ST->getContext());
1246  auto *Zero = ConstantInt::get(IdxType, 0);
1247  for (unsigned i = 0; i < Count; i++) {
1248  Value *Indices[2] = {
1249  Zero,
1250  ConstantInt::get(IdxType, i),
1251  };
1252  auto *Ptr = IC.Builder.CreateInBoundsGEP(ST, Addr, makeArrayRef(Indices),
1253  AddrName);
1254  auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1255  auto EltAlign = MinAlign(Align, SL->getElementOffset(i));
1256  llvm::Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1257  AAMDNodes AAMD;
1258  SI.getAAMetadata(AAMD);
1259  NS->setAAMetadata(AAMD);
1260  }
1261 
1262  return true;
1263  }
1264 
1265  if (auto *AT = dyn_cast<ArrayType>(T)) {
1266  // If the array only have one element, we unpack.
1267  auto NumElements = AT->getNumElements();
1268  if (NumElements == 1) {
1269  V = IC.Builder.CreateExtractValue(V, 0);
1270  combineStoreToNewValue(IC, SI, V);
1271  return true;
1272  }
1273 
1274  // Bail out if the array is too large. Ideally we would like to optimize
1275  // arrays of arbitrary size but this has a terrible impact on compile time.
1276  // The threshold here is chosen arbitrarily, maybe needs a little bit of
1277  // tuning.
1278  if (NumElements > IC.MaxArraySizeForCombine)
1279  return false;
1280 
1281  const DataLayout &DL = IC.getDataLayout();
1282  auto EltSize = DL.getTypeAllocSize(AT->getElementType());
1283  auto Align = SI.getAlignment();
1284  if (!Align)
1285  Align = DL.getABITypeAlignment(T);
1286 
1287  SmallString<16> EltName = V->getName();
1288  EltName += ".elt";
1289  auto *Addr = SI.getPointerOperand();
1290  SmallString<16> AddrName = Addr->getName();
1291  AddrName += ".repack";
1292 
1293  auto *IdxType = Type::getInt64Ty(T->getContext());
1294  auto *Zero = ConstantInt::get(IdxType, 0);
1295 
1296  uint64_t Offset = 0;
1297  for (uint64_t i = 0; i < NumElements; i++) {
1298  Value *Indices[2] = {
1299  Zero,
1300  ConstantInt::get(IdxType, i),
1301  };
1302  auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, makeArrayRef(Indices),
1303  AddrName);
1304  auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1305  auto EltAlign = MinAlign(Align, Offset);
1306  Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
1307  AAMDNodes AAMD;
1308  SI.getAAMetadata(AAMD);
1309  NS->setAAMetadata(AAMD);
1310  Offset += EltSize;
1311  }
1312 
1313  return true;
1314  }
1315 
1316  return false;
1317 }
1318 
1319 /// equivalentAddressValues - Test if A and B will obviously have the same
1320 /// value. This includes recognizing that %t0 and %t1 will have the same
1321 /// value in code like this:
1322 /// %t0 = getelementptr \@a, 0, 3
1323 /// store i32 0, i32* %t0
1324 /// %t1 = getelementptr \@a, 0, 3
1325 /// %t2 = load i32* %t1
1326 ///
1328  // Test if the values are trivially equivalent.
1329  if (A == B) return true;
1330 
1331  // Test if the values come form identical arithmetic instructions.
1332  // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
1333  // its only used to compare two uses within the same basic block, which
1334  // means that they'll always either have the same value or one of them
1335  // will have an undefined value.
1336  if (isa<BinaryOperator>(A) ||
1337  isa<CastInst>(A) ||
1338  isa<PHINode>(A) ||
1339  isa<GetElementPtrInst>(A))
1340  if (Instruction *BI = dyn_cast<Instruction>(B))
1341  if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
1342  return true;
1343 
1344  // Otherwise they may not be equivalent.
1345  return false;
1346 }
1347 
1348 /// Converts store (bitcast (load (bitcast (select ...)))) to
1349 /// store (load (select ...)), where select is minmax:
1350 /// select ((cmp load V1, load V2), V1, V2).
1352  StoreInst &SI) {
1353  // bitcast?
1354  if (!match(SI.getPointerOperand(), m_BitCast(m_Value())))
1355  return false;
1356  // load? integer?
1357  Value *LoadAddr;
1358  if (!match(SI.getValueOperand(), m_Load(m_BitCast(m_Value(LoadAddr)))))
1359  return false;
1360  auto *LI = cast<LoadInst>(SI.getValueOperand());
1361  if (!LI->getType()->isIntegerTy())
1362  return false;
1363  if (!isMinMaxWithLoads(LoadAddr))
1364  return false;
1365 
1366  if (!all_of(LI->users(), [LI, LoadAddr](User *U) {
1367  auto *SI = dyn_cast<StoreInst>(U);
1368  return SI && SI->getPointerOperand() != LI &&
1369  peekThroughBitcast(SI->getPointerOperand()) != LoadAddr &&
1370  !SI->getPointerOperand()->isSwiftError();
1371  }))
1372  return false;
1373 
1374  IC.Builder.SetInsertPoint(LI);
1375  LoadInst *NewLI = combineLoadToNewType(
1376  IC, *LI, LoadAddr->getType()->getPointerElementType());
1377  // Replace all the stores with stores of the newly loaded value.
1378  for (auto *UI : LI->users()) {
1379  auto *USI = cast<StoreInst>(UI);
1380  IC.Builder.SetInsertPoint(USI);
1381  combineStoreToNewValue(IC, *USI, NewLI);
1382  }
1383  IC.replaceInstUsesWith(*LI, UndefValue::get(LI->getType()));
1384  IC.eraseInstFromFunction(*LI);
1385  return true;
1386 }
1387 
1389  Value *Val = SI.getOperand(0);
1390  Value *Ptr = SI.getOperand(1);
1391 
1392  // Try to canonicalize the stored type.
1393  if (combineStoreToValueType(*this, SI))
1394  return eraseInstFromFunction(SI);
1395 
1396  // Attempt to improve the alignment.
1397  unsigned KnownAlign = getOrEnforceKnownAlignment(
1398  Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, &AC, &DT);
1399  unsigned StoreAlign = SI.getAlignment();
1400  unsigned EffectiveStoreAlign =
1401  StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType());
1402 
1403  if (KnownAlign > EffectiveStoreAlign)
1404  SI.setAlignment(KnownAlign);
1405  else if (StoreAlign == 0)
1406  SI.setAlignment(EffectiveStoreAlign);
1407 
1408  // Try to canonicalize the stored type.
1409  if (unpackStoreToAggregate(*this, SI))
1410  return eraseInstFromFunction(SI);
1411 
1412  if (removeBitcastsFromLoadStoreOnMinMax(*this, SI))
1413  return eraseInstFromFunction(SI);
1414 
1415  // Replace GEP indices if possible.
1416  if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) {
1417  Worklist.Add(NewGEPI);
1418  return &SI;
1419  }
1420 
1421  // Don't hack volatile/ordered stores.
1422  // FIXME: Some bits are legal for ordered atomic stores; needs refactoring.
1423  if (!SI.isUnordered()) return nullptr;
1424 
1425  // If the RHS is an alloca with a single use, zapify the store, making the
1426  // alloca dead.
1427  if (Ptr->hasOneUse()) {
1428  if (isa<AllocaInst>(Ptr))
1429  return eraseInstFromFunction(SI);
1430  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
1431  if (isa<AllocaInst>(GEP->getOperand(0))) {
1432  if (GEP->getOperand(0)->hasOneUse())
1433  return eraseInstFromFunction(SI);
1434  }
1435  }
1436  }
1437 
1438  // Do really simple DSE, to catch cases where there are several consecutive
1439  // stores to the same location, separated by a few arithmetic operations. This
1440  // situation often occurs with bitfield accesses.
1441  BasicBlock::iterator BBI(SI);
1442  for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
1443  --ScanInsts) {
1444  --BBI;
1445  // Don't count debug info directives, lest they affect codegen,
1446  // and we skip pointer-to-pointer bitcasts, which are NOPs.
1447  if (isa<DbgInfoIntrinsic>(BBI) ||
1448  (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
1449  ScanInsts++;
1450  continue;
1451  }
1452 
1453  if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
1454  // Prev store isn't volatile, and stores to the same location?
1455  if (PrevSI->isUnordered() && equivalentAddressValues(PrevSI->getOperand(1),
1456  SI.getOperand(1))) {
1457  ++NumDeadStore;
1458  ++BBI;
1459  eraseInstFromFunction(*PrevSI);
1460  continue;
1461  }
1462  break;
1463  }
1464 
1465  // If this is a load, we have to stop. However, if the loaded value is from
1466  // the pointer we're loading and is producing the pointer we're storing,
1467  // then *this* store is dead (X = load P; store X -> P).
1468  if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
1469  if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr)) {
1470  assert(SI.isUnordered() && "can't eliminate ordering operation");
1471  return eraseInstFromFunction(SI);
1472  }
1473 
1474  // Otherwise, this is a load from some other location. Stores before it
1475  // may not be dead.
1476  break;
1477  }
1478 
1479  // Don't skip over loads, throws or things that can modify memory.
1480  if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory() || BBI->mayThrow())
1481  break;
1482  }
1483 
1484  // store X, null -> turns into 'unreachable' in SimplifyCFG
1485  // store X, GEP(null, Y) -> turns into 'unreachable' in SimplifyCFG
1486  if (canSimplifyNullStoreOrGEP(SI)) {
1487  if (!isa<UndefValue>(Val)) {
1488  SI.setOperand(0, UndefValue::get(Val->getType()));
1489  if (Instruction *U = dyn_cast<Instruction>(Val))
1490  Worklist.Add(U); // Dropped a use.
1491  }
1492  return nullptr; // Do not modify these!
1493  }
1494 
1495  // store undef, Ptr -> noop
1496  if (isa<UndefValue>(Val))
1497  return eraseInstFromFunction(SI);
1498 
1499  // If this store is the last instruction in the basic block (possibly
1500  // excepting debug info instructions), and if the block ends with an
1501  // unconditional branch, try to move it to the successor block.
1502  BBI = SI.getIterator();
1503  do {
1504  ++BBI;
1505  } while (isa<DbgInfoIntrinsic>(BBI) ||
1506  (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
1507  if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
1508  if (BI->isUnconditional())
1509  if (SimplifyStoreAtEndOfBlock(SI))
1510  return nullptr; // xform done!
1511 
1512  return nullptr;
1513 }
1514 
1515 /// SimplifyStoreAtEndOfBlock - Turn things like:
1516 /// if () { *P = v1; } else { *P = v2 }
1517 /// into a phi node with a store in the successor.
1518 ///
1519 /// Simplify things like:
1520 /// *P = v1; if () { *P = v2; }
1521 /// into a phi node with a store in the successor.
1522 ///
1523 bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
1524  assert(SI.isUnordered() &&
1525  "this code has not been auditted for volatile or ordered store case");
1526 
1527  BasicBlock *StoreBB = SI.getParent();
1528 
1529  // Check to see if the successor block has exactly two incoming edges. If
1530  // so, see if the other predecessor contains a store to the same location.
1531  // if so, insert a PHI node (if needed) and move the stores down.
1532  BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
1533 
1534  // Determine whether Dest has exactly two predecessors and, if so, compute
1535  // the other predecessor.
1536  pred_iterator PI = pred_begin(DestBB);
1537  BasicBlock *P = *PI;
1538  BasicBlock *OtherBB = nullptr;
1539 
1540  if (P != StoreBB)
1541  OtherBB = P;
1542 
1543  if (++PI == pred_end(DestBB))
1544  return false;
1545 
1546  P = *PI;
1547  if (P != StoreBB) {
1548  if (OtherBB)
1549  return false;
1550  OtherBB = P;
1551  }
1552  if (++PI != pred_end(DestBB))
1553  return false;
1554 
1555  // Bail out if all the relevant blocks aren't distinct (this can happen,
1556  // for example, if SI is in an infinite loop)
1557  if (StoreBB == DestBB || OtherBB == DestBB)
1558  return false;
1559 
1560  // Verify that the other block ends in a branch and is not otherwise empty.
1561  BasicBlock::iterator BBI(OtherBB->getTerminator());
1562  BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
1563  if (!OtherBr || BBI == OtherBB->begin())
1564  return false;
1565 
1566  // If the other block ends in an unconditional branch, check for the 'if then
1567  // else' case. there is an instruction before the branch.
1568  StoreInst *OtherStore = nullptr;
1569  if (OtherBr->isUnconditional()) {
1570  --BBI;
1571  // Skip over debugging info.
1572  while (isa<DbgInfoIntrinsic>(BBI) ||
1573  (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
1574  if (BBI==OtherBB->begin())
1575  return false;
1576  --BBI;
1577  }
1578  // If this isn't a store, isn't a store to the same location, or is not the
1579  // right kind of store, bail out.
1580  OtherStore = dyn_cast<StoreInst>(BBI);
1581  if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
1582  !SI.isSameOperationAs(OtherStore))
1583  return false;
1584  } else {
1585  // Otherwise, the other block ended with a conditional branch. If one of the
1586  // destinations is StoreBB, then we have the if/then case.
1587  if (OtherBr->getSuccessor(0) != StoreBB &&
1588  OtherBr->getSuccessor(1) != StoreBB)
1589  return false;
1590 
1591  // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
1592  // if/then triangle. See if there is a store to the same ptr as SI that
1593  // lives in OtherBB.
1594  for (;; --BBI) {
1595  // Check to see if we find the matching store.
1596  if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
1597  if (OtherStore->getOperand(1) != SI.getOperand(1) ||
1598  !SI.isSameOperationAs(OtherStore))
1599  return false;
1600  break;
1601  }
1602  // If we find something that may be using or overwriting the stored
1603  // value, or if we run out of instructions, we can't do the xform.
1604  if (BBI->mayReadFromMemory() || BBI->mayThrow() ||
1605  BBI->mayWriteToMemory() || BBI == OtherBB->begin())
1606  return false;
1607  }
1608 
1609  // In order to eliminate the store in OtherBr, we have to
1610  // make sure nothing reads or overwrites the stored value in
1611  // StoreBB.
1612  for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
1613  // FIXME: This should really be AA driven.
1614  if (I->mayReadFromMemory() || I->mayThrow() || I->mayWriteToMemory())
1615  return false;
1616  }
1617  }
1618 
1619  // Insert a PHI node now if we need it.
1620  Value *MergedVal = OtherStore->getOperand(0);
1621  if (MergedVal != SI.getOperand(0)) {
1622  PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
1623  PN->addIncoming(SI.getOperand(0), SI.getParent());
1624  PN->addIncoming(OtherStore->getOperand(0), OtherBB);
1625  MergedVal = InsertNewInstBefore(PN, DestBB->front());
1626  }
1627 
1628  // Advance to a place where it is safe to insert the new store and
1629  // insert it.
1630  BBI = DestBB->getFirstInsertionPt();
1631  StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1),
1632  SI.isVolatile(),
1633  SI.getAlignment(),
1634  SI.getOrdering(),
1635  SI.getSyncScopeID());
1636  InsertNewInstBefore(NewSI, *BBI);
1637  // The debug locations of the original instructions might differ; merge them.
1638  NewSI->applyMergedLocation(SI.getDebugLoc(), OtherStore->getDebugLoc());
1639 
1640  // If the two stores had AA tags, merge them.
1641  AAMDNodes AATags;
1642  SI.getAAMetadata(AATags);
1643  if (AATags) {
1644  OtherStore->getAAMetadata(AATags, /* Merge = */ true);
1645  NewSI->setAAMetadata(AATags);
1646  }
1647 
1648  // Nuke the old stores.
1649  eraseInstFromFunction(SI);
1650  eraseInstFromFunction(*OtherStore);
1651  return true;
1652 }
Value * CreateInBoundsGEP(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &Name="")
Definition: IRBuilder.h:1393
uint64_t CallInst * C
Value * getValueOperand()
Definition: Instructions.h:399
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Instruction *CxtI) const
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
uint64_t getTypeStoreSizeInBits(Type *Ty) const
Returns the maximum number of bits that may be overwritten by storing the specified type; always a mu...
Definition: DataLayout.h:419
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:72
bool isSimple() const
Definition: Instructions.h:266
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:80
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
unsigned getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1165
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
Definition: IRBuilder.h:1292
static Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
Definition: Constants.cpp:1579
bool isSameOperationAs(const Instruction *I, unsigned flags=0) const
This function determines if the specified instruction executes the same operation as the current one...
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:265
BasicBlock * getSuccessor(unsigned idx) const
Return the specified successor.
static bool canSimplifyNullStoreOrGEP(StoreInst &SI)
void setAlignment(unsigned Align)
bool isUnordered() const
Definition: Instructions.h:393
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:588
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value *> IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:867
cl::opt< unsigned > DefMaxInstsToScan
The default number of maximum instructions to scan in the block, used by FindAvailableLoadedValue().
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:617
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this store instruction.
Definition: Instructions.h:374
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI, const DataLayout &DL)
Returns true if V is dereferenceable for size of alloca.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:237
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:714
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:38
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:921
STATISTIC(NumFunctions, "Total number of functions")
Metadata node.
Definition: Metadata.h:862
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
static LoadInst * combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy, const Twine &Suffix="")
Helper to combine a load to a new type.
An instruction for reading from memory.
Definition: Instructions.h:168
static bool pointsToConstantGlobal(Value *V)
pointsToConstantGlobal - Return true if V (possibly indirectly) points to some part of a constant glo...
uint64_t MaxArraySizeForCombine
Maximum size of array considered when transforming.
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
Hexagon Common GEP
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:385
bool isSafeToLoadUnconditionally(Value *V, unsigned Align, const DataLayout &DL, Instruction *ScanFrom=nullptr, const DominatorTree *DT=nullptr)
Return true if we know that executing a load from this value cannot trap.
Definition: Loads.cpp:201
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:130
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:268
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:264
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align, bool isVolatile=false)
Definition: IRBuilder.h:1346
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:260
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:221
This class represents the LLVM &#39;select&#39; instruction.
Type * getPointerElementType() const
Definition: Type.h:376
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
unsigned getAlignment() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:113
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:97
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, AliasAnalysis *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:321
APInt zextOrSelf(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:898
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:639
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
Instruction * eraseInstFromFunction(Instruction &I)
Combiner aware instruction erasure.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
static Instruction * replaceGEPIdxWithZero(InstCombiner &IC, Value *Ptr, T &MemI)
The core instruction combiner logic.
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:295
Type * getSourceElementType() const
Definition: Instructions.h:938
Instruction * clone() const
Create a copy of &#39;this&#39; instruction that is identical in all ways except the following: ...
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:408
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:885
static Value * peekThroughBitcast(Value *V, bool OneUseOnly=false)
Return the source operand of a potentially bitcasted value while optionally checking if it has one us...
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1629
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
void copyNonnullMetadata(const LoadInst &OldLI, MDNode *N, LoadInst &NewLI)
Copy a nonnull metadata node to a new load instruction.
Definition: Local.cpp:2413
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
static Instruction * simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI)
bool isSwiftError() const
Return true if this value is a swifterror value.
Definition: Value.cpp:735
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:212
This class represents a no-op cast from one type to another.
op_iterator idx_begin()
Definition: Instructions.h:966
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:138
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
An instruction for storing to memory.
Definition: Instructions.h:310
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:439
SelectClass_match< Cond, LHS, RHS > m_Select(const Cond &C, const LHS &L, const RHS &R)
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:301
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:127
Value * getOperand(unsigned i) const
Definition: User.h:170
const DataLayout & getDataLayout() const
const BasicBlock & getEntryBlock() const
Definition: Function.h:626
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:610
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:841
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:742
void getAAMetadata(AAMDNodes &N, bool Merge=false) const
Fills the AAMDNodes structure with AA metadata from this instruction.
#define P(N)
static Instruction * combineLoadToOperationType(InstCombiner &IC, LoadInst &LI)
Combine loads to match the type of their uses&#39; value after looking through intervening bitcasts...
static bool equivalentAddressValues(Value *A, Value *B)
equivalentAddressValues - Test if A and B will obviously have the same value.
static bool unpackStoreToAggregate(InstCombiner &IC, StoreInst &SI)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:218
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:287
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction...
Definition: Instruction.cpp:74
void setAAMetadata(const AAMDNodes &N)
Sets the metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1265
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
Conditional or Unconditional Branch instruction.
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
const Instruction & front() const
Definition: BasicBlock.h:276
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
static StoreInst * combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V)
Combine a store to a new type.
bool mayThrow() const
Return true if this instruction may throw an exception.
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:113
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:499
bool isUnordered() const
Definition: Instructions.h:268
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:885
void copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI, MDNode *N, LoadInst &NewLI)
Copy a range metadata node to a new load instruction.
Definition: Local.cpp:2438
Instruction * visitAllocaInst(AllocaInst &AI)
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:116
Value * getPointerOperand()
Definition: Instructions.h:274
self_iterator getIterator()
Definition: ilist_node.h:82
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:60
void setAlignment(unsigned Align)
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1392
const AMDGPUAS & AS
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:539
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:93
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:1963
size_t size() const
Definition: SmallVector.h:53
bool isVolatile() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1226
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:106
LoadClass_match< OpTy > m_Load(const OpTy &Op)
Matches LoadInst.
static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op)
static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, SmallVectorImpl< Instruction *> &ToDelete)
isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) pointer to an alloca...
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:192
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:1698
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:64
static bool isSupportedAtomicType(Type *Ty)
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU...
Definition: DataLayout.h:243
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:722
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:258
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:642
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:376
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:307
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:180
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:621
void applyMergedLocation(const DILocation *LocA, const DILocation *LocB)
Merge 2 debug locations and apply it to the Instruction.
Definition: DebugInfo.cpp:691
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1426
static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI)
Combine stores to match the type of value being stored.
void getAllMetadata(SmallVectorImpl< std::pair< unsigned, MDNode *>> &MDs) const
Get all metadata attached to this Instruction.
Definition: Instruction.h:216
void setOperand(unsigned i, Value *Val)
Definition: User.h:175
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
Class for arbitrary precision integers.
Definition: APInt.h:69
iterator_range< user_iterator > users()
Definition: Value.h:399
static bool isObjectSizeLessThanOrEq(Value *V, uint64_t MaxSize, const DataLayout &DL)
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:389
bool isNonIntegralPointerType(PointerType *PT) const
Definition: DataLayout.h:346
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:560
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:428
This class wraps the llvm.memcpy/memmove intrinsics.
bool isVolatile() const
Return true if this is a store to a volatile memory location.
Definition: Instructions.h:343
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:290
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:230
void emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:647
static Type * getIndexedType(Type *Ty, ArrayRef< Value *> IdxList)
Returns the type of the element that would be loaded with a load instruction with the specified param...
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
AtomicOrdering getOrdering() const
Returns the ordering constraint of this store instruction.
Definition: Instructions.h:362
static bool removeBitcastsFromLoadStoreOnMinMax(InstCombiner &IC, StoreInst &SI)
Converts store (bitcast (load (bitcast (select ...)))) to store (load (select ...)), where select is minmax: select ((cmp load V1, load V2), V1, V2).
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:249
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
static Value * likeBitCastFromVector(InstCombiner &IC, Value *V)
Look for extractelement/insertvalue sequence that acts like a bitcast.
bool mayReadFromMemory() const
Return true if this instruction may read memory.
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:568
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
This instruction extracts a single (scalar) element from a VectorType value.
Instruction * InsertNewInstBefore(Instruction *New, Instruction &Old)
Inserts an instruction New before instruction Old.
void combineMetadataForCSE(Instruction *K, const Instruction *J)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:2320
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:355
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:280
static GetElementPtrInst * CreateInBounds(Value *Ptr, ArrayRef< Value *> IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Create an "inbounds" getelementptr.
Definition: Instructions.h:901
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:375
static bool canReplaceGEPIdxWithZero(InstCombiner &IC, GetElementPtrInst *GEPI, Instruction *MemI, unsigned &Idx)
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
LoadInst * CreateAlignedLoad(Value *Ptr, unsigned Align, const char *Name)
Provided to resolve &#39;CreateAlignedLoad(Ptr, Align, "...")&#39; correctly, instead of converting the strin...
Definition: IRBuilder.h:1328
Instruction * visitLoadInst(LoadInst &LI)
LLVM Value Representation.
Definition: Value.h:73
void setAlignment(unsigned Align)
This file provides internal interfaces used to implement the InstCombine.
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:411
Instruction * visitStoreInst(StoreInst &SI)
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:87
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it...
IRTranslator LLVM IR MI
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:412
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
static Expected< std::string > replace(StringRef S, StringRef From, StringRef To)
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:99
const Instruction * getFirstNonPHIOrDbg() const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic...
Definition: BasicBlock.cpp:196
bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, const DataLayout &DL, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested...
Definition: Loads.cpp:129
bool isSimple() const
Definition: Instructions.h:391
const TerminatorInst * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:138
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:1971
#define LLVM_DEBUG(X)
Definition: Debug.h:119
Value * getPointerOperand()
Definition: Instructions.h:402
static Instruction * unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI)
bool use_empty() const
Definition: Value.h:322
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:476
static bool isMinMaxWithLoads(Value *V)
Returns true if instruction represent minmax pattern like: select ((cmp load V1, load V2)...
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
const BasicBlock * getParent() const
Definition: Instruction.h:67
an instruction to allocate memory on the stack
Definition: Instructions.h:60
user_iterator user_end()
Definition: Value.h:383