LLVM  9.0.0svn
VectorUtils.cpp
Go to the documentation of this file.
1 //===----------- VectorUtils.cpp - Vectorizer utility functions -----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines vectorizer utilities.
11 //
12 //===----------------------------------------------------------------------===//
13 
17 #include "llvm/Analysis/LoopInfo.h"
23 #include "llvm/IR/Constants.h"
25 #include "llvm/IR/IRBuilder.h"
26 #include "llvm/IR/PatternMatch.h"
27 #include "llvm/IR/Value.h"
28 
29 #define DEBUG_TYPE "vectorutils"
30 
31 using namespace llvm;
32 using namespace llvm::PatternMatch;
33 
34 /// Maximum factor for an interleaved memory access.
36  "max-interleave-group-factor", cl::Hidden,
37  cl::desc("Maximum factor for an interleaved access group (default = 8)"),
38  cl::init(8));
39 
40 /// Return true if all of the intrinsic's arguments and return type are scalars
41 /// for the scalar form of the intrinsic and vectors for the vector form of the
42 /// intrinsic.
44  switch (ID) {
45  case Intrinsic::bswap: // Begin integer bit-manipulation.
46  case Intrinsic::bitreverse:
47  case Intrinsic::ctpop:
48  case Intrinsic::ctlz:
49  case Intrinsic::cttz:
50  case Intrinsic::fshl:
51  case Intrinsic::fshr:
52  case Intrinsic::sqrt: // Begin floating-point.
53  case Intrinsic::sin:
54  case Intrinsic::cos:
55  case Intrinsic::exp:
56  case Intrinsic::exp2:
57  case Intrinsic::log:
58  case Intrinsic::log10:
59  case Intrinsic::log2:
60  case Intrinsic::fabs:
61  case Intrinsic::minnum:
62  case Intrinsic::maxnum:
63  case Intrinsic::minimum:
64  case Intrinsic::maximum:
65  case Intrinsic::copysign:
66  case Intrinsic::floor:
67  case Intrinsic::ceil:
68  case Intrinsic::trunc:
69  case Intrinsic::rint:
70  case Intrinsic::nearbyint:
71  case Intrinsic::round:
72  case Intrinsic::pow:
73  case Intrinsic::fma:
74  case Intrinsic::fmuladd:
75  case Intrinsic::powi:
76  case Intrinsic::canonicalize:
77  case Intrinsic::sadd_sat:
78  case Intrinsic::ssub_sat:
79  case Intrinsic::uadd_sat:
80  case Intrinsic::usub_sat:
81  return true;
82  default:
83  return false;
84  }
85 }
86 
87 /// Identifies if the intrinsic has a scalar operand. It check for
88 /// ctlz,cttz and powi special intrinsics whose argument is scalar.
90  unsigned ScalarOpdIdx) {
91  switch (ID) {
92  case Intrinsic::ctlz:
93  case Intrinsic::cttz:
94  case Intrinsic::powi:
95  return (ScalarOpdIdx == 1);
96  default:
97  return false;
98  }
99 }
100 
101 /// Returns intrinsic ID for call.
102 /// For the input call instruction it finds mapping intrinsic and returns
103 /// its ID, in case it does not found it return not_intrinsic.
105  const TargetLibraryInfo *TLI) {
107  if (ID == Intrinsic::not_intrinsic)
109 
110  if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
111  ID == Intrinsic::lifetime_end || ID == Intrinsic::assume ||
112  ID == Intrinsic::sideeffect)
113  return ID;
115 }
116 
117 /// Find the operand of the GEP that should be checked for consecutive
118 /// stores. This ignores trailing indices that have no effect on the final
119 /// pointer.
121  const DataLayout &DL = Gep->getModule()->getDataLayout();
122  unsigned LastOperand = Gep->getNumOperands() - 1;
123  unsigned GEPAllocSize = DL.getTypeAllocSize(Gep->getResultElementType());
124 
125  // Walk backwards and try to peel off zeros.
126  while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
127  // Find the type we're currently indexing into.
128  gep_type_iterator GEPTI = gep_type_begin(Gep);
129  std::advance(GEPTI, LastOperand - 2);
130 
131  // If it's a type with the same allocation size as the result of the GEP we
132  // can peel off the zero index.
133  if (DL.getTypeAllocSize(GEPTI.getIndexedType()) != GEPAllocSize)
134  break;
135  --LastOperand;
136  }
137 
138  return LastOperand;
139 }
140 
141 /// If the argument is a GEP, then returns the operand identified by
142 /// getGEPInductionOperand. However, if there is some other non-loop-invariant
143 /// operand, it returns that instead.
146  if (!GEP)
147  return Ptr;
148 
149  unsigned InductionOperand = getGEPInductionOperand(GEP);
150 
151  // Check that all of the gep indices are uniform except for our induction
152  // operand.
153  for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i)
154  if (i != InductionOperand &&
155  !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp))
156  return Ptr;
157  return GEP->getOperand(InductionOperand);
158 }
159 
160 /// If a value has only one user that is a CastInst, return it.
162  Value *UniqueCast = nullptr;
163  for (User *U : Ptr->users()) {
164  CastInst *CI = dyn_cast<CastInst>(U);
165  if (CI && CI->getType() == Ty) {
166  if (!UniqueCast)
167  UniqueCast = CI;
168  else
169  return nullptr;
170  }
171  }
172  return UniqueCast;
173 }
174 
175 /// Get the stride of a pointer access in a loop. Looks for symbolic
176 /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
178  auto *PtrTy = dyn_cast<PointerType>(Ptr->getType());
179  if (!PtrTy || PtrTy->isAggregateType())
180  return nullptr;
181 
182  // Try to remove a gep instruction to make the pointer (actually index at this
183  // point) easier analyzable. If OrigPtr is equal to Ptr we are analyzing the
184  // pointer, otherwise, we are analyzing the index.
185  Value *OrigPtr = Ptr;
186 
187  // The size of the pointer access.
188  int64_t PtrAccessSize = 1;
189 
190  Ptr = stripGetElementPtr(Ptr, SE, Lp);
191  const SCEV *V = SE->getSCEV(Ptr);
192 
193  if (Ptr != OrigPtr)
194  // Strip off casts.
195  while (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V))
196  V = C->getOperand();
197 
198  const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V);
199  if (!S)
200  return nullptr;
201 
202  V = S->getStepRecurrence(*SE);
203  if (!V)
204  return nullptr;
205 
206  // Strip off the size of access multiplication if we are still analyzing the
207  // pointer.
208  if (OrigPtr == Ptr) {
209  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
210  if (M->getOperand(0)->getSCEVType() != scConstant)
211  return nullptr;
212 
213  const APInt &APStepVal = cast<SCEVConstant>(M->getOperand(0))->getAPInt();
214 
215  // Huge step value - give up.
216  if (APStepVal.getBitWidth() > 64)
217  return nullptr;
218 
219  int64_t StepVal = APStepVal.getSExtValue();
220  if (PtrAccessSize != StepVal)
221  return nullptr;
222  V = M->getOperand(1);
223  }
224  }
225 
226  // Strip off casts.
227  Type *StripedOffRecurrenceCast = nullptr;
228  if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(V)) {
229  StripedOffRecurrenceCast = C->getType();
230  V = C->getOperand();
231  }
232 
233  // Look for the loop invariant symbolic value.
234  const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V);
235  if (!U)
236  return nullptr;
237 
238  Value *Stride = U->getValue();
239  if (!Lp->isLoopInvariant(Stride))
240  return nullptr;
241 
242  // If we have stripped off the recurrence cast we have to make sure that we
243  // return the value that is used in this loop so that we can replace it later.
244  if (StripedOffRecurrenceCast)
245  Stride = getUniqueCastUse(Stride, Lp, StripedOffRecurrenceCast);
246 
247  return Stride;
248 }
249 
250 /// Given a vector and an element number, see if the scalar value is
251 /// already around as a register, for example if it were inserted then extracted
252 /// from the vector.
253 Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
254  assert(V->getType()->isVectorTy() && "Not looking at a vector?");
255  VectorType *VTy = cast<VectorType>(V->getType());
256  unsigned Width = VTy->getNumElements();
257  if (EltNo >= Width) // Out of range access.
258  return UndefValue::get(VTy->getElementType());
259 
260  if (Constant *C = dyn_cast<Constant>(V))
261  return C->getAggregateElement(EltNo);
262 
263  if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
264  // If this is an insert to a variable element, we don't know what it is.
265  if (!isa<ConstantInt>(III->getOperand(2)))
266  return nullptr;
267  unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
268 
269  // If this is an insert to the element we are looking for, return the
270  // inserted value.
271  if (EltNo == IIElt)
272  return III->getOperand(1);
273 
274  // Otherwise, the insertelement doesn't modify the value, recurse on its
275  // vector input.
276  return findScalarElement(III->getOperand(0), EltNo);
277  }
278 
279  if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
280  unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements();
281  int InEl = SVI->getMaskValue(EltNo);
282  if (InEl < 0)
283  return UndefValue::get(VTy->getElementType());
284  if (InEl < (int)LHSWidth)
285  return findScalarElement(SVI->getOperand(0), InEl);
286  return findScalarElement(SVI->getOperand(1), InEl - LHSWidth);
287  }
288 
289  // Extract a value from a vector add operation with a constant zero.
290  // TODO: Use getBinOpIdentity() to generalize this.
291  Value *Val; Constant *C;
292  if (match(V, m_Add(m_Value(Val), m_Constant(C))))
293  if (Constant *Elt = C->getAggregateElement(EltNo))
294  if (Elt->isNullValue())
295  return findScalarElement(Val, EltNo);
296 
297  // Otherwise, we don't know.
298  return nullptr;
299 }
300 
301 /// Get splat value if the input is a splat vector or return nullptr.
302 /// This function is not fully general. It checks only 2 cases:
303 /// the input value is (1) a splat constants vector or (2) a sequence
304 /// of instructions that broadcast a single value into a vector.
305 ///
307 
308  if (auto *C = dyn_cast<Constant>(V))
309  if (isa<VectorType>(V->getType()))
310  return C->getSplatValue();
311 
312  auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V);
313  if (!ShuffleInst)
314  return nullptr;
315  // All-zero (or undef) shuffle mask elements.
316  for (int MaskElt : ShuffleInst->getShuffleMask())
317  if (MaskElt != 0 && MaskElt != -1)
318  return nullptr;
319  // The first shuffle source is 'insertelement' with index 0.
320  auto *InsertEltInst =
321  dyn_cast<InsertElementInst>(ShuffleInst->getOperand(0));
322  if (!InsertEltInst || !isa<ConstantInt>(InsertEltInst->getOperand(2)) ||
323  !cast<ConstantInt>(InsertEltInst->getOperand(2))->isZero())
324  return nullptr;
325 
326  return InsertEltInst->getOperand(1);
327 }
328 
331  const TargetTransformInfo *TTI) {
332 
333  // DemandedBits will give us every value's live-out bits. But we want
334  // to ensure no extra casts would need to be inserted, so every DAG
335  // of connected values must have the same minimum bitwidth.
337  SmallVector<Value *, 16> Worklist;
339  SmallPtrSet<Value *, 16> Visited;
341  SmallPtrSet<Instruction *, 4> InstructionSet;
343 
344  // Determine the roots. We work bottom-up, from truncs or icmps.
345  bool SeenExtFromIllegalType = false;
346  for (auto *BB : Blocks)
347  for (auto &I : *BB) {
348  InstructionSet.insert(&I);
349 
350  if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) &&
351  !TTI->isTypeLegal(I.getOperand(0)->getType()))
352  SeenExtFromIllegalType = true;
353 
354  // Only deal with non-vector integers up to 64-bits wide.
355  if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) &&
356  !I.getType()->isVectorTy() &&
357  I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
358  // Don't make work for ourselves. If we know the loaded type is legal,
359  // don't add it to the worklist.
360  if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType()))
361  continue;
362 
363  Worklist.push_back(&I);
364  Roots.insert(&I);
365  }
366  }
367  // Early exit.
368  if (Worklist.empty() || (TTI && !SeenExtFromIllegalType))
369  return MinBWs;
370 
371  // Now proceed breadth-first, unioning values together.
372  while (!Worklist.empty()) {
373  Value *Val = Worklist.pop_back_val();
374  Value *Leader = ECs.getOrInsertLeaderValue(Val);
375 
376  if (Visited.count(Val))
377  continue;
378  Visited.insert(Val);
379 
380  // Non-instructions terminate a chain successfully.
381  if (!isa<Instruction>(Val))
382  continue;
383  Instruction *I = cast<Instruction>(Val);
384 
385  // If we encounter a type that is larger than 64 bits, we can't represent
386  // it so bail out.
387  if (DB.getDemandedBits(I).getBitWidth() > 64)
389 
390  uint64_t V = DB.getDemandedBits(I).getZExtValue();
391  DBits[Leader] |= V;
392  DBits[I] = V;
393 
394  // Casts, loads and instructions outside of our range terminate a chain
395  // successfully.
396  if (isa<SExtInst>(I) || isa<ZExtInst>(I) || isa<LoadInst>(I) ||
397  !InstructionSet.count(I))
398  continue;
399 
400  // Unsafe casts terminate a chain unsuccessfully. We can't do anything
401  // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to
402  // transform anything that relies on them.
403  if (isa<BitCastInst>(I) || isa<PtrToIntInst>(I) || isa<IntToPtrInst>(I) ||
404  !I->getType()->isIntegerTy()) {
405  DBits[Leader] |= ~0ULL;
406  continue;
407  }
408 
409  // We don't modify the types of PHIs. Reductions will already have been
410  // truncated if possible, and inductions' sizes will have been chosen by
411  // indvars.
412  if (isa<PHINode>(I))
413  continue;
414 
415  if (DBits[Leader] == ~0ULL)
416  // All bits demanded, no point continuing.
417  continue;
418 
419  for (Value *O : cast<User>(I)->operands()) {
420  ECs.unionSets(Leader, O);
421  Worklist.push_back(O);
422  }
423  }
424 
425  // Now we've discovered all values, walk them to see if there are
426  // any users we didn't see. If there are, we can't optimize that
427  // chain.
428  for (auto &I : DBits)
429  for (auto *U : I.first->users())
430  if (U->getType()->isIntegerTy() && DBits.count(U) == 0)
431  DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL;
432 
433  for (auto I = ECs.begin(), E = ECs.end(); I != E; ++I) {
434  uint64_t LeaderDemandedBits = 0;
435  for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI)
436  LeaderDemandedBits |= DBits[*MI];
437 
438  uint64_t MinBW = (sizeof(LeaderDemandedBits) * 8) -
439  llvm::countLeadingZeros(LeaderDemandedBits);
440  // Round up to a power of 2
441  if (!isPowerOf2_64((uint64_t)MinBW))
442  MinBW = NextPowerOf2(MinBW);
443 
444  // We don't modify the types of PHIs. Reductions will already have been
445  // truncated if possible, and inductions' sizes will have been chosen by
446  // indvars.
447  // If we are required to shrink a PHI, abandon this entire equivalence class.
448  bool Abort = false;
449  for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI)
450  if (isa<PHINode>(*MI) && MinBW < (*MI)->getType()->getScalarSizeInBits()) {
451  Abort = true;
452  break;
453  }
454  if (Abort)
455  continue;
456 
457  for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI) {
458  if (!isa<Instruction>(*MI))
459  continue;
460  Type *Ty = (*MI)->getType();
461  if (Roots.count(*MI))
462  Ty = cast<Instruction>(*MI)->getOperand(0)->getType();
463  if (MinBW < Ty->getScalarSizeInBits())
464  MinBWs[cast<Instruction>(*MI)] = MinBW;
465  }
466  }
467 
468  return MinBWs;
469 }
470 
471 /// Add all access groups in @p AccGroups to @p List.
472 template <typename ListT>
473 static void addToAccessGroupList(ListT &List, MDNode *AccGroups) {
474  // Interpret an access group as a list containing itself.
475  if (AccGroups->getNumOperands() == 0) {
476  assert(isValidAsAccessGroup(AccGroups) && "Node must be an access group");
477  List.insert(AccGroups);
478  return;
479  }
480 
481  for (auto &AccGroupListOp : AccGroups->operands()) {
482  auto *Item = cast<MDNode>(AccGroupListOp.get());
483  assert(isValidAsAccessGroup(Item) && "List item must be an access group");
484  List.insert(Item);
485  }
486 }
487 
488 MDNode *llvm::uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2) {
489  if (!AccGroups1)
490  return AccGroups2;
491  if (!AccGroups2)
492  return AccGroups1;
493  if (AccGroups1 == AccGroups2)
494  return AccGroups1;
495 
497  addToAccessGroupList(Union, AccGroups1);
498  addToAccessGroupList(Union, AccGroups2);
499 
500  if (Union.size() == 0)
501  return nullptr;
502  if (Union.size() == 1)
503  return cast<MDNode>(Union.front());
504 
505  LLVMContext &Ctx = AccGroups1->getContext();
506  return MDNode::get(Ctx, Union.getArrayRef());
507 }
508 
510  const Instruction *Inst2) {
511  bool MayAccessMem1 = Inst1->mayReadOrWriteMemory();
512  bool MayAccessMem2 = Inst2->mayReadOrWriteMemory();
513 
514  if (!MayAccessMem1 && !MayAccessMem2)
515  return nullptr;
516  if (!MayAccessMem1)
518  if (!MayAccessMem2)
520 
523  if (!MD1 || !MD2)
524  return nullptr;
525  if (MD1 == MD2)
526  return MD1;
527 
528  // Use set for scalable 'contains' check.
529  SmallPtrSet<Metadata *, 4> AccGroupSet2;
530  addToAccessGroupList(AccGroupSet2, MD2);
531 
532  SmallVector<Metadata *, 4> Intersection;
533  if (MD1->getNumOperands() == 0) {
534  assert(isValidAsAccessGroup(MD1) && "Node must be an access group");
535  if (AccGroupSet2.count(MD1))
536  Intersection.push_back(MD1);
537  } else {
538  for (const MDOperand &Node : MD1->operands()) {
539  auto *Item = cast<MDNode>(Node.get());
540  assert(isValidAsAccessGroup(Item) && "List item must be an access group");
541  if (AccGroupSet2.count(Item))
542  Intersection.push_back(Item);
543  }
544  }
545 
546  if (Intersection.size() == 0)
547  return nullptr;
548  if (Intersection.size() == 1)
549  return cast<MDNode>(Intersection.front());
550 
551  LLVMContext &Ctx = Inst1->getContext();
552  return MDNode::get(Ctx, Intersection);
553 }
554 
555 /// \returns \p I after propagating metadata from \p VL.
557  Instruction *I0 = cast<Instruction>(VL[0]);
559  I0->getAllMetadataOtherThanDebugLoc(Metadata);
560 
565  MDNode *MD = I0->getMetadata(Kind);
566 
567  for (int J = 1, E = VL.size(); MD && J != E; ++J) {
568  const Instruction *IJ = cast<Instruction>(VL[J]);
569  MDNode *IMD = IJ->getMetadata(Kind);
570  switch (Kind) {
572  MD = MDNode::getMostGenericTBAA(MD, IMD);
573  break;
575  MD = MDNode::getMostGenericAliasScope(MD, IMD);
576  break;
578  MD = MDNode::getMostGenericFPMath(MD, IMD);
579  break;
583  MD = MDNode::intersect(MD, IMD);
584  break;
586  MD = intersectAccessGroups(Inst, IJ);
587  break;
588  default:
589  llvm_unreachable("unhandled metadata");
590  }
591  }
592 
593  Inst->setMetadata(Kind, MD);
594  }
595 
596  return Inst;
597 }
598 
599 Constant *
601  const InterleaveGroup<Instruction> &Group) {
602  // All 1's means mask is not needed.
603  if (Group.getNumMembers() == Group.getFactor())
604  return nullptr;
605 
606  // TODO: support reversed access.
607  assert(!Group.isReverse() && "Reversed group not supported.");
608 
610  for (unsigned i = 0; i < VF; i++)
611  for (unsigned j = 0; j < Group.getFactor(); ++j) {
612  unsigned HasMember = Group.getMember(j) ? 1 : 0;
613  Mask.push_back(Builder.getInt1(HasMember));
614  }
615 
616  return ConstantVector::get(Mask);
617 }
618 
620  unsigned ReplicationFactor, unsigned VF) {
622  for (unsigned i = 0; i < VF; i++)
623  for (unsigned j = 0; j < ReplicationFactor; j++)
624  MaskVec.push_back(Builder.getInt32(i));
625 
626  return ConstantVector::get(MaskVec);
627 }
628 
630  unsigned NumVecs) {
632  for (unsigned i = 0; i < VF; i++)
633  for (unsigned j = 0; j < NumVecs; j++)
634  Mask.push_back(Builder.getInt32(j * VF + i));
635 
636  return ConstantVector::get(Mask);
637 }
638 
639 Constant *llvm::createStrideMask(IRBuilder<> &Builder, unsigned Start,
640  unsigned Stride, unsigned VF) {
642  for (unsigned i = 0; i < VF; i++)
643  Mask.push_back(Builder.getInt32(Start + i * Stride));
644 
645  return ConstantVector::get(Mask);
646 }
647 
649  unsigned NumInts, unsigned NumUndefs) {
651  for (unsigned i = 0; i < NumInts; i++)
652  Mask.push_back(Builder.getInt32(Start + i));
653 
655  for (unsigned i = 0; i < NumUndefs; i++)
656  Mask.push_back(Undef);
657 
658  return ConstantVector::get(Mask);
659 }
660 
661 /// A helper function for concatenating vectors. This function concatenates two
662 /// vectors having the same element type. If the second vector has fewer
663 /// elements than the first, it is padded with undefs.
665  Value *V2) {
666  VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType());
667  VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());
668  assert(VecTy1 && VecTy2 &&
669  VecTy1->getScalarType() == VecTy2->getScalarType() &&
670  "Expect two vectors with the same element type");
671 
672  unsigned NumElts1 = VecTy1->getNumElements();
673  unsigned NumElts2 = VecTy2->getNumElements();
674  assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements");
675 
676  if (NumElts1 > NumElts2) {
677  // Extend with UNDEFs.
678  Constant *ExtMask =
679  createSequentialMask(Builder, 0, NumElts2, NumElts1 - NumElts2);
680  V2 = Builder.CreateShuffleVector(V2, UndefValue::get(VecTy2), ExtMask);
681  }
682 
683  Constant *Mask = createSequentialMask(Builder, 0, NumElts1 + NumElts2, 0);
684  return Builder.CreateShuffleVector(V1, V2, Mask);
685 }
686 
688  unsigned NumVecs = Vecs.size();
689  assert(NumVecs > 1 && "Should be at least two vectors");
690 
691  SmallVector<Value *, 8> ResList;
692  ResList.append(Vecs.begin(), Vecs.end());
693  do {
694  SmallVector<Value *, 8> TmpList;
695  for (unsigned i = 0; i < NumVecs - 1; i += 2) {
696  Value *V0 = ResList[i], *V1 = ResList[i + 1];
697  assert((V0->getType() == V1->getType() || i == NumVecs - 2) &&
698  "Only the last vector may have a different type");
699 
700  TmpList.push_back(concatenateTwoVectors(Builder, V0, V1));
701  }
702 
703  // Push the last vector if the total number of vectors is odd.
704  if (NumVecs % 2 != 0)
705  TmpList.push_back(ResList[NumVecs - 1]);
706 
707  ResList = TmpList;
708  NumVecs = ResList.size();
709  } while (NumVecs > 1);
710 
711  return ResList[0];
712 }
713 
714 bool InterleavedAccessInfo::isStrided(int Stride) {
715  unsigned Factor = std::abs(Stride);
716  return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
717 }
718 
719 void InterleavedAccessInfo::collectConstStrideAccesses(
721  const ValueToValueMap &Strides) {
722  auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();
723 
724  // Since it's desired that the load/store instructions be maintained in
725  // "program order" for the interleaved access analysis, we have to visit the
726  // blocks in the loop in reverse postorder (i.e., in a topological order).
727  // Such an ordering will ensure that any load/store that may be executed
728  // before a second load/store will precede the second load/store in
729  // AccessStrideInfo.
730  LoopBlocksDFS DFS(TheLoop);
731  DFS.perform(LI);
732  for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
733  for (auto &I : *BB) {
734  auto *LI = dyn_cast<LoadInst>(&I);
735  auto *SI = dyn_cast<StoreInst>(&I);
736  if (!LI && !SI)
737  continue;
738 
740  // We don't check wrapping here because we don't know yet if Ptr will be
741  // part of a full group or a group with gaps. Checking wrapping for all
742  // pointers (even those that end up in groups with no gaps) will be overly
743  // conservative. For full groups, wrapping should be ok since if we would
744  // wrap around the address space we would do a memory access at nullptr
745  // even without the transformation. The wrapping checks are therefore
746  // deferred until after we've formed the interleaved groups.
747  int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides,
748  /*Assume=*/true, /*ShouldCheckWrap=*/false);
749 
750  const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
751  PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
752  uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
753 
754  // An alignment of 0 means target ABI alignment.
755  unsigned Align = getLoadStoreAlignment(&I);
756  if (!Align)
757  Align = DL.getABITypeAlignment(PtrTy->getElementType());
758 
759  AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size, Align);
760  }
761 }
762 
763 // Analyze interleaved accesses and collect them into interleaved load and
764 // store groups.
765 //
766 // When generating code for an interleaved load group, we effectively hoist all
767 // loads in the group to the location of the first load in program order. When
768 // generating code for an interleaved store group, we sink all stores to the
769 // location of the last store. This code motion can change the order of load
770 // and store instructions and may break dependences.
771 //
772 // The code generation strategy mentioned above ensures that we won't violate
773 // any write-after-read (WAR) dependences.
774 //
775 // E.g., for the WAR dependence: a = A[i]; // (1)
776 // A[i] = b; // (2)
777 //
778 // The store group of (2) is always inserted at or below (2), and the load
779 // group of (1) is always inserted at or above (1). Thus, the instructions will
780 // never be reordered. All other dependences are checked to ensure the
781 // correctness of the instruction reordering.
782 //
783 // The algorithm visits all memory accesses in the loop in bottom-up program
784 // order. Program order is established by traversing the blocks in the loop in
785 // reverse postorder when collecting the accesses.
786 //
787 // We visit the memory accesses in bottom-up order because it can simplify the
788 // construction of store groups in the presence of write-after-write (WAW)
789 // dependences.
790 //
791 // E.g., for the WAW dependence: A[i] = a; // (1)
792 // A[i] = b; // (2)
793 // A[i + 1] = c; // (3)
794 //
795 // We will first create a store group with (3) and (2). (1) can't be added to
796 // this group because it and (2) are dependent. However, (1) can be grouped
797 // with other accesses that may precede it in program order. Note that a
798 // bottom-up order does not imply that WAW dependences should not be checked.
800  bool EnablePredicatedInterleavedMemAccesses) {
801  LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
802  const ValueToValueMap &Strides = LAI->getSymbolicStrides();
803 
804  // Holds all accesses with a constant stride.
806  collectConstStrideAccesses(AccessStrideInfo, Strides);
807 
808  if (AccessStrideInfo.empty())
809  return;
810 
811  // Collect the dependences in the loop.
812  collectDependences();
813 
814  // Holds all interleaved store groups temporarily.
816  // Holds all interleaved load groups temporarily.
818 
819  // Search in bottom-up program order for pairs of accesses (A and B) that can
820  // form interleaved load or store groups. In the algorithm below, access A
821  // precedes access B in program order. We initialize a group for B in the
822  // outer loop of the algorithm, and then in the inner loop, we attempt to
823  // insert each A into B's group if:
824  //
825  // 1. A and B have the same stride,
826  // 2. A and B have the same memory object size, and
827  // 3. A belongs in B's group according to its distance from B.
828  //
829  // Special care is taken to ensure group formation will not break any
830  // dependences.
831  for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
832  BI != E; ++BI) {
833  Instruction *B = BI->first;
834  StrideDescriptor DesB = BI->second;
835 
836  // Initialize a group for B if it has an allowable stride. Even if we don't
837  // create a group for B, we continue with the bottom-up algorithm to ensure
838  // we don't break any of B's dependences.
839  InterleaveGroup<Instruction> *Group = nullptr;
840  if (isStrided(DesB.Stride) &&
841  (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
842  Group = getInterleaveGroup(B);
843  if (!Group) {
844  LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
845  << '\n');
846  Group = createInterleaveGroup(B, DesB.Stride, DesB.Align);
847  }
848  if (B->mayWriteToMemory())
849  StoreGroups.insert(Group);
850  else
851  LoadGroups.insert(Group);
852  }
853 
854  for (auto AI = std::next(BI); AI != E; ++AI) {
855  Instruction *A = AI->first;
856  StrideDescriptor DesA = AI->second;
857 
858  // Our code motion strategy implies that we can't have dependences
859  // between accesses in an interleaved group and other accesses located
860  // between the first and last member of the group. Note that this also
861  // means that a group can't have more than one member at a given offset.
862  // The accesses in a group can have dependences with other accesses, but
863  // we must ensure we don't extend the boundaries of the group such that
864  // we encompass those dependent accesses.
865  //
866  // For example, assume we have the sequence of accesses shown below in a
867  // stride-2 loop:
868  //
869  // (1, 2) is a group | A[i] = a; // (1)
870  // | A[i-1] = b; // (2) |
871  // A[i-3] = c; // (3)
872  // A[i] = d; // (4) | (2, 4) is not a group
873  //
874  // Because accesses (2) and (3) are dependent, we can group (2) with (1)
875  // but not with (4). If we did, the dependent access (3) would be within
876  // the boundaries of the (2, 4) group.
877  if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
878  // If a dependence exists and A is already in a group, we know that A
879  // must be a store since A precedes B and WAR dependences are allowed.
880  // Thus, A would be sunk below B. We release A's group to prevent this
881  // illegal code motion. A will then be free to form another group with
882  // instructions that precede it.
883  if (isInterleaved(A)) {
884  InterleaveGroup<Instruction> *StoreGroup = getInterleaveGroup(A);
885  StoreGroups.remove(StoreGroup);
886  releaseGroup(StoreGroup);
887  }
888 
889  // If a dependence exists and A is not already in a group (or it was
890  // and we just released it), B might be hoisted above A (if B is a
891  // load) or another store might be sunk below A (if B is a store). In
892  // either case, we can't add additional instructions to B's group. B
893  // will only form a group with instructions that it precedes.
894  break;
895  }
896 
897  // At this point, we've checked for illegal code motion. If either A or B
898  // isn't strided, there's nothing left to do.
899  if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
900  continue;
901 
902  // Ignore A if it's already in a group or isn't the same kind of memory
903  // operation as B.
904  // Note that mayReadFromMemory() isn't mutually exclusive to
905  // mayWriteToMemory in the case of atomic loads. We shouldn't see those
906  // here, canVectorizeMemory() should have returned false - except for the
907  // case we asked for optimization remarks.
908  if (isInterleaved(A) ||
909  (A->mayReadFromMemory() != B->mayReadFromMemory()) ||
910  (A->mayWriteToMemory() != B->mayWriteToMemory()))
911  continue;
912 
913  // Check rules 1 and 2. Ignore A if its stride or size is different from
914  // that of B.
915  if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
916  continue;
917 
918  // Ignore A if the memory object of A and B don't belong to the same
919  // address space
921  continue;
922 
923  // Calculate the distance from A to B.
924  const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(
925  PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
926  if (!DistToB)
927  continue;
928  int64_t DistanceToB = DistToB->getAPInt().getSExtValue();
929 
930  // Check rule 3. Ignore A if its distance to B is not a multiple of the
931  // size.
932  if (DistanceToB % static_cast<int64_t>(DesB.Size))
933  continue;
934 
935  // All members of a predicated interleave-group must have the same predicate,
936  // and currently must reside in the same BB.
937  BasicBlock *BlockA = A->getParent();
938  BasicBlock *BlockB = B->getParent();
939  if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
940  (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
941  continue;
942 
943  // The index of A is the index of B plus A's distance to B in multiples
944  // of the size.
945  int IndexA =
946  Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
947 
948  // Try to insert A into B's group.
949  if (Group->insertMember(A, IndexA, DesA.Align)) {
950  LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
951  << " into the interleave group with" << *B
952  << '\n');
953  InterleaveGroupMap[A] = Group;
954 
955  // Set the first load in program order as the insert position.
956  if (A->mayReadFromMemory())
957  Group->setInsertPos(A);
958  }
959  } // Iteration over A accesses.
960  } // Iteration over B accesses.
961 
962  // Remove interleaved store groups with gaps.
963  for (auto *Group : StoreGroups)
964  if (Group->getNumMembers() != Group->getFactor()) {
965  LLVM_DEBUG(
966  dbgs() << "LV: Invalidate candidate interleaved store group due "
967  "to gaps.\n");
968  releaseGroup(Group);
969  }
970  // Remove interleaved groups with gaps (currently only loads) whose memory
971  // accesses may wrap around. We have to revisit the getPtrStride analysis,
972  // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
973  // not check wrapping (see documentation there).
974  // FORNOW we use Assume=false;
975  // TODO: Change to Assume=true but making sure we don't exceed the threshold
976  // of runtime SCEV assumptions checks (thereby potentially failing to
977  // vectorize altogether).
978  // Additional optional optimizations:
979  // TODO: If we are peeling the loop and we know that the first pointer doesn't
980  // wrap then we can deduce that all pointers in the group don't wrap.
981  // This means that we can forcefully peel the loop in order to only have to
982  // check the first pointer for no-wrap. When we'll change to use Assume=true
983  // we'll only need at most one runtime check per interleaved group.
984  for (auto *Group : LoadGroups) {
985  // Case 1: A full group. Can Skip the checks; For full groups, if the wide
986  // load would wrap around the address space we would do a memory access at
987  // nullptr even without the transformation.
988  if (Group->getNumMembers() == Group->getFactor())
989  continue;
990 
991  // Case 2: If first and last members of the group don't wrap this implies
992  // that all the pointers in the group don't wrap.
993  // So we check only group member 0 (which is always guaranteed to exist),
994  // and group member Factor - 1; If the latter doesn't exist we rely on
995  // peeling (if it is a non-reveresed accsess -- see Case 3).
996  Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0));
997  if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false,
998  /*ShouldCheckWrap=*/true)) {
999  LLVM_DEBUG(
1000  dbgs() << "LV: Invalidate candidate interleaved group due to "
1001  "first group member potentially pointer-wrapping.\n");
1002  releaseGroup(Group);
1003  continue;
1004  }
1005  Instruction *LastMember = Group->getMember(Group->getFactor() - 1);
1006  if (LastMember) {
1007  Value *LastMemberPtr = getLoadStorePointerOperand(LastMember);
1008  if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false,
1009  /*ShouldCheckWrap=*/true)) {
1010  LLVM_DEBUG(
1011  dbgs() << "LV: Invalidate candidate interleaved group due to "
1012  "last group member potentially pointer-wrapping.\n");
1013  releaseGroup(Group);
1014  }
1015  } else {
1016  // Case 3: A non-reversed interleaved load group with gaps: We need
1017  // to execute at least one scalar epilogue iteration. This will ensure
1018  // we don't speculatively access memory out-of-bounds. We only need
1019  // to look for a member at index factor - 1, since every group must have
1020  // a member at index zero.
1021  if (Group->isReverse()) {
1022  LLVM_DEBUG(
1023  dbgs() << "LV: Invalidate candidate interleaved group due to "
1024  "a reverse access with gaps.\n");
1025  releaseGroup(Group);
1026  continue;
1027  }
1028  LLVM_DEBUG(
1029  dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
1030  RequiresScalarEpilogue = true;
1031  }
1032  }
1033 }
1034 
1036  // If no group had triggered the requirement to create an epilogue loop,
1037  // there is nothing to do.
1038  if (!requiresScalarEpilogue())
1039  return;
1040 
1041  // Avoid releasing a Group twice.
1043  for (auto &I : InterleaveGroupMap) {
1044  InterleaveGroup<Instruction> *Group = I.second;
1045  if (Group->requiresScalarEpilogue())
1046  DelSet.insert(Group);
1047  }
1048  for (auto *Ptr : DelSet) {
1049  LLVM_DEBUG(
1050  dbgs()
1051  << "LV: Invalidate candidate interleaved group due to gaps that "
1052  "require a scalar epilogue (not allowed under optsize) and cannot "
1053  "be masked (not enabled). \n");
1054  releaseGroup(Ptr);
1055  }
1056 
1057  RequiresScalarEpilogue = false;
1058 }
1059 
1060 template <typename InstT>
1061 void InterleaveGroup<InstT>::addMetadata(InstT *NewInst) const {
1062  llvm_unreachable("addMetadata can only be used for Instruction");
1063 }
1064 
1065 namespace llvm {
1066 template <>
1069  std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
1070  [](std::pair<int, Instruction *> p) { return p.second; });
1071  propagateMetadata(NewInst, VL);
1072 }
1073 }
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode *>> &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
Definition: Instruction.h:244
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
uint64_t CallInst * C
Value * stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If the argument is a GEP, then returns the operand identified by getGEPInductionOperand.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:711
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:71
void setInsertPos(InstTy *Inst)
Definition: VectorUtils.h:331
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1563
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock *> Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:376
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register...
const T & front() const
Return the first element of the SetVector.
Definition: SetVector.h:123
This class represents lattice values for constants.
Definition: AllocatorList.h:24
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:78
iterator begin() const
Definition: ArrayRef.h:137
const Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
reverse_iterator rend()
Definition: MapVector.h:77
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value *> VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal, MD_access_group].
The main scalar evolution driver.
static MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
Definition: Metadata.cpp:923
This class represents a function call, abstracting a target machine&#39;s calling convention.
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:90
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
This instruction constructs a fixed permutation of two input vectors.
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:705
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:38
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Metadata node.
Definition: Metadata.h:864
An instruction for reading from memory.
Definition: Instructions.h:168
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Hexagon Common GEP
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
LLVM_READONLY APFloat maximum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2018 maximum semantics.
Definition: APFloat.h:1262
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:189
This is the base class for unary cast operator classes.
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1509
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Intrinsic::ID getIntrinsicForCallSite(ImmutableCallSite ICS, const TargetLibraryInfo *TLI)
Map a call instruction to an intrinsic ID.
static cl::opt< unsigned > MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
Maximum factor for an interleaved memory access.
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:347
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:48
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:371
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:353
bool isReverse() const
Definition: VectorUtils.h:268
static Value * concatenateTwoVectors(IRBuilder<> &Builder, Value *V1, Value *V2)
A helper function for concatenating vectors.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:743
LLVM_READONLY APFloat minimum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2018 minimum semantics.
Definition: APFloat.h:1249
bool empty() const
Definition: MapVector.h:80
RPOIterator endRPO() const
Definition: LoopIterator.h:141
uint64_t getNumElements() const
Definition: DerivedTypes.h:359
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:158
This node represents multiplication of some number of SCEVs.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:642
const APInt & getAPInt() const
InstTy * getMember(unsigned Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:310
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1575
Constant * createSequentialMask(IRBuilder<> &Builder, unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:142
member_iterator member_begin(iterator I) const
This node represents a polynomial recurrence on the trip count of the specified loop.
op_range operands() const
Definition: Metadata.h:1067
unsigned getNumMembers() const
Definition: VectorUtils.h:271
LLVMContext & getContext() const
Definition: Metadata.h:924
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:27
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:221
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
static MDNode * intersect(MDNode *A, MDNode *B)
Definition: Metadata.cpp:910
Value * getLoadStorePointerOperand(Value *V)
A helper function that returns the pointer operand of a load or store instruction.
An instruction for storing to memory.
Definition: Instructions.h:321
unsigned getLoadStoreAddressSpace(Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction...
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopInfo.cpp:817
void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
Value * getOperand(unsigned i) const
Definition: User.h:170
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
Class to represent pointers.
Definition: DerivedTypes.h:467
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:335
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:304
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:854
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1166
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:423
This instruction inserts a single (scalar) element into a VectorType value.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Value * concatenateVectors(IRBuilder<> &Builder, ArrayRef< Value *> Vecs)
Concatenate a list of vectors.
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
ConstantInt * getInt1(bool V)
Get a constant value representing either true or false.
Definition: IRBuilder.h:282
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
Constant * createReplicatedMask(IRBuilder<> &Builder, unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static unsigned getScalarSizeInBits(Type *Ty)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
member_iterator member_end() const
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:434
unsigned getIndex(const InstTy *Instr) const
Get the index for the given member.
Definition: VectorUtils.h:321
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr=nullptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one...
Constant * createBitMaskForGaps(IRBuilder<> &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
static double log2(double V)
static MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1415
uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:640
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
size_t size() const
Definition: SmallVector.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1226
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1238
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:58
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:298
unsigned getNumOperands() const
Definition: User.h:192
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
const ElemTy & getOrInsertLeaderValue(const ElemTy &V)
getOrInsertLeaderValue - Return the leader for the specified value that is in the set...
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
Provides information about what library functions are available for the current target.
iterator end() const
Definition: ArrayRef.h:138
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:381
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:307
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2068
reverse_iterator rbegin()
Definition: MapVector.h:75
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:89
unsigned getFactor() const
Definition: VectorUtils.h:269
APInt getDemandedBits(Instruction *I)
Return the bits demanded from instruction I.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:462
Store the result of a depth first search within basic blocks contained by a single loop...
Definition: LoopIterator.h:98
Class to represent vector types.
Definition: DerivedTypes.h:393
static void addToAccessGroupList(ListT &List, MDNode *AccGroups)
Add all access groups in AccGroups to List.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:56
Class for arbitrary precision integers.
Definition: APInt.h:70
iterator_range< user_iterator > users()
Definition: Value.h:400
static void DFS(BasicBlock *Root, SetVector< BasicBlock *> &Set)
bool isPredicated(MCInstrInfo const &MCII, MCInst const &MCI)
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:394
Value * getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty)
If a value has only one user that is a CastInst, return it.
Constant * createStrideMask(IRBuilder<> &Builder, unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
unsigned getGEPInductionOperand(const GetElementPtrInst *Gep)
Find the operand of the GEP that should be checked for consecutive stores.
void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:436
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:546
This class represents an analyzed expression in the program.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:465
const NodeList & List
Definition: RDFGraph.cpp:210
#define I(x, y, z)
Definition: MD5.cpp:58
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1213
bool mayReadFromMemory() const
Return true if this instruction may read memory.
Type * getResultElementType() const
Definition: Instructions.h:956
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
uint32_t Size
Definition: Profile.cpp:47
OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere...
Definition: STLExtras.h:1268
unsigned getLoadStoreAlignment(Value *I)
A helper function that returns the alignment of load or store instruction.
const unsigned Kind
Constant * createInterleaveMask(IRBuilder<> &Builder, unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool insertMember(InstTy *Instr, int Index, unsigned NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
Definition: VectorUtils.h:278
LLVM Value Representation.
Definition: Value.h:73
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
RPOIterator beginRPO() const
Reverse iterate over the cached postorder blocks.
Definition: LoopIterator.h:137
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
IRTranslator LLVM IR MI
This pass exposes codegen information to IR-level passes.
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1075
#define LLVM_DEBUG(X)
Definition: Debug.h:123
static MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
Definition: Metadata.cpp:930
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
static Constant * get(ArrayRef< Constant *> V)
Definition: Constants.cpp:1079
Type * getElementType() const
Definition: DerivedTypes.h:486
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1227
std::vector< uint32_t > Metadata
PAL metadata represented as a vector.
bool isValidAsAccessGroup(MDNode *AccGroup)
Return whether an MDNode might represent an access group.
Definition: LoopInfo.cpp:760
const BasicBlock * getParent() const
Definition: Instruction.h:67
This class represents a constant integer value.
bool requiresScalarEpilogue() const
Returns true if this Group requires a scalar iteration to handle gaps.
Definition: VectorUtils.h:342
gep_type_iterator gep_type_begin(const User *GEP)
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:43
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
Definition: Instruction.h:522