LLVM  13.0.0git
VectorUtils.cpp
Go to the documentation of this file.
1 //===----------- VectorUtils.cpp - Vectorizer utility functions -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines vectorizer utilities.
10 //
11 //===----------------------------------------------------------------------===//
12 
16 #include "llvm/Analysis/LoopInfo.h"
22 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/PatternMatch.h"
26 #include "llvm/IR/Value.h"
28 
29 #define DEBUG_TYPE "vectorutils"
30 
31 using namespace llvm;
32 using namespace llvm::PatternMatch;
33 
34 /// Maximum factor for an interleaved memory access.
36  "max-interleave-group-factor", cl::Hidden,
37  cl::desc("Maximum factor for an interleaved access group (default = 8)"),
38  cl::init(8));
39 
40 /// Return true if all of the intrinsic's arguments and return type are scalars
41 /// for the scalar form of the intrinsic, and vectors for the vector form of the
42 /// intrinsic (except operands that are marked as always being scalar by
43 /// hasVectorInstrinsicScalarOpd).
45  switch (ID) {
46  case Intrinsic::abs: // Begin integer bit-manipulation.
47  case Intrinsic::bswap:
48  case Intrinsic::bitreverse:
49  case Intrinsic::ctpop:
50  case Intrinsic::ctlz:
51  case Intrinsic::cttz:
52  case Intrinsic::fshl:
53  case Intrinsic::fshr:
54  case Intrinsic::smax:
55  case Intrinsic::smin:
56  case Intrinsic::umax:
57  case Intrinsic::umin:
58  case Intrinsic::sadd_sat:
59  case Intrinsic::ssub_sat:
60  case Intrinsic::uadd_sat:
61  case Intrinsic::usub_sat:
62  case Intrinsic::smul_fix:
63  case Intrinsic::smul_fix_sat:
64  case Intrinsic::umul_fix:
65  case Intrinsic::umul_fix_sat:
66  case Intrinsic::sqrt: // Begin floating-point.
67  case Intrinsic::sin:
68  case Intrinsic::cos:
69  case Intrinsic::exp:
70  case Intrinsic::exp2:
71  case Intrinsic::log:
72  case Intrinsic::log10:
73  case Intrinsic::log2:
74  case Intrinsic::fabs:
75  case Intrinsic::minnum:
76  case Intrinsic::maxnum:
77  case Intrinsic::minimum:
78  case Intrinsic::maximum:
79  case Intrinsic::copysign:
80  case Intrinsic::floor:
81  case Intrinsic::ceil:
82  case Intrinsic::trunc:
83  case Intrinsic::rint:
84  case Intrinsic::nearbyint:
85  case Intrinsic::round:
86  case Intrinsic::roundeven:
87  case Intrinsic::pow:
88  case Intrinsic::fma:
89  case Intrinsic::fmuladd:
90  case Intrinsic::powi:
91  case Intrinsic::canonicalize:
92  return true;
93  default:
94  return false;
95  }
96 }
97 
98 /// Identifies if the vector form of the intrinsic has a scalar operand.
100  unsigned ScalarOpdIdx) {
101  switch (ID) {
102  case Intrinsic::abs:
103  case Intrinsic::ctlz:
104  case Intrinsic::cttz:
105  case Intrinsic::powi:
106  return (ScalarOpdIdx == 1);
107  case Intrinsic::smul_fix:
108  case Intrinsic::smul_fix_sat:
109  case Intrinsic::umul_fix:
110  case Intrinsic::umul_fix_sat:
111  return (ScalarOpdIdx == 2);
112  default:
113  return false;
114  }
115 }
116 
117 /// Returns intrinsic ID for call.
118 /// For the input call instruction it finds mapping intrinsic and returns
119 /// its ID, in case it does not found it return not_intrinsic.
121  const TargetLibraryInfo *TLI) {
125 
126  if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
127  ID == Intrinsic::lifetime_end || ID == Intrinsic::assume ||
128  ID == Intrinsic::experimental_noalias_scope_decl ||
129  ID == Intrinsic::sideeffect || ID == Intrinsic::pseudoprobe)
130  return ID;
132 }
133 
134 /// Find the operand of the GEP that should be checked for consecutive
135 /// stores. This ignores trailing indices that have no effect on the final
136 /// pointer.
138  const DataLayout &DL = Gep->getModule()->getDataLayout();
139  unsigned LastOperand = Gep->getNumOperands() - 1;
140  TypeSize GEPAllocSize = DL.getTypeAllocSize(Gep->getResultElementType());
141 
142  // Walk backwards and try to peel off zeros.
143  while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
144  // Find the type we're currently indexing into.
145  gep_type_iterator GEPTI = gep_type_begin(Gep);
146  std::advance(GEPTI, LastOperand - 2);
147 
148  // If it's a type with the same allocation size as the result of the GEP we
149  // can peel off the zero index.
150  if (DL.getTypeAllocSize(GEPTI.getIndexedType()) != GEPAllocSize)
151  break;
152  --LastOperand;
153  }
154 
155  return LastOperand;
156 }
157 
158 /// If the argument is a GEP, then returns the operand identified by
159 /// getGEPInductionOperand. However, if there is some other non-loop-invariant
160 /// operand, it returns that instead.
162  GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
163  if (!GEP)
164  return Ptr;
165 
166  unsigned InductionOperand = getGEPInductionOperand(GEP);
167 
168  // Check that all of the gep indices are uniform except for our induction
169  // operand.
170  for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i)
171  if (i != InductionOperand &&
172  !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp))
173  return Ptr;
174  return GEP->getOperand(InductionOperand);
175 }
176 
177 /// If a value has only one user that is a CastInst, return it.
179  Value *UniqueCast = nullptr;
180  for (User *U : Ptr->users()) {
181  CastInst *CI = dyn_cast<CastInst>(U);
182  if (CI && CI->getType() == Ty) {
183  if (!UniqueCast)
184  UniqueCast = CI;
185  else
186  return nullptr;
187  }
188  }
189  return UniqueCast;
190 }
191 
192 /// Get the stride of a pointer access in a loop. Looks for symbolic
193 /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
195  auto *PtrTy = dyn_cast<PointerType>(Ptr->getType());
196  if (!PtrTy || PtrTy->isAggregateType())
197  return nullptr;
198 
199  // Try to remove a gep instruction to make the pointer (actually index at this
200  // point) easier analyzable. If OrigPtr is equal to Ptr we are analyzing the
201  // pointer, otherwise, we are analyzing the index.
202  Value *OrigPtr = Ptr;
203 
204  // The size of the pointer access.
205  int64_t PtrAccessSize = 1;
206 
207  Ptr = stripGetElementPtr(Ptr, SE, Lp);
208  const SCEV *V = SE->getSCEV(Ptr);
209 
210  if (Ptr != OrigPtr)
211  // Strip off casts.
212  while (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(V))
213  V = C->getOperand();
214 
215  const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V);
216  if (!S)
217  return nullptr;
218 
219  V = S->getStepRecurrence(*SE);
220  if (!V)
221  return nullptr;
222 
223  // Strip off the size of access multiplication if we are still analyzing the
224  // pointer.
225  if (OrigPtr == Ptr) {
226  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
227  if (M->getOperand(0)->getSCEVType() != scConstant)
228  return nullptr;
229 
230  const APInt &APStepVal = cast<SCEVConstant>(M->getOperand(0))->getAPInt();
231 
232  // Huge step value - give up.
233  if (APStepVal.getBitWidth() > 64)
234  return nullptr;
235 
236  int64_t StepVal = APStepVal.getSExtValue();
237  if (PtrAccessSize != StepVal)
238  return nullptr;
239  V = M->getOperand(1);
240  }
241  }
242 
243  // Strip off casts.
244  Type *StripedOffRecurrenceCast = nullptr;
245  if (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(V)) {
246  StripedOffRecurrenceCast = C->getType();
247  V = C->getOperand();
248  }
249 
250  // Look for the loop invariant symbolic value.
251  const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V);
252  if (!U)
253  return nullptr;
254 
255  Value *Stride = U->getValue();
256  if (!Lp->isLoopInvariant(Stride))
257  return nullptr;
258 
259  // If we have stripped off the recurrence cast we have to make sure that we
260  // return the value that is used in this loop so that we can replace it later.
261  if (StripedOffRecurrenceCast)
262  Stride = getUniqueCastUse(Stride, Lp, StripedOffRecurrenceCast);
263 
264  return Stride;
265 }
266 
267 /// Given a vector and an element number, see if the scalar value is
268 /// already around as a register, for example if it were inserted then extracted
269 /// from the vector.
270 Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
271  assert(V->getType()->isVectorTy() && "Not looking at a vector?");
272  VectorType *VTy = cast<VectorType>(V->getType());
273  // For fixed-length vector, return undef for out of range access.
274  if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
275  unsigned Width = FVTy->getNumElements();
276  if (EltNo >= Width)
277  return UndefValue::get(FVTy->getElementType());
278  }
279 
280  if (Constant *C = dyn_cast<Constant>(V))
281  return C->getAggregateElement(EltNo);
282 
283  if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
284  // If this is an insert to a variable element, we don't know what it is.
285  if (!isa<ConstantInt>(III->getOperand(2)))
286  return nullptr;
287  unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
288 
289  // If this is an insert to the element we are looking for, return the
290  // inserted value.
291  if (EltNo == IIElt)
292  return III->getOperand(1);
293 
294  // Guard against infinite loop on malformed, unreachable IR.
295  if (III == III->getOperand(0))
296  return nullptr;
297 
298  // Otherwise, the insertelement doesn't modify the value, recurse on its
299  // vector input.
300  return findScalarElement(III->getOperand(0), EltNo);
301  }
302 
303  ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V);
304  // Restrict the following transformation to fixed-length vector.
305  if (SVI && isa<FixedVectorType>(SVI->getType())) {
306  unsigned LHSWidth =
307  cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements();
308  int InEl = SVI->getMaskValue(EltNo);
309  if (InEl < 0)
310  return UndefValue::get(VTy->getElementType());
311  if (InEl < (int)LHSWidth)
312  return findScalarElement(SVI->getOperand(0), InEl);
313  return findScalarElement(SVI->getOperand(1), InEl - LHSWidth);
314  }
315 
316  // Extract a value from a vector add operation with a constant zero.
317  // TODO: Use getBinOpIdentity() to generalize this.
318  Value *Val; Constant *C;
319  if (match(V, m_Add(m_Value(Val), m_Constant(C))))
320  if (Constant *Elt = C->getAggregateElement(EltNo))
321  if (Elt->isNullValue())
322  return findScalarElement(Val, EltNo);
323 
324  // Otherwise, we don't know.
325  return nullptr;
326 }
327 
329  int SplatIndex = -1;
330  for (int M : Mask) {
331  // Ignore invalid (undefined) mask elements.
332  if (M < 0)
333  continue;
334 
335  // There can be only 1 non-negative mask element value if this is a splat.
336  if (SplatIndex != -1 && SplatIndex != M)
337  return -1;
338 
339  // Initialize the splat index to the 1st non-negative mask element.
340  SplatIndex = M;
341  }
342  assert((SplatIndex == -1 || SplatIndex >= 0) && "Negative index?");
343  return SplatIndex;
344 }
345 
346 /// Get splat value if the input is a splat vector or return nullptr.
347 /// This function is not fully general. It checks only 2 cases:
348 /// the input value is (1) a splat constant vector or (2) a sequence
349 /// of instructions that broadcasts a scalar at element 0.
351  if (isa<VectorType>(V->getType()))
352  if (auto *C = dyn_cast<Constant>(V))
353  return C->getSplatValue();
354 
355  // shuf (inselt ?, Splat, 0), ?, <0, undef, 0, ...>
356  Value *Splat;
357  if (match(V,
359  m_Value(), m_ZeroMask())))
360  return Splat;
361 
362  return nullptr;
363 }
364 
365 bool llvm::isSplatValue(const Value *V, int Index, unsigned Depth) {
366  assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
367 
368  if (isa<VectorType>(V->getType())) {
369  if (isa<UndefValue>(V))
370  return true;
371  // FIXME: We can allow undefs, but if Index was specified, we may want to
372  // check that the constant is defined at that index.
373  if (auto *C = dyn_cast<Constant>(V))
374  return C->getSplatValue() != nullptr;
375  }
376 
377  if (auto *Shuf = dyn_cast<ShuffleVectorInst>(V)) {
378  // FIXME: We can safely allow undefs here. If Index was specified, we will
379  // check that the mask elt is defined at the required index.
380  if (!is_splat(Shuf->getShuffleMask()))
381  return false;
382 
383  // Match any index.
384  if (Index == -1)
385  return true;
386 
387  // Match a specific element. The mask should be defined at and match the
388  // specified index.
389  return Shuf->getMaskValue(Index) == Index;
390  }
391 
392  // The remaining tests are all recursive, so bail out if we hit the limit.
394  return false;
395 
396  // If both operands of a binop are splats, the result is a splat.
397  Value *X, *Y, *Z;
398  if (match(V, m_BinOp(m_Value(X), m_Value(Y))))
399  return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth);
400 
401  // If all operands of a select are splats, the result is a splat.
402  if (match(V, m_Select(m_Value(X), m_Value(Y), m_Value(Z))))
403  return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth) &&
404  isSplatValue(Z, Index, Depth);
405 
406  // TODO: Add support for unary ops (fneg), casts, intrinsics (overflow ops).
407 
408  return false;
409 }
410 
412  SmallVectorImpl<int> &ScaledMask) {
413  assert(Scale > 0 && "Unexpected scaling factor");
414 
415  // Fast-path: if no scaling, then it is just a copy.
416  if (Scale == 1) {
417  ScaledMask.assign(Mask.begin(), Mask.end());
418  return;
419  }
420 
421  ScaledMask.clear();
422  for (int MaskElt : Mask) {
423  if (MaskElt >= 0) {
424  assert(((uint64_t)Scale * MaskElt + (Scale - 1)) <= INT32_MAX &&
425  "Overflowed 32-bits");
426  }
427  for (int SliceElt = 0; SliceElt != Scale; ++SliceElt)
428  ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + SliceElt);
429  }
430 }
431 
433  SmallVectorImpl<int> &ScaledMask) {
434  assert(Scale > 0 && "Unexpected scaling factor");
435 
436  // Fast-path: if no scaling, then it is just a copy.
437  if (Scale == 1) {
438  ScaledMask.assign(Mask.begin(), Mask.end());
439  return true;
440  }
441 
442  // We must map the original elements down evenly to a type with less elements.
443  int NumElts = Mask.size();
444  if (NumElts % Scale != 0)
445  return false;
446 
447  ScaledMask.clear();
448  ScaledMask.reserve(NumElts / Scale);
449 
450  // Step through the input mask by splitting into Scale-sized slices.
451  do {
452  ArrayRef<int> MaskSlice = Mask.take_front(Scale);
453  assert((int)MaskSlice.size() == Scale && "Expected Scale-sized slice.");
454 
455  // The first element of the slice determines how we evaluate this slice.
456  int SliceFront = MaskSlice.front();
457  if (SliceFront < 0) {
458  // Negative values (undef or other "sentinel" values) must be equal across
459  // the entire slice.
460  if (!is_splat(MaskSlice))
461  return false;
462  ScaledMask.push_back(SliceFront);
463  } else {
464  // A positive mask element must be cleanly divisible.
465  if (SliceFront % Scale != 0)
466  return false;
467  // Elements of the slice must be consecutive.
468  for (int i = 1; i < Scale; ++i)
469  if (MaskSlice[i] != SliceFront + i)
470  return false;
471  ScaledMask.push_back(SliceFront / Scale);
472  }
473  Mask = Mask.drop_front(Scale);
474  } while (!Mask.empty());
475 
476  assert((int)ScaledMask.size() * Scale == NumElts && "Unexpected scaled mask");
477 
478  // All elements of the original mask can be scaled down to map to the elements
479  // of a mask with wider elements.
480  return true;
481 }
482 
485  const TargetTransformInfo *TTI) {
486 
487  // DemandedBits will give us every value's live-out bits. But we want
488  // to ensure no extra casts would need to be inserted, so every DAG
489  // of connected values must have the same minimum bitwidth.
491  SmallVector<Value *, 16> Worklist;
493  SmallPtrSet<Value *, 16> Visited;
495  SmallPtrSet<Instruction *, 4> InstructionSet;
497 
498  // Determine the roots. We work bottom-up, from truncs or icmps.
499  bool SeenExtFromIllegalType = false;
500  for (auto *BB : Blocks)
501  for (auto &I : *BB) {
502  InstructionSet.insert(&I);
503 
504  if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) &&
505  !TTI->isTypeLegal(I.getOperand(0)->getType()))
506  SeenExtFromIllegalType = true;
507 
508  // Only deal with non-vector integers up to 64-bits wide.
509  if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) &&
510  !I.getType()->isVectorTy() &&
511  I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
512  // Don't make work for ourselves. If we know the loaded type is legal,
513  // don't add it to the worklist.
514  if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType()))
515  continue;
516 
517  Worklist.push_back(&I);
518  Roots.insert(&I);
519  }
520  }
521  // Early exit.
522  if (Worklist.empty() || (TTI && !SeenExtFromIllegalType))
523  return MinBWs;
524 
525  // Now proceed breadth-first, unioning values together.
526  while (!Worklist.empty()) {
527  Value *Val = Worklist.pop_back_val();
528  Value *Leader = ECs.getOrInsertLeaderValue(Val);
529 
530  if (Visited.count(Val))
531  continue;
532  Visited.insert(Val);
533 
534  // Non-instructions terminate a chain successfully.
535  if (!isa<Instruction>(Val))
536  continue;
537  Instruction *I = cast<Instruction>(Val);
538 
539  // If we encounter a type that is larger than 64 bits, we can't represent
540  // it so bail out.
541  if (DB.getDemandedBits(I).getBitWidth() > 64)
543 
544  uint64_t V = DB.getDemandedBits(I).getZExtValue();
545  DBits[Leader] |= V;
546  DBits[I] = V;
547 
548  // Casts, loads and instructions outside of our range terminate a chain
549  // successfully.
550  if (isa<SExtInst>(I) || isa<ZExtInst>(I) || isa<LoadInst>(I) ||
551  !InstructionSet.count(I))
552  continue;
553 
554  // Unsafe casts terminate a chain unsuccessfully. We can't do anything
555  // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to
556  // transform anything that relies on them.
557  if (isa<BitCastInst>(I) || isa<PtrToIntInst>(I) || isa<IntToPtrInst>(I) ||
558  !I->getType()->isIntegerTy()) {
559  DBits[Leader] |= ~0ULL;
560  continue;
561  }
562 
563  // We don't modify the types of PHIs. Reductions will already have been
564  // truncated if possible, and inductions' sizes will have been chosen by
565  // indvars.
566  if (isa<PHINode>(I))
567  continue;
568 
569  if (DBits[Leader] == ~0ULL)
570  // All bits demanded, no point continuing.
571  continue;
572 
573  for (Value *O : cast<User>(I)->operands()) {
574  ECs.unionSets(Leader, O);
575  Worklist.push_back(O);
576  }
577  }
578 
579  // Now we've discovered all values, walk them to see if there are
580  // any users we didn't see. If there are, we can't optimize that
581  // chain.
582  for (auto &I : DBits)
583  for (auto *U : I.first->users())
584  if (U->getType()->isIntegerTy() && DBits.count(U) == 0)
585  DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL;
586 
587  for (auto I = ECs.begin(), E = ECs.end(); I != E; ++I) {
588  uint64_t LeaderDemandedBits = 0;
589  for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end()))
590  LeaderDemandedBits |= DBits[M];
591 
592  uint64_t MinBW = (sizeof(LeaderDemandedBits) * 8) -
593  llvm::countLeadingZeros(LeaderDemandedBits);
594  // Round up to a power of 2
595  if (!isPowerOf2_64((uint64_t)MinBW))
596  MinBW = NextPowerOf2(MinBW);
597 
598  // We don't modify the types of PHIs. Reductions will already have been
599  // truncated if possible, and inductions' sizes will have been chosen by
600  // indvars.
601  // If we are required to shrink a PHI, abandon this entire equivalence class.
602  bool Abort = false;
603  for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end()))
604  if (isa<PHINode>(M) && MinBW < M->getType()->getScalarSizeInBits()) {
605  Abort = true;
606  break;
607  }
608  if (Abort)
609  continue;
610 
611  for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end())) {
612  if (!isa<Instruction>(M))
613  continue;
614  Type *Ty = M->getType();
615  if (Roots.count(M))
616  Ty = cast<Instruction>(M)->getOperand(0)->getType();
617  if (MinBW < Ty->getScalarSizeInBits())
618  MinBWs[cast<Instruction>(M)] = MinBW;
619  }
620  }
621 
622  return MinBWs;
623 }
624 
625 /// Add all access groups in @p AccGroups to @p List.
626 template <typename ListT>
627 static void addToAccessGroupList(ListT &List, MDNode *AccGroups) {
628  // Interpret an access group as a list containing itself.
629  if (AccGroups->getNumOperands() == 0) {
630  assert(isValidAsAccessGroup(AccGroups) && "Node must be an access group");
631  List.insert(AccGroups);
632  return;
633  }
634 
635  for (auto &AccGroupListOp : AccGroups->operands()) {
636  auto *Item = cast<MDNode>(AccGroupListOp.get());
637  assert(isValidAsAccessGroup(Item) && "List item must be an access group");
638  List.insert(Item);
639  }
640 }
641 
642 MDNode *llvm::uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2) {
643  if (!AccGroups1)
644  return AccGroups2;
645  if (!AccGroups2)
646  return AccGroups1;
647  if (AccGroups1 == AccGroups2)
648  return AccGroups1;
649 
651  addToAccessGroupList(Union, AccGroups1);
652  addToAccessGroupList(Union, AccGroups2);
653 
654  if (Union.size() == 0)
655  return nullptr;
656  if (Union.size() == 1)
657  return cast<MDNode>(Union.front());
658 
659  LLVMContext &Ctx = AccGroups1->getContext();
660  return MDNode::get(Ctx, Union.getArrayRef());
661 }
662 
664  const Instruction *Inst2) {
665  bool MayAccessMem1 = Inst1->mayReadOrWriteMemory();
666  bool MayAccessMem2 = Inst2->mayReadOrWriteMemory();
667 
668  if (!MayAccessMem1 && !MayAccessMem2)
669  return nullptr;
670  if (!MayAccessMem1)
671  return Inst2->getMetadata(LLVMContext::MD_access_group);
672  if (!MayAccessMem2)
673  return Inst1->getMetadata(LLVMContext::MD_access_group);
674 
675  MDNode *MD1 = Inst1->getMetadata(LLVMContext::MD_access_group);
676  MDNode *MD2 = Inst2->getMetadata(LLVMContext::MD_access_group);
677  if (!MD1 || !MD2)
678  return nullptr;
679  if (MD1 == MD2)
680  return MD1;
681 
682  // Use set for scalable 'contains' check.
683  SmallPtrSet<Metadata *, 4> AccGroupSet2;
684  addToAccessGroupList(AccGroupSet2, MD2);
685 
686  SmallVector<Metadata *, 4> Intersection;
687  if (MD1->getNumOperands() == 0) {
688  assert(isValidAsAccessGroup(MD1) && "Node must be an access group");
689  if (AccGroupSet2.count(MD1))
690  Intersection.push_back(MD1);
691  } else {
692  for (const MDOperand &Node : MD1->operands()) {
693  auto *Item = cast<MDNode>(Node.get());
694  assert(isValidAsAccessGroup(Item) && "List item must be an access group");
695  if (AccGroupSet2.count(Item))
696  Intersection.push_back(Item);
697  }
698  }
699 
700  if (Intersection.size() == 0)
701  return nullptr;
702  if (Intersection.size() == 1)
703  return cast<MDNode>(Intersection.front());
704 
705  LLVMContext &Ctx = Inst1->getContext();
706  return MDNode::get(Ctx, Intersection);
707 }
708 
709 /// \returns \p I after propagating metadata from \p VL.
711  Instruction *I0 = cast<Instruction>(VL[0]);
714 
715  for (auto Kind : {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
716  LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
717  LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load,
718  LLVMContext::MD_access_group}) {
719  MDNode *MD = I0->getMetadata(Kind);
720 
721  for (int J = 1, E = VL.size(); MD && J != E; ++J) {
722  const Instruction *IJ = cast<Instruction>(VL[J]);
723  MDNode *IMD = IJ->getMetadata(Kind);
724  switch (Kind) {
725  case LLVMContext::MD_tbaa:
726  MD = MDNode::getMostGenericTBAA(MD, IMD);
727  break;
728  case LLVMContext::MD_alias_scope:
729  MD = MDNode::getMostGenericAliasScope(MD, IMD);
730  break;
731  case LLVMContext::MD_fpmath:
732  MD = MDNode::getMostGenericFPMath(MD, IMD);
733  break;
734  case LLVMContext::MD_noalias:
735  case LLVMContext::MD_nontemporal:
736  case LLVMContext::MD_invariant_load:
737  MD = MDNode::intersect(MD, IMD);
738  break;
739  case LLVMContext::MD_access_group:
740  MD = intersectAccessGroups(Inst, IJ);
741  break;
742  default:
743  llvm_unreachable("unhandled metadata");
744  }
745  }
746 
747  Inst->setMetadata(Kind, MD);
748  }
749 
750  return Inst;
751 }
752 
753 Constant *
755  const InterleaveGroup<Instruction> &Group) {
756  // All 1's means mask is not needed.
757  if (Group.getNumMembers() == Group.getFactor())
758  return nullptr;
759 
760  // TODO: support reversed access.
761  assert(!Group.isReverse() && "Reversed group not supported.");
762 
764  for (unsigned i = 0; i < VF; i++)
765  for (unsigned j = 0; j < Group.getFactor(); ++j) {
766  unsigned HasMember = Group.getMember(j) ? 1 : 0;
767  Mask.push_back(Builder.getInt1(HasMember));
768  }
769 
770  return ConstantVector::get(Mask);
771 }
772 
774 llvm::createReplicatedMask(unsigned ReplicationFactor, unsigned VF) {
775  SmallVector<int, 16> MaskVec;
776  for (unsigned i = 0; i < VF; i++)
777  for (unsigned j = 0; j < ReplicationFactor; j++)
778  MaskVec.push_back(i);
779 
780  return MaskVec;
781 }
782 
784  unsigned NumVecs) {
786  for (unsigned i = 0; i < VF; i++)
787  for (unsigned j = 0; j < NumVecs; j++)
788  Mask.push_back(j * VF + i);
789 
790  return Mask;
791 }
792 
794 llvm::createStrideMask(unsigned Start, unsigned Stride, unsigned VF) {
796  for (unsigned i = 0; i < VF; i++)
797  Mask.push_back(Start + i * Stride);
798 
799  return Mask;
800 }
801 
803  unsigned NumInts,
804  unsigned NumUndefs) {
806  for (unsigned i = 0; i < NumInts; i++)
807  Mask.push_back(Start + i);
808 
809  for (unsigned i = 0; i < NumUndefs; i++)
810  Mask.push_back(-1);
811 
812  return Mask;
813 }
814 
815 /// A helper function for concatenating vectors. This function concatenates two
816 /// vectors having the same element type. If the second vector has fewer
817 /// elements than the first, it is padded with undefs.
819  Value *V2) {
820  VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType());
821  VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());
822  assert(VecTy1 && VecTy2 &&
823  VecTy1->getScalarType() == VecTy2->getScalarType() &&
824  "Expect two vectors with the same element type");
825 
826  unsigned NumElts1 = cast<FixedVectorType>(VecTy1)->getNumElements();
827  unsigned NumElts2 = cast<FixedVectorType>(VecTy2)->getNumElements();
828  assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements");
829 
830  if (NumElts1 > NumElts2) {
831  // Extend with UNDEFs.
832  V2 = Builder.CreateShuffleVector(
833  V2, createSequentialMask(0, NumElts2, NumElts1 - NumElts2));
834  }
835 
836  return Builder.CreateShuffleVector(
837  V1, V2, createSequentialMask(0, NumElts1 + NumElts2, 0));
838 }
839 
841  ArrayRef<Value *> Vecs) {
842  unsigned NumVecs = Vecs.size();
843  assert(NumVecs > 1 && "Should be at least two vectors");
844 
845  SmallVector<Value *, 8> ResList;
846  ResList.append(Vecs.begin(), Vecs.end());
847  do {
848  SmallVector<Value *, 8> TmpList;
849  for (unsigned i = 0; i < NumVecs - 1; i += 2) {
850  Value *V0 = ResList[i], *V1 = ResList[i + 1];
851  assert((V0->getType() == V1->getType() || i == NumVecs - 2) &&
852  "Only the last vector may have a different type");
853 
854  TmpList.push_back(concatenateTwoVectors(Builder, V0, V1));
855  }
856 
857  // Push the last vector if the total number of vectors is odd.
858  if (NumVecs % 2 != 0)
859  TmpList.push_back(ResList[NumVecs - 1]);
860 
861  ResList = TmpList;
862  NumVecs = ResList.size();
863  } while (NumVecs > 1);
864 
865  return ResList[0];
866 }
867 
869  assert(isa<VectorType>(Mask->getType()) &&
870  isa<IntegerType>(Mask->getType()->getScalarType()) &&
871  cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
872  1 &&
873  "Mask must be a vector of i1");
874 
875  auto *ConstMask = dyn_cast<Constant>(Mask);
876  if (!ConstMask)
877  return false;
878  if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))
879  return true;
880  if (isa<ScalableVectorType>(ConstMask->getType()))
881  return false;
882  for (unsigned
883  I = 0,
884  E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
885  I != E; ++I) {
886  if (auto *MaskElt = ConstMask->getAggregateElement(I))
887  if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))
888  continue;
889  return false;
890  }
891  return true;
892 }
893 
894 
896  assert(isa<VectorType>(Mask->getType()) &&
897  isa<IntegerType>(Mask->getType()->getScalarType()) &&
898  cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
899  1 &&
900  "Mask must be a vector of i1");
901 
902  auto *ConstMask = dyn_cast<Constant>(Mask);
903  if (!ConstMask)
904  return false;
905  if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
906  return true;
907  if (isa<ScalableVectorType>(ConstMask->getType()))
908  return false;
909  for (unsigned
910  I = 0,
911  E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
912  I != E; ++I) {
913  if (auto *MaskElt = ConstMask->getAggregateElement(I))
914  if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
915  continue;
916  return false;
917  }
918  return true;
919 }
920 
921 /// TODO: This is a lot like known bits, but for
922 /// vectors. Is there something we can common this with?
924  assert(isa<FixedVectorType>(Mask->getType()) &&
925  isa<IntegerType>(Mask->getType()->getScalarType()) &&
926  cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
927  1 &&
928  "Mask must be a fixed width vector of i1");
929 
930  const unsigned VWidth =
931  cast<FixedVectorType>(Mask->getType())->getNumElements();
932  APInt DemandedElts = APInt::getAllOnesValue(VWidth);
933  if (auto *CV = dyn_cast<ConstantVector>(Mask))
934  for (unsigned i = 0; i < VWidth; i++)
935  if (CV->getAggregateElement(i)->isNullValue())
936  DemandedElts.clearBit(i);
937  return DemandedElts;
938 }
939 
940 bool InterleavedAccessInfo::isStrided(int Stride) {
941  unsigned Factor = std::abs(Stride);
942  return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
943 }
944 
945 void InterleavedAccessInfo::collectConstStrideAccesses(
947  const ValueToValueMap &Strides) {
948  auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();
949 
950  // Since it's desired that the load/store instructions be maintained in
951  // "program order" for the interleaved access analysis, we have to visit the
952  // blocks in the loop in reverse postorder (i.e., in a topological order).
953  // Such an ordering will ensure that any load/store that may be executed
954  // before a second load/store will precede the second load/store in
955  // AccessStrideInfo.
956  LoopBlocksDFS DFS(TheLoop);
957  DFS.perform(LI);
958  for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
959  for (auto &I : *BB) {
960  auto *LI = dyn_cast<LoadInst>(&I);
961  auto *SI = dyn_cast<StoreInst>(&I);
962  if (!LI && !SI)
963  continue;
964 
966  // We don't check wrapping here because we don't know yet if Ptr will be
967  // part of a full group or a group with gaps. Checking wrapping for all
968  // pointers (even those that end up in groups with no gaps) will be overly
969  // conservative. For full groups, wrapping should be ok since if we would
970  // wrap around the address space we would do a memory access at nullptr
971  // even without the transformation. The wrapping checks are therefore
972  // deferred until after we've formed the interleaved groups.
973  int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides,
974  /*Assume=*/true, /*ShouldCheckWrap=*/false);
975 
976  const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
977  PointerType *PtrTy = cast<PointerType>(Ptr->getType());
978  uint64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
979  AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size,
981  }
982 }
983 
984 // Analyze interleaved accesses and collect them into interleaved load and
985 // store groups.
986 //
987 // When generating code for an interleaved load group, we effectively hoist all
988 // loads in the group to the location of the first load in program order. When
989 // generating code for an interleaved store group, we sink all stores to the
990 // location of the last store. This code motion can change the order of load
991 // and store instructions and may break dependences.
992 //
993 // The code generation strategy mentioned above ensures that we won't violate
994 // any write-after-read (WAR) dependences.
995 //
996 // E.g., for the WAR dependence: a = A[i]; // (1)
997 // A[i] = b; // (2)
998 //
999 // The store group of (2) is always inserted at or below (2), and the load
1000 // group of (1) is always inserted at or above (1). Thus, the instructions will
1001 // never be reordered. All other dependences are checked to ensure the
1002 // correctness of the instruction reordering.
1003 //
1004 // The algorithm visits all memory accesses in the loop in bottom-up program
1005 // order. Program order is established by traversing the blocks in the loop in
1006 // reverse postorder when collecting the accesses.
1007 //
1008 // We visit the memory accesses in bottom-up order because it can simplify the
1009 // construction of store groups in the presence of write-after-write (WAW)
1010 // dependences.
1011 //
1012 // E.g., for the WAW dependence: A[i] = a; // (1)
1013 // A[i] = b; // (2)
1014 // A[i + 1] = c; // (3)
1015 //
1016 // We will first create a store group with (3) and (2). (1) can't be added to
1017 // this group because it and (2) are dependent. However, (1) can be grouped
1018 // with other accesses that may precede it in program order. Note that a
1019 // bottom-up order does not imply that WAW dependences should not be checked.
1021  bool EnablePredicatedInterleavedMemAccesses) {
1022  LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
1023  const ValueToValueMap &Strides = LAI->getSymbolicStrides();
1024 
1025  // Holds all accesses with a constant stride.
1027  collectConstStrideAccesses(AccessStrideInfo, Strides);
1028 
1029  if (AccessStrideInfo.empty())
1030  return;
1031 
1032  // Collect the dependences in the loop.
1033  collectDependences();
1034 
1035  // Holds all interleaved store groups temporarily.
1037  // Holds all interleaved load groups temporarily.
1039 
1040  // Search in bottom-up program order for pairs of accesses (A and B) that can
1041  // form interleaved load or store groups. In the algorithm below, access A
1042  // precedes access B in program order. We initialize a group for B in the
1043  // outer loop of the algorithm, and then in the inner loop, we attempt to
1044  // insert each A into B's group if:
1045  //
1046  // 1. A and B have the same stride,
1047  // 2. A and B have the same memory object size, and
1048  // 3. A belongs in B's group according to its distance from B.
1049  //
1050  // Special care is taken to ensure group formation will not break any
1051  // dependences.
1052  for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
1053  BI != E; ++BI) {
1054  Instruction *B = BI->first;
1055  StrideDescriptor DesB = BI->second;
1056 
1057  // Initialize a group for B if it has an allowable stride. Even if we don't
1058  // create a group for B, we continue with the bottom-up algorithm to ensure
1059  // we don't break any of B's dependences.
1060  InterleaveGroup<Instruction> *Group = nullptr;
1061  if (isStrided(DesB.Stride) &&
1062  (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
1063  Group = getInterleaveGroup(B);
1064  if (!Group) {
1065  LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
1066  << '\n');
1067  Group = createInterleaveGroup(B, DesB.Stride, DesB.Alignment);
1068  }
1069  if (B->mayWriteToMemory())
1070  StoreGroups.insert(Group);
1071  else
1072  LoadGroups.insert(Group);
1073  }
1074 
1075  for (auto AI = std::next(BI); AI != E; ++AI) {
1076  Instruction *A = AI->first;
1077  StrideDescriptor DesA = AI->second;
1078 
1079  // Our code motion strategy implies that we can't have dependences
1080  // between accesses in an interleaved group and other accesses located
1081  // between the first and last member of the group. Note that this also
1082  // means that a group can't have more than one member at a given offset.
1083  // The accesses in a group can have dependences with other accesses, but
1084  // we must ensure we don't extend the boundaries of the group such that
1085  // we encompass those dependent accesses.
1086  //
1087  // For example, assume we have the sequence of accesses shown below in a
1088  // stride-2 loop:
1089  //
1090  // (1, 2) is a group | A[i] = a; // (1)
1091  // | A[i-1] = b; // (2) |
1092  // A[i-3] = c; // (3)
1093  // A[i] = d; // (4) | (2, 4) is not a group
1094  //
1095  // Because accesses (2) and (3) are dependent, we can group (2) with (1)
1096  // but not with (4). If we did, the dependent access (3) would be within
1097  // the boundaries of the (2, 4) group.
1098  if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
1099  // If a dependence exists and A is already in a group, we know that A
1100  // must be a store since A precedes B and WAR dependences are allowed.
1101  // Thus, A would be sunk below B. We release A's group to prevent this
1102  // illegal code motion. A will then be free to form another group with
1103  // instructions that precede it.
1104  if (isInterleaved(A)) {
1105  InterleaveGroup<Instruction> *StoreGroup = getInterleaveGroup(A);
1106 
1107  LLVM_DEBUG(dbgs() << "LV: Invalidated store group due to "
1108  "dependence between " << *A << " and "<< *B << '\n');
1109 
1110  StoreGroups.remove(StoreGroup);
1111  releaseGroup(StoreGroup);
1112  }
1113 
1114  // If a dependence exists and A is not already in a group (or it was
1115  // and we just released it), B might be hoisted above A (if B is a
1116  // load) or another store might be sunk below A (if B is a store). In
1117  // either case, we can't add additional instructions to B's group. B
1118  // will only form a group with instructions that it precedes.
1119  break;
1120  }
1121 
1122  // At this point, we've checked for illegal code motion. If either A or B
1123  // isn't strided, there's nothing left to do.
1124  if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
1125  continue;
1126 
1127  // Ignore A if it's already in a group or isn't the same kind of memory
1128  // operation as B.
1129  // Note that mayReadFromMemory() isn't mutually exclusive to
1130  // mayWriteToMemory in the case of atomic loads. We shouldn't see those
1131  // here, canVectorizeMemory() should have returned false - except for the
1132  // case we asked for optimization remarks.
1133  if (isInterleaved(A) ||
1134  (A->mayReadFromMemory() != B->mayReadFromMemory()) ||
1135  (A->mayWriteToMemory() != B->mayWriteToMemory()))
1136  continue;
1137 
1138  // Check rules 1 and 2. Ignore A if its stride or size is different from
1139  // that of B.
1140  if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
1141  continue;
1142 
1143  // Ignore A if the memory object of A and B don't belong to the same
1144  // address space
1146  continue;
1147 
1148  // Calculate the distance from A to B.
1149  const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(
1150  PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
1151  if (!DistToB)
1152  continue;
1153  int64_t DistanceToB = DistToB->getAPInt().getSExtValue();
1154 
1155  // Check rule 3. Ignore A if its distance to B is not a multiple of the
1156  // size.
1157  if (DistanceToB % static_cast<int64_t>(DesB.Size))
1158  continue;
1159 
1160  // All members of a predicated interleave-group must have the same predicate,
1161  // and currently must reside in the same BB.
1162  BasicBlock *BlockA = A->getParent();
1163  BasicBlock *BlockB = B->getParent();
1164  if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
1165  (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
1166  continue;
1167 
1168  // The index of A is the index of B plus A's distance to B in multiples
1169  // of the size.
1170  int IndexA =
1171  Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
1172 
1173  // Try to insert A into B's group.
1174  if (Group->insertMember(A, IndexA, DesA.Alignment)) {
1175  LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
1176  << " into the interleave group with" << *B
1177  << '\n');
1178  InterleaveGroupMap[A] = Group;
1179 
1180  // Set the first load in program order as the insert position.
1181  if (A->mayReadFromMemory())
1182  Group->setInsertPos(A);
1183  }
1184  } // Iteration over A accesses.
1185  } // Iteration over B accesses.
1186 
1187  // Remove interleaved store groups with gaps.
1188  for (auto *Group : StoreGroups)
1189  if (Group->getNumMembers() != Group->getFactor()) {
1190  LLVM_DEBUG(
1191  dbgs() << "LV: Invalidate candidate interleaved store group due "
1192  "to gaps.\n");
1193  releaseGroup(Group);
1194  }
1195  // Remove interleaved groups with gaps (currently only loads) whose memory
1196  // accesses may wrap around. We have to revisit the getPtrStride analysis,
1197  // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
1198  // not check wrapping (see documentation there).
1199  // FORNOW we use Assume=false;
1200  // TODO: Change to Assume=true but making sure we don't exceed the threshold
1201  // of runtime SCEV assumptions checks (thereby potentially failing to
1202  // vectorize altogether).
1203  // Additional optional optimizations:
1204  // TODO: If we are peeling the loop and we know that the first pointer doesn't
1205  // wrap then we can deduce that all pointers in the group don't wrap.
1206  // This means that we can forcefully peel the loop in order to only have to
1207  // check the first pointer for no-wrap. When we'll change to use Assume=true
1208  // we'll only need at most one runtime check per interleaved group.
1209  for (auto *Group : LoadGroups) {
1210  // Case 1: A full group. Can Skip the checks; For full groups, if the wide
1211  // load would wrap around the address space we would do a memory access at
1212  // nullptr even without the transformation.
1213  if (Group->getNumMembers() == Group->getFactor())
1214  continue;
1215 
1216  // Case 2: If first and last members of the group don't wrap this implies
1217  // that all the pointers in the group don't wrap.
1218  // So we check only group member 0 (which is always guaranteed to exist),
1219  // and group member Factor - 1; If the latter doesn't exist we rely on
1220  // peeling (if it is a non-reversed accsess -- see Case 3).
1221  Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0));
1222  if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false,
1223  /*ShouldCheckWrap=*/true)) {
1224  LLVM_DEBUG(
1225  dbgs() << "LV: Invalidate candidate interleaved group due to "
1226  "first group member potentially pointer-wrapping.\n");
1227  releaseGroup(Group);
1228  continue;
1229  }
1230  Instruction *LastMember = Group->getMember(Group->getFactor() - 1);
1231  if (LastMember) {
1232  Value *LastMemberPtr = getLoadStorePointerOperand(LastMember);
1233  if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false,
1234  /*ShouldCheckWrap=*/true)) {
1235  LLVM_DEBUG(
1236  dbgs() << "LV: Invalidate candidate interleaved group due to "
1237  "last group member potentially pointer-wrapping.\n");
1238  releaseGroup(Group);
1239  }
1240  } else {
1241  // Case 3: A non-reversed interleaved load group with gaps: We need
1242  // to execute at least one scalar epilogue iteration. This will ensure
1243  // we don't speculatively access memory out-of-bounds. We only need
1244  // to look for a member at index factor - 1, since every group must have
1245  // a member at index zero.
1246  if (Group->isReverse()) {
1247  LLVM_DEBUG(
1248  dbgs() << "LV: Invalidate candidate interleaved group due to "
1249  "a reverse access with gaps.\n");
1250  releaseGroup(Group);
1251  continue;
1252  }
1253  LLVM_DEBUG(
1254  dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
1255  RequiresScalarEpilogue = true;
1256  }
1257  }
1258 }
1259 
1261  // If no group had triggered the requirement to create an epilogue loop,
1262  // there is nothing to do.
1263  if (!requiresScalarEpilogue())
1264  return;
1265 
1266  bool ReleasedGroup = false;
1267  // Release groups requiring scalar epilogues. Note that this also removes them
1268  // from InterleaveGroups.
1269  for (auto *Group : make_early_inc_range(InterleaveGroups)) {
1270  if (!Group->requiresScalarEpilogue())
1271  continue;
1272  LLVM_DEBUG(
1273  dbgs()
1274  << "LV: Invalidate candidate interleaved group due to gaps that "
1275  "require a scalar epilogue (not allowed under optsize) and cannot "
1276  "be masked (not enabled). \n");
1277  releaseGroup(Group);
1278  ReleasedGroup = true;
1279  }
1280  assert(ReleasedGroup && "At least one group must be invalidated, as a "
1281  "scalar epilogue was required");
1282  (void)ReleasedGroup;
1283  RequiresScalarEpilogue = false;
1284 }
1285 
1286 template <typename InstT>
1287 void InterleaveGroup<InstT>::addMetadata(InstT *NewInst) const {
1288  llvm_unreachable("addMetadata can only be used for Instruction");
1289 }
1290 
1291 namespace llvm {
1292 template <>
1295  std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
1296  [](std::pair<int, Instruction *> p) { return p.second; });
1297  propagateMetadata(NewInst, VL);
1298 }
1299 }
1300 
1301 std::string VFABI::mangleTLIVectorName(StringRef VectorName,
1302  StringRef ScalarName, unsigned numArgs,
1303  ElementCount VF) {
1304  SmallString<256> Buffer;
1305  llvm::raw_svector_ostream Out(Buffer);
1306  Out << "_ZGV" << VFABI::_LLVM_ << "N";
1307  if (VF.isScalable())
1308  Out << 'x';
1309  else
1310  Out << VF.getFixedValue();
1311  for (unsigned I = 0; I < numArgs; ++I)
1312  Out << "v";
1313  Out << "_" << ScalarName << "(" << VectorName << ")";
1314  return std::string(Out.str());
1315 }
1316 
1318  const CallInst &CI, SmallVectorImpl<std::string> &VariantMappings) {
1319  const StringRef S =
1321  .getValueAsString();
1322  if (S.empty())
1323  return;
1324 
1325  SmallVector<StringRef, 8> ListAttr;
1326  S.split(ListAttr, ",");
1327 
1328  for (auto &S : SetVector<StringRef>(ListAttr.begin(), ListAttr.end())) {
1329 #ifndef NDEBUG
1330  LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << S << "'\n");
1332  assert(Info.hasValue() && "Invalid name for a VFABI variant.");
1333  assert(CI.getModule()->getFunction(Info.getValue().VectorName) &&
1334  "Vector function is missing.");
1335 #endif
1336  VariantMappings.push_back(std::string(S));
1337  }
1338 }
1339 
1341  for (unsigned Pos = 0, NumParams = Parameters.size(); Pos < NumParams;
1342  ++Pos) {
1343  assert(Parameters[Pos].ParamPos == Pos && "Broken parameter list.");
1344 
1345  switch (Parameters[Pos].ParamKind) {
1346  default: // Nothing to check.
1347  break;
1352  // Compile time linear steps must be non-zero.
1353  if (Parameters[Pos].LinearStepOrPos == 0)
1354  return false;
1355  break;
1360  // The runtime linear step must be referring to some other
1361  // parameters in the signature.
1362  if (Parameters[Pos].LinearStepOrPos >= int(NumParams))
1363  return false;
1364  // The linear step parameter must be marked as uniform.
1365  if (Parameters[Parameters[Pos].LinearStepOrPos].ParamKind !=
1367  return false;
1368  // The linear step parameter can't point at itself.
1369  if (Parameters[Pos].LinearStepOrPos == int(Pos))
1370  return false;
1371  break;
1373  // The global predicate must be the unique. Can be placed anywhere in the
1374  // signature.
1375  for (unsigned NextPos = Pos + 1; NextPos < NumParams; ++NextPos)
1376  if (Parameters[NextPos].ParamKind == VFParamKind::GlobalPredicate)
1377  return false;
1378  break;
1379  }
1380  }
1381  return true;
1382 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::NextPowerOf2
uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:683
llvm::EngineKind::Kind
Kind
Definition: ExecutionEngine.h:524
llvm::EquivalenceClasses::getOrInsertLeaderValue
const ElemTy & getOrInsertLeaderValue(const ElemTy &V)
getOrInsertLeaderValue - Return the leader for the specified value that is in the set.
Definition: EquivalenceClasses.h:179
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:63
llvm
Definition: AllocatorList.h:23
llvm::maskIsAllOneOrUndef
bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
Definition: VectorUtils.cpp:895
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
llvm::EquivalenceClasses::end
iterator end() const
Definition: EquivalenceClasses.h:147
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31
llvm::InterleaveGroup::insertMember
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
Definition: VectorUtils.h:617
ceil
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g ceil
Definition: README-FPStack.txt:54
llvm::ElementCount
Definition: TypeSize.h:386
llvm::getVectorIntrinsicIDForCall
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Definition: VectorUtils.cpp:120
llvm::PointerType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:653
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::getGEPInductionOperand
unsigned getGEPInductionOperand(const GetElementPtrInst *Gep)
Find the operand of the GEP that should be checked for consecutive stores.
Definition: VectorUtils.cpp:137
llvm::possiblyDemandedEltsInMask
APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
Definition: VectorUtils.cpp:923
llvm::scConstant
@ scConstant
Definition: ScalarEvolutionExpressions.h:41
minimum
Should compile r2 movcc movcs str strb mov lr r1 movcs movcc mov lr r1 str mov mov cmp r1 movlo r2 str bx lr r0 mov mov cmp r0 movhs r2 mov r1 bx lr Some of the NEON intrinsics may be appropriate for more general either as target independent intrinsics or perhaps elsewhere in the ARM backend Some of them may also be lowered to target independent and perhaps some new SDNodes could be added For minimum
Definition: README.txt:489
llvm::Instruction::getAllMetadataOtherThanDebugLoc
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode * >> &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
Definition: Instruction.h:300
GetElementPtrTypeIterator.h
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:317
llvm::PatternMatch::m_InsertElt
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Definition: PatternMatch.h:1445
llvm::VFParamKind::GlobalPredicate
@ GlobalPredicate
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::PatternMatch::m_Add
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:959
llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1644
llvm::EquivalenceClasses
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
Definition: EquivalenceClasses.h:58
llvm::CallBase::getAttribute
Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const
Get the attribute of a given kind at a position.
Definition: InstrTypes.h:1593
llvm::getSplatValue
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
Definition: VectorUtils.cpp:350
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::VFABI::getVectorVariantNames
void getVectorVariantNames(const CallInst &CI, SmallVectorImpl< std::string > &VariantMappings)
Populates a set of strings representing the Vector Function ABI variants associated to the CallInst C...
Definition: VectorUtils.cpp:1317
concatenateTwoVectors
static Value * concatenateTwoVectors(IRBuilderBase &Builder, Value *V1, Value *V2)
A helper function for concatenating vectors.
Definition: VectorUtils.cpp:818
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:443
llvm::maskIsAllZeroOrUndef
bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
Definition: VectorUtils.cpp:868
llvm::MaxAnalysisRecursionDepth
constexpr unsigned MaxAnalysisRecursionDepth
Definition: ValueTracking.h:48
ValueTracking.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::computeMinimumValueSizes
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
Definition: VectorUtils.cpp:484
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:34
ScalarEvolution.h
llvm::narrowShuffleMaskElts
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
Definition: VectorUtils.cpp:411
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
MaxInterleaveGroupFactor
static cl::opt< unsigned > MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
Maximum factor for an interleaved memory access.
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1582
llvm::getUniqueCastUse
Value * getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty)
If a value has only one user that is a CastInst, return it.
Definition: VectorUtils.cpp:178
llvm::Optional
Definition: APInt.h:34
llvm::PatternMatch::m_BinOp
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
llvm::SmallPtrSet< Value *, 4 >
llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:424
llvm::Intrinsic::not_intrinsic
@ not_intrinsic
Definition: Intrinsics.h:45
llvm::VFParamKind::OMP_Uniform
@ OMP_Uniform
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::propagateMetadata
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Definition: VectorUtils.cpp:710
llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition: GetElementPtrTypeIterator.h:139
llvm::Module::getFunction
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:174
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::uniteAccessGroups
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
Definition: VectorUtils.cpp:642
llvm::InterleaveGroup
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:292
llvm::LinearPolySize::isScalable
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:299
p
the resulting code requires compare and branches when and if * p
Definition: README.txt:396
llvm::APIntOps::umin
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2184
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1198
llvm::MDNode::operands
op_range operands() const
Definition: Metadata.h:1100
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1330
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1108
floor
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g floor
Definition: README-FPStack.txt:54
llvm::VFShape::hasValidParameterList
bool hasValidParameterList() const
Sanity check on the Parameters in the VFShape.
Definition: VectorUtils.cpp:1340
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
CommandLine.h
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::remove
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:157
llvm::Intrinsic::getType
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:1251
llvm::TargetTransformInfo::isTypeLegal
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
Definition: TargetTransformInfo.cpp:449
llvm::isSplatValue
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Definition: VectorUtils.cpp:365
llvm::VFABI::mangleTLIVectorName
std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName, unsigned numArgs, ElementCount VF)
This routine mangles the given VectorName according to the LangRef specification for vector-function-...
Definition: VectorUtils.cpp:1301
llvm::InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue
void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
Definition: VectorUtils.cpp:1260
llvm::createInterleaveMask
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
Definition: VectorUtils.cpp:783
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition: PatternMatch.h:1423
llvm::ARM_MC::isPredicated
bool isPredicated(const MCInst &MI, const MCInstrInfo *MCII)
Definition: ARMMCTargetDesc.cpp:181
llvm::Instruction::mayReadOrWriteMemory
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
Definition: Instruction.h:586
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
getScalarSizeInBits
static unsigned getScalarSizeInBits(Type *Ty)
Definition: SystemZTargetTransformInfo.cpp:365
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:648
llvm::isTriviallyVectorizable
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:44
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::getStrideFromPointer
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
Definition: VectorUtils.cpp:194
round
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
llvm::VFParamKind::OMP_LinearVal
@ OMP_LinearVal
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
SI
@ SI
Definition: SIInstrInfo.cpp:7342
llvm::getPtrStride
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
Definition: LoopAccessAnalysis.cpp:1017
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:235
llvm::InsertElementInst
This instruction inserts a single (scalar) element into a VectorType value.
Definition: Instructions.h:1914
llvm::EquivalenceClasses::member_end
member_iterator member_end() const
Definition: EquivalenceClasses.h:157
llvm::VFParamKind::OMP_LinearUValPos
@ OMP_LinearUValPos
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::SCEVMulExpr
This node represents multiplication of some number of SCEVs.
Definition: ScalarEvolutionExpressions.h:288
llvm::ShuffleVectorInst::getMaskValue
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
Definition: Instructions.h:2032
llvm::Instruction
Definition: Instruction.h:45
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1632
llvm::MapVector::rend
reverse_iterator rend()
Definition: MapVector.h:76
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1770
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:26
PatternMatch.h
llvm::InterleaveGroup::isReverse
bool isReverse() const
Definition: VectorUtils.h:603
llvm::Metadata
Root of the metadata hierarchy.
Definition: Metadata.h:62
llvm::InterleaveGroup::getMember
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:662
llvm::Attribute::getValueAsString
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:304
LoopIterator.h
llvm::VFParamKind::OMP_LinearValPos
@ OMP_LinearValPos
llvm::SCEVUnknown::getValue
Value * getValue() const
Definition: ScalarEvolutionExpressions.h:543
llvm::ShuffleVectorInst::getType
VectorType * getType() const
Overload to return most specific vector type.
Definition: Instructions.h:2023
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::Instruction::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:277
llvm::SmallString< 256 >
llvm::maxnum
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1297
LoopInfo.h
llvm::InterleaveGroup::setInsertPos
void setInsertPos(InstTy *Inst)
Definition: VectorUtils.h:679
llvm::VFABI::_LLVM_
static constexpr const char * _LLVM_
LLVM Internal VFABI ISA token for vector functions.
Definition: VectorUtils.h:140
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:3885
llvm::LoopBlocksDFS
Store the result of a depth first search within basic blocks contained by a single loop.
Definition: LoopIterator.h:97
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:391
llvm::InterleaveGroup::getIndex
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
Definition: VectorUtils.h:669
VectorUtils.h
llvm::cl::opt
Definition: CommandLine.h:1419
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:78
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:128
llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:491
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::MDNode::intersect
static MDNode * intersect(MDNode *A, MDNode *B)
Definition: Metadata.cpp:922
llvm::VFParamKind::OMP_LinearRef
@ OMP_LinearRef
llvm::EquivalenceClasses::begin
iterator begin() const
Definition: EquivalenceClasses.h:146
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::InterleaveGroup::getFactor
uint32_t getFactor() const
Definition: VectorUtils.h:604
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap
Definition: DenseMap.h:714
llvm::DemandedBits
Definition: DemandedBits.h:40
I
#define I(x, y, z)
Definition: MD5.cpp:59
addToAccessGroupList
static void addToAccessGroupList(ListT &List, MDNode *AccGroups)
Add all access groups in AccGroups to List.
Definition: VectorUtils.cpp:627
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:905
llvm::hasVectorInstrinsicScalarOpd
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:99
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:440
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:581
llvm::concatenateVectors
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Definition: VectorUtils.cpp:840
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:634
llvm::GetElementPtrInst::getResultElementType
Type * getResultElementType() const
Definition: Instructions.h:1007
llvm::SCEVConstant
This class represents a constant integer value.
Definition: ScalarEvolutionExpressions.h:47
llvm::MDNode::getMostGenericTBAA
static MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)
Definition: TypeBasedAliasAnalysis.cpp:477
maximum
Should compile r2 movcc movcs str strb mov lr r1 movcs movcc mov lr r1 str mov mov cmp r1 movlo r2 str bx lr r0 mov mov cmp r0 movhs r2 mov r1 bx lr Some of the NEON intrinsics may be appropriate for more general either as target independent intrinsics or perhaps elsewhere in the ARM backend Some of them may also be lowered to target independent and perhaps some new SDNodes could be added For maximum
Definition: README.txt:489
IRBuilder.h
llvm::InterleavedAccessInfo::analyzeInterleaving
void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
Definition: VectorUtils.cpp:1020
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::generic_gep_type_iterator::getIndexedType
Type * getIndexedType() const
Definition: GetElementPtrTypeIterator.h:72
llvm::PatternMatch::m_ZeroMask
Definition: PatternMatch.h:1484
llvm::is_splat
bool is_splat(R &&Range)
Wrapper function around std::equal to detect if all elements in a container are same.
Definition: STLExtras.h:1660
llvm::getLoadStoreAddressSpace
unsigned getLoadStoreAddressSpace(Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
Definition: Instructions.h:5273
llvm::MDNode
Metadata node.
Definition: Metadata.h:897
llvm::createBitMaskForGaps
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
Definition: VectorUtils.cpp:754
llvm::PatternMatch::m_Constant
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:98
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:649
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:71
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::APIntOps::smin
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2174
llvm::intersectAccessGroups
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
Definition: VectorUtils.cpp:663
llvm::ArrayRef< int >
llvm::APInt::getAllOnesValue
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:568
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::raw_svector_ostream::str
StringRef str() const
Return a StringRef for the vector contents.
Definition: raw_ostream.h:672
llvm::LinearPolySize::getFixedValue
ScalarTy getFixedValue() const
Definition: TypeSize.h:313
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Constant::getAggregateElement
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:421
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::PatternMatch::m_Shuffle
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
Definition: PatternMatch.h:1512
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:940
llvm::MDNode::getMostGenericAliasScope
static MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
Definition: Metadata.cpp:935
llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:95
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
trunc
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g trunc
Definition: README-FPStack.txt:63
llvm::ConstantVector::get
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1354
llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:432
llvm::createReplicatedMask
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
Definition: VectorUtils.cpp:774
llvm::ArrayRef::front
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:166
llvm::APInt::clearBit
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1526
llvm::MapVector::rbegin
reverse_iterator rbegin()
Definition: MapVector.h:74
llvm::ScalarEvolution::isLoopInvariant
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Definition: ScalarEvolution.cpp:12477
DemandedBits.h
llvm::MDNode::getContext
LLVMContext & getContext() const
Definition: Metadata.h:957
llvm::MapVector::empty
bool empty() const
Definition: MapVector.h:79
j
return j(j<< 16)
llvm::VFParamKind::OMP_LinearPos
@ OMP_LinearPos
llvm::APIntOps::umax
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:2189
llvm::widenShuffleMaskElts
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Definition: VectorUtils.cpp:432
llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:123
llvm::minnum
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1286
llvm::isValidAsAccessGroup
bool isValidAsAccessGroup(MDNode *AccGroup)
Return whether an MDNode might represent an access group.
Definition: LoopInfo.cpp:1042
llvm::SmallVectorImpl::assign
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:669
llvm::ArrayRef::begin
iterator begin() const
Definition: ArrayRef.h:151
llvm::replaceSymbolicStrideSCEV
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr=nullptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
Definition: LoopAccessAnalysis.cpp:143
llvm::TypeSize
Definition: TypeSize.h:417
llvm::getIntrinsicForCallSite
Intrinsic::ID getIntrinsicForCallSite(const CallBase &CB, const TargetLibraryInfo *TLI)
Map a call instruction to an intrinsic ID.
Definition: ValueTracking.cpp:3243
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:352
llvm::VFParamKind::OMP_LinearRefPos
@ OMP_LinearRefPos
llvm::SCEVUnknown
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
Definition: ScalarEvolutionExpressions.h:522
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:207
EquivalenceClasses.h
powi
This is blocked on not handling X *X *X powi(X, 3)(see note above). The issue is that we end up getting t
llvm::log2
static double log2(double V)
Definition: AMDGPULibCalls.cpp:841
llvm::DemandedBits::getDemandedBits
APInt getDemandedBits(Instruction *I)
Return the bits demanded from instruction I.
Definition: DemandedBits.cpp:443
transform
instcombine should handle this transform
Definition: README.txt:262
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::PatternMatch::m_ZeroInt
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:478
llvm::countLeadingZeros
unsigned countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: MathExtras.h:225
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:403
llvm::findScalarElement
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
Definition: VectorUtils.cpp:270
llvm::SCEVConstant::getAPInt
const APInt & getAPInt() const
Definition: ScalarEvolutionExpressions.h:57
llvm::VFParamKind::OMP_LinearUVal
@ OMP_LinearUVal
ScalarEvolutionExpressions.h
llvm::AttributeList::FunctionIndex
@ FunctionIndex
Definition: Attributes.h:389
llvm::ShuffleVectorInst
This instruction constructs a fixed permutation of two input vectors.
Definition: Instructions.h:1986
llvm::SCEVIntegralCastExpr
This is the base class for unary integral cast operator classes.
Definition: ScalarEvolutionExpressions.h:121
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
llvm::getSplatIndex
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
Definition: VectorUtils.cpp:328
llvm::MDNode::getMostGenericFPMath
static MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
Definition: Metadata.cpp:967
List
const NodeList & List
Definition: RDFGraph.cpp:201
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
llvm::VFABI::MappingsAttrName
static constexpr const char * MappingsAttrName
Definition: VectorUtils.h:201
llvm::stripGetElementPtr
Value * stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If the argument is a GEP, then returns the operand identified by getGEPInductionOperand.
Definition: VectorUtils.cpp:161
TargetTransformInfo.h
llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:647
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::SmallVectorImpl< int >
llvm::VFABI::tryDemangleForVFABI
Optional< VFInfo > tryDemangleForVFABI(StringRef MangledName, const Module &M)
Function to construct a VFInfo out of a mangled names in the following format:
Definition: VFABIDemangling.cpp:319
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:397
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::getLoadStorePointerOperand
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Definition: Instructions.h:5237
llvm::EquivalenceClasses::unionSets
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Definition: EquivalenceClasses.h:217
llvm::createStrideMask
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
Definition: VectorUtils.cpp:794
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1450
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:411
llvm::createSequentialMask
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Definition: VectorUtils.cpp:802
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:624
llvm::isPowerOf2_64
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:496
Value.h
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1272
llvm::EquivalenceClasses::member_begin
member_iterator member_begin(iterator I) const
Definition: EquivalenceClasses.h:153
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:434
llvm::VFParamKind::OMP_Linear
@ OMP_Linear
llvm::ArrayRef::end
iterator end() const
Definition: ArrayRef.h:152
llvm::APIntOps::smax
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition: APInt.h:2179
llvm::getLoadStoreAlignment
Align getLoadStoreAlignment(Value *I)
A helper function that returns the alignment of load or store instruction.
Definition: Instructions.h:5263
llvm::MDOperand
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:744
llvm::InterleaveGroup::getNumMembers
uint32_t getNumMembers() const
Definition: VectorUtils.h:610
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::InterleaveGroup::addMetadata
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Definition: VectorUtils.cpp:1287
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38