LLVM  14.0.0git
VectorUtils.cpp
Go to the documentation of this file.
1 //===----------- VectorUtils.cpp - Vectorizer utility functions -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines vectorizer utilities.
10 //
11 //===----------------------------------------------------------------------===//
12 
16 #include "llvm/Analysis/LoopInfo.h"
22 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/PatternMatch.h"
26 #include "llvm/IR/Value.h"
28 
29 #define DEBUG_TYPE "vectorutils"
30 
31 using namespace llvm;
32 using namespace llvm::PatternMatch;
33 
34 /// Maximum factor for an interleaved memory access.
36  "max-interleave-group-factor", cl::Hidden,
37  cl::desc("Maximum factor for an interleaved access group (default = 8)"),
38  cl::init(8));
39 
40 /// Return true if all of the intrinsic's arguments and return type are scalars
41 /// for the scalar form of the intrinsic, and vectors for the vector form of the
42 /// intrinsic (except operands that are marked as always being scalar by
43 /// hasVectorInstrinsicScalarOpd).
45  switch (ID) {
46  case Intrinsic::abs: // Begin integer bit-manipulation.
47  case Intrinsic::bswap:
48  case Intrinsic::bitreverse:
49  case Intrinsic::ctpop:
50  case Intrinsic::ctlz:
51  case Intrinsic::cttz:
52  case Intrinsic::fshl:
53  case Intrinsic::fshr:
54  case Intrinsic::smax:
55  case Intrinsic::smin:
56  case Intrinsic::umax:
57  case Intrinsic::umin:
58  case Intrinsic::sadd_sat:
59  case Intrinsic::ssub_sat:
60  case Intrinsic::uadd_sat:
61  case Intrinsic::usub_sat:
62  case Intrinsic::smul_fix:
63  case Intrinsic::smul_fix_sat:
64  case Intrinsic::umul_fix:
65  case Intrinsic::umul_fix_sat:
66  case Intrinsic::sqrt: // Begin floating-point.
67  case Intrinsic::sin:
68  case Intrinsic::cos:
69  case Intrinsic::exp:
70  case Intrinsic::exp2:
71  case Intrinsic::log:
72  case Intrinsic::log10:
73  case Intrinsic::log2:
74  case Intrinsic::fabs:
75  case Intrinsic::minnum:
76  case Intrinsic::maxnum:
77  case Intrinsic::minimum:
78  case Intrinsic::maximum:
79  case Intrinsic::copysign:
80  case Intrinsic::floor:
81  case Intrinsic::ceil:
82  case Intrinsic::trunc:
83  case Intrinsic::rint:
84  case Intrinsic::nearbyint:
85  case Intrinsic::round:
86  case Intrinsic::roundeven:
87  case Intrinsic::pow:
88  case Intrinsic::fma:
89  case Intrinsic::fmuladd:
90  case Intrinsic::powi:
91  case Intrinsic::canonicalize:
92  return true;
93  default:
94  return false;
95  }
96 }
97 
98 /// Identifies if the vector form of the intrinsic has a scalar operand.
100  unsigned ScalarOpdIdx) {
101  switch (ID) {
102  case Intrinsic::abs:
103  case Intrinsic::ctlz:
104  case Intrinsic::cttz:
105  case Intrinsic::powi:
106  return (ScalarOpdIdx == 1);
107  case Intrinsic::smul_fix:
108  case Intrinsic::smul_fix_sat:
109  case Intrinsic::umul_fix:
110  case Intrinsic::umul_fix_sat:
111  return (ScalarOpdIdx == 2);
112  default:
113  return false;
114  }
115 }
116 
118  unsigned ScalarOpdIdx) {
119  switch (ID) {
120  case Intrinsic::powi:
121  return (ScalarOpdIdx == 1);
122  default:
123  return false;
124  }
125 }
126 
127 /// Returns intrinsic ID for call.
128 /// For the input call instruction it finds mapping intrinsic and returns
129 /// its ID, in case it does not found it return not_intrinsic.
131  const TargetLibraryInfo *TLI) {
135 
136  if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
137  ID == Intrinsic::lifetime_end || ID == Intrinsic::assume ||
138  ID == Intrinsic::experimental_noalias_scope_decl ||
139  ID == Intrinsic::sideeffect || ID == Intrinsic::pseudoprobe)
140  return ID;
142 }
143 
144 /// Find the operand of the GEP that should be checked for consecutive
145 /// stores. This ignores trailing indices that have no effect on the final
146 /// pointer.
148  const DataLayout &DL = Gep->getModule()->getDataLayout();
149  unsigned LastOperand = Gep->getNumOperands() - 1;
150  TypeSize GEPAllocSize = DL.getTypeAllocSize(Gep->getResultElementType());
151 
152  // Walk backwards and try to peel off zeros.
153  while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
154  // Find the type we're currently indexing into.
155  gep_type_iterator GEPTI = gep_type_begin(Gep);
156  std::advance(GEPTI, LastOperand - 2);
157 
158  // If it's a type with the same allocation size as the result of the GEP we
159  // can peel off the zero index.
160  if (DL.getTypeAllocSize(GEPTI.getIndexedType()) != GEPAllocSize)
161  break;
162  --LastOperand;
163  }
164 
165  return LastOperand;
166 }
167 
168 /// If the argument is a GEP, then returns the operand identified by
169 /// getGEPInductionOperand. However, if there is some other non-loop-invariant
170 /// operand, it returns that instead.
172  GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
173  if (!GEP)
174  return Ptr;
175 
176  unsigned InductionOperand = getGEPInductionOperand(GEP);
177 
178  // Check that all of the gep indices are uniform except for our induction
179  // operand.
180  for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i)
181  if (i != InductionOperand &&
182  !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp))
183  return Ptr;
184  return GEP->getOperand(InductionOperand);
185 }
186 
187 /// If a value has only one user that is a CastInst, return it.
189  Value *UniqueCast = nullptr;
190  for (User *U : Ptr->users()) {
191  CastInst *CI = dyn_cast<CastInst>(U);
192  if (CI && CI->getType() == Ty) {
193  if (!UniqueCast)
194  UniqueCast = CI;
195  else
196  return nullptr;
197  }
198  }
199  return UniqueCast;
200 }
201 
202 /// Get the stride of a pointer access in a loop. Looks for symbolic
203 /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
205  auto *PtrTy = dyn_cast<PointerType>(Ptr->getType());
206  if (!PtrTy || PtrTy->isAggregateType())
207  return nullptr;
208 
209  // Try to remove a gep instruction to make the pointer (actually index at this
210  // point) easier analyzable. If OrigPtr is equal to Ptr we are analyzing the
211  // pointer, otherwise, we are analyzing the index.
212  Value *OrigPtr = Ptr;
213 
214  // The size of the pointer access.
215  int64_t PtrAccessSize = 1;
216 
217  Ptr = stripGetElementPtr(Ptr, SE, Lp);
218  const SCEV *V = SE->getSCEV(Ptr);
219 
220  if (Ptr != OrigPtr)
221  // Strip off casts.
222  while (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(V))
223  V = C->getOperand();
224 
225  const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V);
226  if (!S)
227  return nullptr;
228 
229  V = S->getStepRecurrence(*SE);
230  if (!V)
231  return nullptr;
232 
233  // Strip off the size of access multiplication if we are still analyzing the
234  // pointer.
235  if (OrigPtr == Ptr) {
236  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
237  if (M->getOperand(0)->getSCEVType() != scConstant)
238  return nullptr;
239 
240  const APInt &APStepVal = cast<SCEVConstant>(M->getOperand(0))->getAPInt();
241 
242  // Huge step value - give up.
243  if (APStepVal.getBitWidth() > 64)
244  return nullptr;
245 
246  int64_t StepVal = APStepVal.getSExtValue();
247  if (PtrAccessSize != StepVal)
248  return nullptr;
249  V = M->getOperand(1);
250  }
251  }
252 
253  // Strip off casts.
254  Type *StripedOffRecurrenceCast = nullptr;
255  if (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(V)) {
256  StripedOffRecurrenceCast = C->getType();
257  V = C->getOperand();
258  }
259 
260  // Look for the loop invariant symbolic value.
261  const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V);
262  if (!U)
263  return nullptr;
264 
265  Value *Stride = U->getValue();
266  if (!Lp->isLoopInvariant(Stride))
267  return nullptr;
268 
269  // If we have stripped off the recurrence cast we have to make sure that we
270  // return the value that is used in this loop so that we can replace it later.
271  if (StripedOffRecurrenceCast)
272  Stride = getUniqueCastUse(Stride, Lp, StripedOffRecurrenceCast);
273 
274  return Stride;
275 }
276 
277 /// Given a vector and an element number, see if the scalar value is
278 /// already around as a register, for example if it were inserted then extracted
279 /// from the vector.
280 Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
281  assert(V->getType()->isVectorTy() && "Not looking at a vector?");
282  VectorType *VTy = cast<VectorType>(V->getType());
283  // For fixed-length vector, return undef for out of range access.
284  if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
285  unsigned Width = FVTy->getNumElements();
286  if (EltNo >= Width)
287  return UndefValue::get(FVTy->getElementType());
288  }
289 
290  if (Constant *C = dyn_cast<Constant>(V))
291  return C->getAggregateElement(EltNo);
292 
293  if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
294  // If this is an insert to a variable element, we don't know what it is.
295  if (!isa<ConstantInt>(III->getOperand(2)))
296  return nullptr;
297  unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
298 
299  // If this is an insert to the element we are looking for, return the
300  // inserted value.
301  if (EltNo == IIElt)
302  return III->getOperand(1);
303 
304  // Guard against infinite loop on malformed, unreachable IR.
305  if (III == III->getOperand(0))
306  return nullptr;
307 
308  // Otherwise, the insertelement doesn't modify the value, recurse on its
309  // vector input.
310  return findScalarElement(III->getOperand(0), EltNo);
311  }
312 
313  ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V);
314  // Restrict the following transformation to fixed-length vector.
315  if (SVI && isa<FixedVectorType>(SVI->getType())) {
316  unsigned LHSWidth =
317  cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements();
318  int InEl = SVI->getMaskValue(EltNo);
319  if (InEl < 0)
320  return UndefValue::get(VTy->getElementType());
321  if (InEl < (int)LHSWidth)
322  return findScalarElement(SVI->getOperand(0), InEl);
323  return findScalarElement(SVI->getOperand(1), InEl - LHSWidth);
324  }
325 
326  // Extract a value from a vector add operation with a constant zero.
327  // TODO: Use getBinOpIdentity() to generalize this.
328  Value *Val; Constant *C;
329  if (match(V, m_Add(m_Value(Val), m_Constant(C))))
330  if (Constant *Elt = C->getAggregateElement(EltNo))
331  if (Elt->isNullValue())
332  return findScalarElement(Val, EltNo);
333 
334  // Otherwise, we don't know.
335  return nullptr;
336 }
337 
339  int SplatIndex = -1;
340  for (int M : Mask) {
341  // Ignore invalid (undefined) mask elements.
342  if (M < 0)
343  continue;
344 
345  // There can be only 1 non-negative mask element value if this is a splat.
346  if (SplatIndex != -1 && SplatIndex != M)
347  return -1;
348 
349  // Initialize the splat index to the 1st non-negative mask element.
350  SplatIndex = M;
351  }
352  assert((SplatIndex == -1 || SplatIndex >= 0) && "Negative index?");
353  return SplatIndex;
354 }
355 
356 /// Get splat value if the input is a splat vector or return nullptr.
357 /// This function is not fully general. It checks only 2 cases:
358 /// the input value is (1) a splat constant vector or (2) a sequence
359 /// of instructions that broadcasts a scalar at element 0.
361  if (isa<VectorType>(V->getType()))
362  if (auto *C = dyn_cast<Constant>(V))
363  return C->getSplatValue();
364 
365  // shuf (inselt ?, Splat, 0), ?, <0, undef, 0, ...>
366  Value *Splat;
367  if (match(V,
369  m_Value(), m_ZeroMask())))
370  return Splat;
371 
372  return nullptr;
373 }
374 
375 bool llvm::isSplatValue(const Value *V, int Index, unsigned Depth) {
376  assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
377 
378  if (isa<VectorType>(V->getType())) {
379  if (isa<UndefValue>(V))
380  return true;
381  // FIXME: We can allow undefs, but if Index was specified, we may want to
382  // check that the constant is defined at that index.
383  if (auto *C = dyn_cast<Constant>(V))
384  return C->getSplatValue() != nullptr;
385  }
386 
387  if (auto *Shuf = dyn_cast<ShuffleVectorInst>(V)) {
388  // FIXME: We can safely allow undefs here. If Index was specified, we will
389  // check that the mask elt is defined at the required index.
390  if (!is_splat(Shuf->getShuffleMask()))
391  return false;
392 
393  // Match any index.
394  if (Index == -1)
395  return true;
396 
397  // Match a specific element. The mask should be defined at and match the
398  // specified index.
399  return Shuf->getMaskValue(Index) == Index;
400  }
401 
402  // The remaining tests are all recursive, so bail out if we hit the limit.
404  return false;
405 
406  // If both operands of a binop are splats, the result is a splat.
407  Value *X, *Y, *Z;
408  if (match(V, m_BinOp(m_Value(X), m_Value(Y))))
409  return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth);
410 
411  // If all operands of a select are splats, the result is a splat.
412  if (match(V, m_Select(m_Value(X), m_Value(Y), m_Value(Z))))
413  return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth) &&
414  isSplatValue(Z, Index, Depth);
415 
416  // TODO: Add support for unary ops (fneg), casts, intrinsics (overflow ops).
417 
418  return false;
419 }
420 
422  SmallVectorImpl<int> &ScaledMask) {
423  assert(Scale > 0 && "Unexpected scaling factor");
424 
425  // Fast-path: if no scaling, then it is just a copy.
426  if (Scale == 1) {
427  ScaledMask.assign(Mask.begin(), Mask.end());
428  return;
429  }
430 
431  ScaledMask.clear();
432  for (int MaskElt : Mask) {
433  if (MaskElt >= 0) {
434  assert(((uint64_t)Scale * MaskElt + (Scale - 1)) <= INT32_MAX &&
435  "Overflowed 32-bits");
436  }
437  for (int SliceElt = 0; SliceElt != Scale; ++SliceElt)
438  ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + SliceElt);
439  }
440 }
441 
443  SmallVectorImpl<int> &ScaledMask) {
444  assert(Scale > 0 && "Unexpected scaling factor");
445 
446  // Fast-path: if no scaling, then it is just a copy.
447  if (Scale == 1) {
448  ScaledMask.assign(Mask.begin(), Mask.end());
449  return true;
450  }
451 
452  // We must map the original elements down evenly to a type with less elements.
453  int NumElts = Mask.size();
454  if (NumElts % Scale != 0)
455  return false;
456 
457  ScaledMask.clear();
458  ScaledMask.reserve(NumElts / Scale);
459 
460  // Step through the input mask by splitting into Scale-sized slices.
461  do {
462  ArrayRef<int> MaskSlice = Mask.take_front(Scale);
463  assert((int)MaskSlice.size() == Scale && "Expected Scale-sized slice.");
464 
465  // The first element of the slice determines how we evaluate this slice.
466  int SliceFront = MaskSlice.front();
467  if (SliceFront < 0) {
468  // Negative values (undef or other "sentinel" values) must be equal across
469  // the entire slice.
470  if (!is_splat(MaskSlice))
471  return false;
472  ScaledMask.push_back(SliceFront);
473  } else {
474  // A positive mask element must be cleanly divisible.
475  if (SliceFront % Scale != 0)
476  return false;
477  // Elements of the slice must be consecutive.
478  for (int i = 1; i < Scale; ++i)
479  if (MaskSlice[i] != SliceFront + i)
480  return false;
481  ScaledMask.push_back(SliceFront / Scale);
482  }
483  Mask = Mask.drop_front(Scale);
484  } while (!Mask.empty());
485 
486  assert((int)ScaledMask.size() * Scale == NumElts && "Unexpected scaled mask");
487 
488  // All elements of the original mask can be scaled down to map to the elements
489  // of a mask with wider elements.
490  return true;
491 }
492 
495  const TargetTransformInfo *TTI) {
496 
497  // DemandedBits will give us every value's live-out bits. But we want
498  // to ensure no extra casts would need to be inserted, so every DAG
499  // of connected values must have the same minimum bitwidth.
501  SmallVector<Value *, 16> Worklist;
503  SmallPtrSet<Value *, 16> Visited;
505  SmallPtrSet<Instruction *, 4> InstructionSet;
507 
508  // Determine the roots. We work bottom-up, from truncs or icmps.
509  bool SeenExtFromIllegalType = false;
510  for (auto *BB : Blocks)
511  for (auto &I : *BB) {
512  InstructionSet.insert(&I);
513 
514  if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) &&
515  !TTI->isTypeLegal(I.getOperand(0)->getType()))
516  SeenExtFromIllegalType = true;
517 
518  // Only deal with non-vector integers up to 64-bits wide.
519  if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) &&
520  !I.getType()->isVectorTy() &&
521  I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
522  // Don't make work for ourselves. If we know the loaded type is legal,
523  // don't add it to the worklist.
524  if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType()))
525  continue;
526 
527  Worklist.push_back(&I);
528  Roots.insert(&I);
529  }
530  }
531  // Early exit.
532  if (Worklist.empty() || (TTI && !SeenExtFromIllegalType))
533  return MinBWs;
534 
535  // Now proceed breadth-first, unioning values together.
536  while (!Worklist.empty()) {
537  Value *Val = Worklist.pop_back_val();
538  Value *Leader = ECs.getOrInsertLeaderValue(Val);
539 
540  if (Visited.count(Val))
541  continue;
542  Visited.insert(Val);
543 
544  // Non-instructions terminate a chain successfully.
545  if (!isa<Instruction>(Val))
546  continue;
547  Instruction *I = cast<Instruction>(Val);
548 
549  // If we encounter a type that is larger than 64 bits, we can't represent
550  // it so bail out.
551  if (DB.getDemandedBits(I).getBitWidth() > 64)
553 
555  DBits[Leader] |= V;
556  DBits[I] = V;
557 
558  // Casts, loads and instructions outside of our range terminate a chain
559  // successfully.
560  if (isa<SExtInst>(I) || isa<ZExtInst>(I) || isa<LoadInst>(I) ||
561  !InstructionSet.count(I))
562  continue;
563 
564  // Unsafe casts terminate a chain unsuccessfully. We can't do anything
565  // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to
566  // transform anything that relies on them.
567  if (isa<BitCastInst>(I) || isa<PtrToIntInst>(I) || isa<IntToPtrInst>(I) ||
568  !I->getType()->isIntegerTy()) {
569  DBits[Leader] |= ~0ULL;
570  continue;
571  }
572 
573  // We don't modify the types of PHIs. Reductions will already have been
574  // truncated if possible, and inductions' sizes will have been chosen by
575  // indvars.
576  if (isa<PHINode>(I))
577  continue;
578 
579  if (DBits[Leader] == ~0ULL)
580  // All bits demanded, no point continuing.
581  continue;
582 
583  for (Value *O : cast<User>(I)->operands()) {
584  ECs.unionSets(Leader, O);
585  Worklist.push_back(O);
586  }
587  }
588 
589  // Now we've discovered all values, walk them to see if there are
590  // any users we didn't see. If there are, we can't optimize that
591  // chain.
592  for (auto &I : DBits)
593  for (auto *U : I.first->users())
594  if (U->getType()->isIntegerTy() && DBits.count(U) == 0)
595  DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL;
596 
597  for (auto I = ECs.begin(), E = ECs.end(); I != E; ++I) {
598  uint64_t LeaderDemandedBits = 0;
599  for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end()))
600  LeaderDemandedBits |= DBits[M];
601 
602  uint64_t MinBW = (sizeof(LeaderDemandedBits) * 8) -
603  llvm::countLeadingZeros(LeaderDemandedBits);
604  // Round up to a power of 2
605  if (!isPowerOf2_64((uint64_t)MinBW))
606  MinBW = NextPowerOf2(MinBW);
607 
608  // We don't modify the types of PHIs. Reductions will already have been
609  // truncated if possible, and inductions' sizes will have been chosen by
610  // indvars.
611  // If we are required to shrink a PHI, abandon this entire equivalence class.
612  bool Abort = false;
613  for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end()))
614  if (isa<PHINode>(M) && MinBW < M->getType()->getScalarSizeInBits()) {
615  Abort = true;
616  break;
617  }
618  if (Abort)
619  continue;
620 
621  for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end())) {
622  if (!isa<Instruction>(M))
623  continue;
624  Type *Ty = M->getType();
625  if (Roots.count(M))
626  Ty = cast<Instruction>(M)->getOperand(0)->getType();
627  if (MinBW < Ty->getScalarSizeInBits())
628  MinBWs[cast<Instruction>(M)] = MinBW;
629  }
630  }
631 
632  return MinBWs;
633 }
634 
635 /// Add all access groups in @p AccGroups to @p List.
636 template <typename ListT>
637 static void addToAccessGroupList(ListT &List, MDNode *AccGroups) {
638  // Interpret an access group as a list containing itself.
639  if (AccGroups->getNumOperands() == 0) {
640  assert(isValidAsAccessGroup(AccGroups) && "Node must be an access group");
641  List.insert(AccGroups);
642  return;
643  }
644 
645  for (auto &AccGroupListOp : AccGroups->operands()) {
646  auto *Item = cast<MDNode>(AccGroupListOp.get());
647  assert(isValidAsAccessGroup(Item) && "List item must be an access group");
648  List.insert(Item);
649  }
650 }
651 
652 MDNode *llvm::uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2) {
653  if (!AccGroups1)
654  return AccGroups2;
655  if (!AccGroups2)
656  return AccGroups1;
657  if (AccGroups1 == AccGroups2)
658  return AccGroups1;
659 
661  addToAccessGroupList(Union, AccGroups1);
662  addToAccessGroupList(Union, AccGroups2);
663 
664  if (Union.size() == 0)
665  return nullptr;
666  if (Union.size() == 1)
667  return cast<MDNode>(Union.front());
668 
669  LLVMContext &Ctx = AccGroups1->getContext();
670  return MDNode::get(Ctx, Union.getArrayRef());
671 }
672 
674  const Instruction *Inst2) {
675  bool MayAccessMem1 = Inst1->mayReadOrWriteMemory();
676  bool MayAccessMem2 = Inst2->mayReadOrWriteMemory();
677 
678  if (!MayAccessMem1 && !MayAccessMem2)
679  return nullptr;
680  if (!MayAccessMem1)
681  return Inst2->getMetadata(LLVMContext::MD_access_group);
682  if (!MayAccessMem2)
683  return Inst1->getMetadata(LLVMContext::MD_access_group);
684 
685  MDNode *MD1 = Inst1->getMetadata(LLVMContext::MD_access_group);
686  MDNode *MD2 = Inst2->getMetadata(LLVMContext::MD_access_group);
687  if (!MD1 || !MD2)
688  return nullptr;
689  if (MD1 == MD2)
690  return MD1;
691 
692  // Use set for scalable 'contains' check.
693  SmallPtrSet<Metadata *, 4> AccGroupSet2;
694  addToAccessGroupList(AccGroupSet2, MD2);
695 
696  SmallVector<Metadata *, 4> Intersection;
697  if (MD1->getNumOperands() == 0) {
698  assert(isValidAsAccessGroup(MD1) && "Node must be an access group");
699  if (AccGroupSet2.count(MD1))
700  Intersection.push_back(MD1);
701  } else {
702  for (const MDOperand &Node : MD1->operands()) {
703  auto *Item = cast<MDNode>(Node.get());
704  assert(isValidAsAccessGroup(Item) && "List item must be an access group");
705  if (AccGroupSet2.count(Item))
706  Intersection.push_back(Item);
707  }
708  }
709 
710  if (Intersection.size() == 0)
711  return nullptr;
712  if (Intersection.size() == 1)
713  return cast<MDNode>(Intersection.front());
714 
715  LLVMContext &Ctx = Inst1->getContext();
716  return MDNode::get(Ctx, Intersection);
717 }
718 
719 /// \returns \p I after propagating metadata from \p VL.
721  if (VL.empty())
722  return Inst;
723  Instruction *I0 = cast<Instruction>(VL[0]);
726 
727  for (auto Kind : {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
728  LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
729  LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load,
730  LLVMContext::MD_access_group}) {
731  MDNode *MD = I0->getMetadata(Kind);
732 
733  for (int J = 1, E = VL.size(); MD && J != E; ++J) {
734  const Instruction *IJ = cast<Instruction>(VL[J]);
735  MDNode *IMD = IJ->getMetadata(Kind);
736  switch (Kind) {
737  case LLVMContext::MD_tbaa:
738  MD = MDNode::getMostGenericTBAA(MD, IMD);
739  break;
740  case LLVMContext::MD_alias_scope:
741  MD = MDNode::getMostGenericAliasScope(MD, IMD);
742  break;
743  case LLVMContext::MD_fpmath:
744  MD = MDNode::getMostGenericFPMath(MD, IMD);
745  break;
746  case LLVMContext::MD_noalias:
747  case LLVMContext::MD_nontemporal:
748  case LLVMContext::MD_invariant_load:
749  MD = MDNode::intersect(MD, IMD);
750  break;
751  case LLVMContext::MD_access_group:
752  MD = intersectAccessGroups(Inst, IJ);
753  break;
754  default:
755  llvm_unreachable("unhandled metadata");
756  }
757  }
758 
759  Inst->setMetadata(Kind, MD);
760  }
761 
762  return Inst;
763 }
764 
765 Constant *
767  const InterleaveGroup<Instruction> &Group) {
768  // All 1's means mask is not needed.
769  if (Group.getNumMembers() == Group.getFactor())
770  return nullptr;
771 
772  // TODO: support reversed access.
773  assert(!Group.isReverse() && "Reversed group not supported.");
774 
776  for (unsigned i = 0; i < VF; i++)
777  for (unsigned j = 0; j < Group.getFactor(); ++j) {
778  unsigned HasMember = Group.getMember(j) ? 1 : 0;
779  Mask.push_back(Builder.getInt1(HasMember));
780  }
781 
782  return ConstantVector::get(Mask);
783 }
784 
786 llvm::createReplicatedMask(unsigned ReplicationFactor, unsigned VF) {
787  SmallVector<int, 16> MaskVec;
788  for (unsigned i = 0; i < VF; i++)
789  for (unsigned j = 0; j < ReplicationFactor; j++)
790  MaskVec.push_back(i);
791 
792  return MaskVec;
793 }
794 
796  unsigned NumVecs) {
798  for (unsigned i = 0; i < VF; i++)
799  for (unsigned j = 0; j < NumVecs; j++)
800  Mask.push_back(j * VF + i);
801 
802  return Mask;
803 }
804 
806 llvm::createStrideMask(unsigned Start, unsigned Stride, unsigned VF) {
808  for (unsigned i = 0; i < VF; i++)
809  Mask.push_back(Start + i * Stride);
810 
811  return Mask;
812 }
813 
815  unsigned NumInts,
816  unsigned NumUndefs) {
818  for (unsigned i = 0; i < NumInts; i++)
819  Mask.push_back(Start + i);
820 
821  for (unsigned i = 0; i < NumUndefs; i++)
822  Mask.push_back(-1);
823 
824  return Mask;
825 }
826 
827 /// A helper function for concatenating vectors. This function concatenates two
828 /// vectors having the same element type. If the second vector has fewer
829 /// elements than the first, it is padded with undefs.
831  Value *V2) {
832  VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType());
833  VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());
834  assert(VecTy1 && VecTy2 &&
835  VecTy1->getScalarType() == VecTy2->getScalarType() &&
836  "Expect two vectors with the same element type");
837 
838  unsigned NumElts1 = cast<FixedVectorType>(VecTy1)->getNumElements();
839  unsigned NumElts2 = cast<FixedVectorType>(VecTy2)->getNumElements();
840  assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements");
841 
842  if (NumElts1 > NumElts2) {
843  // Extend with UNDEFs.
844  V2 = Builder.CreateShuffleVector(
845  V2, createSequentialMask(0, NumElts2, NumElts1 - NumElts2));
846  }
847 
848  return Builder.CreateShuffleVector(
849  V1, V2, createSequentialMask(0, NumElts1 + NumElts2, 0));
850 }
851 
853  ArrayRef<Value *> Vecs) {
854  unsigned NumVecs = Vecs.size();
855  assert(NumVecs > 1 && "Should be at least two vectors");
856 
857  SmallVector<Value *, 8> ResList;
858  ResList.append(Vecs.begin(), Vecs.end());
859  do {
860  SmallVector<Value *, 8> TmpList;
861  for (unsigned i = 0; i < NumVecs - 1; i += 2) {
862  Value *V0 = ResList[i], *V1 = ResList[i + 1];
863  assert((V0->getType() == V1->getType() || i == NumVecs - 2) &&
864  "Only the last vector may have a different type");
865 
866  TmpList.push_back(concatenateTwoVectors(Builder, V0, V1));
867  }
868 
869  // Push the last vector if the total number of vectors is odd.
870  if (NumVecs % 2 != 0)
871  TmpList.push_back(ResList[NumVecs - 1]);
872 
873  ResList = TmpList;
874  NumVecs = ResList.size();
875  } while (NumVecs > 1);
876 
877  return ResList[0];
878 }
879 
881  assert(isa<VectorType>(Mask->getType()) &&
882  isa<IntegerType>(Mask->getType()->getScalarType()) &&
883  cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
884  1 &&
885  "Mask must be a vector of i1");
886 
887  auto *ConstMask = dyn_cast<Constant>(Mask);
888  if (!ConstMask)
889  return false;
890  if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))
891  return true;
892  if (isa<ScalableVectorType>(ConstMask->getType()))
893  return false;
894  for (unsigned
895  I = 0,
896  E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
897  I != E; ++I) {
898  if (auto *MaskElt = ConstMask->getAggregateElement(I))
899  if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))
900  continue;
901  return false;
902  }
903  return true;
904 }
905 
907  assert(isa<VectorType>(Mask->getType()) &&
908  isa<IntegerType>(Mask->getType()->getScalarType()) &&
909  cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
910  1 &&
911  "Mask must be a vector of i1");
912 
913  auto *ConstMask = dyn_cast<Constant>(Mask);
914  if (!ConstMask)
915  return false;
916  if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
917  return true;
918  if (isa<ScalableVectorType>(ConstMask->getType()))
919  return false;
920  for (unsigned
921  I = 0,
922  E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
923  I != E; ++I) {
924  if (auto *MaskElt = ConstMask->getAggregateElement(I))
925  if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
926  continue;
927  return false;
928  }
929  return true;
930 }
931 
932 /// TODO: This is a lot like known bits, but for
933 /// vectors. Is there something we can common this with?
935  assert(isa<FixedVectorType>(Mask->getType()) &&
936  isa<IntegerType>(Mask->getType()->getScalarType()) &&
937  cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
938  1 &&
939  "Mask must be a fixed width vector of i1");
940 
941  const unsigned VWidth =
942  cast<FixedVectorType>(Mask->getType())->getNumElements();
943  APInt DemandedElts = APInt::getAllOnesValue(VWidth);
944  if (auto *CV = dyn_cast<ConstantVector>(Mask))
945  for (unsigned i = 0; i < VWidth; i++)
946  if (CV->getAggregateElement(i)->isNullValue())
947  DemandedElts.clearBit(i);
948  return DemandedElts;
949 }
950 
951 bool InterleavedAccessInfo::isStrided(int Stride) {
952  unsigned Factor = std::abs(Stride);
953  return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
954 }
955 
956 void InterleavedAccessInfo::collectConstStrideAccesses(
958  const ValueToValueMap &Strides) {
959  auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();
960 
961  // Since it's desired that the load/store instructions be maintained in
962  // "program order" for the interleaved access analysis, we have to visit the
963  // blocks in the loop in reverse postorder (i.e., in a topological order).
964  // Such an ordering will ensure that any load/store that may be executed
965  // before a second load/store will precede the second load/store in
966  // AccessStrideInfo.
967  LoopBlocksDFS DFS(TheLoop);
968  DFS.perform(LI);
969  for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
970  for (auto &I : *BB) {
972  if (!Ptr)
973  continue;
974  Type *ElementTy = getLoadStoreType(&I);
975 
976  // We don't check wrapping here because we don't know yet if Ptr will be
977  // part of a full group or a group with gaps. Checking wrapping for all
978  // pointers (even those that end up in groups with no gaps) will be overly
979  // conservative. For full groups, wrapping should be ok since if we would
980  // wrap around the address space we would do a memory access at nullptr
981  // even without the transformation. The wrapping checks are therefore
982  // deferred until after we've formed the interleaved groups.
983  int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides,
984  /*Assume=*/true, /*ShouldCheckWrap=*/false);
985 
986  const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
987  uint64_t Size = DL.getTypeAllocSize(ElementTy);
988  AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size,
990  }
991 }
992 
993 // Analyze interleaved accesses and collect them into interleaved load and
994 // store groups.
995 //
996 // When generating code for an interleaved load group, we effectively hoist all
997 // loads in the group to the location of the first load in program order. When
998 // generating code for an interleaved store group, we sink all stores to the
999 // location of the last store. This code motion can change the order of load
1000 // and store instructions and may break dependences.
1001 //
1002 // The code generation strategy mentioned above ensures that we won't violate
1003 // any write-after-read (WAR) dependences.
1004 //
1005 // E.g., for the WAR dependence: a = A[i]; // (1)
1006 // A[i] = b; // (2)
1007 //
1008 // The store group of (2) is always inserted at or below (2), and the load
1009 // group of (1) is always inserted at or above (1). Thus, the instructions will
1010 // never be reordered. All other dependences are checked to ensure the
1011 // correctness of the instruction reordering.
1012 //
1013 // The algorithm visits all memory accesses in the loop in bottom-up program
1014 // order. Program order is established by traversing the blocks in the loop in
1015 // reverse postorder when collecting the accesses.
1016 //
1017 // We visit the memory accesses in bottom-up order because it can simplify the
1018 // construction of store groups in the presence of write-after-write (WAW)
1019 // dependences.
1020 //
1021 // E.g., for the WAW dependence: A[i] = a; // (1)
1022 // A[i] = b; // (2)
1023 // A[i + 1] = c; // (3)
1024 //
1025 // We will first create a store group with (3) and (2). (1) can't be added to
1026 // this group because it and (2) are dependent. However, (1) can be grouped
1027 // with other accesses that may precede it in program order. Note that a
1028 // bottom-up order does not imply that WAW dependences should not be checked.
1030  bool EnablePredicatedInterleavedMemAccesses) {
1031  LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
1032  const ValueToValueMap &Strides = LAI->getSymbolicStrides();
1033 
1034  // Holds all accesses with a constant stride.
1036  collectConstStrideAccesses(AccessStrideInfo, Strides);
1037 
1038  if (AccessStrideInfo.empty())
1039  return;
1040 
1041  // Collect the dependences in the loop.
1042  collectDependences();
1043 
1044  // Holds all interleaved store groups temporarily.
1046  // Holds all interleaved load groups temporarily.
1048 
1049  // Search in bottom-up program order for pairs of accesses (A and B) that can
1050  // form interleaved load or store groups. In the algorithm below, access A
1051  // precedes access B in program order. We initialize a group for B in the
1052  // outer loop of the algorithm, and then in the inner loop, we attempt to
1053  // insert each A into B's group if:
1054  //
1055  // 1. A and B have the same stride,
1056  // 2. A and B have the same memory object size, and
1057  // 3. A belongs in B's group according to its distance from B.
1058  //
1059  // Special care is taken to ensure group formation will not break any
1060  // dependences.
1061  for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
1062  BI != E; ++BI) {
1063  Instruction *B = BI->first;
1064  StrideDescriptor DesB = BI->second;
1065 
1066  // Initialize a group for B if it has an allowable stride. Even if we don't
1067  // create a group for B, we continue with the bottom-up algorithm to ensure
1068  // we don't break any of B's dependences.
1069  InterleaveGroup<Instruction> *Group = nullptr;
1070  if (isStrided(DesB.Stride) &&
1071  (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
1072  Group = getInterleaveGroup(B);
1073  if (!Group) {
1074  LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
1075  << '\n');
1076  Group = createInterleaveGroup(B, DesB.Stride, DesB.Alignment);
1077  }
1078  if (B->mayWriteToMemory())
1079  StoreGroups.insert(Group);
1080  else
1081  LoadGroups.insert(Group);
1082  }
1083 
1084  for (auto AI = std::next(BI); AI != E; ++AI) {
1085  Instruction *A = AI->first;
1086  StrideDescriptor DesA = AI->second;
1087 
1088  // Our code motion strategy implies that we can't have dependences
1089  // between accesses in an interleaved group and other accesses located
1090  // between the first and last member of the group. Note that this also
1091  // means that a group can't have more than one member at a given offset.
1092  // The accesses in a group can have dependences with other accesses, but
1093  // we must ensure we don't extend the boundaries of the group such that
1094  // we encompass those dependent accesses.
1095  //
1096  // For example, assume we have the sequence of accesses shown below in a
1097  // stride-2 loop:
1098  //
1099  // (1, 2) is a group | A[i] = a; // (1)
1100  // | A[i-1] = b; // (2) |
1101  // A[i-3] = c; // (3)
1102  // A[i] = d; // (4) | (2, 4) is not a group
1103  //
1104  // Because accesses (2) and (3) are dependent, we can group (2) with (1)
1105  // but not with (4). If we did, the dependent access (3) would be within
1106  // the boundaries of the (2, 4) group.
1107  if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
1108  // If a dependence exists and A is already in a group, we know that A
1109  // must be a store since A precedes B and WAR dependences are allowed.
1110  // Thus, A would be sunk below B. We release A's group to prevent this
1111  // illegal code motion. A will then be free to form another group with
1112  // instructions that precede it.
1113  if (isInterleaved(A)) {
1114  InterleaveGroup<Instruction> *StoreGroup = getInterleaveGroup(A);
1115 
1116  LLVM_DEBUG(dbgs() << "LV: Invalidated store group due to "
1117  "dependence between " << *A << " and "<< *B << '\n');
1118 
1119  StoreGroups.remove(StoreGroup);
1120  releaseGroup(StoreGroup);
1121  }
1122 
1123  // If a dependence exists and A is not already in a group (or it was
1124  // and we just released it), B might be hoisted above A (if B is a
1125  // load) or another store might be sunk below A (if B is a store). In
1126  // either case, we can't add additional instructions to B's group. B
1127  // will only form a group with instructions that it precedes.
1128  break;
1129  }
1130 
1131  // At this point, we've checked for illegal code motion. If either A or B
1132  // isn't strided, there's nothing left to do.
1133  if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
1134  continue;
1135 
1136  // Ignore A if it's already in a group or isn't the same kind of memory
1137  // operation as B.
1138  // Note that mayReadFromMemory() isn't mutually exclusive to
1139  // mayWriteToMemory in the case of atomic loads. We shouldn't see those
1140  // here, canVectorizeMemory() should have returned false - except for the
1141  // case we asked for optimization remarks.
1142  if (isInterleaved(A) ||
1143  (A->mayReadFromMemory() != B->mayReadFromMemory()) ||
1144  (A->mayWriteToMemory() != B->mayWriteToMemory()))
1145  continue;
1146 
1147  // Check rules 1 and 2. Ignore A if its stride or size is different from
1148  // that of B.
1149  if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
1150  continue;
1151 
1152  // Ignore A if the memory object of A and B don't belong to the same
1153  // address space
1155  continue;
1156 
1157  // Calculate the distance from A to B.
1158  const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(
1159  PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
1160  if (!DistToB)
1161  continue;
1162  int64_t DistanceToB = DistToB->getAPInt().getSExtValue();
1163 
1164  // Check rule 3. Ignore A if its distance to B is not a multiple of the
1165  // size.
1166  if (DistanceToB % static_cast<int64_t>(DesB.Size))
1167  continue;
1168 
1169  // All members of a predicated interleave-group must have the same predicate,
1170  // and currently must reside in the same BB.
1171  BasicBlock *BlockA = A->getParent();
1172  BasicBlock *BlockB = B->getParent();
1173  if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
1174  (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
1175  continue;
1176 
1177  // The index of A is the index of B plus A's distance to B in multiples
1178  // of the size.
1179  int IndexA =
1180  Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
1181 
1182  // Try to insert A into B's group.
1183  if (Group->insertMember(A, IndexA, DesA.Alignment)) {
1184  LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
1185  << " into the interleave group with" << *B
1186  << '\n');
1187  InterleaveGroupMap[A] = Group;
1188 
1189  // Set the first load in program order as the insert position.
1190  if (A->mayReadFromMemory())
1191  Group->setInsertPos(A);
1192  }
1193  } // Iteration over A accesses.
1194  } // Iteration over B accesses.
1195 
1196  // Remove interleaved store groups with gaps.
1197  for (auto *Group : StoreGroups)
1198  if (Group->getNumMembers() != Group->getFactor()) {
1199  LLVM_DEBUG(
1200  dbgs() << "LV: Invalidate candidate interleaved store group due "
1201  "to gaps.\n");
1202  releaseGroup(Group);
1203  }
1204  // Remove interleaved groups with gaps (currently only loads) whose memory
1205  // accesses may wrap around. We have to revisit the getPtrStride analysis,
1206  // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
1207  // not check wrapping (see documentation there).
1208  // FORNOW we use Assume=false;
1209  // TODO: Change to Assume=true but making sure we don't exceed the threshold
1210  // of runtime SCEV assumptions checks (thereby potentially failing to
1211  // vectorize altogether).
1212  // Additional optional optimizations:
1213  // TODO: If we are peeling the loop and we know that the first pointer doesn't
1214  // wrap then we can deduce that all pointers in the group don't wrap.
1215  // This means that we can forcefully peel the loop in order to only have to
1216  // check the first pointer for no-wrap. When we'll change to use Assume=true
1217  // we'll only need at most one runtime check per interleaved group.
1218  for (auto *Group : LoadGroups) {
1219  // Case 1: A full group. Can Skip the checks; For full groups, if the wide
1220  // load would wrap around the address space we would do a memory access at
1221  // nullptr even without the transformation.
1222  if (Group->getNumMembers() == Group->getFactor())
1223  continue;
1224 
1225  // Case 2: If first and last members of the group don't wrap this implies
1226  // that all the pointers in the group don't wrap.
1227  // So we check only group member 0 (which is always guaranteed to exist),
1228  // and group member Factor - 1; If the latter doesn't exist we rely on
1229  // peeling (if it is a non-reversed accsess -- see Case 3).
1230  Value *FirstMemberPtr = getLoadStorePointerOperand(Group->getMember(0));
1231  if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false,
1232  /*ShouldCheckWrap=*/true)) {
1233  LLVM_DEBUG(
1234  dbgs() << "LV: Invalidate candidate interleaved group due to "
1235  "first group member potentially pointer-wrapping.\n");
1236  releaseGroup(Group);
1237  continue;
1238  }
1239  Instruction *LastMember = Group->getMember(Group->getFactor() - 1);
1240  if (LastMember) {
1241  Value *LastMemberPtr = getLoadStorePointerOperand(LastMember);
1242  if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false,
1243  /*ShouldCheckWrap=*/true)) {
1244  LLVM_DEBUG(
1245  dbgs() << "LV: Invalidate candidate interleaved group due to "
1246  "last group member potentially pointer-wrapping.\n");
1247  releaseGroup(Group);
1248  }
1249  } else {
1250  // Case 3: A non-reversed interleaved load group with gaps: We need
1251  // to execute at least one scalar epilogue iteration. This will ensure
1252  // we don't speculatively access memory out-of-bounds. We only need
1253  // to look for a member at index factor - 1, since every group must have
1254  // a member at index zero.
1255  if (Group->isReverse()) {
1256  LLVM_DEBUG(
1257  dbgs() << "LV: Invalidate candidate interleaved group due to "
1258  "a reverse access with gaps.\n");
1259  releaseGroup(Group);
1260  continue;
1261  }
1262  LLVM_DEBUG(
1263  dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
1264  RequiresScalarEpilogue = true;
1265  }
1266  }
1267 }
1268 
1270  // If no group had triggered the requirement to create an epilogue loop,
1271  // there is nothing to do.
1272  if (!requiresScalarEpilogue())
1273  return;
1274 
1275  bool ReleasedGroup = false;
1276  // Release groups requiring scalar epilogues. Note that this also removes them
1277  // from InterleaveGroups.
1278  for (auto *Group : make_early_inc_range(InterleaveGroups)) {
1279  if (!Group->requiresScalarEpilogue())
1280  continue;
1281  LLVM_DEBUG(
1282  dbgs()
1283  << "LV: Invalidate candidate interleaved group due to gaps that "
1284  "require a scalar epilogue (not allowed under optsize) and cannot "
1285  "be masked (not enabled). \n");
1286  releaseGroup(Group);
1287  ReleasedGroup = true;
1288  }
1289  assert(ReleasedGroup && "At least one group must be invalidated, as a "
1290  "scalar epilogue was required");
1291  (void)ReleasedGroup;
1292  RequiresScalarEpilogue = false;
1293 }
1294 
1295 template <typename InstT>
1296 void InterleaveGroup<InstT>::addMetadata(InstT *NewInst) const {
1297  llvm_unreachable("addMetadata can only be used for Instruction");
1298 }
1299 
1300 namespace llvm {
1301 template <>
1304  std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
1305  [](std::pair<int, Instruction *> p) { return p.second; });
1306  propagateMetadata(NewInst, VL);
1307 }
1308 }
1309 
1310 std::string VFABI::mangleTLIVectorName(StringRef VectorName,
1311  StringRef ScalarName, unsigned numArgs,
1312  ElementCount VF) {
1313  SmallString<256> Buffer;
1314  llvm::raw_svector_ostream Out(Buffer);
1315  Out << "_ZGV" << VFABI::_LLVM_ << "N";
1316  if (VF.isScalable())
1317  Out << 'x';
1318  else
1319  Out << VF.getFixedValue();
1320  for (unsigned I = 0; I < numArgs; ++I)
1321  Out << "v";
1322  Out << "_" << ScalarName << "(" << VectorName << ")";
1323  return std::string(Out.str());
1324 }
1325 
1327  const CallInst &CI, SmallVectorImpl<std::string> &VariantMappings) {
1328  const StringRef S =
1330  .getValueAsString();
1331  if (S.empty())
1332  return;
1333 
1334  SmallVector<StringRef, 8> ListAttr;
1335  S.split(ListAttr, ",");
1336 
1337  for (auto &S : SetVector<StringRef>(ListAttr.begin(), ListAttr.end())) {
1338 #ifndef NDEBUG
1339  LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << S << "'\n");
1341  assert(Info.hasValue() && "Invalid name for a VFABI variant.");
1342  assert(CI.getModule()->getFunction(Info.getValue().VectorName) &&
1343  "Vector function is missing.");
1344 #endif
1345  VariantMappings.push_back(std::string(S));
1346  }
1347 }
1348 
1350  for (unsigned Pos = 0, NumParams = Parameters.size(); Pos < NumParams;
1351  ++Pos) {
1352  assert(Parameters[Pos].ParamPos == Pos && "Broken parameter list.");
1353 
1354  switch (Parameters[Pos].ParamKind) {
1355  default: // Nothing to check.
1356  break;
1361  // Compile time linear steps must be non-zero.
1362  if (Parameters[Pos].LinearStepOrPos == 0)
1363  return false;
1364  break;
1369  // The runtime linear step must be referring to some other
1370  // parameters in the signature.
1371  if (Parameters[Pos].LinearStepOrPos >= int(NumParams))
1372  return false;
1373  // The linear step parameter must be marked as uniform.
1374  if (Parameters[Parameters[Pos].LinearStepOrPos].ParamKind !=
1376  return false;
1377  // The linear step parameter can't point at itself.
1378  if (Parameters[Pos].LinearStepOrPos == int(Pos))
1379  return false;
1380  break;
1382  // The global predicate must be the unique. Can be placed anywhere in the
1383  // signature.
1384  for (unsigned NextPos = Pos + 1; NextPos < NumParams; ++NextPos)
1385  if (Parameters[NextPos].ParamKind == VFParamKind::GlobalPredicate)
1386  return false;
1387  break;
1388  }
1389  }
1390  return true;
1391 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::NextPowerOf2
uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:683
llvm::EquivalenceClasses::getOrInsertLeaderValue
const ElemTy & getOrInsertLeaderValue(const ElemTy &V)
getOrInsertLeaderValue - Return the leader for the specified value that is in the set.
Definition: EquivalenceClasses.h:179
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:64
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::maskIsAllOneOrUndef
bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
Definition: VectorUtils.cpp:906
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
llvm::EquivalenceClasses::end
iterator end() const
Definition: EquivalenceClasses.h:147
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31
llvm::InterleaveGroup::insertMember
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
Definition: VectorUtils.h:610
ceil
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g ceil
Definition: README-FPStack.txt:54
llvm::ElementCount
Definition: TypeSize.h:386
llvm::getVectorIntrinsicIDForCall
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Definition: VectorUtils.cpp:130
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::getGEPInductionOperand
unsigned getGEPInductionOperand(const GetElementPtrInst *Gep)
Find the operand of the GEP that should be checked for consecutive stores.
Definition: VectorUtils.cpp:147
llvm::possiblyDemandedEltsInMask
APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
Definition: VectorUtils.cpp:934
llvm::scConstant
@ scConstant
Definition: ScalarEvolutionExpressions.h:41
minimum
Should compile r2 movcc movcs str strb mov lr r1 movcs movcc mov lr r1 str mov mov cmp r1 movlo r2 str bx lr r0 mov mov cmp r0 movhs r2 mov r1 bx lr Some of the NEON intrinsics may be appropriate for more general either as target independent intrinsics or perhaps elsewhere in the ARM backend Some of them may also be lowered to target independent and perhaps some new SDNodes could be added For minimum
Definition: README.txt:489
llvm::Instruction::getAllMetadataOtherThanDebugLoc
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode * >> &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
Definition: Instruction.h:305
GetElementPtrTypeIterator.h
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:319
llvm::PatternMatch::m_InsertElt
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Definition: PatternMatch.h:1494
llvm::VFParamKind::GlobalPredicate
@ GlobalPredicate
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::PatternMatch::m_Add
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1008
llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1643
llvm::EquivalenceClasses
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
Definition: EquivalenceClasses.h:58
llvm::CallBase::getAttribute
Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const
Get the attribute of a given kind at a position.
Definition: InstrTypes.h:1587
llvm::getSplatValue
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
Definition: VectorUtils.cpp:360
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::VFABI::getVectorVariantNames
void getVectorVariantNames(const CallInst &CI, SmallVectorImpl< std::string > &VariantMappings)
Populates a set of strings representing the Vector Function ABI variants associated to the CallInst C...
Definition: VectorUtils.cpp:1326
concatenateTwoVectors
static Value * concatenateTwoVectors(IRBuilderBase &Builder, Value *V1, Value *V2)
A helper function for concatenating vectors.
Definition: VectorUtils.cpp:830
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:443
llvm::maskIsAllZeroOrUndef
bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
Definition: VectorUtils.cpp:880
llvm::MaxAnalysisRecursionDepth
constexpr unsigned MaxAnalysisRecursionDepth
Definition: ValueTracking.h:48
ValueTracking.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::computeMinimumValueSizes
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
Definition: VectorUtils.cpp:494
llvm::getLoadStoreType
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
Definition: Instructions.h:5340
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:34
ScalarEvolution.h
llvm::narrowShuffleMaskElts
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
Definition: VectorUtils.cpp:421
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
MaxInterleaveGroupFactor
static cl::opt< unsigned > MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
Maximum factor for an interleaved memory access.
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1581
llvm::getUniqueCastUse
Value * getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty)
If a value has only one user that is a CastInst, return it.
Definition: VectorUtils.cpp:188
llvm::Optional
Definition: APInt.h:33
llvm::PatternMatch::m_BinOp
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
llvm::SmallPtrSet< Value *, 4 >
llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:421
llvm::Intrinsic::not_intrinsic
@ not_intrinsic
Definition: Intrinsics.h:45
llvm::VFParamKind::OMP_Uniform
@ OMP_Uniform
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::propagateMetadata
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Definition: VectorUtils.cpp:720
llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition: GetElementPtrTypeIterator.h:139
llvm::Module::getFunction
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:178
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::uniteAccessGroups
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
Definition: VectorUtils.cpp:652
llvm::InterleaveGroup
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:284
llvm::LinearPolySize::isScalable
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:299
p
the resulting code requires compare and branches when and if * p
Definition: README.txt:396
llvm::APIntOps::umin
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition: APInt.h:2178
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1198
llvm::MDNode::operands
op_range operands() const
Definition: Metadata.h:1100
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1336
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1108
floor
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g floor
Definition: README-FPStack.txt:54
llvm::VFShape::hasValidParameterList
bool hasValidParameterList() const
Sanity check on the Parameters in the VFShape.
Definition: VectorUtils.cpp:1349
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
CommandLine.h
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::remove
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:157
llvm::Intrinsic::getType
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:1292
llvm::TargetTransformInfo::isTypeLegal
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
Definition: TargetTransformInfo.cpp:449
llvm::isSplatValue
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Definition: VectorUtils.cpp:375
llvm::VFABI::mangleTLIVectorName
std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName, unsigned numArgs, ElementCount VF)
This routine mangles the given VectorName according to the LangRef specification for vector-function-...
Definition: VectorUtils.cpp:1310
llvm::InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue
void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
Definition: VectorUtils.cpp:1269
llvm::createInterleaveMask
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
Definition: VectorUtils.cpp:795
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition: PatternMatch.h:1472
llvm::ARM_MC::isPredicated
bool isPredicated(const MCInst &MI, const MCInstrInfo *MCII)
Definition: ARMMCTargetDesc.cpp:181
llvm::Instruction::mayReadOrWriteMemory
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
Definition: Instruction.h:600
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
getScalarSizeInBits
static unsigned getScalarSizeInBits(Type *Ty)
Definition: SystemZTargetTransformInfo.cpp:367
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:648
llvm::isTriviallyVectorizable
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:44
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::getStrideFromPointer
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
Definition: VectorUtils.cpp:204
round
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
llvm::VFParamKind::OMP_LinearVal
@ OMP_LinearVal
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
llvm::getPtrStride
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of its element size.
Definition: LoopAccessAnalysis.cpp:1029
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:237
llvm::InsertElementInst
This instruction inserts a single (scalar) element into a VectorType value.
Definition: Instructions.h:1939
llvm::EquivalenceClasses::member_end
member_iterator member_end() const
Definition: EquivalenceClasses.h:157
llvm::VFParamKind::OMP_LinearUValPos
@ OMP_LinearUValPos
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::SCEVMulExpr
This node represents multiplication of some number of SCEVs.
Definition: ScalarEvolutionExpressions.h:286
llvm::ShuffleVectorInst::getMaskValue
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
Definition: Instructions.h:2058
llvm::Instruction
Definition: Instruction.h:45
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1631
llvm::MapVector::rend
reverse_iterator rend()
Definition: MapVector.h:76
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1784
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
PatternMatch.h
llvm::InterleaveGroup::isReverse
bool isReverse() const
Definition: VectorUtils.h:600
llvm::Metadata
Root of the metadata hierarchy.
Definition: Metadata.h:62
llvm::InterleaveGroup::getMember
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:655
llvm::Attribute::getValueAsString
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:301
LoopIterator.h
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::VFParamKind::OMP_LinearValPos
@ OMP_LinearValPos
llvm::SCEVUnknown::getValue
Value * getValue() const
Definition: ScalarEvolutionExpressions.h:551
llvm::ShuffleVectorInst::getType
VectorType * getType() const
Overload to return most specific vector type.
Definition: Instructions.h:2049
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::Instruction::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:282
llvm::SmallString< 256 >
llvm::maxnum
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1309
LoopInfo.h
llvm::InterleaveGroup::setInsertPos
void setInsertPos(InstTy *Inst)
Definition: VectorUtils.h:672
llvm::VFABI::_LLVM_
static constexpr const char * _LLVM_
LLVM Internal VFABI ISA token for vector functions.
Definition: VectorUtils.h:132
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4048
llvm::LoopBlocksDFS
Store the result of a depth first search within basic blocks contained by a single loop.
Definition: LoopIterator.h:97
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:388
llvm::InterleaveGroup::getIndex
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
Definition: VectorUtils.h:662
VectorUtils.h
llvm::cl::opt
Definition: CommandLine.h:1422
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:78
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:179
llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:535
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::MDNode::intersect
static MDNode * intersect(MDNode *A, MDNode *B)
Definition: Metadata.cpp:928
llvm::VFParamKind::OMP_LinearRef
@ OMP_LinearRef
llvm::EquivalenceClasses::begin
iterator begin() const
Definition: EquivalenceClasses.h:146
llvm::hasVectorInstrinsicOverloadedScalarOpd
bool hasVectorInstrinsicOverloadedScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand that has an overloaded type.
Definition: VectorUtils.cpp:117
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
uint64_t
llvm::InterleaveGroup::getFactor
uint32_t getFactor() const
Definition: VectorUtils.h:601
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap
Definition: DenseMap.h:714
llvm::DemandedBits
Definition: DemandedBits.h:40
I
#define I(x, y, z)
Definition: MD5.cpp:59
addToAccessGroupList
static void addToAccessGroupList(ListT &List, MDNode *AccGroups)
Add all access groups in AccGroups to List.
Definition: VectorUtils.cpp:637
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:928
llvm::hasVectorInstrinsicScalarOpd
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:99
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:572
llvm::concatenateVectors
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Definition: VectorUtils.cpp:852
llvm::GetElementPtrInst::getResultElementType
Type * getResultElementType() const
Definition: Instructions.h:1026
llvm::SCEVConstant
This class represents a constant integer value.
Definition: ScalarEvolutionExpressions.h:47
llvm::MDNode::getMostGenericTBAA
static MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)
Definition: TypeBasedAliasAnalysis.cpp:477
maximum
Should compile r2 movcc movcs str strb mov lr r1 movcs movcc mov lr r1 str mov mov cmp r1 movlo r2 str bx lr r0 mov mov cmp r0 movhs r2 mov r1 bx lr Some of the NEON intrinsics may be appropriate for more general either as target independent intrinsics or perhaps elsewhere in the ARM backend Some of them may also be lowered to target independent and perhaps some new SDNodes could be added For maximum
Definition: README.txt:489
IRBuilder.h
llvm::InterleavedAccessInfo::analyzeInterleaving
void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
Definition: VectorUtils.cpp:1029
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::generic_gep_type_iterator::getIndexedType
Type * getIndexedType() const
Definition: GetElementPtrTypeIterator.h:72
llvm::PatternMatch::m_ZeroMask
Definition: PatternMatch.h:1533
llvm::is_splat
bool is_splat(R &&Range)
Wrapper function around std::equal to detect if all elements in a container are same.
Definition: STLExtras.h:1714
llvm::getLoadStoreAddressSpace
unsigned getLoadStoreAddressSpace(Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
Definition: Instructions.h:5331
llvm::MDNode
Metadata node.
Definition: Metadata.h:897
llvm::createBitMaskForGaps
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
Definition: VectorUtils.cpp:766
llvm::PatternMatch::m_Constant
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:142
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:70
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::APIntOps::smin
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2168
llvm::intersectAccessGroups
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
Definition: VectorUtils.cpp:673
llvm::ArrayRef< int >
llvm::APInt::getAllOnesValue
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:567
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::raw_svector_ostream::str
StringRef str() const
Return a StringRef for the vector contents.
Definition: raw_ostream.h:681
llvm::LinearPolySize::getFixedValue
ScalarTy getFixedValue() const
Definition: TypeSize.h:313
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Constant::getAggregateElement
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:420
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::PatternMatch::m_Shuffle
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
Definition: PatternMatch.h:1561
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:979
llvm::MDNode::getMostGenericAliasScope
static MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
Definition: Metadata.cpp:941
llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:95
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
trunc
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g trunc
Definition: README-FPStack.txt:63
llvm::ConstantVector::get
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1368
llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:430
llvm::createReplicatedMask
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
Definition: VectorUtils.cpp:786
llvm::ArrayRef::front
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:168
llvm::APInt::clearBit
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1525
llvm::MapVector::rbegin
reverse_iterator rbegin()
Definition: MapVector.h:74
llvm::ScalarEvolution::isLoopInvariant
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Definition: ScalarEvolution.cpp:13027
DemandedBits.h
llvm::MDNode::getContext
LLVMContext & getContext() const
Definition: Metadata.h:957
llvm::MapVector::empty
bool empty() const
Definition: MapVector.h:79
j
return j(j<< 16)
llvm::VFParamKind::OMP_LinearPos
@ OMP_LinearPos
llvm::APIntOps::umax
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:2183
llvm::widenShuffleMaskElts
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Definition: VectorUtils.cpp:442
llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:123
llvm::minnum
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1298
llvm::isValidAsAccessGroup
bool isValidAsAccessGroup(MDNode *AccGroup)
Return whether an MDNode might represent an access group.
Definition: LoopInfo.cpp:1115
llvm::SmallVectorImpl::assign
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:669
llvm::ArrayRef::begin
iterator begin() const
Definition: ArrayRef.h:153
llvm::replaceSymbolicStrideSCEV
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr, Value *OrigPtr=nullptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
Definition: LoopAccessAnalysis.cpp:143
llvm::TypeSize
Definition: TypeSize.h:417
llvm::getIntrinsicForCallSite
Intrinsic::ID getIntrinsicForCallSite(const CallBase &CB, const TargetLibraryInfo *TLI)
Map a call instruction to an intrinsic ID.
Definition: ValueTracking.cpp:3327
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:352
llvm::VFParamKind::OMP_LinearRefPos
@ OMP_LinearRefPos
llvm::SCEVUnknown
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
Definition: ScalarEvolutionExpressions.h:530
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:219
EquivalenceClasses.h
powi
This is blocked on not handling X *X *X powi(X, 3)(see note above). The issue is that we end up getting t
llvm::log2
static double log2(double V)
Definition: AMDGPULibCalls.cpp:842
llvm::DemandedBits::getDemandedBits
APInt getDemandedBits(Instruction *I)
Return the bits demanded from instruction I.
Definition: DemandedBits.cpp:443
transform
instcombine should handle this transform
Definition: README.txt:262
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::PatternMatch::m_ZeroInt
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:522
llvm::countLeadingZeros
unsigned countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: MathExtras.h:225
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:403
llvm::findScalarElement
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
Definition: VectorUtils.cpp:280
llvm::SCEVConstant::getAPInt
const APInt & getAPInt() const
Definition: ScalarEvolutionExpressions.h:57
llvm::VFParamKind::OMP_LinearUVal
@ OMP_LinearUVal
ScalarEvolutionExpressions.h
llvm::AttributeList::FunctionIndex
@ FunctionIndex
Definition: Attributes.h:402
llvm::ShuffleVectorInst
This instruction constructs a fixed permutation of two input vectors.
Definition: Instructions.h:2011
llvm::SCEVIntegralCastExpr
This is the base class for unary integral cast operator classes.
Definition: ScalarEvolutionExpressions.h:121
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
llvm::getSplatIndex
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
Definition: VectorUtils.cpp:338
llvm::MDNode::getMostGenericFPMath
static MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
Definition: Metadata.cpp:973
List
const NodeList & List
Definition: RDFGraph.cpp:201
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
llvm::VFABI::MappingsAttrName
static constexpr const char * MappingsAttrName
Definition: VectorUtils.h:193
llvm::stripGetElementPtr
Value * stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If the argument is a GEP, then returns the operand identified by getGEPInductionOperand.
Definition: VectorUtils.cpp:171
TargetTransformInfo.h
llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:656
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::SmallVectorImpl< int >
llvm::VFABI::tryDemangleForVFABI
Optional< VFInfo > tryDemangleForVFABI(StringRef MangledName, const Module &M)
Function to construct a VFInfo out of a mangled names in the following format:
Definition: VFABIDemangling.cpp:319
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:401
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::getLoadStorePointerOperand
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Definition: Instructions.h:5295
llvm::EquivalenceClasses::unionSets
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Definition: EquivalenceClasses.h:217
llvm::createStrideMask
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
Definition: VectorUtils.cpp:806
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1475
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:414
llvm::createSequentialMask
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Definition: VectorUtils.cpp:814
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:624
llvm::isPowerOf2_64
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:496
Value.h
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1284
llvm::EquivalenceClasses::member_begin
member_iterator member_begin(iterator I) const
Definition: EquivalenceClasses.h:153
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:422
llvm::VFParamKind::OMP_Linear
@ OMP_Linear
llvm::ArrayRef::end
iterator end() const
Definition: ArrayRef.h:154
llvm::APIntOps::smax
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition: APInt.h:2173
llvm::getLoadStoreAlignment
Align getLoadStoreAlignment(Value *I)
A helper function that returns the alignment of load or store instruction.
Definition: Instructions.h:5321
llvm::MDOperand
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:744
llvm::InterleaveGroup::getNumMembers
uint32_t getNumMembers() const
Definition: VectorUtils.h:603
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::InterleaveGroup::addMetadata
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Definition: VectorUtils.cpp:1296
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38