LLVM  14.0.0git
VectorUtils.cpp
Go to the documentation of this file.
1 //===----------- VectorUtils.cpp - Vectorizer utility functions -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines vectorizer utilities.
10 //
11 //===----------------------------------------------------------------------===//
12 
16 #include "llvm/Analysis/LoopInfo.h"
22 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/PatternMatch.h"
26 #include "llvm/IR/Value.h"
28 
29 #define DEBUG_TYPE "vectorutils"
30 
31 using namespace llvm;
32 using namespace llvm::PatternMatch;
33 
34 /// Maximum factor for an interleaved memory access.
36  "max-interleave-group-factor", cl::Hidden,
37  cl::desc("Maximum factor for an interleaved access group (default = 8)"),
38  cl::init(8));
39 
40 /// Return true if all of the intrinsic's arguments and return type are scalars
41 /// for the scalar form of the intrinsic, and vectors for the vector form of the
42 /// intrinsic (except operands that are marked as always being scalar by
43 /// hasVectorInstrinsicScalarOpd).
45  switch (ID) {
46  case Intrinsic::abs: // Begin integer bit-manipulation.
47  case Intrinsic::bswap:
48  case Intrinsic::bitreverse:
49  case Intrinsic::ctpop:
50  case Intrinsic::ctlz:
51  case Intrinsic::cttz:
52  case Intrinsic::fshl:
53  case Intrinsic::fshr:
54  case Intrinsic::smax:
55  case Intrinsic::smin:
56  case Intrinsic::umax:
57  case Intrinsic::umin:
58  case Intrinsic::sadd_sat:
59  case Intrinsic::ssub_sat:
60  case Intrinsic::uadd_sat:
61  case Intrinsic::usub_sat:
62  case Intrinsic::smul_fix:
63  case Intrinsic::smul_fix_sat:
64  case Intrinsic::umul_fix:
65  case Intrinsic::umul_fix_sat:
66  case Intrinsic::sqrt: // Begin floating-point.
67  case Intrinsic::sin:
68  case Intrinsic::cos:
69  case Intrinsic::exp:
70  case Intrinsic::exp2:
71  case Intrinsic::log:
72  case Intrinsic::log10:
73  case Intrinsic::log2:
74  case Intrinsic::fabs:
75  case Intrinsic::minnum:
76  case Intrinsic::maxnum:
77  case Intrinsic::minimum:
78  case Intrinsic::maximum:
79  case Intrinsic::copysign:
80  case Intrinsic::floor:
81  case Intrinsic::ceil:
82  case Intrinsic::trunc:
83  case Intrinsic::rint:
84  case Intrinsic::nearbyint:
85  case Intrinsic::round:
86  case Intrinsic::roundeven:
87  case Intrinsic::pow:
88  case Intrinsic::fma:
89  case Intrinsic::fmuladd:
90  case Intrinsic::powi:
91  case Intrinsic::canonicalize:
92  return true;
93  default:
94  return false;
95  }
96 }
97 
98 /// Identifies if the vector form of the intrinsic has a scalar operand.
100  unsigned ScalarOpdIdx) {
101  switch (ID) {
102  case Intrinsic::abs:
103  case Intrinsic::ctlz:
104  case Intrinsic::cttz:
105  case Intrinsic::powi:
106  return (ScalarOpdIdx == 1);
107  case Intrinsic::smul_fix:
108  case Intrinsic::smul_fix_sat:
109  case Intrinsic::umul_fix:
110  case Intrinsic::umul_fix_sat:
111  return (ScalarOpdIdx == 2);
112  default:
113  return false;
114  }
115 }
116 
118  unsigned ScalarOpdIdx) {
119  switch (ID) {
120  case Intrinsic::powi:
121  return (ScalarOpdIdx == 1);
122  default:
123  return false;
124  }
125 }
126 
127 /// Returns intrinsic ID for call.
128 /// For the input call instruction it finds mapping intrinsic and returns
129 /// its ID, in case it does not found it return not_intrinsic.
131  const TargetLibraryInfo *TLI) {
135 
136  if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
137  ID == Intrinsic::lifetime_end || ID == Intrinsic::assume ||
138  ID == Intrinsic::experimental_noalias_scope_decl ||
139  ID == Intrinsic::sideeffect || ID == Intrinsic::pseudoprobe)
140  return ID;
142 }
143 
144 /// Find the operand of the GEP that should be checked for consecutive
145 /// stores. This ignores trailing indices that have no effect on the final
146 /// pointer.
148  const DataLayout &DL = Gep->getModule()->getDataLayout();
149  unsigned LastOperand = Gep->getNumOperands() - 1;
150  TypeSize GEPAllocSize = DL.getTypeAllocSize(Gep->getResultElementType());
151 
152  // Walk backwards and try to peel off zeros.
153  while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
154  // Find the type we're currently indexing into.
155  gep_type_iterator GEPTI = gep_type_begin(Gep);
156  std::advance(GEPTI, LastOperand - 2);
157 
158  // If it's a type with the same allocation size as the result of the GEP we
159  // can peel off the zero index.
160  if (DL.getTypeAllocSize(GEPTI.getIndexedType()) != GEPAllocSize)
161  break;
162  --LastOperand;
163  }
164 
165  return LastOperand;
166 }
167 
168 /// If the argument is a GEP, then returns the operand identified by
169 /// getGEPInductionOperand. However, if there is some other non-loop-invariant
170 /// operand, it returns that instead.
172  GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
173  if (!GEP)
174  return Ptr;
175 
176  unsigned InductionOperand = getGEPInductionOperand(GEP);
177 
178  // Check that all of the gep indices are uniform except for our induction
179  // operand.
180  for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i)
181  if (i != InductionOperand &&
182  !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp))
183  return Ptr;
184  return GEP->getOperand(InductionOperand);
185 }
186 
187 /// If a value has only one user that is a CastInst, return it.
189  Value *UniqueCast = nullptr;
190  for (User *U : Ptr->users()) {
191  CastInst *CI = dyn_cast<CastInst>(U);
192  if (CI && CI->getType() == Ty) {
193  if (!UniqueCast)
194  UniqueCast = CI;
195  else
196  return nullptr;
197  }
198  }
199  return UniqueCast;
200 }
201 
202 /// Get the stride of a pointer access in a loop. Looks for symbolic
203 /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
205  auto *PtrTy = dyn_cast<PointerType>(Ptr->getType());
206  if (!PtrTy || PtrTy->isAggregateType())
207  return nullptr;
208 
209  // Try to remove a gep instruction to make the pointer (actually index at this
210  // point) easier analyzable. If OrigPtr is equal to Ptr we are analyzing the
211  // pointer, otherwise, we are analyzing the index.
212  Value *OrigPtr = Ptr;
213 
214  // The size of the pointer access.
215  int64_t PtrAccessSize = 1;
216 
217  Ptr = stripGetElementPtr(Ptr, SE, Lp);
218  const SCEV *V = SE->getSCEV(Ptr);
219 
220  if (Ptr != OrigPtr)
221  // Strip off casts.
222  while (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(V))
223  V = C->getOperand();
224 
225  const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V);
226  if (!S)
227  return nullptr;
228 
229  V = S->getStepRecurrence(*SE);
230  if (!V)
231  return nullptr;
232 
233  // Strip off the size of access multiplication if we are still analyzing the
234  // pointer.
235  if (OrigPtr == Ptr) {
236  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
237  if (M->getOperand(0)->getSCEVType() != scConstant)
238  return nullptr;
239 
240  const APInt &APStepVal = cast<SCEVConstant>(M->getOperand(0))->getAPInt();
241 
242  // Huge step value - give up.
243  if (APStepVal.getBitWidth() > 64)
244  return nullptr;
245 
246  int64_t StepVal = APStepVal.getSExtValue();
247  if (PtrAccessSize != StepVal)
248  return nullptr;
249  V = M->getOperand(1);
250  }
251  }
252 
253  // Strip off casts.
254  Type *StripedOffRecurrenceCast = nullptr;
255  if (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(V)) {
256  StripedOffRecurrenceCast = C->getType();
257  V = C->getOperand();
258  }
259 
260  // Look for the loop invariant symbolic value.
261  const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V);
262  if (!U)
263  return nullptr;
264 
265  Value *Stride = U->getValue();
266  if (!Lp->isLoopInvariant(Stride))
267  return nullptr;
268 
269  // If we have stripped off the recurrence cast we have to make sure that we
270  // return the value that is used in this loop so that we can replace it later.
271  if (StripedOffRecurrenceCast)
272  Stride = getUniqueCastUse(Stride, Lp, StripedOffRecurrenceCast);
273 
274  return Stride;
275 }
276 
277 /// Given a vector and an element number, see if the scalar value is
278 /// already around as a register, for example if it were inserted then extracted
279 /// from the vector.
280 Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
281  assert(V->getType()->isVectorTy() && "Not looking at a vector?");
282  VectorType *VTy = cast<VectorType>(V->getType());
283  // For fixed-length vector, return undef for out of range access.
284  if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
285  unsigned Width = FVTy->getNumElements();
286  if (EltNo >= Width)
287  return UndefValue::get(FVTy->getElementType());
288  }
289 
290  if (Constant *C = dyn_cast<Constant>(V))
291  return C->getAggregateElement(EltNo);
292 
293  if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
294  // If this is an insert to a variable element, we don't know what it is.
295  if (!isa<ConstantInt>(III->getOperand(2)))
296  return nullptr;
297  unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
298 
299  // If this is an insert to the element we are looking for, return the
300  // inserted value.
301  if (EltNo == IIElt)
302  return III->getOperand(1);
303 
304  // Guard against infinite loop on malformed, unreachable IR.
305  if (III == III->getOperand(0))
306  return nullptr;
307 
308  // Otherwise, the insertelement doesn't modify the value, recurse on its
309  // vector input.
310  return findScalarElement(III->getOperand(0), EltNo);
311  }
312 
313  ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V);
314  // Restrict the following transformation to fixed-length vector.
315  if (SVI && isa<FixedVectorType>(SVI->getType())) {
316  unsigned LHSWidth =
317  cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements();
318  int InEl = SVI->getMaskValue(EltNo);
319  if (InEl < 0)
320  return UndefValue::get(VTy->getElementType());
321  if (InEl < (int)LHSWidth)
322  return findScalarElement(SVI->getOperand(0), InEl);
323  return findScalarElement(SVI->getOperand(1), InEl - LHSWidth);
324  }
325 
326  // Extract a value from a vector add operation with a constant zero.
327  // TODO: Use getBinOpIdentity() to generalize this.
328  Value *Val; Constant *C;
329  if (match(V, m_Add(m_Value(Val), m_Constant(C))))
330  if (Constant *Elt = C->getAggregateElement(EltNo))
331  if (Elt->isNullValue())
332  return findScalarElement(Val, EltNo);
333 
334  // If the vector is a splat then we can trivially find the scalar element.
335  if (isa<ScalableVectorType>(VTy))
336  if (Value *Splat = getSplatValue(V))
337  if (EltNo < VTy->getElementCount().getKnownMinValue())
338  return Splat;
339 
340  // Otherwise, we don't know.
341  return nullptr;
342 }
343 
345  int SplatIndex = -1;
346  for (int M : Mask) {
347  // Ignore invalid (undefined) mask elements.
348  if (M < 0)
349  continue;
350 
351  // There can be only 1 non-negative mask element value if this is a splat.
352  if (SplatIndex != -1 && SplatIndex != M)
353  return -1;
354 
355  // Initialize the splat index to the 1st non-negative mask element.
356  SplatIndex = M;
357  }
358  assert((SplatIndex == -1 || SplatIndex >= 0) && "Negative index?");
359  return SplatIndex;
360 }
361 
362 /// Get splat value if the input is a splat vector or return nullptr.
363 /// This function is not fully general. It checks only 2 cases:
364 /// the input value is (1) a splat constant vector or (2) a sequence
365 /// of instructions that broadcasts a scalar at element 0.
367  if (isa<VectorType>(V->getType()))
368  if (auto *C = dyn_cast<Constant>(V))
369  return C->getSplatValue();
370 
371  // shuf (inselt ?, Splat, 0), ?, <0, undef, 0, ...>
372  Value *Splat;
373  if (match(V,
375  m_Value(), m_ZeroMask())))
376  return Splat;
377 
378  return nullptr;
379 }
380 
381 bool llvm::isSplatValue(const Value *V, int Index, unsigned Depth) {
382  assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
383 
384  if (isa<VectorType>(V->getType())) {
385  if (isa<UndefValue>(V))
386  return true;
387  // FIXME: We can allow undefs, but if Index was specified, we may want to
388  // check that the constant is defined at that index.
389  if (auto *C = dyn_cast<Constant>(V))
390  return C->getSplatValue() != nullptr;
391  }
392 
393  if (auto *Shuf = dyn_cast<ShuffleVectorInst>(V)) {
394  // FIXME: We can safely allow undefs here. If Index was specified, we will
395  // check that the mask elt is defined at the required index.
396  if (!is_splat(Shuf->getShuffleMask()))
397  return false;
398 
399  // Match any index.
400  if (Index == -1)
401  return true;
402 
403  // Match a specific element. The mask should be defined at and match the
404  // specified index.
405  return Shuf->getMaskValue(Index) == Index;
406  }
407 
408  // The remaining tests are all recursive, so bail out if we hit the limit.
410  return false;
411 
412  // If both operands of a binop are splats, the result is a splat.
413  Value *X, *Y, *Z;
414  if (match(V, m_BinOp(m_Value(X), m_Value(Y))))
415  return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth);
416 
417  // If all operands of a select are splats, the result is a splat.
418  if (match(V, m_Select(m_Value(X), m_Value(Y), m_Value(Z))))
419  return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth) &&
420  isSplatValue(Z, Index, Depth);
421 
422  // TODO: Add support for unary ops (fneg), casts, intrinsics (overflow ops).
423 
424  return false;
425 }
426 
428  SmallVectorImpl<int> &ScaledMask) {
429  assert(Scale > 0 && "Unexpected scaling factor");
430 
431  // Fast-path: if no scaling, then it is just a copy.
432  if (Scale == 1) {
433  ScaledMask.assign(Mask.begin(), Mask.end());
434  return;
435  }
436 
437  ScaledMask.clear();
438  for (int MaskElt : Mask) {
439  if (MaskElt >= 0) {
440  assert(((uint64_t)Scale * MaskElt + (Scale - 1)) <= INT32_MAX &&
441  "Overflowed 32-bits");
442  }
443  for (int SliceElt = 0; SliceElt != Scale; ++SliceElt)
444  ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + SliceElt);
445  }
446 }
447 
449  SmallVectorImpl<int> &ScaledMask) {
450  assert(Scale > 0 && "Unexpected scaling factor");
451 
452  // Fast-path: if no scaling, then it is just a copy.
453  if (Scale == 1) {
454  ScaledMask.assign(Mask.begin(), Mask.end());
455  return true;
456  }
457 
458  // We must map the original elements down evenly to a type with less elements.
459  int NumElts = Mask.size();
460  if (NumElts % Scale != 0)
461  return false;
462 
463  ScaledMask.clear();
464  ScaledMask.reserve(NumElts / Scale);
465 
466  // Step through the input mask by splitting into Scale-sized slices.
467  do {
468  ArrayRef<int> MaskSlice = Mask.take_front(Scale);
469  assert((int)MaskSlice.size() == Scale && "Expected Scale-sized slice.");
470 
471  // The first element of the slice determines how we evaluate this slice.
472  int SliceFront = MaskSlice.front();
473  if (SliceFront < 0) {
474  // Negative values (undef or other "sentinel" values) must be equal across
475  // the entire slice.
476  if (!is_splat(MaskSlice))
477  return false;
478  ScaledMask.push_back(SliceFront);
479  } else {
480  // A positive mask element must be cleanly divisible.
481  if (SliceFront % Scale != 0)
482  return false;
483  // Elements of the slice must be consecutive.
484  for (int i = 1; i < Scale; ++i)
485  if (MaskSlice[i] != SliceFront + i)
486  return false;
487  ScaledMask.push_back(SliceFront / Scale);
488  }
489  Mask = Mask.drop_front(Scale);
490  } while (!Mask.empty());
491 
492  assert((int)ScaledMask.size() * Scale == NumElts && "Unexpected scaled mask");
493 
494  // All elements of the original mask can be scaled down to map to the elements
495  // of a mask with wider elements.
496  return true;
497 }
498 
501  const TargetTransformInfo *TTI) {
502 
503  // DemandedBits will give us every value's live-out bits. But we want
504  // to ensure no extra casts would need to be inserted, so every DAG
505  // of connected values must have the same minimum bitwidth.
507  SmallVector<Value *, 16> Worklist;
509  SmallPtrSet<Value *, 16> Visited;
511  SmallPtrSet<Instruction *, 4> InstructionSet;
513 
514  // Determine the roots. We work bottom-up, from truncs or icmps.
515  bool SeenExtFromIllegalType = false;
516  for (auto *BB : Blocks)
517  for (auto &I : *BB) {
518  InstructionSet.insert(&I);
519 
520  if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) &&
521  !TTI->isTypeLegal(I.getOperand(0)->getType()))
522  SeenExtFromIllegalType = true;
523 
524  // Only deal with non-vector integers up to 64-bits wide.
525  if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) &&
526  !I.getType()->isVectorTy() &&
527  I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
528  // Don't make work for ourselves. If we know the loaded type is legal,
529  // don't add it to the worklist.
530  if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType()))
531  continue;
532 
533  Worklist.push_back(&I);
534  Roots.insert(&I);
535  }
536  }
537  // Early exit.
538  if (Worklist.empty() || (TTI && !SeenExtFromIllegalType))
539  return MinBWs;
540 
541  // Now proceed breadth-first, unioning values together.
542  while (!Worklist.empty()) {
543  Value *Val = Worklist.pop_back_val();
544  Value *Leader = ECs.getOrInsertLeaderValue(Val);
545 
546  if (Visited.count(Val))
547  continue;
548  Visited.insert(Val);
549 
550  // Non-instructions terminate a chain successfully.
551  if (!isa<Instruction>(Val))
552  continue;
553  Instruction *I = cast<Instruction>(Val);
554 
555  // If we encounter a type that is larger than 64 bits, we can't represent
556  // it so bail out.
557  if (DB.getDemandedBits(I).getBitWidth() > 64)
559 
561  DBits[Leader] |= V;
562  DBits[I] = V;
563 
564  // Casts, loads and instructions outside of our range terminate a chain
565  // successfully.
566  if (isa<SExtInst>(I) || isa<ZExtInst>(I) || isa<LoadInst>(I) ||
567  !InstructionSet.count(I))
568  continue;
569 
570  // Unsafe casts terminate a chain unsuccessfully. We can't do anything
571  // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to
572  // transform anything that relies on them.
573  if (isa<BitCastInst>(I) || isa<PtrToIntInst>(I) || isa<IntToPtrInst>(I) ||
574  !I->getType()->isIntegerTy()) {
575  DBits[Leader] |= ~0ULL;
576  continue;
577  }
578 
579  // We don't modify the types of PHIs. Reductions will already have been
580  // truncated if possible, and inductions' sizes will have been chosen by
581  // indvars.
582  if (isa<PHINode>(I))
583  continue;
584 
585  if (DBits[Leader] == ~0ULL)
586  // All bits demanded, no point continuing.
587  continue;
588 
589  for (Value *O : cast<User>(I)->operands()) {
590  ECs.unionSets(Leader, O);
591  Worklist.push_back(O);
592  }
593  }
594 
595  // Now we've discovered all values, walk them to see if there are
596  // any users we didn't see. If there are, we can't optimize that
597  // chain.
598  for (auto &I : DBits)
599  for (auto *U : I.first->users())
600  if (U->getType()->isIntegerTy() && DBits.count(U) == 0)
601  DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL;
602 
603  for (auto I = ECs.begin(), E = ECs.end(); I != E; ++I) {
604  uint64_t LeaderDemandedBits = 0;
605  for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end()))
606  LeaderDemandedBits |= DBits[M];
607 
608  uint64_t MinBW = (sizeof(LeaderDemandedBits) * 8) -
609  llvm::countLeadingZeros(LeaderDemandedBits);
610  // Round up to a power of 2
611  if (!isPowerOf2_64((uint64_t)MinBW))
612  MinBW = NextPowerOf2(MinBW);
613 
614  // We don't modify the types of PHIs. Reductions will already have been
615  // truncated if possible, and inductions' sizes will have been chosen by
616  // indvars.
617  // If we are required to shrink a PHI, abandon this entire equivalence class.
618  bool Abort = false;
619  for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end()))
620  if (isa<PHINode>(M) && MinBW < M->getType()->getScalarSizeInBits()) {
621  Abort = true;
622  break;
623  }
624  if (Abort)
625  continue;
626 
627  for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end())) {
628  if (!isa<Instruction>(M))
629  continue;
630  Type *Ty = M->getType();
631  if (Roots.count(M))
632  Ty = cast<Instruction>(M)->getOperand(0)->getType();
633  if (MinBW < Ty->getScalarSizeInBits())
634  MinBWs[cast<Instruction>(M)] = MinBW;
635  }
636  }
637 
638  return MinBWs;
639 }
640 
641 /// Add all access groups in @p AccGroups to @p List.
642 template <typename ListT>
643 static void addToAccessGroupList(ListT &List, MDNode *AccGroups) {
644  // Interpret an access group as a list containing itself.
645  if (AccGroups->getNumOperands() == 0) {
646  assert(isValidAsAccessGroup(AccGroups) && "Node must be an access group");
647  List.insert(AccGroups);
648  return;
649  }
650 
651  for (auto &AccGroupListOp : AccGroups->operands()) {
652  auto *Item = cast<MDNode>(AccGroupListOp.get());
653  assert(isValidAsAccessGroup(Item) && "List item must be an access group");
654  List.insert(Item);
655  }
656 }
657 
658 MDNode *llvm::uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2) {
659  if (!AccGroups1)
660  return AccGroups2;
661  if (!AccGroups2)
662  return AccGroups1;
663  if (AccGroups1 == AccGroups2)
664  return AccGroups1;
665 
667  addToAccessGroupList(Union, AccGroups1);
668  addToAccessGroupList(Union, AccGroups2);
669 
670  if (Union.size() == 0)
671  return nullptr;
672  if (Union.size() == 1)
673  return cast<MDNode>(Union.front());
674 
675  LLVMContext &Ctx = AccGroups1->getContext();
676  return MDNode::get(Ctx, Union.getArrayRef());
677 }
678 
680  const Instruction *Inst2) {
681  bool MayAccessMem1 = Inst1->mayReadOrWriteMemory();
682  bool MayAccessMem2 = Inst2->mayReadOrWriteMemory();
683 
684  if (!MayAccessMem1 && !MayAccessMem2)
685  return nullptr;
686  if (!MayAccessMem1)
687  return Inst2->getMetadata(LLVMContext::MD_access_group);
688  if (!MayAccessMem2)
689  return Inst1->getMetadata(LLVMContext::MD_access_group);
690 
691  MDNode *MD1 = Inst1->getMetadata(LLVMContext::MD_access_group);
692  MDNode *MD2 = Inst2->getMetadata(LLVMContext::MD_access_group);
693  if (!MD1 || !MD2)
694  return nullptr;
695  if (MD1 == MD2)
696  return MD1;
697 
698  // Use set for scalable 'contains' check.
699  SmallPtrSet<Metadata *, 4> AccGroupSet2;
700  addToAccessGroupList(AccGroupSet2, MD2);
701 
702  SmallVector<Metadata *, 4> Intersection;
703  if (MD1->getNumOperands() == 0) {
704  assert(isValidAsAccessGroup(MD1) && "Node must be an access group");
705  if (AccGroupSet2.count(MD1))
706  Intersection.push_back(MD1);
707  } else {
708  for (const MDOperand &Node : MD1->operands()) {
709  auto *Item = cast<MDNode>(Node.get());
710  assert(isValidAsAccessGroup(Item) && "List item must be an access group");
711  if (AccGroupSet2.count(Item))
712  Intersection.push_back(Item);
713  }
714  }
715 
716  if (Intersection.size() == 0)
717  return nullptr;
718  if (Intersection.size() == 1)
719  return cast<MDNode>(Intersection.front());
720 
721  LLVMContext &Ctx = Inst1->getContext();
722  return MDNode::get(Ctx, Intersection);
723 }
724 
725 /// \returns \p I after propagating metadata from \p VL.
727  if (VL.empty())
728  return Inst;
729  Instruction *I0 = cast<Instruction>(VL[0]);
732 
733  for (auto Kind : {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
734  LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
735  LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load,
736  LLVMContext::MD_access_group}) {
737  MDNode *MD = I0->getMetadata(Kind);
738 
739  for (int J = 1, E = VL.size(); MD && J != E; ++J) {
740  const Instruction *IJ = cast<Instruction>(VL[J]);
741  MDNode *IMD = IJ->getMetadata(Kind);
742  switch (Kind) {
743  case LLVMContext::MD_tbaa:
744  MD = MDNode::getMostGenericTBAA(MD, IMD);
745  break;
746  case LLVMContext::MD_alias_scope:
747  MD = MDNode::getMostGenericAliasScope(MD, IMD);
748  break;
749  case LLVMContext::MD_fpmath:
750  MD = MDNode::getMostGenericFPMath(MD, IMD);
751  break;
752  case LLVMContext::MD_noalias:
753  case LLVMContext::MD_nontemporal:
754  case LLVMContext::MD_invariant_load:
755  MD = MDNode::intersect(MD, IMD);
756  break;
757  case LLVMContext::MD_access_group:
758  MD = intersectAccessGroups(Inst, IJ);
759  break;
760  default:
761  llvm_unreachable("unhandled metadata");
762  }
763  }
764 
765  Inst->setMetadata(Kind, MD);
766  }
767 
768  return Inst;
769 }
770 
771 Constant *
773  const InterleaveGroup<Instruction> &Group) {
774  // All 1's means mask is not needed.
775  if (Group.getNumMembers() == Group.getFactor())
776  return nullptr;
777 
778  // TODO: support reversed access.
779  assert(!Group.isReverse() && "Reversed group not supported.");
780 
782  for (unsigned i = 0; i < VF; i++)
783  for (unsigned j = 0; j < Group.getFactor(); ++j) {
784  unsigned HasMember = Group.getMember(j) ? 1 : 0;
785  Mask.push_back(Builder.getInt1(HasMember));
786  }
787 
788  return ConstantVector::get(Mask);
789 }
790 
792 llvm::createReplicatedMask(unsigned ReplicationFactor, unsigned VF) {
793  SmallVector<int, 16> MaskVec;
794  for (unsigned i = 0; i < VF; i++)
795  for (unsigned j = 0; j < ReplicationFactor; j++)
796  MaskVec.push_back(i);
797 
798  return MaskVec;
799 }
800 
802  unsigned NumVecs) {
804  for (unsigned i = 0; i < VF; i++)
805  for (unsigned j = 0; j < NumVecs; j++)
806  Mask.push_back(j * VF + i);
807 
808  return Mask;
809 }
810 
812 llvm::createStrideMask(unsigned Start, unsigned Stride, unsigned VF) {
814  for (unsigned i = 0; i < VF; i++)
815  Mask.push_back(Start + i * Stride);
816 
817  return Mask;
818 }
819 
821  unsigned NumInts,
822  unsigned NumUndefs) {
824  for (unsigned i = 0; i < NumInts; i++)
825  Mask.push_back(Start + i);
826 
827  for (unsigned i = 0; i < NumUndefs; i++)
828  Mask.push_back(-1);
829 
830  return Mask;
831 }
832 
834  unsigned NumElts) {
835  // Avoid casts in the loop and make sure we have a reasonable number.
836  int NumEltsSigned = NumElts;
837  assert(NumEltsSigned > 0 && "Expected smaller or non-zero element count");
838 
839  // If the mask chooses an element from operand 1, reduce it to choose from the
840  // corresponding element of operand 0. Undef mask elements are unchanged.
841  SmallVector<int, 16> UnaryMask;
842  for (int MaskElt : Mask) {
843  assert((MaskElt < NumEltsSigned * 2) && "Expected valid shuffle mask");
844  int UnaryElt = MaskElt >= NumEltsSigned ? MaskElt - NumEltsSigned : MaskElt;
845  UnaryMask.push_back(UnaryElt);
846  }
847  return UnaryMask;
848 }
849 
850 /// A helper function for concatenating vectors. This function concatenates two
851 /// vectors having the same element type. If the second vector has fewer
852 /// elements than the first, it is padded with undefs.
854  Value *V2) {
855  VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType());
856  VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());
857  assert(VecTy1 && VecTy2 &&
858  VecTy1->getScalarType() == VecTy2->getScalarType() &&
859  "Expect two vectors with the same element type");
860 
861  unsigned NumElts1 = cast<FixedVectorType>(VecTy1)->getNumElements();
862  unsigned NumElts2 = cast<FixedVectorType>(VecTy2)->getNumElements();
863  assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements");
864 
865  if (NumElts1 > NumElts2) {
866  // Extend with UNDEFs.
867  V2 = Builder.CreateShuffleVector(
868  V2, createSequentialMask(0, NumElts2, NumElts1 - NumElts2));
869  }
870 
871  return Builder.CreateShuffleVector(
872  V1, V2, createSequentialMask(0, NumElts1 + NumElts2, 0));
873 }
874 
876  ArrayRef<Value *> Vecs) {
877  unsigned NumVecs = Vecs.size();
878  assert(NumVecs > 1 && "Should be at least two vectors");
879 
880  SmallVector<Value *, 8> ResList;
881  ResList.append(Vecs.begin(), Vecs.end());
882  do {
883  SmallVector<Value *, 8> TmpList;
884  for (unsigned i = 0; i < NumVecs - 1; i += 2) {
885  Value *V0 = ResList[i], *V1 = ResList[i + 1];
886  assert((V0->getType() == V1->getType() || i == NumVecs - 2) &&
887  "Only the last vector may have a different type");
888 
889  TmpList.push_back(concatenateTwoVectors(Builder, V0, V1));
890  }
891 
892  // Push the last vector if the total number of vectors is odd.
893  if (NumVecs % 2 != 0)
894  TmpList.push_back(ResList[NumVecs - 1]);
895 
896  ResList = TmpList;
897  NumVecs = ResList.size();
898  } while (NumVecs > 1);
899 
900  return ResList[0];
901 }
902 
904  assert(isa<VectorType>(Mask->getType()) &&
905  isa<IntegerType>(Mask->getType()->getScalarType()) &&
906  cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
907  1 &&
908  "Mask must be a vector of i1");
909 
910  auto *ConstMask = dyn_cast<Constant>(Mask);
911  if (!ConstMask)
912  return false;
913  if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))
914  return true;
915  if (isa<ScalableVectorType>(ConstMask->getType()))
916  return false;
917  for (unsigned
918  I = 0,
919  E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
920  I != E; ++I) {
921  if (auto *MaskElt = ConstMask->getAggregateElement(I))
922  if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))
923  continue;
924  return false;
925  }
926  return true;
927 }
928 
930  assert(isa<VectorType>(Mask->getType()) &&
931  isa<IntegerType>(Mask->getType()->getScalarType()) &&
932  cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
933  1 &&
934  "Mask must be a vector of i1");
935 
936  auto *ConstMask = dyn_cast<Constant>(Mask);
937  if (!ConstMask)
938  return false;
939  if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
940  return true;
941  if (isa<ScalableVectorType>(ConstMask->getType()))
942  return false;
943  for (unsigned
944  I = 0,
945  E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
946  I != E; ++I) {
947  if (auto *MaskElt = ConstMask->getAggregateElement(I))
948  if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
949  continue;
950  return false;
951  }
952  return true;
953 }
954 
955 /// TODO: This is a lot like known bits, but for
956 /// vectors. Is there something we can common this with?
958  assert(isa<FixedVectorType>(Mask->getType()) &&
959  isa<IntegerType>(Mask->getType()->getScalarType()) &&
960  cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
961  1 &&
962  "Mask must be a fixed width vector of i1");
963 
964  const unsigned VWidth =
965  cast<FixedVectorType>(Mask->getType())->getNumElements();
966  APInt DemandedElts = APInt::getAllOnes(VWidth);
967  if (auto *CV = dyn_cast<ConstantVector>(Mask))
968  for (unsigned i = 0; i < VWidth; i++)
969  if (CV->getAggregateElement(i)->isNullValue())
970  DemandedElts.clearBit(i);
971  return DemandedElts;
972 }
973 
974 bool InterleavedAccessInfo::isStrided(int Stride) {
975  unsigned Factor = std::abs(Stride);
976  return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
977 }
978 
979 void InterleavedAccessInfo::collectConstStrideAccesses(
981  const ValueToValueMap &Strides) {
982  auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();
983 
984  // Since it's desired that the load/store instructions be maintained in
985  // "program order" for the interleaved access analysis, we have to visit the
986  // blocks in the loop in reverse postorder (i.e., in a topological order).
987  // Such an ordering will ensure that any load/store that may be executed
988  // before a second load/store will precede the second load/store in
989  // AccessStrideInfo.
990  LoopBlocksDFS DFS(TheLoop);
991  DFS.perform(LI);
992  for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
993  for (auto &I : *BB) {
995  if (!Ptr)
996  continue;
997  Type *ElementTy = getLoadStoreType(&I);
998 
999  // We don't check wrapping here because we don't know yet if Ptr will be
1000  // part of a full group or a group with gaps. Checking wrapping for all
1001  // pointers (even those that end up in groups with no gaps) will be overly
1002  // conservative. For full groups, wrapping should be ok since if we would
1003  // wrap around the address space we would do a memory access at nullptr
1004  // even without the transformation. The wrapping checks are therefore
1005  // deferred until after we've formed the interleaved groups.
1006  int64_t Stride = getPtrStride(PSE, ElementTy, Ptr, TheLoop, Strides,
1007  /*Assume=*/true, /*ShouldCheckWrap=*/false);
1008 
1009  const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
1010  uint64_t Size = DL.getTypeAllocSize(ElementTy);
1011  AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size,
1013  }
1014 }
1015 
1016 // Analyze interleaved accesses and collect them into interleaved load and
1017 // store groups.
1018 //
1019 // When generating code for an interleaved load group, we effectively hoist all
1020 // loads in the group to the location of the first load in program order. When
1021 // generating code for an interleaved store group, we sink all stores to the
1022 // location of the last store. This code motion can change the order of load
1023 // and store instructions and may break dependences.
1024 //
1025 // The code generation strategy mentioned above ensures that we won't violate
1026 // any write-after-read (WAR) dependences.
1027 //
1028 // E.g., for the WAR dependence: a = A[i]; // (1)
1029 // A[i] = b; // (2)
1030 //
1031 // The store group of (2) is always inserted at or below (2), and the load
1032 // group of (1) is always inserted at or above (1). Thus, the instructions will
1033 // never be reordered. All other dependences are checked to ensure the
1034 // correctness of the instruction reordering.
1035 //
1036 // The algorithm visits all memory accesses in the loop in bottom-up program
1037 // order. Program order is established by traversing the blocks in the loop in
1038 // reverse postorder when collecting the accesses.
1039 //
1040 // We visit the memory accesses in bottom-up order because it can simplify the
1041 // construction of store groups in the presence of write-after-write (WAW)
1042 // dependences.
1043 //
1044 // E.g., for the WAW dependence: A[i] = a; // (1)
1045 // A[i] = b; // (2)
1046 // A[i + 1] = c; // (3)
1047 //
1048 // We will first create a store group with (3) and (2). (1) can't be added to
1049 // this group because it and (2) are dependent. However, (1) can be grouped
1050 // with other accesses that may precede it in program order. Note that a
1051 // bottom-up order does not imply that WAW dependences should not be checked.
1053  bool EnablePredicatedInterleavedMemAccesses) {
1054  LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
1055  const ValueToValueMap &Strides = LAI->getSymbolicStrides();
1056 
1057  // Holds all accesses with a constant stride.
1059  collectConstStrideAccesses(AccessStrideInfo, Strides);
1060 
1061  if (AccessStrideInfo.empty())
1062  return;
1063 
1064  // Collect the dependences in the loop.
1065  collectDependences();
1066 
1067  // Holds all interleaved store groups temporarily.
1069  // Holds all interleaved load groups temporarily.
1071 
1072  // Search in bottom-up program order for pairs of accesses (A and B) that can
1073  // form interleaved load or store groups. In the algorithm below, access A
1074  // precedes access B in program order. We initialize a group for B in the
1075  // outer loop of the algorithm, and then in the inner loop, we attempt to
1076  // insert each A into B's group if:
1077  //
1078  // 1. A and B have the same stride,
1079  // 2. A and B have the same memory object size, and
1080  // 3. A belongs in B's group according to its distance from B.
1081  //
1082  // Special care is taken to ensure group formation will not break any
1083  // dependences.
1084  for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
1085  BI != E; ++BI) {
1086  Instruction *B = BI->first;
1087  StrideDescriptor DesB = BI->second;
1088 
1089  // Initialize a group for B if it has an allowable stride. Even if we don't
1090  // create a group for B, we continue with the bottom-up algorithm to ensure
1091  // we don't break any of B's dependences.
1092  InterleaveGroup<Instruction> *Group = nullptr;
1093  if (isStrided(DesB.Stride) &&
1094  (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
1095  Group = getInterleaveGroup(B);
1096  if (!Group) {
1097  LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
1098  << '\n');
1099  Group = createInterleaveGroup(B, DesB.Stride, DesB.Alignment);
1100  }
1101  if (B->mayWriteToMemory())
1102  StoreGroups.insert(Group);
1103  else
1104  LoadGroups.insert(Group);
1105  }
1106 
1107  for (auto AI = std::next(BI); AI != E; ++AI) {
1108  Instruction *A = AI->first;
1109  StrideDescriptor DesA = AI->second;
1110 
1111  // Our code motion strategy implies that we can't have dependences
1112  // between accesses in an interleaved group and other accesses located
1113  // between the first and last member of the group. Note that this also
1114  // means that a group can't have more than one member at a given offset.
1115  // The accesses in a group can have dependences with other accesses, but
1116  // we must ensure we don't extend the boundaries of the group such that
1117  // we encompass those dependent accesses.
1118  //
1119  // For example, assume we have the sequence of accesses shown below in a
1120  // stride-2 loop:
1121  //
1122  // (1, 2) is a group | A[i] = a; // (1)
1123  // | A[i-1] = b; // (2) |
1124  // A[i-3] = c; // (3)
1125  // A[i] = d; // (4) | (2, 4) is not a group
1126  //
1127  // Because accesses (2) and (3) are dependent, we can group (2) with (1)
1128  // but not with (4). If we did, the dependent access (3) would be within
1129  // the boundaries of the (2, 4) group.
1130  if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
1131  // If a dependence exists and A is already in a group, we know that A
1132  // must be a store since A precedes B and WAR dependences are allowed.
1133  // Thus, A would be sunk below B. We release A's group to prevent this
1134  // illegal code motion. A will then be free to form another group with
1135  // instructions that precede it.
1136  if (isInterleaved(A)) {
1137  InterleaveGroup<Instruction> *StoreGroup = getInterleaveGroup(A);
1138 
1139  LLVM_DEBUG(dbgs() << "LV: Invalidated store group due to "
1140  "dependence between " << *A << " and "<< *B << '\n');
1141 
1142  StoreGroups.remove(StoreGroup);
1143  releaseGroup(StoreGroup);
1144  }
1145 
1146  // If a dependence exists and A is not already in a group (or it was
1147  // and we just released it), B might be hoisted above A (if B is a
1148  // load) or another store might be sunk below A (if B is a store). In
1149  // either case, we can't add additional instructions to B's group. B
1150  // will only form a group with instructions that it precedes.
1151  break;
1152  }
1153 
1154  // At this point, we've checked for illegal code motion. If either A or B
1155  // isn't strided, there's nothing left to do.
1156  if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
1157  continue;
1158 
1159  // Ignore A if it's already in a group or isn't the same kind of memory
1160  // operation as B.
1161  // Note that mayReadFromMemory() isn't mutually exclusive to
1162  // mayWriteToMemory in the case of atomic loads. We shouldn't see those
1163  // here, canVectorizeMemory() should have returned false - except for the
1164  // case we asked for optimization remarks.
1165  if (isInterleaved(A) ||
1166  (A->mayReadFromMemory() != B->mayReadFromMemory()) ||
1167  (A->mayWriteToMemory() != B->mayWriteToMemory()))
1168  continue;
1169 
1170  // Check rules 1 and 2. Ignore A if its stride or size is different from
1171  // that of B.
1172  if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
1173  continue;
1174 
1175  // Ignore A if the memory object of A and B don't belong to the same
1176  // address space
1178  continue;
1179 
1180  // Calculate the distance from A to B.
1181  const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(
1182  PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
1183  if (!DistToB)
1184  continue;
1185  int64_t DistanceToB = DistToB->getAPInt().getSExtValue();
1186 
1187  // Check rule 3. Ignore A if its distance to B is not a multiple of the
1188  // size.
1189  if (DistanceToB % static_cast<int64_t>(DesB.Size))
1190  continue;
1191 
1192  // All members of a predicated interleave-group must have the same predicate,
1193  // and currently must reside in the same BB.
1194  BasicBlock *BlockA = A->getParent();
1195  BasicBlock *BlockB = B->getParent();
1196  if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
1197  (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
1198  continue;
1199 
1200  // The index of A is the index of B plus A's distance to B in multiples
1201  // of the size.
1202  int IndexA =
1203  Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
1204 
1205  // Try to insert A into B's group.
1206  if (Group->insertMember(A, IndexA, DesA.Alignment)) {
1207  LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
1208  << " into the interleave group with" << *B
1209  << '\n');
1210  InterleaveGroupMap[A] = Group;
1211 
1212  // Set the first load in program order as the insert position.
1213  if (A->mayReadFromMemory())
1214  Group->setInsertPos(A);
1215  }
1216  } // Iteration over A accesses.
1217  } // Iteration over B accesses.
1218 
1219  auto InvalidateGroupIfMemberMayWrap = [&](InterleaveGroup<Instruction> *Group,
1220  int Index,
1221  std::string FirstOrLast) -> bool {
1222  Instruction *Member = Group->getMember(Index);
1223  assert(Member && "Group member does not exist");
1224  Value *MemberPtr = getLoadStorePointerOperand(Member);
1225  Type *AccessTy = getLoadStoreType(Member);
1226  if (getPtrStride(PSE, AccessTy, MemberPtr, TheLoop, Strides,
1227  /*Assume=*/false, /*ShouldCheckWrap=*/true))
1228  return false;
1229  LLVM_DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "
1230  << FirstOrLast
1231  << " group member potentially pointer-wrapping.\n");
1232  releaseGroup(Group);
1233  return true;
1234  };
1235 
1236  // Remove interleaved groups with gaps whose memory
1237  // accesses may wrap around. We have to revisit the getPtrStride analysis,
1238  // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
1239  // not check wrapping (see documentation there).
1240  // FORNOW we use Assume=false;
1241  // TODO: Change to Assume=true but making sure we don't exceed the threshold
1242  // of runtime SCEV assumptions checks (thereby potentially failing to
1243  // vectorize altogether).
1244  // Additional optional optimizations:
1245  // TODO: If we are peeling the loop and we know that the first pointer doesn't
1246  // wrap then we can deduce that all pointers in the group don't wrap.
1247  // This means that we can forcefully peel the loop in order to only have to
1248  // check the first pointer for no-wrap. When we'll change to use Assume=true
1249  // we'll only need at most one runtime check per interleaved group.
1250  for (auto *Group : LoadGroups) {
1251  // Case 1: A full group. Can Skip the checks; For full groups, if the wide
1252  // load would wrap around the address space we would do a memory access at
1253  // nullptr even without the transformation.
1254  if (Group->getNumMembers() == Group->getFactor())
1255  continue;
1256 
1257  // Case 2: If first and last members of the group don't wrap this implies
1258  // that all the pointers in the group don't wrap.
1259  // So we check only group member 0 (which is always guaranteed to exist),
1260  // and group member Factor - 1; If the latter doesn't exist we rely on
1261  // peeling (if it is a non-reversed accsess -- see Case 3).
1262  if (InvalidateGroupIfMemberMayWrap(Group, 0, std::string("first")))
1263  continue;
1264  if (Group->getMember(Group->getFactor() - 1))
1265  InvalidateGroupIfMemberMayWrap(Group, Group->getFactor() - 1,
1266  std::string("last"));
1267  else {
1268  // Case 3: A non-reversed interleaved load group with gaps: We need
1269  // to execute at least one scalar epilogue iteration. This will ensure
1270  // we don't speculatively access memory out-of-bounds. We only need
1271  // to look for a member at index factor - 1, since every group must have
1272  // a member at index zero.
1273  if (Group->isReverse()) {
1274  LLVM_DEBUG(
1275  dbgs() << "LV: Invalidate candidate interleaved group due to "
1276  "a reverse access with gaps.\n");
1277  releaseGroup(Group);
1278  continue;
1279  }
1280  LLVM_DEBUG(
1281  dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
1282  RequiresScalarEpilogue = true;
1283  }
1284  }
1285 
1286  for (auto *Group : StoreGroups) {
1287  // Case 1: A full group. Can Skip the checks; For full groups, if the wide
1288  // store would wrap around the address space we would do a memory access at
1289  // nullptr even without the transformation.
1290  if (Group->getNumMembers() == Group->getFactor())
1291  continue;
1292 
1293  // Interleave-store-group with gaps is implemented using masked wide store.
1294  // Remove interleaved store groups with gaps if
1295  // masked-interleaved-accesses are not enabled by the target.
1296  if (!EnablePredicatedInterleavedMemAccesses) {
1297  LLVM_DEBUG(
1298  dbgs() << "LV: Invalidate candidate interleaved store group due "
1299  "to gaps.\n");
1300  releaseGroup(Group);
1301  continue;
1302  }
1303 
1304  // Case 2: If first and last members of the group don't wrap this implies
1305  // that all the pointers in the group don't wrap.
1306  // So we check only group member 0 (which is always guaranteed to exist),
1307  // and the last group member. Case 3 (scalar epilog) is not relevant for
1308  // stores with gaps, which are implemented with masked-store (rather than
1309  // speculative access, as in loads).
1310  if (InvalidateGroupIfMemberMayWrap(Group, 0, std::string("first")))
1311  continue;
1312  for (int Index = Group->getFactor() - 1; Index > 0; Index--)
1313  if (Group->getMember(Index)) {
1314  InvalidateGroupIfMemberMayWrap(Group, Index, std::string("last"));
1315  break;
1316  }
1317  }
1318 }
1319 
1321  // If no group had triggered the requirement to create an epilogue loop,
1322  // there is nothing to do.
1323  if (!requiresScalarEpilogue())
1324  return;
1325 
1326  bool ReleasedGroup = false;
1327  // Release groups requiring scalar epilogues. Note that this also removes them
1328  // from InterleaveGroups.
1329  for (auto *Group : make_early_inc_range(InterleaveGroups)) {
1330  if (!Group->requiresScalarEpilogue())
1331  continue;
1332  LLVM_DEBUG(
1333  dbgs()
1334  << "LV: Invalidate candidate interleaved group due to gaps that "
1335  "require a scalar epilogue (not allowed under optsize) and cannot "
1336  "be masked (not enabled). \n");
1337  releaseGroup(Group);
1338  ReleasedGroup = true;
1339  }
1340  assert(ReleasedGroup && "At least one group must be invalidated, as a "
1341  "scalar epilogue was required");
1342  (void)ReleasedGroup;
1343  RequiresScalarEpilogue = false;
1344 }
1345 
1346 template <typename InstT>
1347 void InterleaveGroup<InstT>::addMetadata(InstT *NewInst) const {
1348  llvm_unreachable("addMetadata can only be used for Instruction");
1349 }
1350 
1351 namespace llvm {
1352 template <>
1355  std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
1356  [](std::pair<int, Instruction *> p) { return p.second; });
1357  propagateMetadata(NewInst, VL);
1358 }
1359 }
1360 
1361 std::string VFABI::mangleTLIVectorName(StringRef VectorName,
1362  StringRef ScalarName, unsigned numArgs,
1363  ElementCount VF) {
1364  SmallString<256> Buffer;
1365  llvm::raw_svector_ostream Out(Buffer);
1366  Out << "_ZGV" << VFABI::_LLVM_ << "N";
1367  if (VF.isScalable())
1368  Out << 'x';
1369  else
1370  Out << VF.getFixedValue();
1371  for (unsigned I = 0; I < numArgs; ++I)
1372  Out << "v";
1373  Out << "_" << ScalarName << "(" << VectorName << ")";
1374  return std::string(Out.str());
1375 }
1376 
1378  const CallInst &CI, SmallVectorImpl<std::string> &VariantMappings) {
1380  if (S.empty())
1381  return;
1382 
1383  SmallVector<StringRef, 8> ListAttr;
1384  S.split(ListAttr, ",");
1385 
1386  for (auto &S : SetVector<StringRef>(ListAttr.begin(), ListAttr.end())) {
1387 #ifndef NDEBUG
1388  LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << S << "'\n");
1390  assert(Info.hasValue() && "Invalid name for a VFABI variant.");
1391  assert(CI.getModule()->getFunction(Info.getValue().VectorName) &&
1392  "Vector function is missing.");
1393 #endif
1394  VariantMappings.push_back(std::string(S));
1395  }
1396 }
1397 
1399  for (unsigned Pos = 0, NumParams = Parameters.size(); Pos < NumParams;
1400  ++Pos) {
1401  assert(Parameters[Pos].ParamPos == Pos && "Broken parameter list.");
1402 
1403  switch (Parameters[Pos].ParamKind) {
1404  default: // Nothing to check.
1405  break;
1410  // Compile time linear steps must be non-zero.
1411  if (Parameters[Pos].LinearStepOrPos == 0)
1412  return false;
1413  break;
1418  // The runtime linear step must be referring to some other
1419  // parameters in the signature.
1420  if (Parameters[Pos].LinearStepOrPos >= int(NumParams))
1421  return false;
1422  // The linear step parameter must be marked as uniform.
1423  if (Parameters[Parameters[Pos].LinearStepOrPos].ParamKind !=
1425  return false;
1426  // The linear step parameter can't point at itself.
1427  if (Parameters[Pos].LinearStepOrPos == int(Pos))
1428  return false;
1429  break;
1431  // The global predicate must be the unique. Can be placed anywhere in the
1432  // signature.
1433  for (unsigned NextPos = Pos + 1; NextPos < NumParams; ++NextPos)
1434  if (Parameters[NextPos].ParamKind == VFParamKind::GlobalPredicate)
1435  return false;
1436  break;
1437  }
1438  }
1439  return true;
1440 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::NextPowerOf2
uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:683
llvm::EquivalenceClasses::getOrInsertLeaderValue
const ElemTy & getOrInsertLeaderValue(const ElemTy &V)
getOrInsertLeaderValue - Return the leader for the specified value that is in the set.
Definition: EquivalenceClasses.h:179
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:64
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
llvm::maskIsAllOneOrUndef
bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
Definition: VectorUtils.cpp:929
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
llvm::EquivalenceClasses::end
iterator end() const
Definition: EquivalenceClasses.h:147
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31
llvm::InterleaveGroup::insertMember
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
Definition: VectorUtils.h:616
ceil
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g ceil
Definition: README-FPStack.txt:54
llvm::ElementCount
Definition: TypeSize.h:386
llvm::getVectorIntrinsicIDForCall
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Definition: VectorUtils.cpp:130
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::getGEPInductionOperand
unsigned getGEPInductionOperand(const GetElementPtrInst *Gep)
Find the operand of the GEP that should be checked for consecutive stores.
Definition: VectorUtils.cpp:147
llvm::possiblyDemandedEltsInMask
APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
Definition: VectorUtils.cpp:957
llvm::scConstant
@ scConstant
Definition: ScalarEvolutionExpressions.h:41
minimum
Should compile r2 movcc movcs str strb mov lr r1 movcs movcc mov lr r1 str mov mov cmp r1 movlo r2 str bx lr r0 mov mov cmp r0 movhs r2 mov r1 bx lr Some of the NEON intrinsics may be appropriate for more general either as target independent intrinsics or perhaps elsewhere in the ARM backend Some of them may also be lowered to target independent and perhaps some new SDNodes could be added For minimum
Definition: README.txt:489
llvm::Instruction::getAllMetadataOtherThanDebugLoc
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode * >> &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
Definition: Instruction.h:305
GetElementPtrTypeIterator.h
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:308
llvm::PatternMatch::m_InsertElt
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Definition: PatternMatch.h:1494
llvm::VFParamKind::GlobalPredicate
@ GlobalPredicate
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::PatternMatch::m_Add
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1008
llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1474
llvm::EquivalenceClasses
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
Definition: EquivalenceClasses.h:58
llvm::getSplatValue
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
Definition: VectorUtils.cpp:366
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::VFABI::getVectorVariantNames
void getVectorVariantNames(const CallInst &CI, SmallVectorImpl< std::string > &VariantMappings)
Populates a set of strings representing the Vector Function ABI variants associated to the CallInst C...
Definition: VectorUtils.cpp:1377
concatenateTwoVectors
static Value * concatenateTwoVectors(IRBuilderBase &Builder, Value *V1, Value *V2)
A helper function for concatenating vectors.
Definition: VectorUtils.cpp:853
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:460
llvm::maskIsAllZeroOrUndef
bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
Definition: VectorUtils.cpp:903
llvm::MaxAnalysisRecursionDepth
constexpr unsigned MaxAnalysisRecursionDepth
Definition: ValueTracking.h:48
ValueTracking.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::computeMinimumValueSizes
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
Definition: VectorUtils.cpp:500
llvm::getLoadStoreType
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
Definition: Instructions.h:5344
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
ScalarEvolution.h
llvm::narrowShuffleMaskElts
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
Definition: VectorUtils.cpp:427
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
MaxInterleaveGroupFactor
static cl::opt< unsigned > MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
Maximum factor for an interleaved memory access.
llvm::getPtrStride
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
Definition: LoopAccessAnalysis.cpp:1029
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1410
llvm::getUniqueCastUse
Value * getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty)
If a value has only one user that is a CastInst, return it.
Definition: VectorUtils.cpp:188
llvm::createUnaryMask
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
Definition: VectorUtils.cpp:833
llvm::Optional
Definition: APInt.h:33
llvm::PatternMatch::m_BinOp
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
llvm::SmallPtrSet< Value *, 4 >
llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:422
llvm::Intrinsic::not_intrinsic
@ not_intrinsic
Definition: Intrinsics.h:45
llvm::VFParamKind::OMP_Uniform
@ OMP_Uniform
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::propagateMetadata
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Definition: VectorUtils.cpp:726
llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition: GetElementPtrTypeIterator.h:139
llvm::Module::getFunction
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:178
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::uniteAccessGroups
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
Definition: VectorUtils.cpp:658
llvm::InterleaveGroup
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:284
llvm::LinearPolySize::isScalable
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:299
p
the resulting code requires compare and branches when and if * p
Definition: README.txt:396
llvm::APIntOps::umin
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition: APInt.h:2128
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1233
llvm::MDNode::operands
op_range operands() const
Definition: Metadata.h:1135
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1336
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1143
floor
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g floor
Definition: README-FPStack.txt:54
llvm::VFShape::hasValidParameterList
bool hasValidParameterList() const
Sanity check on the Parameters in the VFShape.
Definition: VectorUtils.cpp:1398
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
CommandLine.h
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::remove
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:157
llvm::Intrinsic::getType
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:1335
llvm::TargetTransformInfo::isTypeLegal
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
Definition: TargetTransformInfo.cpp:454
llvm::isSplatValue
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Definition: VectorUtils.cpp:381
llvm::VFABI::mangleTLIVectorName
std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName, unsigned numArgs, ElementCount VF)
This routine mangles the given VectorName according to the LangRef specification for vector-function-...
Definition: VectorUtils.cpp:1361
llvm::InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue
void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
Definition: VectorUtils.cpp:1320
llvm::createInterleaveMask
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
Definition: VectorUtils.cpp:801
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition: PatternMatch.h:1472
llvm::ARM_MC::isPredicated
bool isPredicated(const MCInst &MI, const MCInstrInfo *MCII)
Definition: ARMMCTargetDesc.cpp:181
llvm::Instruction::mayReadOrWriteMemory
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
Definition: Instruction.h:598
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
getScalarSizeInBits
static unsigned getScalarSizeInBits(Type *Ty)
Definition: SystemZTargetTransformInfo.cpp:367
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:648
llvm::isTriviallyVectorizable
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:44
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::getStrideFromPointer
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
Definition: VectorUtils.cpp:204
round
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
llvm::VFParamKind::OMP_LinearVal
@ OMP_LinearVal
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
llvm::APInt::getAllOnes
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:226
llvm::InsertElementInst
This instruction inserts a single (scalar) element into a VectorType value.
Definition: Instructions.h:1937
llvm::EquivalenceClasses::member_end
member_iterator member_end() const
Definition: EquivalenceClasses.h:157
llvm::VFParamKind::OMP_LinearUValPos
@ OMP_LinearUValPos
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::SCEVMulExpr
This node represents multiplication of some number of SCEVs.
Definition: ScalarEvolutionExpressions.h:286
llvm::ShuffleVectorInst::getMaskValue
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
Definition: Instructions.h:2064
llvm::Instruction
Definition: Instruction.h:45
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1460
llvm::MapVector::rend
reverse_iterator rend()
Definition: MapVector.h:76
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1796
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
PatternMatch.h
llvm::InterleaveGroup::isReverse
bool isReverse() const
Definition: VectorUtils.h:606
llvm::Metadata
Root of the metadata hierarchy.
Definition: Metadata.h:62
llvm::InterleaveGroup::getMember
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:661
llvm::Attribute::getValueAsString
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:301
LoopIterator.h
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::VFParamKind::OMP_LinearValPos
@ OMP_LinearValPos
llvm::SCEVUnknown::getValue
Value * getValue() const
Definition: ScalarEvolutionExpressions.h:551
llvm::ShuffleVectorInst::getType
VectorType * getType() const
Overload to return most specific vector type.
Definition: Instructions.h:2055
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::Instruction::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:282
llvm::SmallString< 256 >
llvm::maxnum
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1307
LoopInfo.h
llvm::InterleaveGroup::setInsertPos
void setInsertPos(InstTy *Inst)
Definition: VectorUtils.h:678
llvm::VFABI::_LLVM_
static constexpr const char * _LLVM_
LLVM Internal VFABI ISA token for vector functions.
Definition: VectorUtils.h:132
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4088
llvm::LoopBlocksDFS
Store the result of a depth first search within basic blocks contained by a single loop.
Definition: LoopIterator.h:97
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
llvm::InterleaveGroup::getIndex
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
Definition: VectorUtils.h:668
VectorUtils.h
llvm::cl::opt
Definition: CommandLine.h:1432
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:77
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:197
llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:535
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::MDNode::intersect
static MDNode * intersect(MDNode *A, MDNode *B)
Definition: Metadata.cpp:928
llvm::VFParamKind::OMP_LinearRef
@ OMP_LinearRef
llvm::EquivalenceClasses::begin
iterator begin() const
Definition: EquivalenceClasses.h:146
llvm::hasVectorInstrinsicOverloadedScalarOpd
bool hasVectorInstrinsicOverloadedScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand that has an overloaded type.
Definition: VectorUtils.cpp:117
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
uint64_t
llvm::InterleaveGroup::getFactor
uint32_t getFactor() const
Definition: VectorUtils.h:607
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap
Definition: DenseMap.h:714
llvm::DemandedBits
Definition: DemandedBits.h:40
I
#define I(x, y, z)
Definition: MD5.cpp:59
addToAccessGroupList
static void addToAccessGroupList(ListT &List, MDNode *AccGroups)
Add all access groups in AccGroups to List.
Definition: VectorUtils.cpp:643
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:928
llvm::hasVectorInstrinsicScalarOpd
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:99
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:576
llvm::concatenateVectors
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Definition: VectorUtils.cpp:875
llvm::GetElementPtrInst::getResultElementType
Type * getResultElementType() const
Definition: Instructions.h:1026
llvm::SCEVConstant
This class represents a constant integer value.
Definition: ScalarEvolutionExpressions.h:47
llvm::MDNode::getMostGenericTBAA
static MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)
Definition: TypeBasedAliasAnalysis.cpp:477
maximum
Should compile r2 movcc movcs str strb mov lr r1 movcs movcc mov lr r1 str mov mov cmp r1 movlo r2 str bx lr r0 mov mov cmp r0 movhs r2 mov r1 bx lr Some of the NEON intrinsics may be appropriate for more general either as target independent intrinsics or perhaps elsewhere in the ARM backend Some of them may also be lowered to target independent and perhaps some new SDNodes could be added For maximum
Definition: README.txt:489
IRBuilder.h
llvm::InterleavedAccessInfo::analyzeInterleaving
void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
Definition: VectorUtils.cpp:1052
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::generic_gep_type_iterator::getIndexedType
Type * getIndexedType() const
Definition: GetElementPtrTypeIterator.h:72
llvm::PatternMatch::m_ZeroMask
Definition: PatternMatch.h:1533
llvm::is_splat
bool is_splat(R &&Range)
Wrapper function around std::equal to detect if all elements in a container are same.
Definition: STLExtras.h:1718
llvm::getLoadStoreAddressSpace
unsigned getLoadStoreAddressSpace(Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
Definition: Instructions.h:5335
llvm::MDNode
Metadata node.
Definition: Metadata.h:906
llvm::createBitMaskForGaps
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
Definition: VectorUtils.cpp:772
llvm::PatternMatch::m_Constant
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:142
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:650
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::APIntOps::smin
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2118
llvm::intersectAccessGroups
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
Definition: VectorUtils.cpp:679
llvm::CallBase::getFnAttr
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
Definition: InstrTypes.h:1599
llvm::ArrayRef< int >
llvm::replaceSymbolicStrideSCEV
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
Definition: LoopAccessAnalysis.cpp:143
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::raw_svector_ostream::str
StringRef str() const
Return a StringRef for the vector contents.
Definition: raw_ostream.h:683
llvm::LinearPolySize::getFixedValue
ScalarTy getFixedValue() const
Definition: TypeSize.h:313
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::Constant::getAggregateElement
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:420
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::PatternMatch::m_Shuffle
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
Definition: PatternMatch.h:1561
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:990
llvm::MDNode::getMostGenericAliasScope
static MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
Definition: Metadata.cpp:941
llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:95
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
trunc
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g trunc
Definition: README-FPStack.txt:63
llvm::ConstantVector::get
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1394
llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:430
llvm::createReplicatedMask
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
Definition: VectorUtils.cpp:792
llvm::ArrayRef::front
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:168
llvm::APInt::clearBit
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1356
llvm::MapVector::rbegin
reverse_iterator rbegin()
Definition: MapVector.h:74
llvm::ScalarEvolution::isLoopInvariant
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Definition: ScalarEvolution.cpp:12630
DemandedBits.h
llvm::MDNode::getContext
LLVMContext & getContext() const
Definition: Metadata.h:967
llvm::MapVector::empty
bool empty() const
Definition: MapVector.h:79
j
return j(j<< 16)
llvm::VFParamKind::OMP_LinearPos
@ OMP_LinearPos
llvm::APIntOps::umax
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:2133
llvm::widenShuffleMaskElts
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Definition: VectorUtils.cpp:448
llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:123
llvm::minnum
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1296
llvm::isValidAsAccessGroup
bool isValidAsAccessGroup(MDNode *AccGroup)
Return whether an MDNode might represent an access group.
Definition: LoopInfo.cpp:1120
llvm::SmallVectorImpl::assign
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:669
llvm::ArrayRef::begin
iterator begin() const
Definition: ArrayRef.h:153
llvm::TypeSize
Definition: TypeSize.h:417
llvm::getIntrinsicForCallSite
Intrinsic::ID getIntrinsicForCallSite(const CallBase &CB, const TargetLibraryInfo *TLI)
Map a call instruction to an intrinsic ID.
Definition: ValueTracking.cpp:3349
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:352
llvm::VFParamKind::OMP_LinearRefPos
@ OMP_LinearRefPos
llvm::SCEVUnknown
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
Definition: ScalarEvolutionExpressions.h:530
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:221
EquivalenceClasses.h
powi
This is blocked on not handling X *X *X powi(X, 3)(see note above). The issue is that we end up getting t
llvm::log2
static double log2(double V)
Definition: AMDGPULibCalls.cpp:842
llvm::DemandedBits::getDemandedBits
APInt getDemandedBits(Instruction *I)
Return the bits demanded from instruction I.
Definition: DemandedBits.cpp:443
transform
instcombine should handle this transform
Definition: README.txt:262
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::PatternMatch::m_ZeroInt
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:522
llvm::countLeadingZeros
unsigned countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: MathExtras.h:225
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:413
llvm::findScalarElement
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
Definition: VectorUtils.cpp:280
llvm::SCEVConstant::getAPInt
const APInt & getAPInt() const
Definition: ScalarEvolutionExpressions.h:57
llvm::VFParamKind::OMP_LinearUVal
@ OMP_LinearUVal
ScalarEvolutionExpressions.h
llvm::ShuffleVectorInst
This instruction constructs a fixed permutation of two input vectors.
Definition: Instructions.h:2009
llvm::SCEVIntegralCastExpr
This is the base class for unary integral cast operator classes.
Definition: ScalarEvolutionExpressions.h:121
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
llvm::getSplatIndex
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
Definition: VectorUtils.cpp:344
llvm::MDNode::getMostGenericFPMath
static MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
Definition: Metadata.cpp:973
List
const NodeList & List
Definition: RDFGraph.cpp:201
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
llvm::VFABI::MappingsAttrName
static constexpr const char * MappingsAttrName
Definition: VectorUtils.h:193
llvm::stripGetElementPtr
Value * stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If the argument is a GEP, then returns the operand identified by getGEPInductionOperand.
Definition: VectorUtils.cpp:171
TargetTransformInfo.h
llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:658
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::SmallVectorImpl< int >
llvm::VFABI::tryDemangleForVFABI
Optional< VFInfo > tryDemangleForVFABI(StringRef MangledName, const Module &M)
Function to construct a VFInfo out of a mangled names in the following format:
Definition: VFABIDemangling.cpp:319
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:401
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::getLoadStorePointerOperand
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Definition: Instructions.h:5299
llvm::EquivalenceClasses::unionSets
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Definition: EquivalenceClasses.h:217
llvm::createStrideMask
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
Definition: VectorUtils.cpp:812
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1475
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:412
llvm::createSequentialMask
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Definition: VectorUtils.cpp:820
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:624
llvm::isPowerOf2_64
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:496
Value.h
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1282
llvm::EquivalenceClasses::member_begin
member_iterator member_begin(iterator I) const
Definition: EquivalenceClasses.h:153
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::VFParamKind::OMP_Linear
@ OMP_Linear
llvm::ArrayRef::end
iterator end() const
Definition: ArrayRef.h:154
llvm::APIntOps::smax
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition: APInt.h:2123
llvm::getLoadStoreAlignment
Align getLoadStoreAlignment(Value *I)
A helper function that returns the alignment of load or store instruction.
Definition: Instructions.h:5325
llvm::MDOperand
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:753
llvm::InterleaveGroup::getNumMembers
uint32_t getNumMembers() const
Definition: VectorUtils.h:609
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::InterleaveGroup::addMetadata
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Definition: VectorUtils.cpp:1347
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37