LLVM  17.0.0git
VectorUtils.cpp
Go to the documentation of this file.
1 //===----------- VectorUtils.cpp - Vectorizer utility functions -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines vectorizer utilities.
10 //
11 //===----------------------------------------------------------------------===//
12 
16 #include "llvm/Analysis/LoopInfo.h"
22 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/PatternMatch.h"
26 #include "llvm/IR/Value.h"
28 
29 #define DEBUG_TYPE "vectorutils"
30 
31 using namespace llvm;
32 using namespace llvm::PatternMatch;
33 
34 /// Maximum factor for an interleaved memory access.
36  "max-interleave-group-factor", cl::Hidden,
37  cl::desc("Maximum factor for an interleaved access group (default = 8)"),
38  cl::init(8));
39 
40 /// Return true if all of the intrinsic's arguments and return type are scalars
41 /// for the scalar form of the intrinsic, and vectors for the vector form of the
42 /// intrinsic (except operands that are marked as always being scalar by
43 /// isVectorIntrinsicWithScalarOpAtArg).
45  switch (ID) {
46  case Intrinsic::abs: // Begin integer bit-manipulation.
47  case Intrinsic::bswap:
48  case Intrinsic::bitreverse:
49  case Intrinsic::ctpop:
50  case Intrinsic::ctlz:
51  case Intrinsic::cttz:
52  case Intrinsic::fshl:
53  case Intrinsic::fshr:
54  case Intrinsic::smax:
55  case Intrinsic::smin:
56  case Intrinsic::umax:
57  case Intrinsic::umin:
58  case Intrinsic::sadd_sat:
59  case Intrinsic::ssub_sat:
60  case Intrinsic::uadd_sat:
61  case Intrinsic::usub_sat:
62  case Intrinsic::smul_fix:
63  case Intrinsic::smul_fix_sat:
64  case Intrinsic::umul_fix:
65  case Intrinsic::umul_fix_sat:
66  case Intrinsic::sqrt: // Begin floating-point.
67  case Intrinsic::sin:
68  case Intrinsic::cos:
69  case Intrinsic::exp:
70  case Intrinsic::exp2:
71  case Intrinsic::log:
72  case Intrinsic::log10:
73  case Intrinsic::log2:
74  case Intrinsic::fabs:
75  case Intrinsic::minnum:
76  case Intrinsic::maxnum:
77  case Intrinsic::minimum:
78  case Intrinsic::maximum:
79  case Intrinsic::copysign:
80  case Intrinsic::floor:
81  case Intrinsic::ceil:
82  case Intrinsic::trunc:
83  case Intrinsic::rint:
84  case Intrinsic::nearbyint:
85  case Intrinsic::round:
86  case Intrinsic::roundeven:
87  case Intrinsic::pow:
88  case Intrinsic::fma:
89  case Intrinsic::fmuladd:
90  case Intrinsic::powi:
91  case Intrinsic::canonicalize:
92  case Intrinsic::fptosi_sat:
93  case Intrinsic::fptoui_sat:
94  return true;
95  default:
96  return false;
97  }
98 }
99 
100 /// Identifies if the vector form of the intrinsic has a scalar operand.
102  unsigned ScalarOpdIdx) {
103  switch (ID) {
104  case Intrinsic::abs:
105  case Intrinsic::ctlz:
106  case Intrinsic::cttz:
107  case Intrinsic::powi:
108  return (ScalarOpdIdx == 1);
109  case Intrinsic::smul_fix:
110  case Intrinsic::smul_fix_sat:
111  case Intrinsic::umul_fix:
112  case Intrinsic::umul_fix_sat:
113  return (ScalarOpdIdx == 2);
114  default:
115  return false;
116  }
117 }
118 
120  unsigned OpdIdx) {
121  switch (ID) {
122  case Intrinsic::fptosi_sat:
123  case Intrinsic::fptoui_sat:
124  return OpdIdx == 0;
125  case Intrinsic::powi:
126  return OpdIdx == 1;
127  default:
128  return false;
129  }
130 }
131 
132 /// Returns intrinsic ID for call.
133 /// For the input call instruction it finds mapping intrinsic and returns
134 /// its ID, in case it does not found it return not_intrinsic.
136  const TargetLibraryInfo *TLI) {
140 
141  if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
142  ID == Intrinsic::lifetime_end || ID == Intrinsic::assume ||
143  ID == Intrinsic::experimental_noalias_scope_decl ||
144  ID == Intrinsic::sideeffect || ID == Intrinsic::pseudoprobe)
145  return ID;
147 }
148 
149 /// Find the operand of the GEP that should be checked for consecutive
150 /// stores. This ignores trailing indices that have no effect on the final
151 /// pointer.
153  const DataLayout &DL = Gep->getModule()->getDataLayout();
154  unsigned LastOperand = Gep->getNumOperands() - 1;
155  TypeSize GEPAllocSize = DL.getTypeAllocSize(Gep->getResultElementType());
156 
157  // Walk backwards and try to peel off zeros.
158  while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
159  // Find the type we're currently indexing into.
160  gep_type_iterator GEPTI = gep_type_begin(Gep);
161  std::advance(GEPTI, LastOperand - 2);
162 
163  // If it's a type with the same allocation size as the result of the GEP we
164  // can peel off the zero index.
165  if (DL.getTypeAllocSize(GEPTI.getIndexedType()) != GEPAllocSize)
166  break;
167  --LastOperand;
168  }
169 
170  return LastOperand;
171 }
172 
173 /// If the argument is a GEP, then returns the operand identified by
174 /// getGEPInductionOperand. However, if there is some other non-loop-invariant
175 /// operand, it returns that instead.
177  GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
178  if (!GEP)
179  return Ptr;
180 
181  unsigned InductionOperand = getGEPInductionOperand(GEP);
182 
183  // Check that all of the gep indices are uniform except for our induction
184  // operand.
185  for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i)
186  if (i != InductionOperand &&
187  !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp))
188  return Ptr;
189  return GEP->getOperand(InductionOperand);
190 }
191 
192 /// If a value has only one user that is a CastInst, return it.
194  Value *UniqueCast = nullptr;
195  for (User *U : Ptr->users()) {
196  CastInst *CI = dyn_cast<CastInst>(U);
197  if (CI && CI->getType() == Ty) {
198  if (!UniqueCast)
199  UniqueCast = CI;
200  else
201  return nullptr;
202  }
203  }
204  return UniqueCast;
205 }
206 
207 /// Get the stride of a pointer access in a loop. Looks for symbolic
208 /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
210  auto *PtrTy = dyn_cast<PointerType>(Ptr->getType());
211  if (!PtrTy || PtrTy->isAggregateType())
212  return nullptr;
213 
214  // Try to remove a gep instruction to make the pointer (actually index at this
215  // point) easier analyzable. If OrigPtr is equal to Ptr we are analyzing the
216  // pointer, otherwise, we are analyzing the index.
217  Value *OrigPtr = Ptr;
218 
219  // The size of the pointer access.
220  int64_t PtrAccessSize = 1;
221 
222  Ptr = stripGetElementPtr(Ptr, SE, Lp);
223  const SCEV *V = SE->getSCEV(Ptr);
224 
225  if (Ptr != OrigPtr)
226  // Strip off casts.
227  while (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(V))
228  V = C->getOperand();
229 
230  const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V);
231  if (!S)
232  return nullptr;
233 
234  V = S->getStepRecurrence(*SE);
235  if (!V)
236  return nullptr;
237 
238  // Strip off the size of access multiplication if we are still analyzing the
239  // pointer.
240  if (OrigPtr == Ptr) {
241  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
242  if (M->getOperand(0)->getSCEVType() != scConstant)
243  return nullptr;
244 
245  const APInt &APStepVal = cast<SCEVConstant>(M->getOperand(0))->getAPInt();
246 
247  // Huge step value - give up.
248  if (APStepVal.getBitWidth() > 64)
249  return nullptr;
250 
251  int64_t StepVal = APStepVal.getSExtValue();
252  if (PtrAccessSize != StepVal)
253  return nullptr;
254  V = M->getOperand(1);
255  }
256  }
257 
258  // Strip off casts.
259  Type *StripedOffRecurrenceCast = nullptr;
260  if (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(V)) {
261  StripedOffRecurrenceCast = C->getType();
262  V = C->getOperand();
263  }
264 
265  // Look for the loop invariant symbolic value.
266  const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V);
267  if (!U)
268  return nullptr;
269 
270  Value *Stride = U->getValue();
271  if (!Lp->isLoopInvariant(Stride))
272  return nullptr;
273 
274  // If we have stripped off the recurrence cast we have to make sure that we
275  // return the value that is used in this loop so that we can replace it later.
276  if (StripedOffRecurrenceCast)
277  Stride = getUniqueCastUse(Stride, Lp, StripedOffRecurrenceCast);
278 
279  return Stride;
280 }
281 
282 /// Given a vector and an element number, see if the scalar value is
283 /// already around as a register, for example if it were inserted then extracted
284 /// from the vector.
285 Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
286  assert(V->getType()->isVectorTy() && "Not looking at a vector?");
287  VectorType *VTy = cast<VectorType>(V->getType());
288  // For fixed-length vector, return undef for out of range access.
289  if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
290  unsigned Width = FVTy->getNumElements();
291  if (EltNo >= Width)
292  return UndefValue::get(FVTy->getElementType());
293  }
294 
295  if (Constant *C = dyn_cast<Constant>(V))
296  return C->getAggregateElement(EltNo);
297 
298  if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
299  // If this is an insert to a variable element, we don't know what it is.
300  if (!isa<ConstantInt>(III->getOperand(2)))
301  return nullptr;
302  unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
303 
304  // If this is an insert to the element we are looking for, return the
305  // inserted value.
306  if (EltNo == IIElt)
307  return III->getOperand(1);
308 
309  // Guard against infinite loop on malformed, unreachable IR.
310  if (III == III->getOperand(0))
311  return nullptr;
312 
313  // Otherwise, the insertelement doesn't modify the value, recurse on its
314  // vector input.
315  return findScalarElement(III->getOperand(0), EltNo);
316  }
317 
318  ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V);
319  // Restrict the following transformation to fixed-length vector.
320  if (SVI && isa<FixedVectorType>(SVI->getType())) {
321  unsigned LHSWidth =
322  cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements();
323  int InEl = SVI->getMaskValue(EltNo);
324  if (InEl < 0)
325  return UndefValue::get(VTy->getElementType());
326  if (InEl < (int)LHSWidth)
327  return findScalarElement(SVI->getOperand(0), InEl);
328  return findScalarElement(SVI->getOperand(1), InEl - LHSWidth);
329  }
330 
331  // Extract a value from a vector add operation with a constant zero.
332  // TODO: Use getBinOpIdentity() to generalize this.
333  Value *Val; Constant *C;
334  if (match(V, m_Add(m_Value(Val), m_Constant(C))))
335  if (Constant *Elt = C->getAggregateElement(EltNo))
336  if (Elt->isNullValue())
337  return findScalarElement(Val, EltNo);
338 
339  // If the vector is a splat then we can trivially find the scalar element.
340  if (isa<ScalableVectorType>(VTy))
341  if (Value *Splat = getSplatValue(V))
342  if (EltNo < VTy->getElementCount().getKnownMinValue())
343  return Splat;
344 
345  // Otherwise, we don't know.
346  return nullptr;
347 }
348 
350  int SplatIndex = -1;
351  for (int M : Mask) {
352  // Ignore invalid (undefined) mask elements.
353  if (M < 0)
354  continue;
355 
356  // There can be only 1 non-negative mask element value if this is a splat.
357  if (SplatIndex != -1 && SplatIndex != M)
358  return -1;
359 
360  // Initialize the splat index to the 1st non-negative mask element.
361  SplatIndex = M;
362  }
363  assert((SplatIndex == -1 || SplatIndex >= 0) && "Negative index?");
364  return SplatIndex;
365 }
366 
367 /// Get splat value if the input is a splat vector or return nullptr.
368 /// This function is not fully general. It checks only 2 cases:
369 /// the input value is (1) a splat constant vector or (2) a sequence
370 /// of instructions that broadcasts a scalar at element 0.
372  if (isa<VectorType>(V->getType()))
373  if (auto *C = dyn_cast<Constant>(V))
374  return C->getSplatValue();
375 
376  // shuf (inselt ?, Splat, 0), ?, <0, undef, 0, ...>
377  Value *Splat;
378  if (match(V,
380  m_Value(), m_ZeroMask())))
381  return Splat;
382 
383  return nullptr;
384 }
385 
386 bool llvm::isSplatValue(const Value *V, int Index, unsigned Depth) {
387  assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
388 
389  if (isa<VectorType>(V->getType())) {
390  if (isa<UndefValue>(V))
391  return true;
392  // FIXME: We can allow undefs, but if Index was specified, we may want to
393  // check that the constant is defined at that index.
394  if (auto *C = dyn_cast<Constant>(V))
395  return C->getSplatValue() != nullptr;
396  }
397 
398  if (auto *Shuf = dyn_cast<ShuffleVectorInst>(V)) {
399  // FIXME: We can safely allow undefs here. If Index was specified, we will
400  // check that the mask elt is defined at the required index.
401  if (!all_equal(Shuf->getShuffleMask()))
402  return false;
403 
404  // Match any index.
405  if (Index == -1)
406  return true;
407 
408  // Match a specific element. The mask should be defined at and match the
409  // specified index.
410  return Shuf->getMaskValue(Index) == Index;
411  }
412 
413  // The remaining tests are all recursive, so bail out if we hit the limit.
415  return false;
416 
417  // If both operands of a binop are splats, the result is a splat.
418  Value *X, *Y, *Z;
419  if (match(V, m_BinOp(m_Value(X), m_Value(Y))))
420  return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth);
421 
422  // If all operands of a select are splats, the result is a splat.
423  if (match(V, m_Select(m_Value(X), m_Value(Y), m_Value(Z))))
424  return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth) &&
426 
427  // TODO: Add support for unary ops (fneg), casts, intrinsics (overflow ops).
428 
429  return false;
430 }
431 
433  const APInt &DemandedElts, APInt &DemandedLHS,
434  APInt &DemandedRHS, bool AllowUndefElts) {
435  DemandedLHS = DemandedRHS = APInt::getZero(SrcWidth);
436 
437  // Early out if we don't demand any elements.
438  if (DemandedElts.isZero())
439  return true;
440 
441  // Simple case of a shuffle with zeroinitializer.
442  if (all_of(Mask, [](int Elt) { return Elt == 0; })) {
443  DemandedLHS.setBit(0);
444  return true;
445  }
446 
447  for (unsigned I = 0, E = Mask.size(); I != E; ++I) {
448  int M = Mask[I];
449  assert((-1 <= M) && (M < (SrcWidth * 2)) &&
450  "Invalid shuffle mask constant");
451 
452  if (!DemandedElts[I] || (AllowUndefElts && (M < 0)))
453  continue;
454 
455  // For undef elements, we don't know anything about the common state of
456  // the shuffle result.
457  if (M < 0)
458  return false;
459 
460  if (M < SrcWidth)
461  DemandedLHS.setBit(M);
462  else
463  DemandedRHS.setBit(M - SrcWidth);
464  }
465 
466  return true;
467 }
468 
470  SmallVectorImpl<int> &ScaledMask) {
471  assert(Scale > 0 && "Unexpected scaling factor");
472 
473  // Fast-path: if no scaling, then it is just a copy.
474  if (Scale == 1) {
475  ScaledMask.assign(Mask.begin(), Mask.end());
476  return;
477  }
478 
479  ScaledMask.clear();
480  for (int MaskElt : Mask) {
481  if (MaskElt >= 0) {
482  assert(((uint64_t)Scale * MaskElt + (Scale - 1)) <= INT32_MAX &&
483  "Overflowed 32-bits");
484  }
485  for (int SliceElt = 0; SliceElt != Scale; ++SliceElt)
486  ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + SliceElt);
487  }
488 }
489 
491  SmallVectorImpl<int> &ScaledMask) {
492  assert(Scale > 0 && "Unexpected scaling factor");
493 
494  // Fast-path: if no scaling, then it is just a copy.
495  if (Scale == 1) {
496  ScaledMask.assign(Mask.begin(), Mask.end());
497  return true;
498  }
499 
500  // We must map the original elements down evenly to a type with less elements.
501  int NumElts = Mask.size();
502  if (NumElts % Scale != 0)
503  return false;
504 
505  ScaledMask.clear();
506  ScaledMask.reserve(NumElts / Scale);
507 
508  // Step through the input mask by splitting into Scale-sized slices.
509  do {
510  ArrayRef<int> MaskSlice = Mask.take_front(Scale);
511  assert((int)MaskSlice.size() == Scale && "Expected Scale-sized slice.");
512 
513  // The first element of the slice determines how we evaluate this slice.
514  int SliceFront = MaskSlice.front();
515  if (SliceFront < 0) {
516  // Negative values (undef or other "sentinel" values) must be equal across
517  // the entire slice.
518  if (!all_equal(MaskSlice))
519  return false;
520  ScaledMask.push_back(SliceFront);
521  } else {
522  // A positive mask element must be cleanly divisible.
523  if (SliceFront % Scale != 0)
524  return false;
525  // Elements of the slice must be consecutive.
526  for (int i = 1; i < Scale; ++i)
527  if (MaskSlice[i] != SliceFront + i)
528  return false;
529  ScaledMask.push_back(SliceFront / Scale);
530  }
531  Mask = Mask.drop_front(Scale);
532  } while (!Mask.empty());
533 
534  assert((int)ScaledMask.size() * Scale == NumElts && "Unexpected scaled mask");
535 
536  // All elements of the original mask can be scaled down to map to the elements
537  // of a mask with wider elements.
538  return true;
539 }
540 
542  SmallVectorImpl<int> &ScaledMask) {
543  std::array<SmallVector<int, 16>, 2> TmpMasks;
544  SmallVectorImpl<int> *Output = &TmpMasks[0], *Tmp = &TmpMasks[1];
545  ArrayRef<int> InputMask = Mask;
546  for (unsigned Scale = 2; Scale <= InputMask.size(); ++Scale) {
547  while (widenShuffleMaskElts(Scale, InputMask, *Output)) {
548  InputMask = *Output;
549  std::swap(Output, Tmp);
550  }
551  }
552  ScaledMask.assign(InputMask.begin(), InputMask.end());
553 }
554 
556  ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
557  unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
558  function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction,
559  function_ref<void(ArrayRef<int>, unsigned, unsigned)> ManyInputsAction) {
560  SmallVector<SmallVector<SmallVector<int>>> Res(NumOfDestRegs);
561  // Try to perform better estimation of the permutation.
562  // 1. Split the source/destination vectors into real registers.
563  // 2. Do the mask analysis to identify which real registers are
564  // permuted.
565  int Sz = Mask.size();
566  unsigned SzDest = Sz / NumOfDestRegs;
567  unsigned SzSrc = Sz / NumOfSrcRegs;
568  for (unsigned I = 0; I < NumOfDestRegs; ++I) {
569  auto &RegMasks = Res[I];
570  RegMasks.assign(NumOfSrcRegs, {});
571  // Check that the values in dest registers are in the one src
572  // register.
573  for (unsigned K = 0; K < SzDest; ++K) {
574  int Idx = I * SzDest + K;
575  if (Idx == Sz)
576  break;
577  if (Mask[Idx] >= Sz || Mask[Idx] == UndefMaskElem)
578  continue;
579  int SrcRegIdx = Mask[Idx] / SzSrc;
580  // Add a cost of PermuteTwoSrc for each new source register permute,
581  // if we have more than one source registers.
582  if (RegMasks[SrcRegIdx].empty())
583  RegMasks[SrcRegIdx].assign(SzDest, UndefMaskElem);
584  RegMasks[SrcRegIdx][K] = Mask[Idx] % SzSrc;
585  }
586  }
587  // Process split mask.
588  for (unsigned I = 0; I < NumOfUsedRegs; ++I) {
589  auto &Dest = Res[I];
590  int NumSrcRegs =
591  count_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
592  switch (NumSrcRegs) {
593  case 0:
594  // No input vectors were used!
595  NoInputAction();
596  break;
597  case 1: {
598  // Find the only mask with at least single undef mask elem.
599  auto *It =
600  find_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
601  unsigned SrcReg = std::distance(Dest.begin(), It);
602  SingleInputAction(*It, SrcReg, I);
603  break;
604  }
605  default: {
606  // The first mask is a permutation of a single register. Since we have >2
607  // input registers to shuffle, we merge the masks for 2 first registers
608  // and generate a shuffle of 2 registers rather than the reordering of the
609  // first register and then shuffle with the second register. Next,
610  // generate the shuffles of the resulting register + the remaining
611  // registers from the list.
612  auto &&CombineMasks = [](MutableArrayRef<int> FirstMask,
613  ArrayRef<int> SecondMask) {
614  for (int Idx = 0, VF = FirstMask.size(); Idx < VF; ++Idx) {
615  if (SecondMask[Idx] != UndefMaskElem) {
616  assert(FirstMask[Idx] == UndefMaskElem &&
617  "Expected undefined mask element.");
618  FirstMask[Idx] = SecondMask[Idx] + VF;
619  }
620  }
621  };
622  auto &&NormalizeMask = [](MutableArrayRef<int> Mask) {
623  for (int Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) {
624  if (Mask[Idx] != UndefMaskElem)
625  Mask[Idx] = Idx;
626  }
627  };
628  int SecondIdx;
629  do {
630  int FirstIdx = -1;
631  SecondIdx = -1;
632  MutableArrayRef<int> FirstMask, SecondMask;
633  for (unsigned I = 0; I < NumOfDestRegs; ++I) {
634  SmallVectorImpl<int> &RegMask = Dest[I];
635  if (RegMask.empty())
636  continue;
637 
638  if (FirstIdx == SecondIdx) {
639  FirstIdx = I;
640  FirstMask = RegMask;
641  continue;
642  }
643  SecondIdx = I;
644  SecondMask = RegMask;
645  CombineMasks(FirstMask, SecondMask);
646  ManyInputsAction(FirstMask, FirstIdx, SecondIdx);
647  NormalizeMask(FirstMask);
648  RegMask.clear();
649  SecondMask = FirstMask;
650  SecondIdx = FirstIdx;
651  }
652  if (FirstIdx != SecondIdx && SecondIdx >= 0) {
653  CombineMasks(SecondMask, FirstMask);
654  ManyInputsAction(SecondMask, SecondIdx, FirstIdx);
655  Dest[FirstIdx].clear();
656  NormalizeMask(SecondMask);
657  }
658  } while (SecondIdx >= 0);
659  break;
660  }
661  }
662  }
663 }
664 
667  const TargetTransformInfo *TTI) {
668 
669  // DemandedBits will give us every value's live-out bits. But we want
670  // to ensure no extra casts would need to be inserted, so every DAG
671  // of connected values must have the same minimum bitwidth.
673  SmallVector<Value *, 16> Worklist;
675  SmallPtrSet<Value *, 16> Visited;
677  SmallPtrSet<Instruction *, 4> InstructionSet;
679 
680  // Determine the roots. We work bottom-up, from truncs or icmps.
681  bool SeenExtFromIllegalType = false;
682  for (auto *BB : Blocks)
683  for (auto &I : *BB) {
684  InstructionSet.insert(&I);
685 
686  if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) &&
687  !TTI->isTypeLegal(I.getOperand(0)->getType()))
688  SeenExtFromIllegalType = true;
689 
690  // Only deal with non-vector integers up to 64-bits wide.
691  if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) &&
692  !I.getType()->isVectorTy() &&
693  I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
694  // Don't make work for ourselves. If we know the loaded type is legal,
695  // don't add it to the worklist.
696  if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType()))
697  continue;
698 
699  Worklist.push_back(&I);
700  Roots.insert(&I);
701  }
702  }
703  // Early exit.
704  if (Worklist.empty() || (TTI && !SeenExtFromIllegalType))
705  return MinBWs;
706 
707  // Now proceed breadth-first, unioning values together.
708  while (!Worklist.empty()) {
709  Value *Val = Worklist.pop_back_val();
710  Value *Leader = ECs.getOrInsertLeaderValue(Val);
711 
712  if (!Visited.insert(Val).second)
713  continue;
714 
715  // Non-instructions terminate a chain successfully.
716  if (!isa<Instruction>(Val))
717  continue;
718  Instruction *I = cast<Instruction>(Val);
719 
720  // If we encounter a type that is larger than 64 bits, we can't represent
721  // it so bail out.
722  if (DB.getDemandedBits(I).getBitWidth() > 64)
724 
725  uint64_t V = DB.getDemandedBits(I).getZExtValue();
726  DBits[Leader] |= V;
727  DBits[I] = V;
728 
729  // Casts, loads and instructions outside of our range terminate a chain
730  // successfully.
731  if (isa<SExtInst>(I) || isa<ZExtInst>(I) || isa<LoadInst>(I) ||
732  !InstructionSet.count(I))
733  continue;
734 
735  // Unsafe casts terminate a chain unsuccessfully. We can't do anything
736  // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to
737  // transform anything that relies on them.
738  if (isa<BitCastInst>(I) || isa<PtrToIntInst>(I) || isa<IntToPtrInst>(I) ||
739  !I->getType()->isIntegerTy()) {
740  DBits[Leader] |= ~0ULL;
741  continue;
742  }
743 
744  // We don't modify the types of PHIs. Reductions will already have been
745  // truncated if possible, and inductions' sizes will have been chosen by
746  // indvars.
747  if (isa<PHINode>(I))
748  continue;
749 
750  if (DBits[Leader] == ~0ULL)
751  // All bits demanded, no point continuing.
752  continue;
753 
754  for (Value *O : cast<User>(I)->operands()) {
755  ECs.unionSets(Leader, O);
756  Worklist.push_back(O);
757  }
758  }
759 
760  // Now we've discovered all values, walk them to see if there are
761  // any users we didn't see. If there are, we can't optimize that
762  // chain.
763  for (auto &I : DBits)
764  for (auto *U : I.first->users())
765  if (U->getType()->isIntegerTy() && DBits.count(U) == 0)
766  DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL;
767 
768  for (auto I = ECs.begin(), E = ECs.end(); I != E; ++I) {
769  uint64_t LeaderDemandedBits = 0;
770  for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end()))
771  LeaderDemandedBits |= DBits[M];
772 
773  uint64_t MinBW = (sizeof(LeaderDemandedBits) * 8) -
774  llvm::countLeadingZeros(LeaderDemandedBits);
775  // Round up to a power of 2
776  if (!isPowerOf2_64((uint64_t)MinBW))
777  MinBW = NextPowerOf2(MinBW);
778 
779  // We don't modify the types of PHIs. Reductions will already have been
780  // truncated if possible, and inductions' sizes will have been chosen by
781  // indvars.
782  // If we are required to shrink a PHI, abandon this entire equivalence class.
783  bool Abort = false;
784  for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end()))
785  if (isa<PHINode>(M) && MinBW < M->getType()->getScalarSizeInBits()) {
786  Abort = true;
787  break;
788  }
789  if (Abort)
790  continue;
791 
792  for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end())) {
793  if (!isa<Instruction>(M))
794  continue;
795  Type *Ty = M->getType();
796  if (Roots.count(M))
797  Ty = cast<Instruction>(M)->getOperand(0)->getType();
798  if (MinBW < Ty->getScalarSizeInBits())
799  MinBWs[cast<Instruction>(M)] = MinBW;
800  }
801  }
802 
803  return MinBWs;
804 }
805 
806 /// Add all access groups in @p AccGroups to @p List.
807 template <typename ListT>
808 static void addToAccessGroupList(ListT &List, MDNode *AccGroups) {
809  // Interpret an access group as a list containing itself.
810  if (AccGroups->getNumOperands() == 0) {
811  assert(isValidAsAccessGroup(AccGroups) && "Node must be an access group");
812  List.insert(AccGroups);
813  return;
814  }
815 
816  for (const auto &AccGroupListOp : AccGroups->operands()) {
817  auto *Item = cast<MDNode>(AccGroupListOp.get());
818  assert(isValidAsAccessGroup(Item) && "List item must be an access group");
819  List.insert(Item);
820  }
821 }
822 
823 MDNode *llvm::uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2) {
824  if (!AccGroups1)
825  return AccGroups2;
826  if (!AccGroups2)
827  return AccGroups1;
828  if (AccGroups1 == AccGroups2)
829  return AccGroups1;
830 
832  addToAccessGroupList(Union, AccGroups1);
833  addToAccessGroupList(Union, AccGroups2);
834 
835  if (Union.size() == 0)
836  return nullptr;
837  if (Union.size() == 1)
838  return cast<MDNode>(Union.front());
839 
840  LLVMContext &Ctx = AccGroups1->getContext();
841  return MDNode::get(Ctx, Union.getArrayRef());
842 }
843 
845  const Instruction *Inst2) {
846  bool MayAccessMem1 = Inst1->mayReadOrWriteMemory();
847  bool MayAccessMem2 = Inst2->mayReadOrWriteMemory();
848 
849  if (!MayAccessMem1 && !MayAccessMem2)
850  return nullptr;
851  if (!MayAccessMem1)
852  return Inst2->getMetadata(LLVMContext::MD_access_group);
853  if (!MayAccessMem2)
854  return Inst1->getMetadata(LLVMContext::MD_access_group);
855 
856  MDNode *MD1 = Inst1->getMetadata(LLVMContext::MD_access_group);
857  MDNode *MD2 = Inst2->getMetadata(LLVMContext::MD_access_group);
858  if (!MD1 || !MD2)
859  return nullptr;
860  if (MD1 == MD2)
861  return MD1;
862 
863  // Use set for scalable 'contains' check.
864  SmallPtrSet<Metadata *, 4> AccGroupSet2;
865  addToAccessGroupList(AccGroupSet2, MD2);
866 
867  SmallVector<Metadata *, 4> Intersection;
868  if (MD1->getNumOperands() == 0) {
869  assert(isValidAsAccessGroup(MD1) && "Node must be an access group");
870  if (AccGroupSet2.count(MD1))
871  Intersection.push_back(MD1);
872  } else {
873  for (const MDOperand &Node : MD1->operands()) {
874  auto *Item = cast<MDNode>(Node.get());
875  assert(isValidAsAccessGroup(Item) && "List item must be an access group");
876  if (AccGroupSet2.count(Item))
877  Intersection.push_back(Item);
878  }
879  }
880 
881  if (Intersection.size() == 0)
882  return nullptr;
883  if (Intersection.size() == 1)
884  return cast<MDNode>(Intersection.front());
885 
886  LLVMContext &Ctx = Inst1->getContext();
887  return MDNode::get(Ctx, Intersection);
888 }
889 
890 /// \returns \p I after propagating metadata from \p VL.
892  if (VL.empty())
893  return Inst;
894  Instruction *I0 = cast<Instruction>(VL[0]);
897 
898  for (auto Kind : {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
899  LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
900  LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load,
901  LLVMContext::MD_access_group}) {
902  MDNode *MD = I0->getMetadata(Kind);
903 
904  for (int J = 1, E = VL.size(); MD && J != E; ++J) {
905  const Instruction *IJ = cast<Instruction>(VL[J]);
906  MDNode *IMD = IJ->getMetadata(Kind);
907  switch (Kind) {
908  case LLVMContext::MD_tbaa:
909  MD = MDNode::getMostGenericTBAA(MD, IMD);
910  break;
911  case LLVMContext::MD_alias_scope:
912  MD = MDNode::getMostGenericAliasScope(MD, IMD);
913  break;
914  case LLVMContext::MD_fpmath:
915  MD = MDNode::getMostGenericFPMath(MD, IMD);
916  break;
917  case LLVMContext::MD_noalias:
918  case LLVMContext::MD_nontemporal:
919  case LLVMContext::MD_invariant_load:
920  MD = MDNode::intersect(MD, IMD);
921  break;
922  case LLVMContext::MD_access_group:
923  MD = intersectAccessGroups(Inst, IJ);
924  break;
925  default:
926  llvm_unreachable("unhandled metadata");
927  }
928  }
929 
930  Inst->setMetadata(Kind, MD);
931  }
932 
933  return Inst;
934 }
935 
936 Constant *
938  const InterleaveGroup<Instruction> &Group) {
939  // All 1's means mask is not needed.
940  if (Group.getNumMembers() == Group.getFactor())
941  return nullptr;
942 
943  // TODO: support reversed access.
944  assert(!Group.isReverse() && "Reversed group not supported.");
945 
947  for (unsigned i = 0; i < VF; i++)
948  for (unsigned j = 0; j < Group.getFactor(); ++j) {
949  unsigned HasMember = Group.getMember(j) ? 1 : 0;
950  Mask.push_back(Builder.getInt1(HasMember));
951  }
952 
953  return ConstantVector::get(Mask);
954 }
955 
957 llvm::createReplicatedMask(unsigned ReplicationFactor, unsigned VF) {
958  SmallVector<int, 16> MaskVec;
959  for (unsigned i = 0; i < VF; i++)
960  for (unsigned j = 0; j < ReplicationFactor; j++)
961  MaskVec.push_back(i);
962 
963  return MaskVec;
964 }
965 
967  unsigned NumVecs) {
969  for (unsigned i = 0; i < VF; i++)
970  for (unsigned j = 0; j < NumVecs; j++)
971  Mask.push_back(j * VF + i);
972 
973  return Mask;
974 }
975 
977 llvm::createStrideMask(unsigned Start, unsigned Stride, unsigned VF) {
979  for (unsigned i = 0; i < VF; i++)
980  Mask.push_back(Start + i * Stride);
981 
982  return Mask;
983 }
984 
986  unsigned NumInts,
987  unsigned NumUndefs) {
989  for (unsigned i = 0; i < NumInts; i++)
990  Mask.push_back(Start + i);
991 
992  for (unsigned i = 0; i < NumUndefs; i++)
993  Mask.push_back(-1);
994 
995  return Mask;
996 }
997 
999  unsigned NumElts) {
1000  // Avoid casts in the loop and make sure we have a reasonable number.
1001  int NumEltsSigned = NumElts;
1002  assert(NumEltsSigned > 0 && "Expected smaller or non-zero element count");
1003 
1004  // If the mask chooses an element from operand 1, reduce it to choose from the
1005  // corresponding element of operand 0. Undef mask elements are unchanged.
1006  SmallVector<int, 16> UnaryMask;
1007  for (int MaskElt : Mask) {
1008  assert((MaskElt < NumEltsSigned * 2) && "Expected valid shuffle mask");
1009  int UnaryElt = MaskElt >= NumEltsSigned ? MaskElt - NumEltsSigned : MaskElt;
1010  UnaryMask.push_back(UnaryElt);
1011  }
1012  return UnaryMask;
1013 }
1014 
1015 /// A helper function for concatenating vectors. This function concatenates two
1016 /// vectors having the same element type. If the second vector has fewer
1017 /// elements than the first, it is padded with undefs.
1019  Value *V2) {
1020  VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType());
1021  VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());
1022  assert(VecTy1 && VecTy2 &&
1023  VecTy1->getScalarType() == VecTy2->getScalarType() &&
1024  "Expect two vectors with the same element type");
1025 
1026  unsigned NumElts1 = cast<FixedVectorType>(VecTy1)->getNumElements();
1027  unsigned NumElts2 = cast<FixedVectorType>(VecTy2)->getNumElements();
1028  assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements");
1029 
1030  if (NumElts1 > NumElts2) {
1031  // Extend with UNDEFs.
1032  V2 = Builder.CreateShuffleVector(
1033  V2, createSequentialMask(0, NumElts2, NumElts1 - NumElts2));
1034  }
1035 
1036  return Builder.CreateShuffleVector(
1037  V1, V2, createSequentialMask(0, NumElts1 + NumElts2, 0));
1038 }
1039 
1041  ArrayRef<Value *> Vecs) {
1042  unsigned NumVecs = Vecs.size();
1043  assert(NumVecs > 1 && "Should be at least two vectors");
1044 
1045  SmallVector<Value *, 8> ResList;
1046  ResList.append(Vecs.begin(), Vecs.end());
1047  do {
1048  SmallVector<Value *, 8> TmpList;
1049  for (unsigned i = 0; i < NumVecs - 1; i += 2) {
1050  Value *V0 = ResList[i], *V1 = ResList[i + 1];
1051  assert((V0->getType() == V1->getType() || i == NumVecs - 2) &&
1052  "Only the last vector may have a different type");
1053 
1054  TmpList.push_back(concatenateTwoVectors(Builder, V0, V1));
1055  }
1056 
1057  // Push the last vector if the total number of vectors is odd.
1058  if (NumVecs % 2 != 0)
1059  TmpList.push_back(ResList[NumVecs - 1]);
1060 
1061  ResList = TmpList;
1062  NumVecs = ResList.size();
1063  } while (NumVecs > 1);
1064 
1065  return ResList[0];
1066 }
1067 
1069  assert(isa<VectorType>(Mask->getType()) &&
1070  isa<IntegerType>(Mask->getType()->getScalarType()) &&
1071  cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1072  1 &&
1073  "Mask must be a vector of i1");
1074 
1075  auto *ConstMask = dyn_cast<Constant>(Mask);
1076  if (!ConstMask)
1077  return false;
1078  if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))
1079  return true;
1080  if (isa<ScalableVectorType>(ConstMask->getType()))
1081  return false;
1082  for (unsigned
1083  I = 0,
1084  E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1085  I != E; ++I) {
1086  if (auto *MaskElt = ConstMask->getAggregateElement(I))
1087  if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))
1088  continue;
1089  return false;
1090  }
1091  return true;
1092 }
1093 
1095  assert(isa<VectorType>(Mask->getType()) &&
1096  isa<IntegerType>(Mask->getType()->getScalarType()) &&
1097  cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1098  1 &&
1099  "Mask must be a vector of i1");
1100 
1101  auto *ConstMask = dyn_cast<Constant>(Mask);
1102  if (!ConstMask)
1103  return false;
1104  if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
1105  return true;
1106  if (isa<ScalableVectorType>(ConstMask->getType()))
1107  return false;
1108  for (unsigned
1109  I = 0,
1110  E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1111  I != E; ++I) {
1112  if (auto *MaskElt = ConstMask->getAggregateElement(I))
1113  if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
1114  continue;
1115  return false;
1116  }
1117  return true;
1118 }
1119 
1120 /// TODO: This is a lot like known bits, but for
1121 /// vectors. Is there something we can common this with?
1123  assert(isa<FixedVectorType>(Mask->getType()) &&
1124  isa<IntegerType>(Mask->getType()->getScalarType()) &&
1125  cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1126  1 &&
1127  "Mask must be a fixed width vector of i1");
1128 
1129  const unsigned VWidth =
1130  cast<FixedVectorType>(Mask->getType())->getNumElements();
1131  APInt DemandedElts = APInt::getAllOnes(VWidth);
1132  if (auto *CV = dyn_cast<ConstantVector>(Mask))
1133  for (unsigned i = 0; i < VWidth; i++)
1134  if (CV->getAggregateElement(i)->isNullValue())
1135  DemandedElts.clearBit(i);
1136  return DemandedElts;
1137 }
1138 
1139 bool InterleavedAccessInfo::isStrided(int Stride) {
1140  unsigned Factor = std::abs(Stride);
1141  return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
1142 }
1143 
1144 void InterleavedAccessInfo::collectConstStrideAccesses(
1146  const ValueToValueMap &Strides) {
1147  auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();
1148 
1149  // Since it's desired that the load/store instructions be maintained in
1150  // "program order" for the interleaved access analysis, we have to visit the
1151  // blocks in the loop in reverse postorder (i.e., in a topological order).
1152  // Such an ordering will ensure that any load/store that may be executed
1153  // before a second load/store will precede the second load/store in
1154  // AccessStrideInfo.
1155  LoopBlocksDFS DFS(TheLoop);
1156  DFS.perform(LI);
1157  for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
1158  for (auto &I : *BB) {
1160  if (!Ptr)
1161  continue;
1162  Type *ElementTy = getLoadStoreType(&I);
1163 
1164  // Currently, codegen doesn't support cases where the type size doesn't
1165  // match the alloc size. Skip them for now.
1166  uint64_t Size = DL.getTypeAllocSize(ElementTy);
1167  if (Size * 8 != DL.getTypeSizeInBits(ElementTy))
1168  continue;
1169 
1170  // We don't check wrapping here because we don't know yet if Ptr will be
1171  // part of a full group or a group with gaps. Checking wrapping for all
1172  // pointers (even those that end up in groups with no gaps) will be overly
1173  // conservative. For full groups, wrapping should be ok since if we would
1174  // wrap around the address space we would do a memory access at nullptr
1175  // even without the transformation. The wrapping checks are therefore
1176  // deferred until after we've formed the interleaved groups.
1177  int64_t Stride =
1178  getPtrStride(PSE, ElementTy, Ptr, TheLoop, Strides,
1179  /*Assume=*/true, /*ShouldCheckWrap=*/false).value_or(0);
1180 
1181  const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
1182  AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size,
1184  }
1185 }
1186 
1187 // Analyze interleaved accesses and collect them into interleaved load and
1188 // store groups.
1189 //
1190 // When generating code for an interleaved load group, we effectively hoist all
1191 // loads in the group to the location of the first load in program order. When
1192 // generating code for an interleaved store group, we sink all stores to the
1193 // location of the last store. This code motion can change the order of load
1194 // and store instructions and may break dependences.
1195 //
1196 // The code generation strategy mentioned above ensures that we won't violate
1197 // any write-after-read (WAR) dependences.
1198 //
1199 // E.g., for the WAR dependence: a = A[i]; // (1)
1200 // A[i] = b; // (2)
1201 //
1202 // The store group of (2) is always inserted at or below (2), and the load
1203 // group of (1) is always inserted at or above (1). Thus, the instructions will
1204 // never be reordered. All other dependences are checked to ensure the
1205 // correctness of the instruction reordering.
1206 //
1207 // The algorithm visits all memory accesses in the loop in bottom-up program
1208 // order. Program order is established by traversing the blocks in the loop in
1209 // reverse postorder when collecting the accesses.
1210 //
1211 // We visit the memory accesses in bottom-up order because it can simplify the
1212 // construction of store groups in the presence of write-after-write (WAW)
1213 // dependences.
1214 //
1215 // E.g., for the WAW dependence: A[i] = a; // (1)
1216 // A[i] = b; // (2)
1217 // A[i + 1] = c; // (3)
1218 //
1219 // We will first create a store group with (3) and (2). (1) can't be added to
1220 // this group because it and (2) are dependent. However, (1) can be grouped
1221 // with other accesses that may precede it in program order. Note that a
1222 // bottom-up order does not imply that WAW dependences should not be checked.
1224  bool EnablePredicatedInterleavedMemAccesses) {
1225  LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
1226  const ValueToValueMap &Strides = LAI->getSymbolicStrides();
1227 
1228  // Holds all accesses with a constant stride.
1230  collectConstStrideAccesses(AccessStrideInfo, Strides);
1231 
1232  if (AccessStrideInfo.empty())
1233  return;
1234 
1235  // Collect the dependences in the loop.
1236  collectDependences();
1237 
1238  // Holds all interleaved store groups temporarily.
1240  // Holds all interleaved load groups temporarily.
1242 
1243  // Search in bottom-up program order for pairs of accesses (A and B) that can
1244  // form interleaved load or store groups. In the algorithm below, access A
1245  // precedes access B in program order. We initialize a group for B in the
1246  // outer loop of the algorithm, and then in the inner loop, we attempt to
1247  // insert each A into B's group if:
1248  //
1249  // 1. A and B have the same stride,
1250  // 2. A and B have the same memory object size, and
1251  // 3. A belongs in B's group according to its distance from B.
1252  //
1253  // Special care is taken to ensure group formation will not break any
1254  // dependences.
1255  for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
1256  BI != E; ++BI) {
1257  Instruction *B = BI->first;
1258  StrideDescriptor DesB = BI->second;
1259 
1260  // Initialize a group for B if it has an allowable stride. Even if we don't
1261  // create a group for B, we continue with the bottom-up algorithm to ensure
1262  // we don't break any of B's dependences.
1263  InterleaveGroup<Instruction> *Group = nullptr;
1264  if (isStrided(DesB.Stride) &&
1265  (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
1266  Group = getInterleaveGroup(B);
1267  if (!Group) {
1268  LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
1269  << '\n');
1270  Group = createInterleaveGroup(B, DesB.Stride, DesB.Alignment);
1271  }
1272  if (B->mayWriteToMemory())
1273  StoreGroups.insert(Group);
1274  else
1275  LoadGroups.insert(Group);
1276  }
1277 
1278  for (auto AI = std::next(BI); AI != E; ++AI) {
1279  Instruction *A = AI->first;
1280  StrideDescriptor DesA = AI->second;
1281 
1282  // Our code motion strategy implies that we can't have dependences
1283  // between accesses in an interleaved group and other accesses located
1284  // between the first and last member of the group. Note that this also
1285  // means that a group can't have more than one member at a given offset.
1286  // The accesses in a group can have dependences with other accesses, but
1287  // we must ensure we don't extend the boundaries of the group such that
1288  // we encompass those dependent accesses.
1289  //
1290  // For example, assume we have the sequence of accesses shown below in a
1291  // stride-2 loop:
1292  //
1293  // (1, 2) is a group | A[i] = a; // (1)
1294  // | A[i-1] = b; // (2) |
1295  // A[i-3] = c; // (3)
1296  // A[i] = d; // (4) | (2, 4) is not a group
1297  //
1298  // Because accesses (2) and (3) are dependent, we can group (2) with (1)
1299  // but not with (4). If we did, the dependent access (3) would be within
1300  // the boundaries of the (2, 4) group.
1301  if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
1302  // If a dependence exists and A is already in a group, we know that A
1303  // must be a store since A precedes B and WAR dependences are allowed.
1304  // Thus, A would be sunk below B. We release A's group to prevent this
1305  // illegal code motion. A will then be free to form another group with
1306  // instructions that precede it.
1307  if (isInterleaved(A)) {
1308  InterleaveGroup<Instruction> *StoreGroup = getInterleaveGroup(A);
1309 
1310  LLVM_DEBUG(dbgs() << "LV: Invalidated store group due to "
1311  "dependence between " << *A << " and "<< *B << '\n');
1312 
1313  StoreGroups.remove(StoreGroup);
1314  releaseGroup(StoreGroup);
1315  }
1316 
1317  // If a dependence exists and A is not already in a group (or it was
1318  // and we just released it), B might be hoisted above A (if B is a
1319  // load) or another store might be sunk below A (if B is a store). In
1320  // either case, we can't add additional instructions to B's group. B
1321  // will only form a group with instructions that it precedes.
1322  break;
1323  }
1324 
1325  // At this point, we've checked for illegal code motion. If either A or B
1326  // isn't strided, there's nothing left to do.
1327  if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
1328  continue;
1329 
1330  // Ignore A if it's already in a group or isn't the same kind of memory
1331  // operation as B.
1332  // Note that mayReadFromMemory() isn't mutually exclusive to
1333  // mayWriteToMemory in the case of atomic loads. We shouldn't see those
1334  // here, canVectorizeMemory() should have returned false - except for the
1335  // case we asked for optimization remarks.
1336  if (isInterleaved(A) ||
1337  (A->mayReadFromMemory() != B->mayReadFromMemory()) ||
1338  (A->mayWriteToMemory() != B->mayWriteToMemory()))
1339  continue;
1340 
1341  // Check rules 1 and 2. Ignore A if its stride or size is different from
1342  // that of B.
1343  if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
1344  continue;
1345 
1346  // Ignore A if the memory object of A and B don't belong to the same
1347  // address space
1349  continue;
1350 
1351  // Calculate the distance from A to B.
1352  const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(
1353  PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
1354  if (!DistToB)
1355  continue;
1356  int64_t DistanceToB = DistToB->getAPInt().getSExtValue();
1357 
1358  // Check rule 3. Ignore A if its distance to B is not a multiple of the
1359  // size.
1360  if (DistanceToB % static_cast<int64_t>(DesB.Size))
1361  continue;
1362 
1363  // All members of a predicated interleave-group must have the same predicate,
1364  // and currently must reside in the same BB.
1365  BasicBlock *BlockA = A->getParent();
1366  BasicBlock *BlockB = B->getParent();
1367  if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
1368  (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
1369  continue;
1370 
1371  // The index of A is the index of B plus A's distance to B in multiples
1372  // of the size.
1373  int IndexA =
1374  Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
1375 
1376  // Try to insert A into B's group.
1377  if (Group->insertMember(A, IndexA, DesA.Alignment)) {
1378  LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
1379  << " into the interleave group with" << *B
1380  << '\n');
1381  InterleaveGroupMap[A] = Group;
1382 
1383  // Set the first load in program order as the insert position.
1384  if (A->mayReadFromMemory())
1385  Group->setInsertPos(A);
1386  }
1387  } // Iteration over A accesses.
1388  } // Iteration over B accesses.
1389 
1390  auto InvalidateGroupIfMemberMayWrap = [&](InterleaveGroup<Instruction> *Group,
1391  int Index,
1392  std::string FirstOrLast) -> bool {
1393  Instruction *Member = Group->getMember(Index);
1394  assert(Member && "Group member does not exist");
1395  Value *MemberPtr = getLoadStorePointerOperand(Member);
1396  Type *AccessTy = getLoadStoreType(Member);
1397  if (getPtrStride(PSE, AccessTy, MemberPtr, TheLoop, Strides,
1398  /*Assume=*/false, /*ShouldCheckWrap=*/true).value_or(0))
1399  return false;
1400  LLVM_DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "
1401  << FirstOrLast
1402  << " group member potentially pointer-wrapping.\n");
1403  releaseGroup(Group);
1404  return true;
1405  };
1406 
1407  // Remove interleaved groups with gaps whose memory
1408  // accesses may wrap around. We have to revisit the getPtrStride analysis,
1409  // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
1410  // not check wrapping (see documentation there).
1411  // FORNOW we use Assume=false;
1412  // TODO: Change to Assume=true but making sure we don't exceed the threshold
1413  // of runtime SCEV assumptions checks (thereby potentially failing to
1414  // vectorize altogether).
1415  // Additional optional optimizations:
1416  // TODO: If we are peeling the loop and we know that the first pointer doesn't
1417  // wrap then we can deduce that all pointers in the group don't wrap.
1418  // This means that we can forcefully peel the loop in order to only have to
1419  // check the first pointer for no-wrap. When we'll change to use Assume=true
1420  // we'll only need at most one runtime check per interleaved group.
1421  for (auto *Group : LoadGroups) {
1422  // Case 1: A full group. Can Skip the checks; For full groups, if the wide
1423  // load would wrap around the address space we would do a memory access at
1424  // nullptr even without the transformation.
1425  if (Group->getNumMembers() == Group->getFactor())
1426  continue;
1427 
1428  // Case 2: If first and last members of the group don't wrap this implies
1429  // that all the pointers in the group don't wrap.
1430  // So we check only group member 0 (which is always guaranteed to exist),
1431  // and group member Factor - 1; If the latter doesn't exist we rely on
1432  // peeling (if it is a non-reversed accsess -- see Case 3).
1433  if (InvalidateGroupIfMemberMayWrap(Group, 0, std::string("first")))
1434  continue;
1435  if (Group->getMember(Group->getFactor() - 1))
1436  InvalidateGroupIfMemberMayWrap(Group, Group->getFactor() - 1,
1437  std::string("last"));
1438  else {
1439  // Case 3: A non-reversed interleaved load group with gaps: We need
1440  // to execute at least one scalar epilogue iteration. This will ensure
1441  // we don't speculatively access memory out-of-bounds. We only need
1442  // to look for a member at index factor - 1, since every group must have
1443  // a member at index zero.
1444  if (Group->isReverse()) {
1445  LLVM_DEBUG(
1446  dbgs() << "LV: Invalidate candidate interleaved group due to "
1447  "a reverse access with gaps.\n");
1448  releaseGroup(Group);
1449  continue;
1450  }
1451  LLVM_DEBUG(
1452  dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
1453  RequiresScalarEpilogue = true;
1454  }
1455  }
1456 
1457  for (auto *Group : StoreGroups) {
1458  // Case 1: A full group. Can Skip the checks; For full groups, if the wide
1459  // store would wrap around the address space we would do a memory access at
1460  // nullptr even without the transformation.
1461  if (Group->getNumMembers() == Group->getFactor())
1462  continue;
1463 
1464  // Interleave-store-group with gaps is implemented using masked wide store.
1465  // Remove interleaved store groups with gaps if
1466  // masked-interleaved-accesses are not enabled by the target.
1467  if (!EnablePredicatedInterleavedMemAccesses) {
1468  LLVM_DEBUG(
1469  dbgs() << "LV: Invalidate candidate interleaved store group due "
1470  "to gaps.\n");
1471  releaseGroup(Group);
1472  continue;
1473  }
1474 
1475  // Case 2: If first and last members of the group don't wrap this implies
1476  // that all the pointers in the group don't wrap.
1477  // So we check only group member 0 (which is always guaranteed to exist),
1478  // and the last group member. Case 3 (scalar epilog) is not relevant for
1479  // stores with gaps, which are implemented with masked-store (rather than
1480  // speculative access, as in loads).
1481  if (InvalidateGroupIfMemberMayWrap(Group, 0, std::string("first")))
1482  continue;
1483  for (int Index = Group->getFactor() - 1; Index > 0; Index--)
1484  if (Group->getMember(Index)) {
1485  InvalidateGroupIfMemberMayWrap(Group, Index, std::string("last"));
1486  break;
1487  }
1488  }
1489 }
1490 
1492  // If no group had triggered the requirement to create an epilogue loop,
1493  // there is nothing to do.
1494  if (!requiresScalarEpilogue())
1495  return;
1496 
1497  bool ReleasedGroup = false;
1498  // Release groups requiring scalar epilogues. Note that this also removes them
1499  // from InterleaveGroups.
1500  for (auto *Group : make_early_inc_range(InterleaveGroups)) {
1501  if (!Group->requiresScalarEpilogue())
1502  continue;
1503  LLVM_DEBUG(
1504  dbgs()
1505  << "LV: Invalidate candidate interleaved group due to gaps that "
1506  "require a scalar epilogue (not allowed under optsize) and cannot "
1507  "be masked (not enabled). \n");
1508  releaseGroup(Group);
1509  ReleasedGroup = true;
1510  }
1511  assert(ReleasedGroup && "At least one group must be invalidated, as a "
1512  "scalar epilogue was required");
1513  (void)ReleasedGroup;
1514  RequiresScalarEpilogue = false;
1515 }
1516 
1517 template <typename InstT>
1518 void InterleaveGroup<InstT>::addMetadata(InstT *NewInst) const {
1519  llvm_unreachable("addMetadata can only be used for Instruction");
1520 }
1521 
1522 namespace llvm {
1523 template <>
1526  std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
1527  [](std::pair<int, Instruction *> p) { return p.second; });
1528  propagateMetadata(NewInst, VL);
1529 }
1530 }
1531 
1532 std::string VFABI::mangleTLIVectorName(StringRef VectorName,
1533  StringRef ScalarName, unsigned numArgs,
1534  ElementCount VF) {
1535  SmallString<256> Buffer;
1536  llvm::raw_svector_ostream Out(Buffer);
1537  Out << "_ZGV" << VFABI::_LLVM_ << "N";
1538  if (VF.isScalable())
1539  Out << 'x';
1540  else
1541  Out << VF.getFixedValue();
1542  for (unsigned I = 0; I < numArgs; ++I)
1543  Out << "v";
1544  Out << "_" << ScalarName << "(" << VectorName << ")";
1545  return std::string(Out.str());
1546 }
1547 
1549  const CallInst &CI, SmallVectorImpl<std::string> &VariantMappings) {
1551  if (S.empty())
1552  return;
1553 
1554  SmallVector<StringRef, 8> ListAttr;
1555  S.split(ListAttr, ",");
1556 
1557  for (const auto &S : SetVector<StringRef>(ListAttr.begin(), ListAttr.end())) {
1558 #ifndef NDEBUG
1559  LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << S << "'\n");
1560  std::optional<VFInfo> Info =
1562  assert(Info && "Invalid name for a VFABI variant.");
1563  assert(CI.getModule()->getFunction(Info->VectorName) &&
1564  "Vector function is missing.");
1565 #endif
1566  VariantMappings.push_back(std::string(S));
1567  }
1568 }
1569 
1571  for (unsigned Pos = 0, NumParams = Parameters.size(); Pos < NumParams;
1572  ++Pos) {
1573  assert(Parameters[Pos].ParamPos == Pos && "Broken parameter list.");
1574 
1575  switch (Parameters[Pos].ParamKind) {
1576  default: // Nothing to check.
1577  break;
1582  // Compile time linear steps must be non-zero.
1583  if (Parameters[Pos].LinearStepOrPos == 0)
1584  return false;
1585  break;
1590  // The runtime linear step must be referring to some other
1591  // parameters in the signature.
1592  if (Parameters[Pos].LinearStepOrPos >= int(NumParams))
1593  return false;
1594  // The linear step parameter must be marked as uniform.
1595  if (Parameters[Parameters[Pos].LinearStepOrPos].ParamKind !=
1597  return false;
1598  // The linear step parameter can't point at itself.
1599  if (Parameters[Pos].LinearStepOrPos == int(Pos))
1600  return false;
1601  break;
1603  // The global predicate must be the unique. Can be placed anywhere in the
1604  // signature.
1605  for (unsigned NextPos = Pos + 1; NextPos < NumParams; ++NextPos)
1606  if (Parameters[NextPos].ParamKind == VFParamKind::GlobalPredicate)
1607  return false;
1608  break;
1609  }
1610  }
1611  return true;
1612 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
i
i
Definition: README.txt:29
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::maskIsAllOneOrUndef
bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
Definition: VectorUtils.cpp:1094
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:70
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:114
llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31
llvm::InterleaveGroup::insertMember
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
Definition: VectorUtils.h:648
ceil
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g ceil
Definition: README-FPStack.txt:54
llvm::ElementCount
Definition: TypeSize.h:279
llvm::getVectorIntrinsicIDForCall
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Definition: VectorUtils.cpp:135
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
llvm::getGEPInductionOperand
unsigned getGEPInductionOperand(const GetElementPtrInst *Gep)
Find the operand of the GEP that should be checked for consecutive stores.
Definition: VectorUtils.cpp:152
llvm::details::FixedOrScalableQuantity::isScalable
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:166
llvm::possiblyDemandedEltsInMask
APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
Definition: VectorUtils.cpp:1122
llvm::scConstant
@ scConstant
Definition: ScalarEvolutionExpressions.h:41
minimum
Should compile r2 movcc movcs str strb mov lr r1 movcs movcc mov lr r1 str mov mov cmp r1 movlo r2 str bx lr r0 mov mov cmp r0 movhs r2 mov r1 bx lr Some of the NEON intrinsics may be appropriate for more general either as target independent intrinsics or perhaps elsewhere in the ARM backend Some of them may also be lowered to target independent and perhaps some new SDNodes could be added For minimum
Definition: README.txt:489
llvm::Instruction::getAllMetadataOtherThanDebugLoc
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode * >> &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
Definition: Instruction.h:298
GetElementPtrTypeIterator.h
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:341
llvm::PatternMatch::m_InsertElt
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Definition: PatternMatch.h:1484
llvm::VFParamKind::GlobalPredicate
@ GlobalPredicate
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::EquivalenceClasses::member_end
member_iterator member_end() const
Definition: EquivalenceClasses.h:178
llvm::PatternMatch::m_Add
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:979
llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1516
llvm::EquivalenceClasses
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
Definition: EquivalenceClasses.h:60
llvm::getSplatValue
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
Definition: VectorUtils.cpp:371
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:172
llvm::VFABI::getVectorVariantNames
void getVectorVariantNames(const CallInst &CI, SmallVectorImpl< std::string > &VariantMappings)
Populates a set of strings representing the Vector Function ABI variants associated to the CallInst C...
Definition: VectorUtils.cpp:1548
concatenateTwoVectors
static Value * concatenateTwoVectors(IRBuilderBase &Builder, Value *V1, Value *V2)
A helper function for concatenating vectors.
Definition: VectorUtils.cpp:1018
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:452
llvm::maskIsAllZeroOrUndef
bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
Definition: VectorUtils.cpp:1068
llvm::MaxAnalysisRecursionDepth
constexpr unsigned MaxAnalysisRecursionDepth
Definition: ValueTracking.h:46
ValueTracking.h
llvm::isVectorIntrinsicWithScalarOpAtArg
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:101
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:138
llvm::computeMinimumValueSizes
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
Definition: VectorUtils.cpp:666
llvm::getLoadStoreType
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
Definition: Instructions.h:5408
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
ScalarEvolution.h
llvm::narrowShuffleMaskElts
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
Definition: VectorUtils.cpp:469
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
MaxInterleaveGroupFactor
static cl::opt< unsigned > MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
Maximum factor for an interleaved memory access.
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
llvm::getUniqueCastUse
Value * getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty)
If a value has only one user that is a CastInst, return it.
Definition: VectorUtils.cpp:193
llvm::createUnaryMask
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
Definition: VectorUtils.cpp:998
llvm::isVectorIntrinsicWithOverloadTypeAtArg
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, unsigned OpdIdx)
Identifies if the vector form of the intrinsic has a operand that has an overloaded type.
Definition: VectorUtils.cpp:119
llvm::PatternMatch::m_BinOp
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:422
llvm::Intrinsic::not_intrinsic
@ not_intrinsic
Definition: Intrinsics.h:44
llvm::VFParamKind::OMP_Uniform
@ OMP_Uniform
llvm::propagateMetadata
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Definition: VectorUtils.cpp:891
llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition: GetElementPtrTypeIterator.h:123
llvm::Module::getFunction
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:175
llvm::uniteAccessGroups
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
Definition: VectorUtils.cpp:823
llvm::count_if
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1903
llvm::InterleaveGroup
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:285
p
the resulting code requires compare and branches when and if * p
Definition: README.txt:396
llvm::APInt::getZero
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:177
llvm::APIntOps::umin
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition: APInt.h:2185
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:158
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1399
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1455
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1297
floor
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g floor
Definition: README-FPStack.txt:54
llvm::AArch64PACKey::DB
@ DB
Definition: AArch64BaseInfo.h:828
llvm::UndefMaskElem
constexpr int UndefMaskElem
Definition: Instructions.h:2005
llvm::VFShape::hasValidParameterList
bool hasValidParameterList() const
Validation check on the Parameters in the VFShape.
Definition: VectorUtils.cpp:1570
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
CommandLine.h
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::remove
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:157
llvm::TargetTransformInfo::isTypeLegal
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
Definition: TargetTransformInfo.cpp:491
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1735
llvm::isSplatValue
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Definition: VectorUtils.cpp:386
llvm::VFABI::mangleTLIVectorName
std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName, unsigned numArgs, ElementCount VF)
This routine mangles the given VectorName according to the LangRef specification for vector-function-...
Definition: VectorUtils.cpp:1532
llvm::InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue
void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
Definition: VectorUtils.cpp:1491
llvm::createInterleaveMask
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
Definition: VectorUtils.cpp:966
llvm::APInt::setBit
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition: PatternMatch.h:1462
llvm::MutableArrayRef
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:27
llvm::ARM_MC::isPredicated
bool isPredicated(const MCInst &MI, const MCInstrInfo *MCII)
Definition: ARMMCTargetDesc.cpp:170
llvm::Instruction::mayReadOrWriteMemory
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
Definition: Instruction.h:622
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
getScalarSizeInBits
static unsigned getScalarSizeInBits(Type *Ty)
Definition: SystemZTargetTransformInfo.cpp:403
llvm::APInt::isZero
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:366
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::isTriviallyVectorizable
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:44
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::getStrideFromPointer
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
Definition: VectorUtils.cpp:209
round
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:56
llvm::VFParamKind::OMP_LinearVal
@ OMP_LinearVal
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
llvm::MDNode::operands
ArrayRef< MDOperand > operands() const
Definition: Metadata.h:1289
llvm::APInt::getAllOnes
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:258
llvm::InsertElementInst
This instruction inserts a single (scalar) element into a VectorType value.
Definition: Instructions.h:1945
llvm::NextPowerOf2
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:437
llvm::VFParamKind::OMP_LinearUValPos
@ OMP_LinearUValPos
llvm::dwarf::Index
Index
Definition: Dwarf.h:550
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::SCEVMulExpr
This node represents multiplication of some number of SCEVs.
Definition: ScalarEvolutionExpressions.h:273
llvm::ShuffleVectorInst::getMaskValue
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
Definition: Instructions.h:2072
llvm::EquivalenceClasses::end
iterator end() const
Definition: EquivalenceClasses.h:168
llvm::Instruction
Definition: Instruction.h:41
llvm::MapVector::rend
reverse_iterator rend()
Definition: MapVector.h:77
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1740
llvm::VFABI::tryDemangleForVFABI
std::optional< VFInfo > tryDemangleForVFABI(StringRef MangledName, const Module &M)
Function to construct a VFInfo out of a mangled names in the following format:
Definition: VFABIDemangling.cpp:317
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::EquivalenceClasses::begin
iterator begin() const
Definition: EquivalenceClasses.h:167
llvm::SmallVectorImpl::append
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:687
PatternMatch.h
llvm::InterleaveGroup::isReverse
bool isReverse() const
Definition: VectorUtils.h:638
llvm::Metadata
Root of the metadata hierarchy.
Definition: Metadata.h:61
llvm::InterleaveGroup::getMember
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:693
llvm::Attribute::getValueAsString
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:312
LoopIterator.h
llvm::VFParamKind::OMP_LinearValPos
@ OMP_LinearValPos
llvm::SCEVUnknown::getValue
Value * getValue() const
Definition: ScalarEvolutionExpressions.h:580
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::ShuffleVectorInst::getType
VectorType * getType() const
Overload to return most specific vector type.
Definition: Instructions.h:2063
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::Instruction::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:275
llvm::SmallString< 256 >
llvm::maxnum
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1327
LoopInfo.h
llvm::InterleaveGroup::setInsertPos
void setInsertPos(InstTy *Inst)
Definition: VectorUtils.h:710
llvm::VFABI::_LLVM_
static constexpr const char * _LLVM_
LLVM Internal VFABI ISA token for vector functions.
Definition: VectorUtils.h:132
llvm::all_equal
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition: STLExtras.h:1986
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4534
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLFunctionalExtras.h:36
llvm::LoopBlocksDFS
Store the result of a depth first search within basic blocks contained by a single loop.
Definition: LoopIterator.h:97
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
llvm::getShuffleDemandedElts
bool getShuffleDemandedElts(int SrcWidth, ArrayRef< int > Mask, const APInt &DemandedElts, APInt &DemandedLHS, APInt &DemandedRHS, bool AllowUndefElts=false)
Transform a shuffle mask's output demanded element mask into demanded element masks for the 2 operand...
Definition: VectorUtils.cpp:432
llvm::InterleaveGroup::getIndex
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
Definition: VectorUtils.h:700
VectorUtils.h
llvm::cl::opt
Definition: CommandLine.h:1410
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:274
llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:537
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::MDNode::intersect
static MDNode * intersect(MDNode *A, MDNode *B)
Definition: Metadata.cpp:1019
llvm::VFParamKind::OMP_LinearRef
@ OMP_LinearRef
Index
uint32_t Index
Definition: ELFObjHandler.cpp:83
uint64_t
llvm::InterleaveGroup::getFactor
uint32_t getFactor() const
Definition: VectorUtils.h:639
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:31
llvm::DenseMap
Definition: DenseMap.h:714
llvm::DemandedBits
Definition: DemandedBits.h:40
I
#define I(x, y, z)
Definition: MD5.cpp:58
addToAccessGroupList
static void addToAccessGroupList(ListT &List, MDNode *AccGroups)
Add all access groups in AccGroups to List.
Definition: VectorUtils.cpp:808
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:940
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:721
llvm::concatenateVectors
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Definition: VectorUtils.cpp:1040
llvm::GetElementPtrInst::getResultElementType
Type * getResultElementType() const
Definition: Instructions.h:1020
llvm::SCEVConstant
This class represents a constant integer value.
Definition: ScalarEvolutionExpressions.h:60
llvm::MDNode::getMostGenericTBAA
static MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)
Definition: TypeBasedAliasAnalysis.cpp:477
llvm::details::FixedOrScalableQuantity::getFixedValue
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:182
maximum
Should compile r2 movcc movcs str strb mov lr r1 movcs movcc mov lr r1 str mov mov cmp r1 movlo r2 str bx lr r0 mov mov cmp r0 movhs r2 mov r1 bx lr Some of the NEON intrinsics may be appropriate for more general either as target independent intrinsics or perhaps elsewhere in the ARM backend Some of them may also be lowered to target independent and perhaps some new SDNodes could be added For maximum
Definition: README.txt:489
IRBuilder.h
llvm::InterleavedAccessInfo::analyzeInterleaving
void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
Definition: VectorUtils.cpp:1223
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::generic_gep_type_iterator::getIndexedType
Type * getIndexedType() const
Definition: GetElementPtrTypeIterator.h:70
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:62
llvm::PatternMatch::m_ZeroMask
Definition: PatternMatch.h:1523
llvm::getLoadStoreAddressSpace
unsigned getLoadStoreAddressSpace(Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
Definition: Instructions.h:5399
llvm::MDNode
Metadata node.
Definition: Metadata.h:943
llvm::createBitMaskForGaps
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
Definition: VectorUtils.cpp:937
llvm::PatternMatch::m_Constant
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:144
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
getType
static M68kRelType getType(unsigned Kind, MCSymbolRefExpr::VariantKind &Modifier, bool &IsPCRel)
Definition: M68kELFObjectWriter.cpp:48
llvm::EquivalenceClasses::getOrInsertLeaderValue
const ElemTy & getOrInsertLeaderValue(const ElemTy &V)
getOrInsertLeaderValue - Return the leader for the specified value that is in the set.
Definition: EquivalenceClasses.h:200
llvm::APIntOps::smin
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2175
llvm::intersectAccessGroups
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
Definition: VectorUtils.cpp:844
llvm::CallBase::getFnAttr
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
Definition: InstrTypes.h:1625
llvm::ArrayRef< int >
llvm::replaceSymbolicStrideSCEV
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
Definition: LoopAccessAnalysis.cpp:150
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::EquivalenceClasses::member_begin
member_iterator member_begin(iterator I) const
Definition: EquivalenceClasses.h:174
llvm::raw_svector_ostream::str
StringRef str() const
Return a StringRef for the vector contents.
Definition: raw_ostream.h:697
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Constant::getAggregateElement
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:418
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::PatternMatch::m_Shuffle
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
Definition: PatternMatch.h:1551
llvm::processShuffleMasks
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
Definition: VectorUtils.cpp:555
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:994
llvm::MDNode::getMostGenericAliasScope
static MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
Definition: Metadata.cpp:1032
llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
trunc
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g trunc
Definition: README-FPStack.txt:63
llvm::ConstantVector::get
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1356
llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:428
llvm::createReplicatedMask
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
Definition: VectorUtils.cpp:957
llvm::ArrayRef::front
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:166
llvm::APInt::clearBit
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1385
llvm::MapVector::rbegin
reverse_iterator rbegin()
Definition: MapVector.h:75
llvm::ScalarEvolution::isLoopInvariant
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Definition: ScalarEvolution.cpp:13775
llvm::find_if
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1762
DemandedBits.h
llvm::MDNode::getContext
LLVMContext & getContext() const
Definition: Metadata.h:1107
llvm::MapVector::empty
bool empty() const
Definition: MapVector.h:80
j
return j(j<< 16)
llvm::VFParamKind::OMP_LinearPos
@ OMP_LinearPos
llvm::APIntOps::umax
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:2190
llvm::widenShuffleMaskElts
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Definition: VectorUtils.cpp:490
llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:124
llvm::minnum
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1316
llvm::isValidAsAccessGroup
bool isValidAsAccessGroup(MDNode *AccGroup)
Return whether an MDNode might represent an access group.
Definition: LoopInfo.cpp:1122
llvm::SmallVectorImpl::assign
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:708
llvm::ArrayRef::begin
iterator begin() const
Definition: ArrayRef.h:151
llvm::TypeSize
Definition: TypeSize.h:314
llvm::getIntrinsicForCallSite
Intrinsic::ID getIntrinsicForCallSite(const CallBase &CB, const TargetLibraryInfo *TLI)
Map a call instruction to an intrinsic ID.
Definition: ValueTracking.cpp:3411
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:330
llvm::VFParamKind::OMP_LinearRefPos
@ OMP_LinearRefPos
llvm::SCEVUnknown
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
Definition: ScalarEvolutionExpressions.h:559
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:234
EquivalenceClasses.h
llvm::ARCCC::Z
@ Z
Definition: ARCInfo.h:41
powi
This is blocked on not handling X *X *X powi(X, 3)(see note above). The issue is that we end up getting t
llvm::log2
static double log2(double V)
Definition: AMDGPULibCalls.cpp:794
transform
instcombine should handle this transform
Definition: README.txt:262
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:614
llvm::PatternMatch::m_ZeroInt
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:524
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:439
llvm::findScalarElement
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
Definition: VectorUtils.cpp:285
llvm::SCEVConstant::getAPInt
const APInt & getAPInt() const
Definition: ScalarEvolutionExpressions.h:70
llvm::VFParamKind::OMP_LinearUVal
@ OMP_LinearUVal
ScalarEvolutionExpressions.h
llvm::getShuffleMaskWithWidestElts
void getShuffleMaskWithWidestElts(ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Repetitively apply widenShuffleMaskElts() for as long as it succeeds, to get the shuffle mask with wi...
Definition: VectorUtils.cpp:541
llvm::ShuffleVectorInst
This instruction constructs a fixed permutation of two input vectors.
Definition: Instructions.h:2017
llvm::SCEVIntegralCastExpr
This is the base class for unary integral cast operator classes.
Definition: ScalarEvolutionExpressions.h:124
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
llvm::getSplatIndex
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
Definition: VectorUtils.cpp:349
llvm::countLeadingZeros
unsigned countLeadingZeros(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: MathExtras.h:81
llvm::MDNode::getMostGenericFPMath
static MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
Definition: Metadata.cpp:1064
List
const NodeList & List
Definition: RDFGraph.cpp:199
llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition: SmallVector.h:677
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
llvm::VFABI::MappingsAttrName
static constexpr const char * MappingsAttrName
Definition: VectorUtils.h:194
llvm::stripGetElementPtr
Value * stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If the argument is a GEP, then returns the operand identified by getGEPInductionOperand.
Definition: VectorUtils.cpp:176
TargetTransformInfo.h
llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:672
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::SmallVectorImpl< int >
llvm::EquivalenceClasses::unionSets
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Definition: EquivalenceClasses.h:238
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:300
llvm::getLoadStorePointerOperand
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Definition: Instructions.h:5363
llvm::createStrideMask
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
Definition: VectorUtils.cpp:977
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1485
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::getPtrStride
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
Definition: LoopAccessAnalysis.cpp:1369
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:411
llvm::createSequentialMask
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Definition: VectorUtils.cpp:985
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:667
llvm::isPowerOf2_64
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:293
Value.h
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1302
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::VFParamKind::OMP_Linear
@ OMP_Linear
llvm::ArrayRef::end
iterator end() const
Definition: ArrayRef.h:152
llvm::APIntOps::smax
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition: APInt.h:2180
llvm::getLoadStoreAlignment
Align getLoadStoreAlignment(Value *I)
A helper function that returns the alignment of load or store instruction.
Definition: Instructions.h:5389
llvm::MDOperand
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:772
llvm::InterleaveGroup::getNumMembers
uint32_t getNumMembers() const
Definition: VectorUtils.h:641
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::InterleaveGroup::addMetadata
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Definition: VectorUtils.cpp:1518
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:39