LLVM  16.0.0git
VectorUtils.cpp
Go to the documentation of this file.
1 //===----------- VectorUtils.cpp - Vectorizer utility functions -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines vectorizer utilities.
10 //
11 //===----------------------------------------------------------------------===//
12 
16 #include "llvm/Analysis/LoopInfo.h"
22 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/PatternMatch.h"
26 #include "llvm/IR/Value.h"
28 
29 #define DEBUG_TYPE "vectorutils"
30 
31 using namespace llvm;
32 using namespace llvm::PatternMatch;
33 
34 /// Maximum factor for an interleaved memory access.
36  "max-interleave-group-factor", cl::Hidden,
37  cl::desc("Maximum factor for an interleaved access group (default = 8)"),
38  cl::init(8));
39 
40 /// Return true if all of the intrinsic's arguments and return type are scalars
41 /// for the scalar form of the intrinsic, and vectors for the vector form of the
42 /// intrinsic (except operands that are marked as always being scalar by
43 /// isVectorIntrinsicWithScalarOpAtArg).
45  switch (ID) {
46  case Intrinsic::abs: // Begin integer bit-manipulation.
47  case Intrinsic::bswap:
48  case Intrinsic::bitreverse:
49  case Intrinsic::ctpop:
50  case Intrinsic::ctlz:
51  case Intrinsic::cttz:
52  case Intrinsic::fshl:
53  case Intrinsic::fshr:
54  case Intrinsic::smax:
55  case Intrinsic::smin:
56  case Intrinsic::umax:
57  case Intrinsic::umin:
58  case Intrinsic::sadd_sat:
59  case Intrinsic::ssub_sat:
60  case Intrinsic::uadd_sat:
61  case Intrinsic::usub_sat:
62  case Intrinsic::smul_fix:
63  case Intrinsic::smul_fix_sat:
64  case Intrinsic::umul_fix:
65  case Intrinsic::umul_fix_sat:
66  case Intrinsic::sqrt: // Begin floating-point.
67  case Intrinsic::sin:
68  case Intrinsic::cos:
69  case Intrinsic::exp:
70  case Intrinsic::exp2:
71  case Intrinsic::log:
72  case Intrinsic::log10:
73  case Intrinsic::log2:
74  case Intrinsic::fabs:
75  case Intrinsic::minnum:
76  case Intrinsic::maxnum:
77  case Intrinsic::minimum:
78  case Intrinsic::maximum:
79  case Intrinsic::copysign:
80  case Intrinsic::floor:
81  case Intrinsic::ceil:
82  case Intrinsic::trunc:
83  case Intrinsic::rint:
84  case Intrinsic::nearbyint:
85  case Intrinsic::round:
86  case Intrinsic::roundeven:
87  case Intrinsic::pow:
88  case Intrinsic::fma:
89  case Intrinsic::fmuladd:
90  case Intrinsic::powi:
91  case Intrinsic::canonicalize:
92  case Intrinsic::fptosi_sat:
93  case Intrinsic::fptoui_sat:
94  return true;
95  default:
96  return false;
97  }
98 }
99 
100 /// Identifies if the vector form of the intrinsic has a scalar operand.
102  unsigned ScalarOpdIdx) {
103  switch (ID) {
104  case Intrinsic::abs:
105  case Intrinsic::ctlz:
106  case Intrinsic::cttz:
107  case Intrinsic::powi:
108  return (ScalarOpdIdx == 1);
109  case Intrinsic::smul_fix:
110  case Intrinsic::smul_fix_sat:
111  case Intrinsic::umul_fix:
112  case Intrinsic::umul_fix_sat:
113  return (ScalarOpdIdx == 2);
114  default:
115  return false;
116  }
117 }
118 
120  unsigned OpdIdx) {
121  switch (ID) {
122  case Intrinsic::fptosi_sat:
123  case Intrinsic::fptoui_sat:
124  return OpdIdx == 0;
125  case Intrinsic::powi:
126  return OpdIdx == 1;
127  default:
128  return false;
129  }
130 }
131 
132 /// Returns intrinsic ID for call.
133 /// For the input call instruction it finds mapping intrinsic and returns
134 /// its ID, in case it does not found it return not_intrinsic.
136  const TargetLibraryInfo *TLI) {
140 
141  if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
142  ID == Intrinsic::lifetime_end || ID == Intrinsic::assume ||
143  ID == Intrinsic::experimental_noalias_scope_decl ||
144  ID == Intrinsic::sideeffect || ID == Intrinsic::pseudoprobe)
145  return ID;
147 }
148 
149 /// Find the operand of the GEP that should be checked for consecutive
150 /// stores. This ignores trailing indices that have no effect on the final
151 /// pointer.
153  const DataLayout &DL = Gep->getModule()->getDataLayout();
154  unsigned LastOperand = Gep->getNumOperands() - 1;
155  TypeSize GEPAllocSize = DL.getTypeAllocSize(Gep->getResultElementType());
156 
157  // Walk backwards and try to peel off zeros.
158  while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
159  // Find the type we're currently indexing into.
160  gep_type_iterator GEPTI = gep_type_begin(Gep);
161  std::advance(GEPTI, LastOperand - 2);
162 
163  // If it's a type with the same allocation size as the result of the GEP we
164  // can peel off the zero index.
165  if (DL.getTypeAllocSize(GEPTI.getIndexedType()) != GEPAllocSize)
166  break;
167  --LastOperand;
168  }
169 
170  return LastOperand;
171 }
172 
173 /// If the argument is a GEP, then returns the operand identified by
174 /// getGEPInductionOperand. However, if there is some other non-loop-invariant
175 /// operand, it returns that instead.
177  GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
178  if (!GEP)
179  return Ptr;
180 
181  unsigned InductionOperand = getGEPInductionOperand(GEP);
182 
183  // Check that all of the gep indices are uniform except for our induction
184  // operand.
185  for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i)
186  if (i != InductionOperand &&
187  !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp))
188  return Ptr;
189  return GEP->getOperand(InductionOperand);
190 }
191 
192 /// If a value has only one user that is a CastInst, return it.
194  Value *UniqueCast = nullptr;
195  for (User *U : Ptr->users()) {
196  CastInst *CI = dyn_cast<CastInst>(U);
197  if (CI && CI->getType() == Ty) {
198  if (!UniqueCast)
199  UniqueCast = CI;
200  else
201  return nullptr;
202  }
203  }
204  return UniqueCast;
205 }
206 
207 /// Get the stride of a pointer access in a loop. Looks for symbolic
208 /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
210  auto *PtrTy = dyn_cast<PointerType>(Ptr->getType());
211  if (!PtrTy || PtrTy->isAggregateType())
212  return nullptr;
213 
214  // Try to remove a gep instruction to make the pointer (actually index at this
215  // point) easier analyzable. If OrigPtr is equal to Ptr we are analyzing the
216  // pointer, otherwise, we are analyzing the index.
217  Value *OrigPtr = Ptr;
218 
219  // The size of the pointer access.
220  int64_t PtrAccessSize = 1;
221 
222  Ptr = stripGetElementPtr(Ptr, SE, Lp);
223  const SCEV *V = SE->getSCEV(Ptr);
224 
225  if (Ptr != OrigPtr)
226  // Strip off casts.
227  while (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(V))
228  V = C->getOperand();
229 
230  const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V);
231  if (!S)
232  return nullptr;
233 
234  V = S->getStepRecurrence(*SE);
235  if (!V)
236  return nullptr;
237 
238  // Strip off the size of access multiplication if we are still analyzing the
239  // pointer.
240  if (OrigPtr == Ptr) {
241  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
242  if (M->getOperand(0)->getSCEVType() != scConstant)
243  return nullptr;
244 
245  const APInt &APStepVal = cast<SCEVConstant>(M->getOperand(0))->getAPInt();
246 
247  // Huge step value - give up.
248  if (APStepVal.getBitWidth() > 64)
249  return nullptr;
250 
251  int64_t StepVal = APStepVal.getSExtValue();
252  if (PtrAccessSize != StepVal)
253  return nullptr;
254  V = M->getOperand(1);
255  }
256  }
257 
258  // Strip off casts.
259  Type *StripedOffRecurrenceCast = nullptr;
260  if (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(V)) {
261  StripedOffRecurrenceCast = C->getType();
262  V = C->getOperand();
263  }
264 
265  // Look for the loop invariant symbolic value.
266  const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V);
267  if (!U)
268  return nullptr;
269 
270  Value *Stride = U->getValue();
271  if (!Lp->isLoopInvariant(Stride))
272  return nullptr;
273 
274  // If we have stripped off the recurrence cast we have to make sure that we
275  // return the value that is used in this loop so that we can replace it later.
276  if (StripedOffRecurrenceCast)
277  Stride = getUniqueCastUse(Stride, Lp, StripedOffRecurrenceCast);
278 
279  return Stride;
280 }
281 
282 /// Given a vector and an element number, see if the scalar value is
283 /// already around as a register, for example if it were inserted then extracted
284 /// from the vector.
285 Value *llvm::findScalarElement(Value *V, unsigned EltNo) {
286  assert(V->getType()->isVectorTy() && "Not looking at a vector?");
287  VectorType *VTy = cast<VectorType>(V->getType());
288  // For fixed-length vector, return undef for out of range access.
289  if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
290  unsigned Width = FVTy->getNumElements();
291  if (EltNo >= Width)
292  return UndefValue::get(FVTy->getElementType());
293  }
294 
295  if (Constant *C = dyn_cast<Constant>(V))
296  return C->getAggregateElement(EltNo);
297 
298  if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
299  // If this is an insert to a variable element, we don't know what it is.
300  if (!isa<ConstantInt>(III->getOperand(2)))
301  return nullptr;
302  unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
303 
304  // If this is an insert to the element we are looking for, return the
305  // inserted value.
306  if (EltNo == IIElt)
307  return III->getOperand(1);
308 
309  // Guard against infinite loop on malformed, unreachable IR.
310  if (III == III->getOperand(0))
311  return nullptr;
312 
313  // Otherwise, the insertelement doesn't modify the value, recurse on its
314  // vector input.
315  return findScalarElement(III->getOperand(0), EltNo);
316  }
317 
318  ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V);
319  // Restrict the following transformation to fixed-length vector.
320  if (SVI && isa<FixedVectorType>(SVI->getType())) {
321  unsigned LHSWidth =
322  cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements();
323  int InEl = SVI->getMaskValue(EltNo);
324  if (InEl < 0)
325  return UndefValue::get(VTy->getElementType());
326  if (InEl < (int)LHSWidth)
327  return findScalarElement(SVI->getOperand(0), InEl);
328  return findScalarElement(SVI->getOperand(1), InEl - LHSWidth);
329  }
330 
331  // Extract a value from a vector add operation with a constant zero.
332  // TODO: Use getBinOpIdentity() to generalize this.
333  Value *Val; Constant *C;
334  if (match(V, m_Add(m_Value(Val), m_Constant(C))))
335  if (Constant *Elt = C->getAggregateElement(EltNo))
336  if (Elt->isNullValue())
337  return findScalarElement(Val, EltNo);
338 
339  // If the vector is a splat then we can trivially find the scalar element.
340  if (isa<ScalableVectorType>(VTy))
341  if (Value *Splat = getSplatValue(V))
342  if (EltNo < VTy->getElementCount().getKnownMinValue())
343  return Splat;
344 
345  // Otherwise, we don't know.
346  return nullptr;
347 }
348 
350  int SplatIndex = -1;
351  for (int M : Mask) {
352  // Ignore invalid (undefined) mask elements.
353  if (M < 0)
354  continue;
355 
356  // There can be only 1 non-negative mask element value if this is a splat.
357  if (SplatIndex != -1 && SplatIndex != M)
358  return -1;
359 
360  // Initialize the splat index to the 1st non-negative mask element.
361  SplatIndex = M;
362  }
363  assert((SplatIndex == -1 || SplatIndex >= 0) && "Negative index?");
364  return SplatIndex;
365 }
366 
367 /// Get splat value if the input is a splat vector or return nullptr.
368 /// This function is not fully general. It checks only 2 cases:
369 /// the input value is (1) a splat constant vector or (2) a sequence
370 /// of instructions that broadcasts a scalar at element 0.
372  if (isa<VectorType>(V->getType()))
373  if (auto *C = dyn_cast<Constant>(V))
374  return C->getSplatValue();
375 
376  // shuf (inselt ?, Splat, 0), ?, <0, undef, 0, ...>
377  Value *Splat;
378  if (match(V,
380  m_Value(), m_ZeroMask())))
381  return Splat;
382 
383  return nullptr;
384 }
385 
386 bool llvm::isSplatValue(const Value *V, int Index, unsigned Depth) {
387  assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
388 
389  if (isa<VectorType>(V->getType())) {
390  if (isa<UndefValue>(V))
391  return true;
392  // FIXME: We can allow undefs, but if Index was specified, we may want to
393  // check that the constant is defined at that index.
394  if (auto *C = dyn_cast<Constant>(V))
395  return C->getSplatValue() != nullptr;
396  }
397 
398  if (auto *Shuf = dyn_cast<ShuffleVectorInst>(V)) {
399  // FIXME: We can safely allow undefs here. If Index was specified, we will
400  // check that the mask elt is defined at the required index.
401  if (!all_equal(Shuf->getShuffleMask()))
402  return false;
403 
404  // Match any index.
405  if (Index == -1)
406  return true;
407 
408  // Match a specific element. The mask should be defined at and match the
409  // specified index.
410  return Shuf->getMaskValue(Index) == Index;
411  }
412 
413  // The remaining tests are all recursive, so bail out if we hit the limit.
415  return false;
416 
417  // If both operands of a binop are splats, the result is a splat.
418  Value *X, *Y, *Z;
419  if (match(V, m_BinOp(m_Value(X), m_Value(Y))))
420  return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth);
421 
422  // If all operands of a select are splats, the result is a splat.
423  if (match(V, m_Select(m_Value(X), m_Value(Y), m_Value(Z))))
424  return isSplatValue(X, Index, Depth) && isSplatValue(Y, Index, Depth) &&
426 
427  // TODO: Add support for unary ops (fneg), casts, intrinsics (overflow ops).
428 
429  return false;
430 }
431 
433  SmallVectorImpl<int> &ScaledMask) {
434  assert(Scale > 0 && "Unexpected scaling factor");
435 
436  // Fast-path: if no scaling, then it is just a copy.
437  if (Scale == 1) {
438  ScaledMask.assign(Mask.begin(), Mask.end());
439  return;
440  }
441 
442  ScaledMask.clear();
443  for (int MaskElt : Mask) {
444  if (MaskElt >= 0) {
445  assert(((uint64_t)Scale * MaskElt + (Scale - 1)) <= INT32_MAX &&
446  "Overflowed 32-bits");
447  }
448  for (int SliceElt = 0; SliceElt != Scale; ++SliceElt)
449  ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + SliceElt);
450  }
451 }
452 
454  SmallVectorImpl<int> &ScaledMask) {
455  assert(Scale > 0 && "Unexpected scaling factor");
456 
457  // Fast-path: if no scaling, then it is just a copy.
458  if (Scale == 1) {
459  ScaledMask.assign(Mask.begin(), Mask.end());
460  return true;
461  }
462 
463  // We must map the original elements down evenly to a type with less elements.
464  int NumElts = Mask.size();
465  if (NumElts % Scale != 0)
466  return false;
467 
468  ScaledMask.clear();
469  ScaledMask.reserve(NumElts / Scale);
470 
471  // Step through the input mask by splitting into Scale-sized slices.
472  do {
473  ArrayRef<int> MaskSlice = Mask.take_front(Scale);
474  assert((int)MaskSlice.size() == Scale && "Expected Scale-sized slice.");
475 
476  // The first element of the slice determines how we evaluate this slice.
477  int SliceFront = MaskSlice.front();
478  if (SliceFront < 0) {
479  // Negative values (undef or other "sentinel" values) must be equal across
480  // the entire slice.
481  if (!all_equal(MaskSlice))
482  return false;
483  ScaledMask.push_back(SliceFront);
484  } else {
485  // A positive mask element must be cleanly divisible.
486  if (SliceFront % Scale != 0)
487  return false;
488  // Elements of the slice must be consecutive.
489  for (int i = 1; i < Scale; ++i)
490  if (MaskSlice[i] != SliceFront + i)
491  return false;
492  ScaledMask.push_back(SliceFront / Scale);
493  }
494  Mask = Mask.drop_front(Scale);
495  } while (!Mask.empty());
496 
497  assert((int)ScaledMask.size() * Scale == NumElts && "Unexpected scaled mask");
498 
499  // All elements of the original mask can be scaled down to map to the elements
500  // of a mask with wider elements.
501  return true;
502 }
503 
505  ArrayRef<int> Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs,
506  unsigned NumOfUsedRegs, function_ref<void()> NoInputAction,
507  function_ref<void(ArrayRef<int>, unsigned, unsigned)> SingleInputAction,
508  function_ref<void(ArrayRef<int>, unsigned, unsigned)> ManyInputsAction) {
509  SmallVector<SmallVector<SmallVector<int>>> Res(NumOfDestRegs);
510  // Try to perform better estimation of the permutation.
511  // 1. Split the source/destination vectors into real registers.
512  // 2. Do the mask analysis to identify which real registers are
513  // permuted.
514  int Sz = Mask.size();
515  unsigned SzDest = Sz / NumOfDestRegs;
516  unsigned SzSrc = Sz / NumOfSrcRegs;
517  for (unsigned I = 0; I < NumOfDestRegs; ++I) {
518  auto &RegMasks = Res[I];
519  RegMasks.assign(NumOfSrcRegs, {});
520  // Check that the values in dest registers are in the one src
521  // register.
522  for (unsigned K = 0; K < SzDest; ++K) {
523  int Idx = I * SzDest + K;
524  if (Idx == Sz)
525  break;
526  if (Mask[Idx] >= Sz || Mask[Idx] == UndefMaskElem)
527  continue;
528  int SrcRegIdx = Mask[Idx] / SzSrc;
529  // Add a cost of PermuteTwoSrc for each new source register permute,
530  // if we have more than one source registers.
531  if (RegMasks[SrcRegIdx].empty())
532  RegMasks[SrcRegIdx].assign(SzDest, UndefMaskElem);
533  RegMasks[SrcRegIdx][K] = Mask[Idx] % SzSrc;
534  }
535  }
536  // Process split mask.
537  for (unsigned I = 0; I < NumOfUsedRegs; ++I) {
538  auto &Dest = Res[I];
539  int NumSrcRegs =
540  count_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
541  switch (NumSrcRegs) {
542  case 0:
543  // No input vectors were used!
544  NoInputAction();
545  break;
546  case 1: {
547  // Find the only mask with at least single undef mask elem.
548  auto *It =
549  find_if(Dest, [](ArrayRef<int> Mask) { return !Mask.empty(); });
550  unsigned SrcReg = std::distance(Dest.begin(), It);
551  SingleInputAction(*It, SrcReg, I);
552  break;
553  }
554  default: {
555  // The first mask is a permutation of a single register. Since we have >2
556  // input registers to shuffle, we merge the masks for 2 first registers
557  // and generate a shuffle of 2 registers rather than the reordering of the
558  // first register and then shuffle with the second register. Next,
559  // generate the shuffles of the resulting register + the remaining
560  // registers from the list.
561  auto &&CombineMasks = [](MutableArrayRef<int> FirstMask,
562  ArrayRef<int> SecondMask) {
563  for (int Idx = 0, VF = FirstMask.size(); Idx < VF; ++Idx) {
564  if (SecondMask[Idx] != UndefMaskElem) {
565  assert(FirstMask[Idx] == UndefMaskElem &&
566  "Expected undefined mask element.");
567  FirstMask[Idx] = SecondMask[Idx] + VF;
568  }
569  }
570  };
571  auto &&NormalizeMask = [](MutableArrayRef<int> Mask) {
572  for (int Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) {
573  if (Mask[Idx] != UndefMaskElem)
574  Mask[Idx] = Idx;
575  }
576  };
577  int SecondIdx;
578  do {
579  int FirstIdx = -1;
580  SecondIdx = -1;
581  MutableArrayRef<int> FirstMask, SecondMask;
582  for (unsigned I = 0; I < NumOfDestRegs; ++I) {
583  SmallVectorImpl<int> &RegMask = Dest[I];
584  if (RegMask.empty())
585  continue;
586 
587  if (FirstIdx == SecondIdx) {
588  FirstIdx = I;
589  FirstMask = RegMask;
590  continue;
591  }
592  SecondIdx = I;
593  SecondMask = RegMask;
594  CombineMasks(FirstMask, SecondMask);
595  ManyInputsAction(FirstMask, FirstIdx, SecondIdx);
596  NormalizeMask(FirstMask);
597  RegMask.clear();
598  SecondMask = FirstMask;
599  SecondIdx = FirstIdx;
600  }
601  if (FirstIdx != SecondIdx && SecondIdx >= 0) {
602  CombineMasks(SecondMask, FirstMask);
603  ManyInputsAction(SecondMask, SecondIdx, FirstIdx);
604  Dest[FirstIdx].clear();
605  NormalizeMask(SecondMask);
606  }
607  } while (SecondIdx >= 0);
608  break;
609  }
610  }
611  }
612 }
613 
616  const TargetTransformInfo *TTI) {
617 
618  // DemandedBits will give us every value's live-out bits. But we want
619  // to ensure no extra casts would need to be inserted, so every DAG
620  // of connected values must have the same minimum bitwidth.
622  SmallVector<Value *, 16> Worklist;
624  SmallPtrSet<Value *, 16> Visited;
626  SmallPtrSet<Instruction *, 4> InstructionSet;
628 
629  // Determine the roots. We work bottom-up, from truncs or icmps.
630  bool SeenExtFromIllegalType = false;
631  for (auto *BB : Blocks)
632  for (auto &I : *BB) {
633  InstructionSet.insert(&I);
634 
635  if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) &&
636  !TTI->isTypeLegal(I.getOperand(0)->getType()))
637  SeenExtFromIllegalType = true;
638 
639  // Only deal with non-vector integers up to 64-bits wide.
640  if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) &&
641  !I.getType()->isVectorTy() &&
642  I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) {
643  // Don't make work for ourselves. If we know the loaded type is legal,
644  // don't add it to the worklist.
645  if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType()))
646  continue;
647 
648  Worklist.push_back(&I);
649  Roots.insert(&I);
650  }
651  }
652  // Early exit.
653  if (Worklist.empty() || (TTI && !SeenExtFromIllegalType))
654  return MinBWs;
655 
656  // Now proceed breadth-first, unioning values together.
657  while (!Worklist.empty()) {
658  Value *Val = Worklist.pop_back_val();
659  Value *Leader = ECs.getOrInsertLeaderValue(Val);
660 
661  if (!Visited.insert(Val).second)
662  continue;
663 
664  // Non-instructions terminate a chain successfully.
665  if (!isa<Instruction>(Val))
666  continue;
667  Instruction *I = cast<Instruction>(Val);
668 
669  // If we encounter a type that is larger than 64 bits, we can't represent
670  // it so bail out.
671  if (DB.getDemandedBits(I).getBitWidth() > 64)
673 
675  DBits[Leader] |= V;
676  DBits[I] = V;
677 
678  // Casts, loads and instructions outside of our range terminate a chain
679  // successfully.
680  if (isa<SExtInst>(I) || isa<ZExtInst>(I) || isa<LoadInst>(I) ||
681  !InstructionSet.count(I))
682  continue;
683 
684  // Unsafe casts terminate a chain unsuccessfully. We can't do anything
685  // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to
686  // transform anything that relies on them.
687  if (isa<BitCastInst>(I) || isa<PtrToIntInst>(I) || isa<IntToPtrInst>(I) ||
688  !I->getType()->isIntegerTy()) {
689  DBits[Leader] |= ~0ULL;
690  continue;
691  }
692 
693  // We don't modify the types of PHIs. Reductions will already have been
694  // truncated if possible, and inductions' sizes will have been chosen by
695  // indvars.
696  if (isa<PHINode>(I))
697  continue;
698 
699  if (DBits[Leader] == ~0ULL)
700  // All bits demanded, no point continuing.
701  continue;
702 
703  for (Value *O : cast<User>(I)->operands()) {
704  ECs.unionSets(Leader, O);
705  Worklist.push_back(O);
706  }
707  }
708 
709  // Now we've discovered all values, walk them to see if there are
710  // any users we didn't see. If there are, we can't optimize that
711  // chain.
712  for (auto &I : DBits)
713  for (auto *U : I.first->users())
714  if (U->getType()->isIntegerTy() && DBits.count(U) == 0)
715  DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL;
716 
717  for (auto I = ECs.begin(), E = ECs.end(); I != E; ++I) {
718  uint64_t LeaderDemandedBits = 0;
719  for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end()))
720  LeaderDemandedBits |= DBits[M];
721 
722  uint64_t MinBW = (sizeof(LeaderDemandedBits) * 8) -
723  llvm::countLeadingZeros(LeaderDemandedBits);
724  // Round up to a power of 2
725  if (!isPowerOf2_64((uint64_t)MinBW))
726  MinBW = NextPowerOf2(MinBW);
727 
728  // We don't modify the types of PHIs. Reductions will already have been
729  // truncated if possible, and inductions' sizes will have been chosen by
730  // indvars.
731  // If we are required to shrink a PHI, abandon this entire equivalence class.
732  bool Abort = false;
733  for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end()))
734  if (isa<PHINode>(M) && MinBW < M->getType()->getScalarSizeInBits()) {
735  Abort = true;
736  break;
737  }
738  if (Abort)
739  continue;
740 
741  for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end())) {
742  if (!isa<Instruction>(M))
743  continue;
744  Type *Ty = M->getType();
745  if (Roots.count(M))
746  Ty = cast<Instruction>(M)->getOperand(0)->getType();
747  if (MinBW < Ty->getScalarSizeInBits())
748  MinBWs[cast<Instruction>(M)] = MinBW;
749  }
750  }
751 
752  return MinBWs;
753 }
754 
755 /// Add all access groups in @p AccGroups to @p List.
756 template <typename ListT>
757 static void addToAccessGroupList(ListT &List, MDNode *AccGroups) {
758  // Interpret an access group as a list containing itself.
759  if (AccGroups->getNumOperands() == 0) {
760  assert(isValidAsAccessGroup(AccGroups) && "Node must be an access group");
761  List.insert(AccGroups);
762  return;
763  }
764 
765  for (const auto &AccGroupListOp : AccGroups->operands()) {
766  auto *Item = cast<MDNode>(AccGroupListOp.get());
767  assert(isValidAsAccessGroup(Item) && "List item must be an access group");
768  List.insert(Item);
769  }
770 }
771 
772 MDNode *llvm::uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2) {
773  if (!AccGroups1)
774  return AccGroups2;
775  if (!AccGroups2)
776  return AccGroups1;
777  if (AccGroups1 == AccGroups2)
778  return AccGroups1;
779 
781  addToAccessGroupList(Union, AccGroups1);
782  addToAccessGroupList(Union, AccGroups2);
783 
784  if (Union.size() == 0)
785  return nullptr;
786  if (Union.size() == 1)
787  return cast<MDNode>(Union.front());
788 
789  LLVMContext &Ctx = AccGroups1->getContext();
790  return MDNode::get(Ctx, Union.getArrayRef());
791 }
792 
794  const Instruction *Inst2) {
795  bool MayAccessMem1 = Inst1->mayReadOrWriteMemory();
796  bool MayAccessMem2 = Inst2->mayReadOrWriteMemory();
797 
798  if (!MayAccessMem1 && !MayAccessMem2)
799  return nullptr;
800  if (!MayAccessMem1)
801  return Inst2->getMetadata(LLVMContext::MD_access_group);
802  if (!MayAccessMem2)
803  return Inst1->getMetadata(LLVMContext::MD_access_group);
804 
805  MDNode *MD1 = Inst1->getMetadata(LLVMContext::MD_access_group);
806  MDNode *MD2 = Inst2->getMetadata(LLVMContext::MD_access_group);
807  if (!MD1 || !MD2)
808  return nullptr;
809  if (MD1 == MD2)
810  return MD1;
811 
812  // Use set for scalable 'contains' check.
813  SmallPtrSet<Metadata *, 4> AccGroupSet2;
814  addToAccessGroupList(AccGroupSet2, MD2);
815 
816  SmallVector<Metadata *, 4> Intersection;
817  if (MD1->getNumOperands() == 0) {
818  assert(isValidAsAccessGroup(MD1) && "Node must be an access group");
819  if (AccGroupSet2.count(MD1))
820  Intersection.push_back(MD1);
821  } else {
822  for (const MDOperand &Node : MD1->operands()) {
823  auto *Item = cast<MDNode>(Node.get());
824  assert(isValidAsAccessGroup(Item) && "List item must be an access group");
825  if (AccGroupSet2.count(Item))
826  Intersection.push_back(Item);
827  }
828  }
829 
830  if (Intersection.size() == 0)
831  return nullptr;
832  if (Intersection.size() == 1)
833  return cast<MDNode>(Intersection.front());
834 
835  LLVMContext &Ctx = Inst1->getContext();
836  return MDNode::get(Ctx, Intersection);
837 }
838 
839 /// \returns \p I after propagating metadata from \p VL.
841  if (VL.empty())
842  return Inst;
843  Instruction *I0 = cast<Instruction>(VL[0]);
846 
847  for (auto Kind : {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
848  LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
849  LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load,
850  LLVMContext::MD_access_group}) {
851  MDNode *MD = I0->getMetadata(Kind);
852 
853  for (int J = 1, E = VL.size(); MD && J != E; ++J) {
854  const Instruction *IJ = cast<Instruction>(VL[J]);
855  MDNode *IMD = IJ->getMetadata(Kind);
856  switch (Kind) {
857  case LLVMContext::MD_tbaa:
858  MD = MDNode::getMostGenericTBAA(MD, IMD);
859  break;
860  case LLVMContext::MD_alias_scope:
861  MD = MDNode::getMostGenericAliasScope(MD, IMD);
862  break;
863  case LLVMContext::MD_fpmath:
864  MD = MDNode::getMostGenericFPMath(MD, IMD);
865  break;
866  case LLVMContext::MD_noalias:
867  case LLVMContext::MD_nontemporal:
868  case LLVMContext::MD_invariant_load:
869  MD = MDNode::intersect(MD, IMD);
870  break;
871  case LLVMContext::MD_access_group:
872  MD = intersectAccessGroups(Inst, IJ);
873  break;
874  default:
875  llvm_unreachable("unhandled metadata");
876  }
877  }
878 
879  Inst->setMetadata(Kind, MD);
880  }
881 
882  return Inst;
883 }
884 
885 Constant *
887  const InterleaveGroup<Instruction> &Group) {
888  // All 1's means mask is not needed.
889  if (Group.getNumMembers() == Group.getFactor())
890  return nullptr;
891 
892  // TODO: support reversed access.
893  assert(!Group.isReverse() && "Reversed group not supported.");
894 
896  for (unsigned i = 0; i < VF; i++)
897  for (unsigned j = 0; j < Group.getFactor(); ++j) {
898  unsigned HasMember = Group.getMember(j) ? 1 : 0;
899  Mask.push_back(Builder.getInt1(HasMember));
900  }
901 
902  return ConstantVector::get(Mask);
903 }
904 
906 llvm::createReplicatedMask(unsigned ReplicationFactor, unsigned VF) {
907  SmallVector<int, 16> MaskVec;
908  for (unsigned i = 0; i < VF; i++)
909  for (unsigned j = 0; j < ReplicationFactor; j++)
910  MaskVec.push_back(i);
911 
912  return MaskVec;
913 }
914 
916  unsigned NumVecs) {
918  for (unsigned i = 0; i < VF; i++)
919  for (unsigned j = 0; j < NumVecs; j++)
920  Mask.push_back(j * VF + i);
921 
922  return Mask;
923 }
924 
926 llvm::createStrideMask(unsigned Start, unsigned Stride, unsigned VF) {
928  for (unsigned i = 0; i < VF; i++)
929  Mask.push_back(Start + i * Stride);
930 
931  return Mask;
932 }
933 
935  unsigned NumInts,
936  unsigned NumUndefs) {
938  for (unsigned i = 0; i < NumInts; i++)
939  Mask.push_back(Start + i);
940 
941  for (unsigned i = 0; i < NumUndefs; i++)
942  Mask.push_back(-1);
943 
944  return Mask;
945 }
946 
948  unsigned NumElts) {
949  // Avoid casts in the loop and make sure we have a reasonable number.
950  int NumEltsSigned = NumElts;
951  assert(NumEltsSigned > 0 && "Expected smaller or non-zero element count");
952 
953  // If the mask chooses an element from operand 1, reduce it to choose from the
954  // corresponding element of operand 0. Undef mask elements are unchanged.
955  SmallVector<int, 16> UnaryMask;
956  for (int MaskElt : Mask) {
957  assert((MaskElt < NumEltsSigned * 2) && "Expected valid shuffle mask");
958  int UnaryElt = MaskElt >= NumEltsSigned ? MaskElt - NumEltsSigned : MaskElt;
959  UnaryMask.push_back(UnaryElt);
960  }
961  return UnaryMask;
962 }
963 
964 /// A helper function for concatenating vectors. This function concatenates two
965 /// vectors having the same element type. If the second vector has fewer
966 /// elements than the first, it is padded with undefs.
968  Value *V2) {
969  VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType());
970  VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType());
971  assert(VecTy1 && VecTy2 &&
972  VecTy1->getScalarType() == VecTy2->getScalarType() &&
973  "Expect two vectors with the same element type");
974 
975  unsigned NumElts1 = cast<FixedVectorType>(VecTy1)->getNumElements();
976  unsigned NumElts2 = cast<FixedVectorType>(VecTy2)->getNumElements();
977  assert(NumElts1 >= NumElts2 && "Unexpect the first vector has less elements");
978 
979  if (NumElts1 > NumElts2) {
980  // Extend with UNDEFs.
981  V2 = Builder.CreateShuffleVector(
982  V2, createSequentialMask(0, NumElts2, NumElts1 - NumElts2));
983  }
984 
985  return Builder.CreateShuffleVector(
986  V1, V2, createSequentialMask(0, NumElts1 + NumElts2, 0));
987 }
988 
990  ArrayRef<Value *> Vecs) {
991  unsigned NumVecs = Vecs.size();
992  assert(NumVecs > 1 && "Should be at least two vectors");
993 
994  SmallVector<Value *, 8> ResList;
995  ResList.append(Vecs.begin(), Vecs.end());
996  do {
997  SmallVector<Value *, 8> TmpList;
998  for (unsigned i = 0; i < NumVecs - 1; i += 2) {
999  Value *V0 = ResList[i], *V1 = ResList[i + 1];
1000  assert((V0->getType() == V1->getType() || i == NumVecs - 2) &&
1001  "Only the last vector may have a different type");
1002 
1003  TmpList.push_back(concatenateTwoVectors(Builder, V0, V1));
1004  }
1005 
1006  // Push the last vector if the total number of vectors is odd.
1007  if (NumVecs % 2 != 0)
1008  TmpList.push_back(ResList[NumVecs - 1]);
1009 
1010  ResList = TmpList;
1011  NumVecs = ResList.size();
1012  } while (NumVecs > 1);
1013 
1014  return ResList[0];
1015 }
1016 
1018  assert(isa<VectorType>(Mask->getType()) &&
1019  isa<IntegerType>(Mask->getType()->getScalarType()) &&
1020  cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1021  1 &&
1022  "Mask must be a vector of i1");
1023 
1024  auto *ConstMask = dyn_cast<Constant>(Mask);
1025  if (!ConstMask)
1026  return false;
1027  if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask))
1028  return true;
1029  if (isa<ScalableVectorType>(ConstMask->getType()))
1030  return false;
1031  for (unsigned
1032  I = 0,
1033  E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1034  I != E; ++I) {
1035  if (auto *MaskElt = ConstMask->getAggregateElement(I))
1036  if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt))
1037  continue;
1038  return false;
1039  }
1040  return true;
1041 }
1042 
1044  assert(isa<VectorType>(Mask->getType()) &&
1045  isa<IntegerType>(Mask->getType()->getScalarType()) &&
1046  cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1047  1 &&
1048  "Mask must be a vector of i1");
1049 
1050  auto *ConstMask = dyn_cast<Constant>(Mask);
1051  if (!ConstMask)
1052  return false;
1053  if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
1054  return true;
1055  if (isa<ScalableVectorType>(ConstMask->getType()))
1056  return false;
1057  for (unsigned
1058  I = 0,
1059  E = cast<FixedVectorType>(ConstMask->getType())->getNumElements();
1060  I != E; ++I) {
1061  if (auto *MaskElt = ConstMask->getAggregateElement(I))
1062  if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
1063  continue;
1064  return false;
1065  }
1066  return true;
1067 }
1068 
1069 /// TODO: This is a lot like known bits, but for
1070 /// vectors. Is there something we can common this with?
1072  assert(isa<FixedVectorType>(Mask->getType()) &&
1073  isa<IntegerType>(Mask->getType()->getScalarType()) &&
1074  cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() ==
1075  1 &&
1076  "Mask must be a fixed width vector of i1");
1077 
1078  const unsigned VWidth =
1079  cast<FixedVectorType>(Mask->getType())->getNumElements();
1080  APInt DemandedElts = APInt::getAllOnes(VWidth);
1081  if (auto *CV = dyn_cast<ConstantVector>(Mask))
1082  for (unsigned i = 0; i < VWidth; i++)
1083  if (CV->getAggregateElement(i)->isNullValue())
1084  DemandedElts.clearBit(i);
1085  return DemandedElts;
1086 }
1087 
1088 bool InterleavedAccessInfo::isStrided(int Stride) {
1089  unsigned Factor = std::abs(Stride);
1090  return Factor >= 2 && Factor <= MaxInterleaveGroupFactor;
1091 }
1092 
1093 void InterleavedAccessInfo::collectConstStrideAccesses(
1095  const ValueToValueMap &Strides) {
1096  auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();
1097 
1098  // Since it's desired that the load/store instructions be maintained in
1099  // "program order" for the interleaved access analysis, we have to visit the
1100  // blocks in the loop in reverse postorder (i.e., in a topological order).
1101  // Such an ordering will ensure that any load/store that may be executed
1102  // before a second load/store will precede the second load/store in
1103  // AccessStrideInfo.
1104  LoopBlocksDFS DFS(TheLoop);
1105  DFS.perform(LI);
1106  for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
1107  for (auto &I : *BB) {
1109  if (!Ptr)
1110  continue;
1111  Type *ElementTy = getLoadStoreType(&I);
1112 
1113  // We don't check wrapping here because we don't know yet if Ptr will be
1114  // part of a full group or a group with gaps. Checking wrapping for all
1115  // pointers (even those that end up in groups with no gaps) will be overly
1116  // conservative. For full groups, wrapping should be ok since if we would
1117  // wrap around the address space we would do a memory access at nullptr
1118  // even without the transformation. The wrapping checks are therefore
1119  // deferred until after we've formed the interleaved groups.
1120  int64_t Stride = getPtrStride(PSE, ElementTy, Ptr, TheLoop, Strides,
1121  /*Assume=*/true, /*ShouldCheckWrap=*/false);
1122 
1123  const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
1124  uint64_t Size = DL.getTypeAllocSize(ElementTy);
1125  AccessStrideInfo[&I] = StrideDescriptor(Stride, Scev, Size,
1127  }
1128 }
1129 
1130 // Analyze interleaved accesses and collect them into interleaved load and
1131 // store groups.
1132 //
1133 // When generating code for an interleaved load group, we effectively hoist all
1134 // loads in the group to the location of the first load in program order. When
1135 // generating code for an interleaved store group, we sink all stores to the
1136 // location of the last store. This code motion can change the order of load
1137 // and store instructions and may break dependences.
1138 //
1139 // The code generation strategy mentioned above ensures that we won't violate
1140 // any write-after-read (WAR) dependences.
1141 //
1142 // E.g., for the WAR dependence: a = A[i]; // (1)
1143 // A[i] = b; // (2)
1144 //
1145 // The store group of (2) is always inserted at or below (2), and the load
1146 // group of (1) is always inserted at or above (1). Thus, the instructions will
1147 // never be reordered. All other dependences are checked to ensure the
1148 // correctness of the instruction reordering.
1149 //
1150 // The algorithm visits all memory accesses in the loop in bottom-up program
1151 // order. Program order is established by traversing the blocks in the loop in
1152 // reverse postorder when collecting the accesses.
1153 //
1154 // We visit the memory accesses in bottom-up order because it can simplify the
1155 // construction of store groups in the presence of write-after-write (WAW)
1156 // dependences.
1157 //
1158 // E.g., for the WAW dependence: A[i] = a; // (1)
1159 // A[i] = b; // (2)
1160 // A[i + 1] = c; // (3)
1161 //
1162 // We will first create a store group with (3) and (2). (1) can't be added to
1163 // this group because it and (2) are dependent. However, (1) can be grouped
1164 // with other accesses that may precede it in program order. Note that a
1165 // bottom-up order does not imply that WAW dependences should not be checked.
1167  bool EnablePredicatedInterleavedMemAccesses) {
1168  LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
1169  const ValueToValueMap &Strides = LAI->getSymbolicStrides();
1170 
1171  // Holds all accesses with a constant stride.
1173  collectConstStrideAccesses(AccessStrideInfo, Strides);
1174 
1175  if (AccessStrideInfo.empty())
1176  return;
1177 
1178  // Collect the dependences in the loop.
1179  collectDependences();
1180 
1181  // Holds all interleaved store groups temporarily.
1183  // Holds all interleaved load groups temporarily.
1185 
1186  // Search in bottom-up program order for pairs of accesses (A and B) that can
1187  // form interleaved load or store groups. In the algorithm below, access A
1188  // precedes access B in program order. We initialize a group for B in the
1189  // outer loop of the algorithm, and then in the inner loop, we attempt to
1190  // insert each A into B's group if:
1191  //
1192  // 1. A and B have the same stride,
1193  // 2. A and B have the same memory object size, and
1194  // 3. A belongs in B's group according to its distance from B.
1195  //
1196  // Special care is taken to ensure group formation will not break any
1197  // dependences.
1198  for (auto BI = AccessStrideInfo.rbegin(), E = AccessStrideInfo.rend();
1199  BI != E; ++BI) {
1200  Instruction *B = BI->first;
1201  StrideDescriptor DesB = BI->second;
1202 
1203  // Initialize a group for B if it has an allowable stride. Even if we don't
1204  // create a group for B, we continue with the bottom-up algorithm to ensure
1205  // we don't break any of B's dependences.
1206  InterleaveGroup<Instruction> *Group = nullptr;
1207  if (isStrided(DesB.Stride) &&
1208  (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
1209  Group = getInterleaveGroup(B);
1210  if (!Group) {
1211  LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
1212  << '\n');
1213  Group = createInterleaveGroup(B, DesB.Stride, DesB.Alignment);
1214  }
1215  if (B->mayWriteToMemory())
1216  StoreGroups.insert(Group);
1217  else
1218  LoadGroups.insert(Group);
1219  }
1220 
1221  for (auto AI = std::next(BI); AI != E; ++AI) {
1222  Instruction *A = AI->first;
1223  StrideDescriptor DesA = AI->second;
1224 
1225  // Our code motion strategy implies that we can't have dependences
1226  // between accesses in an interleaved group and other accesses located
1227  // between the first and last member of the group. Note that this also
1228  // means that a group can't have more than one member at a given offset.
1229  // The accesses in a group can have dependences with other accesses, but
1230  // we must ensure we don't extend the boundaries of the group such that
1231  // we encompass those dependent accesses.
1232  //
1233  // For example, assume we have the sequence of accesses shown below in a
1234  // stride-2 loop:
1235  //
1236  // (1, 2) is a group | A[i] = a; // (1)
1237  // | A[i-1] = b; // (2) |
1238  // A[i-3] = c; // (3)
1239  // A[i] = d; // (4) | (2, 4) is not a group
1240  //
1241  // Because accesses (2) and (3) are dependent, we can group (2) with (1)
1242  // but not with (4). If we did, the dependent access (3) would be within
1243  // the boundaries of the (2, 4) group.
1244  if (!canReorderMemAccessesForInterleavedGroups(&*AI, &*BI)) {
1245  // If a dependence exists and A is already in a group, we know that A
1246  // must be a store since A precedes B and WAR dependences are allowed.
1247  // Thus, A would be sunk below B. We release A's group to prevent this
1248  // illegal code motion. A will then be free to form another group with
1249  // instructions that precede it.
1250  if (isInterleaved(A)) {
1251  InterleaveGroup<Instruction> *StoreGroup = getInterleaveGroup(A);
1252 
1253  LLVM_DEBUG(dbgs() << "LV: Invalidated store group due to "
1254  "dependence between " << *A << " and "<< *B << '\n');
1255 
1256  StoreGroups.remove(StoreGroup);
1257  releaseGroup(StoreGroup);
1258  }
1259 
1260  // If a dependence exists and A is not already in a group (or it was
1261  // and we just released it), B might be hoisted above A (if B is a
1262  // load) or another store might be sunk below A (if B is a store). In
1263  // either case, we can't add additional instructions to B's group. B
1264  // will only form a group with instructions that it precedes.
1265  break;
1266  }
1267 
1268  // At this point, we've checked for illegal code motion. If either A or B
1269  // isn't strided, there's nothing left to do.
1270  if (!isStrided(DesA.Stride) || !isStrided(DesB.Stride))
1271  continue;
1272 
1273  // Ignore A if it's already in a group or isn't the same kind of memory
1274  // operation as B.
1275  // Note that mayReadFromMemory() isn't mutually exclusive to
1276  // mayWriteToMemory in the case of atomic loads. We shouldn't see those
1277  // here, canVectorizeMemory() should have returned false - except for the
1278  // case we asked for optimization remarks.
1279  if (isInterleaved(A) ||
1280  (A->mayReadFromMemory() != B->mayReadFromMemory()) ||
1281  (A->mayWriteToMemory() != B->mayWriteToMemory()))
1282  continue;
1283 
1284  // Check rules 1 and 2. Ignore A if its stride or size is different from
1285  // that of B.
1286  if (DesA.Stride != DesB.Stride || DesA.Size != DesB.Size)
1287  continue;
1288 
1289  // Ignore A if the memory object of A and B don't belong to the same
1290  // address space
1292  continue;
1293 
1294  // Calculate the distance from A to B.
1295  const SCEVConstant *DistToB = dyn_cast<SCEVConstant>(
1296  PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev));
1297  if (!DistToB)
1298  continue;
1299  int64_t DistanceToB = DistToB->getAPInt().getSExtValue();
1300 
1301  // Check rule 3. Ignore A if its distance to B is not a multiple of the
1302  // size.
1303  if (DistanceToB % static_cast<int64_t>(DesB.Size))
1304  continue;
1305 
1306  // All members of a predicated interleave-group must have the same predicate,
1307  // and currently must reside in the same BB.
1308  BasicBlock *BlockA = A->getParent();
1309  BasicBlock *BlockB = B->getParent();
1310  if ((isPredicated(BlockA) || isPredicated(BlockB)) &&
1311  (!EnablePredicatedInterleavedMemAccesses || BlockA != BlockB))
1312  continue;
1313 
1314  // The index of A is the index of B plus A's distance to B in multiples
1315  // of the size.
1316  int IndexA =
1317  Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
1318 
1319  // Try to insert A into B's group.
1320  if (Group->insertMember(A, IndexA, DesA.Alignment)) {
1321  LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
1322  << " into the interleave group with" << *B
1323  << '\n');
1324  InterleaveGroupMap[A] = Group;
1325 
1326  // Set the first load in program order as the insert position.
1327  if (A->mayReadFromMemory())
1328  Group->setInsertPos(A);
1329  }
1330  } // Iteration over A accesses.
1331  } // Iteration over B accesses.
1332 
1333  auto InvalidateGroupIfMemberMayWrap = [&](InterleaveGroup<Instruction> *Group,
1334  int Index,
1335  std::string FirstOrLast) -> bool {
1336  Instruction *Member = Group->getMember(Index);
1337  assert(Member && "Group member does not exist");
1338  Value *MemberPtr = getLoadStorePointerOperand(Member);
1339  Type *AccessTy = getLoadStoreType(Member);
1340  if (getPtrStride(PSE, AccessTy, MemberPtr, TheLoop, Strides,
1341  /*Assume=*/false, /*ShouldCheckWrap=*/true))
1342  return false;
1343  LLVM_DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "
1344  << FirstOrLast
1345  << " group member potentially pointer-wrapping.\n");
1346  releaseGroup(Group);
1347  return true;
1348  };
1349 
1350  // Remove interleaved groups with gaps whose memory
1351  // accesses may wrap around. We have to revisit the getPtrStride analysis,
1352  // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does
1353  // not check wrapping (see documentation there).
1354  // FORNOW we use Assume=false;
1355  // TODO: Change to Assume=true but making sure we don't exceed the threshold
1356  // of runtime SCEV assumptions checks (thereby potentially failing to
1357  // vectorize altogether).
1358  // Additional optional optimizations:
1359  // TODO: If we are peeling the loop and we know that the first pointer doesn't
1360  // wrap then we can deduce that all pointers in the group don't wrap.
1361  // This means that we can forcefully peel the loop in order to only have to
1362  // check the first pointer for no-wrap. When we'll change to use Assume=true
1363  // we'll only need at most one runtime check per interleaved group.
1364  for (auto *Group : LoadGroups) {
1365  // Case 1: A full group. Can Skip the checks; For full groups, if the wide
1366  // load would wrap around the address space we would do a memory access at
1367  // nullptr even without the transformation.
1368  if (Group->getNumMembers() == Group->getFactor())
1369  continue;
1370 
1371  // Case 2: If first and last members of the group don't wrap this implies
1372  // that all the pointers in the group don't wrap.
1373  // So we check only group member 0 (which is always guaranteed to exist),
1374  // and group member Factor - 1; If the latter doesn't exist we rely on
1375  // peeling (if it is a non-reversed accsess -- see Case 3).
1376  if (InvalidateGroupIfMemberMayWrap(Group, 0, std::string("first")))
1377  continue;
1378  if (Group->getMember(Group->getFactor() - 1))
1379  InvalidateGroupIfMemberMayWrap(Group, Group->getFactor() - 1,
1380  std::string("last"));
1381  else {
1382  // Case 3: A non-reversed interleaved load group with gaps: We need
1383  // to execute at least one scalar epilogue iteration. This will ensure
1384  // we don't speculatively access memory out-of-bounds. We only need
1385  // to look for a member at index factor - 1, since every group must have
1386  // a member at index zero.
1387  if (Group->isReverse()) {
1388  LLVM_DEBUG(
1389  dbgs() << "LV: Invalidate candidate interleaved group due to "
1390  "a reverse access with gaps.\n");
1391  releaseGroup(Group);
1392  continue;
1393  }
1394  LLVM_DEBUG(
1395  dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
1396  RequiresScalarEpilogue = true;
1397  }
1398  }
1399 
1400  for (auto *Group : StoreGroups) {
1401  // Case 1: A full group. Can Skip the checks; For full groups, if the wide
1402  // store would wrap around the address space we would do a memory access at
1403  // nullptr even without the transformation.
1404  if (Group->getNumMembers() == Group->getFactor())
1405  continue;
1406 
1407  // Interleave-store-group with gaps is implemented using masked wide store.
1408  // Remove interleaved store groups with gaps if
1409  // masked-interleaved-accesses are not enabled by the target.
1410  if (!EnablePredicatedInterleavedMemAccesses) {
1411  LLVM_DEBUG(
1412  dbgs() << "LV: Invalidate candidate interleaved store group due "
1413  "to gaps.\n");
1414  releaseGroup(Group);
1415  continue;
1416  }
1417 
1418  // Case 2: If first and last members of the group don't wrap this implies
1419  // that all the pointers in the group don't wrap.
1420  // So we check only group member 0 (which is always guaranteed to exist),
1421  // and the last group member. Case 3 (scalar epilog) is not relevant for
1422  // stores with gaps, which are implemented with masked-store (rather than
1423  // speculative access, as in loads).
1424  if (InvalidateGroupIfMemberMayWrap(Group, 0, std::string("first")))
1425  continue;
1426  for (int Index = Group->getFactor() - 1; Index > 0; Index--)
1427  if (Group->getMember(Index)) {
1428  InvalidateGroupIfMemberMayWrap(Group, Index, std::string("last"));
1429  break;
1430  }
1431  }
1432 }
1433 
1435  // If no group had triggered the requirement to create an epilogue loop,
1436  // there is nothing to do.
1437  if (!requiresScalarEpilogue())
1438  return;
1439 
1440  bool ReleasedGroup = false;
1441  // Release groups requiring scalar epilogues. Note that this also removes them
1442  // from InterleaveGroups.
1443  for (auto *Group : make_early_inc_range(InterleaveGroups)) {
1444  if (!Group->requiresScalarEpilogue())
1445  continue;
1446  LLVM_DEBUG(
1447  dbgs()
1448  << "LV: Invalidate candidate interleaved group due to gaps that "
1449  "require a scalar epilogue (not allowed under optsize) and cannot "
1450  "be masked (not enabled). \n");
1451  releaseGroup(Group);
1452  ReleasedGroup = true;
1453  }
1454  assert(ReleasedGroup && "At least one group must be invalidated, as a "
1455  "scalar epilogue was required");
1456  (void)ReleasedGroup;
1457  RequiresScalarEpilogue = false;
1458 }
1459 
1460 template <typename InstT>
1461 void InterleaveGroup<InstT>::addMetadata(InstT *NewInst) const {
1462  llvm_unreachable("addMetadata can only be used for Instruction");
1463 }
1464 
1465 namespace llvm {
1466 template <>
1469  std::transform(Members.begin(), Members.end(), std::back_inserter(VL),
1470  [](std::pair<int, Instruction *> p) { return p.second; });
1471  propagateMetadata(NewInst, VL);
1472 }
1473 }
1474 
1475 std::string VFABI::mangleTLIVectorName(StringRef VectorName,
1476  StringRef ScalarName, unsigned numArgs,
1477  ElementCount VF) {
1478  SmallString<256> Buffer;
1479  llvm::raw_svector_ostream Out(Buffer);
1480  Out << "_ZGV" << VFABI::_LLVM_ << "N";
1481  if (VF.isScalable())
1482  Out << 'x';
1483  else
1484  Out << VF.getFixedValue();
1485  for (unsigned I = 0; I < numArgs; ++I)
1486  Out << "v";
1487  Out << "_" << ScalarName << "(" << VectorName << ")";
1488  return std::string(Out.str());
1489 }
1490 
1492  const CallInst &CI, SmallVectorImpl<std::string> &VariantMappings) {
1494  if (S.empty())
1495  return;
1496 
1497  SmallVector<StringRef, 8> ListAttr;
1498  S.split(ListAttr, ",");
1499 
1500  for (const auto &S : SetVector<StringRef>(ListAttr.begin(), ListAttr.end())) {
1501 #ifndef NDEBUG
1502  LLVM_DEBUG(dbgs() << "VFABI: adding mapping '" << S << "'\n");
1504  assert(Info && "Invalid name for a VFABI variant.");
1505  assert(CI.getModule()->getFunction(Info.value().VectorName) &&
1506  "Vector function is missing.");
1507 #endif
1508  VariantMappings.push_back(std::string(S));
1509  }
1510 }
1511 
1513  for (unsigned Pos = 0, NumParams = Parameters.size(); Pos < NumParams;
1514  ++Pos) {
1515  assert(Parameters[Pos].ParamPos == Pos && "Broken parameter list.");
1516 
1517  switch (Parameters[Pos].ParamKind) {
1518  default: // Nothing to check.
1519  break;
1524  // Compile time linear steps must be non-zero.
1525  if (Parameters[Pos].LinearStepOrPos == 0)
1526  return false;
1527  break;
1532  // The runtime linear step must be referring to some other
1533  // parameters in the signature.
1534  if (Parameters[Pos].LinearStepOrPos >= int(NumParams))
1535  return false;
1536  // The linear step parameter must be marked as uniform.
1537  if (Parameters[Parameters[Pos].LinearStepOrPos].ParamKind !=
1539  return false;
1540  // The linear step parameter can't point at itself.
1541  if (Parameters[Pos].LinearStepOrPos == int(Pos))
1542  return false;
1543  break;
1545  // The global predicate must be the unique. Can be placed anywhere in the
1546  // signature.
1547  for (unsigned NextPos = Pos + 1; NextPos < NumParams; ++NextPos)
1548  if (Parameters[NextPos].ParamKind == VFParamKind::GlobalPredicate)
1549  return false;
1550  break;
1551  }
1552  }
1553  return true;
1554 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
i
i
Definition: README.txt:29
llvm::Loop::isLoopInvariant
bool isLoopInvariant(const Value *V) const
Return true if the specified value is loop invariant.
Definition: LoopInfo.cpp:60
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::maskIsAllOneOrUndef
bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
Definition: VectorUtils.cpp:1043
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:65
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31
llvm::InterleaveGroup::insertMember
bool insertMember(InstTy *Instr, int32_t Index, Align NewAlign)
Try to insert a new member Instr with index Index and alignment NewAlign.
Definition: VectorUtils.h:634
ceil
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g ceil
Definition: README-FPStack.txt:54
llvm::ElementCount
Definition: TypeSize.h:404
llvm::getVectorIntrinsicIDForCall
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
Definition: VectorUtils.cpp:135
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:546
llvm::getGEPInductionOperand
unsigned getGEPInductionOperand(const GetElementPtrInst *Gep)
Find the operand of the GEP that should be checked for consecutive stores.
Definition: VectorUtils.cpp:152
llvm::possiblyDemandedEltsInMask
APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
Definition: VectorUtils.cpp:1071
llvm::scConstant
@ scConstant
Definition: ScalarEvolutionExpressions.h:41
minimum
Should compile r2 movcc movcs str strb mov lr r1 movcs movcc mov lr r1 str mov mov cmp r1 movlo r2 str bx lr r0 mov mov cmp r0 movhs r2 mov r1 bx lr Some of the NEON intrinsics may be appropriate for more general either as target independent intrinsics or perhaps elsewhere in the ARM backend Some of them may also be lowered to target independent and perhaps some new SDNodes could be added For minimum
Definition: README.txt:489
llvm::Instruction::getAllMetadataOtherThanDebugLoc
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode * >> &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
Definition: Instruction.h:294
GetElementPtrTypeIterator.h
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:314
llvm::PatternMatch::m_InsertElt
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Definition: PatternMatch.h:1484
llvm::VFParamKind::GlobalPredicate
@ GlobalPredicate
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1182
llvm::EquivalenceClasses::member_end
member_iterator member_end() const
Definition: EquivalenceClasses.h:178
llvm::PatternMatch::m_Add
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:979
llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1478
llvm::EquivalenceClasses
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
Definition: EquivalenceClasses.h:60
llvm::getSplatValue
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
Definition: VectorUtils.cpp:371
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:172
llvm::VFABI::getVectorVariantNames
void getVectorVariantNames(const CallInst &CI, SmallVectorImpl< std::string > &VariantMappings)
Populates a set of strings representing the Vector Function ABI variants associated to the CallInst C...
Definition: VectorUtils.cpp:1491
concatenateTwoVectors
static Value * concatenateTwoVectors(IRBuilderBase &Builder, Value *V1, Value *V2)
A helper function for concatenating vectors.
Definition: VectorUtils.cpp:967
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::maskIsAllZeroOrUndef
bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
Definition: VectorUtils.cpp:1017
llvm::MaxAnalysisRecursionDepth
constexpr unsigned MaxAnalysisRecursionDepth
Definition: ValueTracking.h:47
ValueTracking.h
llvm::isVectorIntrinsicWithScalarOpAtArg
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
Definition: VectorUtils.cpp:101
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::computeMinimumValueSizes
MapVector< Instruction *, uint64_t > computeMinimumValueSizes(ArrayRef< BasicBlock * > Blocks, DemandedBits &DB, const TargetTransformInfo *TTI=nullptr)
Compute a map of integer instructions to their minimum legal type size.
Definition: VectorUtils.cpp:615
llvm::getLoadStoreType
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
Definition: Instructions.h:5406
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
ScalarEvolution.h
llvm::narrowShuffleMaskElts
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
Definition: VectorUtils.cpp:432
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
MaxInterleaveGroupFactor
static cl::opt< unsigned > MaxInterleaveGroupFactor("max-interleave-group-factor", cl::Hidden, cl::desc("Maximum factor for an interleaved access group (default = 8)"), cl::init(8))
Maximum factor for an interleaved memory access.
llvm::getPtrStride
int64_t getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
Definition: LoopAccessAnalysis.cpp:1359
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1411
llvm::getUniqueCastUse
Value * getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty)
If a value has only one user that is a CastInst, return it.
Definition: VectorUtils.cpp:193
llvm::createUnaryMask
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
Definition: VectorUtils.cpp:947
llvm::Optional
Definition: APInt.h:33
llvm::isVectorIntrinsicWithOverloadTypeAtArg
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, unsigned OpdIdx)
Identifies if the vector form of the intrinsic has a operand that has an overloaded type.
Definition: VectorUtils.cpp:119
llvm::PatternMatch::m_BinOp
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
llvm::SmallPtrSet< Value *, 4 >
llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:422
llvm::Intrinsic::not_intrinsic
@ not_intrinsic
Definition: Intrinsics.h:45
llvm::VFParamKind::OMP_Uniform
@ OMP_Uniform
llvm::propagateMetadata
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Definition: VectorUtils.cpp:840
llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition: GetElementPtrTypeIterator.h:123
llvm::Module::getFunction
Function * getFunction(StringRef Name) const
Look up the specified function in the module symbol table.
Definition: Module.cpp:175
llvm::uniteAccessGroups
MDNode * uniteAccessGroups(MDNode *AccGroups1, MDNode *AccGroups2)
Compute the union of two access-group lists.
Definition: VectorUtils.cpp:772
llvm::count_if
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1707
llvm::InterleaveGroup
The group of interleaved loads/stores sharing the same stride and close to each other.
Definition: VectorUtils.h:284
llvm::LinearPolySize::isScalable
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:298
p
the resulting code requires compare and branches when and if * p
Definition: README.txt:396
llvm::APIntOps::umin
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition: APInt.h:2137
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:159
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1400
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1428
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::MDNode::getNumOperands
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1298
floor
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g floor
Definition: README-FPStack.txt:54
llvm::UndefMaskElem
constexpr int UndefMaskElem
Definition: Instructions.h:1996
llvm::VFShape::hasValidParameterList
bool hasValidParameterList() const
Validation check on the Parameters in the VFShape.
Definition: VectorUtils.cpp:1512
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
CommandLine.h
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::remove
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:157
llvm::TargetTransformInfo::isTypeLegal
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
Definition: TargetTransformInfo.cpp:486
llvm::isSplatValue
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Definition: VectorUtils.cpp:386
llvm::VFABI::mangleTLIVectorName
std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName, unsigned numArgs, ElementCount VF)
This routine mangles the given VectorName according to the LangRef specification for vector-function-...
Definition: VectorUtils.cpp:1475
llvm::InterleavedAccessInfo::invalidateGroupsRequiringScalarEpilogue
void invalidateGroupsRequiringScalarEpilogue()
Invalidate groups that require a scalar epilogue (due to gaps).
Definition: VectorUtils.cpp:1434
llvm::createInterleaveMask
llvm::SmallVector< int, 16 > createInterleaveMask(unsigned VF, unsigned NumVecs)
Create an interleave shuffle mask.
Definition: VectorUtils.cpp:915
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition: PatternMatch.h:1462
llvm::MutableArrayRef
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:28
llvm::ARM_MC::isPredicated
bool isPredicated(const MCInst &MI, const MCInstrInfo *MCII)
Definition: ARMMCTargetDesc.cpp:170
llvm::Instruction::mayReadOrWriteMemory
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
Definition: Instruction.h:586
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
getScalarSizeInBits
static unsigned getScalarSizeInBits(Type *Ty)
Definition: SystemZTargetTransformInfo.cpp:403
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::isTriviallyVectorizable
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
Definition: VectorUtils.cpp:44
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::getStrideFromPointer
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
Definition: VectorUtils.cpp:209
round
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:56
llvm::VFParamKind::OMP_LinearVal
@ OMP_LinearVal
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
llvm::MDNode::operands
ArrayRef< MDOperand > operands() const
Definition: Metadata.h:1290
llvm::APInt::getAllOnes
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:232
llvm::InsertElementInst
This instruction inserts a single (scalar) element into a VectorType value.
Definition: Instructions.h:1936
llvm::NextPowerOf2
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition: MathExtras.h:612
llvm::VFParamKind::OMP_LinearUValPos
@ OMP_LinearUValPos
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::SCEVMulExpr
This node represents multiplication of some number of SCEVs.
Definition: ScalarEvolutionExpressions.h:281
llvm::ShuffleVectorInst::getMaskValue
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
Definition: Instructions.h:2063
llvm::EquivalenceClasses::end
iterator end() const
Definition: EquivalenceClasses.h:168
llvm::Instruction
Definition: Instruction.h:42
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1466
llvm::MapVector::rend
reverse_iterator rend()
Definition: MapVector.h:77
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1708
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::EquivalenceClasses::begin
iterator begin() const
Definition: EquivalenceClasses.h:167
llvm::SmallVectorImpl::append
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:670
PatternMatch.h
llvm::InterleaveGroup::isReverse
bool isReverse() const
Definition: VectorUtils.h:624
llvm::Metadata
Root of the metadata hierarchy.
Definition: Metadata.h:62
llvm::InterleaveGroup::getMember
InstTy * getMember(uint32_t Index) const
Get the member with the given index Index.
Definition: VectorUtils.h:679
llvm::Attribute::getValueAsString
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:304
LoopIterator.h
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::VFParamKind::OMP_LinearValPos
@ OMP_LinearValPos
llvm::SCEVUnknown::getValue
Value * getValue() const
Definition: ScalarEvolutionExpressions.h:592
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::ShuffleVectorInst::getType
VectorType * getType() const
Overload to return most specific vector type.
Definition: Instructions.h:2054
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::Instruction::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:271
llvm::SmallString< 256 >
llvm::maxnum
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1307
LoopInfo.h
llvm::InterleaveGroup::setInsertPos
void setInsertPos(InstTy *Inst)
Definition: VectorUtils.h:696
llvm::VFABI::_LLVM_
static constexpr const char * _LLVM_
LLVM Internal VFABI ISA token for vector functions.
Definition: VectorUtils.h:132
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4436
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLFunctionalExtras.h:36
llvm::LoopBlocksDFS
Store the result of a depth first search within basic blocks contained by a single loop.
Definition: LoopIterator.h:97
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
llvm::InterleaveGroup::getIndex
uint32_t getIndex(const InstTy *Instr) const
Get the index for the given member.
Definition: VectorUtils.h:686
VectorUtils.h
llvm::cl::opt
Definition: CommandLine.h:1399
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:264
llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:537
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::MDNode::intersect
static MDNode * intersect(MDNode *A, MDNode *B)
Definition: Metadata.cpp:1020
llvm::VFParamKind::OMP_LinearRef
@ OMP_LinearRef
Index
uint32_t Index
Definition: ELFObjHandler.cpp:82
uint64_t
llvm::InterleaveGroup::getFactor
uint32_t getFactor() const
Definition: VectorUtils.h:625
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:54
llvm::DenseMap
Definition: DenseMap.h:714
llvm::DemandedBits
Definition: DemandedBits.h:40
I
#define I(x, y, z)
Definition: MD5.cpp:58
addToAccessGroupList
static void addToAccessGroupList(ListT &List, MDNode *AccGroups)
Add all access groups in AccGroups to List.
Definition: VectorUtils.cpp:757
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:929
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:439
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:596
llvm::concatenateVectors
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
Definition: VectorUtils.cpp:989
llvm::GetElementPtrInst::getResultElementType
Type * getResultElementType() const
Definition: Instructions.h:1009
llvm::SCEVConstant
This class represents a constant integer value.
Definition: ScalarEvolutionExpressions.h:60
llvm::MDNode::getMostGenericTBAA
static MDNode * getMostGenericTBAA(MDNode *A, MDNode *B)
Definition: TypeBasedAliasAnalysis.cpp:478
transform
instcombine should handle this transform
Definition: README.txt:262
maximum
Should compile r2 movcc movcs str strb mov lr r1 movcs movcc mov lr r1 str mov mov cmp r1 movlo r2 str bx lr r0 mov mov cmp r0 movhs r2 mov r1 bx lr Some of the NEON intrinsics may be appropriate for more general either as target independent intrinsics or perhaps elsewhere in the ARM backend Some of them may also be lowered to target independent and perhaps some new SDNodes could be added For maximum
Definition: README.txt:489
IRBuilder.h
llvm::InterleavedAccessInfo::analyzeInterleaving
void analyzeInterleaving(bool EnableMaskedInterleavedGroup)
Analyze the interleaved accesses and collect them in interleave groups.
Definition: VectorUtils.cpp:1166
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::generic_gep_type_iterator::getIndexedType
Type * getIndexedType() const
Definition: GetElementPtrTypeIterator.h:70
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:60
llvm::PatternMatch::m_ZeroMask
Definition: PatternMatch.h:1523
llvm::getLoadStoreAddressSpace
unsigned getLoadStoreAddressSpace(Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
Definition: Instructions.h:5397
llvm::MDNode
Metadata node.
Definition: Metadata.h:944
llvm::createBitMaskForGaps
Constant * createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF, const InterleaveGroup< Instruction > &Group)
Create a mask that filters the members of an interleave group where there are gaps.
Definition: VectorUtils.cpp:886
llvm::PatternMatch::m_Constant
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:144
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
getType
static M68kRelType getType(unsigned Kind, MCSymbolRefExpr::VariantKind &Modifier, bool &IsPCRel)
Definition: M68kELFObjectWriter.cpp:48
llvm::EquivalenceClasses::getOrInsertLeaderValue
const ElemTy & getOrInsertLeaderValue(const ElemTy &V)
getOrInsertLeaderValue - Return the leader for the specified value that is in the set.
Definition: EquivalenceClasses.h:200
llvm::APIntOps::smin
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2127
llvm::intersectAccessGroups
MDNode * intersectAccessGroups(const Instruction *Inst1, const Instruction *Inst2)
Compute the access-group list of access groups that Inst1 and Inst2 are both in.
Definition: VectorUtils.cpp:793
llvm::CallBase::getFnAttr
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
Definition: InstrTypes.h:1615
llvm::ArrayRef< int >
llvm::replaceSymbolicStrideSCEV
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
Definition: LoopAccessAnalysis.cpp:150
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::EquivalenceClasses::member_begin
member_iterator member_begin(iterator I) const
Definition: EquivalenceClasses.h:174
llvm::raw_svector_ostream::str
StringRef str() const
Return a StringRef for the vector contents.
Definition: raw_ostream.h:683
llvm::LinearPolySize::getFixedValue
ScalarTy getFixedValue() const
Definition: TypeSize.h:312
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Constant::getAggregateElement
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:410
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::PatternMatch::m_Shuffle
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
Definition: PatternMatch.h:1551
llvm::processShuffleMasks
void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
Definition: VectorUtils.cpp:504
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:994
llvm::MDNode::getMostGenericAliasScope
static MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
Definition: Metadata.cpp:1033
llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:93
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
trunc
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g trunc
Definition: README-FPStack.txt:63
llvm::ConstantVector::get
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1348
llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:429
llvm::createReplicatedMask
llvm::SmallVector< int, 16 > createReplicatedMask(unsigned ReplicationFactor, unsigned VF)
Create a mask with replicated elements.
Definition: VectorUtils.cpp:906
llvm::ArrayRef::front
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:167
llvm::APInt::clearBit
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1357
llvm::MapVector::rbegin
reverse_iterator rbegin()
Definition: MapVector.h:75
llvm::ScalarEvolution::isLoopInvariant
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
Definition: ScalarEvolution.cpp:13496
llvm::find_if
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1617
DemandedBits.h
llvm::MDNode::getContext
LLVMContext & getContext() const
Definition: Metadata.h:1108
llvm::MapVector::empty
bool empty() const
Definition: MapVector.h:80
j
return j(j<< 16)
llvm::VFParamKind::OMP_LinearPos
@ OMP_LinearPos
llvm::APIntOps::umax
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:2142
llvm::widenShuffleMaskElts
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Definition: VectorUtils.cpp:453
llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:123
llvm::minnum
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1296
llvm::empty
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:256
llvm::isValidAsAccessGroup
bool isValidAsAccessGroup(MDNode *AccGroup)
Return whether an MDNode might represent an access group.
Definition: LoopInfo.cpp:1118
llvm::SmallVectorImpl::assign
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:691
llvm::ArrayRef::begin
iterator begin() const
Definition: ArrayRef.h:152
llvm::TypeSize
Definition: TypeSize.h:435
llvm::getIntrinsicForCallSite
Intrinsic::ID getIntrinsicForCallSite(const CallBase &CB, const TargetLibraryInfo *TLI)
Map a call instruction to an intrinsic ID.
Definition: ValueTracking.cpp:3424
llvm::SCEVAddRecExpr
This node represents a polynomial recurrence on the trip count of the specified loop.
Definition: ScalarEvolutionExpressions.h:342
llvm::VFParamKind::OMP_LinearRefPos
@ OMP_LinearRefPos
llvm::SCEVUnknown
This means that we are dealing with an entirely unknown SCEV value, and only represent it as its LLVM...
Definition: ScalarEvolutionExpressions.h:571
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:222
EquivalenceClasses.h
llvm::ARCCC::Z
@ Z
Definition: ARCInfo.h:41
powi
This is blocked on not handling X *X *X powi(X, 3)(see note above). The issue is that we end up getting t
llvm::log2
static double log2(double V)
Definition: AMDGPULibCalls.cpp:793
llvm::DemandedBits::getDemandedBits
APInt getDemandedBits(Instruction *I)
Return the bits demanded from instruction I.
Definition: DemandedBits.cpp:437
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:597
llvm::PatternMatch::m_ZeroInt
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:524
llvm::countLeadingZeros
unsigned countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: MathExtras.h:221
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:439
llvm::findScalarElement
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
Definition: VectorUtils.cpp:285
llvm::SCEVConstant::getAPInt
const APInt & getAPInt() const
Definition: ScalarEvolutionExpressions.h:70
llvm::VFParamKind::OMP_LinearUVal
@ OMP_LinearUVal
ScalarEvolutionExpressions.h
llvm::ShuffleVectorInst
This instruction constructs a fixed permutation of two input vectors.
Definition: Instructions.h:2008
llvm::SCEVIntegralCastExpr
This is the base class for unary integral cast operator classes.
Definition: ScalarEvolutionExpressions.h:129
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
llvm::getSplatIndex
int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
Definition: VectorUtils.cpp:349
llvm::MDNode::getMostGenericFPMath
static MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
Definition: Metadata.cpp:1065
List
const NodeList & List
Definition: RDFGraph.cpp:199
llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition: SmallVector.h:660
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:164
llvm::VFABI::MappingsAttrName
static constexpr const char * MappingsAttrName
Definition: VectorUtils.h:193
llvm::stripGetElementPtr
Value * stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
If the argument is a GEP, then returns the operand identified by getGEPInductionOperand.
Definition: VectorUtils.cpp:176
TargetTransformInfo.h
llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:658
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::SmallVectorImpl< int >
llvm::EquivalenceClasses::unionSets
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
Definition: EquivalenceClasses.h:238
llvm::VFABI::tryDemangleForVFABI
Optional< VFInfo > tryDemangleForVFABI(StringRef MangledName, const Module &M)
Function to construct a VFInfo out of a mangled names in the following format:
Definition: VFABIDemangling.cpp:317
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::getLoadStorePointerOperand
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Definition: Instructions.h:5361
llvm::createStrideMask
llvm::SmallVector< int, 16 > createStrideMask(unsigned Start, unsigned Stride, unsigned VF)
Create a stride shuffle mask.
Definition: VectorUtils.cpp:926
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1474
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:412
llvm::all_equal
bool all_equal(R &&Range)
Returns true if all elements in Range are equal or when the Range is empty.
Definition: STLExtras.h:1782
llvm::createSequentialMask
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
Definition: VectorUtils.cpp:934
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::SmallVectorImpl::reserve
void reserve(size_type N)
Definition: SmallVector.h:650
llvm::isPowerOf2_64
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:464
Value.h
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1282
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::VFParamKind::OMP_Linear
@ OMP_Linear
llvm::ArrayRef::end
iterator end() const
Definition: ArrayRef.h:153
llvm::APIntOps::smax
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition: APInt.h:2132
llvm::getLoadStoreAlignment
Align getLoadStoreAlignment(Value *I)
A helper function that returns the alignment of load or store instruction.
Definition: Instructions.h:5387
llvm::MDOperand
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:773
llvm::InterleaveGroup::getNumMembers
uint32_t getNumMembers() const
Definition: VectorUtils.h:627
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::InterleaveGroup::addMetadata
void addMetadata(InstTy *NewInst) const
Add metadata (e.g.
Definition: VectorUtils.cpp:1461
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38