LLVM  16.0.0git
VectorCombine.cpp
Go to the documentation of this file.
1 //===------- VectorCombine.cpp - Optimize partial vector operations -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass optimizes scalar/vector interactions using target cost models. The
10 // transforms implemented here may not fit in traditional loop-based or SLP
11 // vectorization passes.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/Statistic.h"
20 #include "llvm/Analysis/Loads.h"
24 #include "llvm/IR/Dominators.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/IRBuilder.h"
27 #include "llvm/IR/PatternMatch.h"
28 #include "llvm/InitializePasses.h"
29 #include "llvm/Pass.h"
33 #include <numeric>
34 
35 #define DEBUG_TYPE "vector-combine"
37 
38 using namespace llvm;
39 using namespace llvm::PatternMatch;
40 
41 STATISTIC(NumVecLoad, "Number of vector loads formed");
42 STATISTIC(NumVecCmp, "Number of vector compares formed");
43 STATISTIC(NumVecBO, "Number of vector binops formed");
44 STATISTIC(NumVecCmpBO, "Number of vector compare + binop formed");
45 STATISTIC(NumShufOfBitcast, "Number of shuffles moved after bitcast");
46 STATISTIC(NumScalarBO, "Number of scalar binops formed");
47 STATISTIC(NumScalarCmp, "Number of scalar compares formed");
48 
50  "disable-vector-combine", cl::init(false), cl::Hidden,
51  cl::desc("Disable all vector combine transforms"));
52 
54  "disable-binop-extract-shuffle", cl::init(false), cl::Hidden,
55  cl::desc("Disable binop extract to shuffle transforms"));
56 
58  "vector-combine-max-scan-instrs", cl::init(30), cl::Hidden,
59  cl::desc("Max number of instructions to scan for vector combining."));
60 
62 
63 namespace {
64 class VectorCombine {
65 public:
66  VectorCombine(Function &F, const TargetTransformInfo &TTI,
67  const DominatorTree &DT, AAResults &AA, AssumptionCache &AC,
68  bool TryEarlyFoldsOnly)
69  : F(F), Builder(F.getContext()), TTI(TTI), DT(DT), AA(AA), AC(AC),
70  TryEarlyFoldsOnly(TryEarlyFoldsOnly) {}
71 
72  bool run();
73 
74 private:
75  Function &F;
77  const TargetTransformInfo &TTI;
78  const DominatorTree &DT;
79  AAResults &AA;
80  AssumptionCache &AC;
81 
82  /// If true, only perform beneficial early IR transforms. Do not introduce new
83  /// vector operations.
84  bool TryEarlyFoldsOnly;
85 
86  InstructionWorklist Worklist;
87 
88  // TODO: Direct calls from the top-level "run" loop use a plain "Instruction"
89  // parameter. That should be updated to specific sub-classes because the
90  // run loop was changed to dispatch on opcode.
91  bool vectorizeLoadInsert(Instruction &I);
92  bool widenSubvectorLoad(Instruction &I);
93  ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
94  ExtractElementInst *Ext1,
95  unsigned PreferredExtractIndex) const;
96  bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
97  const Instruction &I,
98  ExtractElementInst *&ConvertToShuffle,
99  unsigned PreferredExtractIndex);
100  void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
101  Instruction &I);
102  void foldExtExtBinop(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
103  Instruction &I);
104  bool foldExtractExtract(Instruction &I);
105  bool foldInsExtFNeg(Instruction &I);
106  bool foldBitcastShuf(Instruction &I);
107  bool scalarizeBinopOrCmp(Instruction &I);
108  bool foldExtractedCmps(Instruction &I);
109  bool foldSingleElementStore(Instruction &I);
110  bool scalarizeLoadExtract(Instruction &I);
111  bool foldShuffleOfBinops(Instruction &I);
112  bool foldShuffleFromReductions(Instruction &I);
113  bool foldSelectShuffle(Instruction &I, bool FromReduction = false);
114 
115  void replaceValue(Value &Old, Value &New) {
116  Old.replaceAllUsesWith(&New);
117  if (auto *NewI = dyn_cast<Instruction>(&New)) {
118  New.takeName(&Old);
119  Worklist.pushUsersToWorkList(*NewI);
120  Worklist.pushValue(NewI);
121  }
122  Worklist.pushValue(&Old);
123  }
124 
126  for (Value *Op : I.operands())
127  Worklist.pushValue(Op);
128  Worklist.remove(&I);
129  I.eraseFromParent();
130  }
131 };
132 } // namespace
133 
135  // Do not widen load if atomic/volatile or under asan/hwasan/memtag/tsan.
136  // The widened load may load data from dirty regions or create data races
137  // non-existent in the source.
138  if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
139  Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
141  return false;
142 
143  // We are potentially transforming byte-sized (8-bit) memory accesses, so make
144  // sure we have all of our type-based constraints in place for this target.
145  Type *ScalarTy = Load->getType()->getScalarType();
146  uint64_t ScalarSize = ScalarTy->getPrimitiveSizeInBits();
147  unsigned MinVectorSize = TTI.getMinVectorRegisterBitWidth();
148  if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
149  ScalarSize % 8 != 0)
150  return false;
151 
152  return true;
153 }
154 
155 bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
156  // Match insert into fixed vector of scalar value.
157  // TODO: Handle non-zero insert index.
158  Value *Scalar;
159  if (!match(&I, m_InsertElt(m_Undef(), m_Value(Scalar), m_ZeroInt())) ||
160  !Scalar->hasOneUse())
161  return false;
162 
163  // Optionally match an extract from another vector.
164  Value *X;
165  bool HasExtract = match(Scalar, m_ExtractElt(m_Value(X), m_ZeroInt()));
166  if (!HasExtract)
167  X = Scalar;
168 
169  auto *Load = dyn_cast<LoadInst>(X);
170  if (!canWidenLoad(Load, TTI))
171  return false;
172 
173  Type *ScalarTy = Scalar->getType();
174  uint64_t ScalarSize = ScalarTy->getPrimitiveSizeInBits();
175  unsigned MinVectorSize = TTI.getMinVectorRegisterBitWidth();
176 
177  // Check safety of replacing the scalar load with a larger vector load.
178  // We use minimal alignment (maximum flexibility) because we only care about
179  // the dereferenceable region. When calculating cost and creating a new op,
180  // we may use a larger value based on alignment attributes.
181  const DataLayout &DL = I.getModule()->getDataLayout();
182  Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts();
183  assert(isa<PointerType>(SrcPtr->getType()) && "Expected a pointer type");
184 
185  unsigned MinVecNumElts = MinVectorSize / ScalarSize;
186  auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts, false);
187  unsigned OffsetEltIndex = 0;
188  Align Alignment = Load->getAlign();
189  if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &AC,
190  &DT)) {
191  // It is not safe to load directly from the pointer, but we can still peek
192  // through gep offsets and check if it safe to load from a base address with
193  // updated alignment. If it is, we can shuffle the element(s) into place
194  // after loading.
195  unsigned OffsetBitWidth = DL.getIndexTypeSizeInBits(SrcPtr->getType());
196  APInt Offset(OffsetBitWidth, 0);
197  SrcPtr = SrcPtr->stripAndAccumulateInBoundsConstantOffsets(DL, Offset);
198 
199  // We want to shuffle the result down from a high element of a vector, so
200  // the offset must be positive.
201  if (Offset.isNegative())
202  return false;
203 
204  // The offset must be a multiple of the scalar element to shuffle cleanly
205  // in the element's size.
206  uint64_t ScalarSizeInBytes = ScalarSize / 8;
207  if (Offset.urem(ScalarSizeInBytes) != 0)
208  return false;
209 
210  // If we load MinVecNumElts, will our target element still be loaded?
211  OffsetEltIndex = Offset.udiv(ScalarSizeInBytes).getZExtValue();
212  if (OffsetEltIndex >= MinVecNumElts)
213  return false;
214 
215  if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &AC,
216  &DT))
217  return false;
218 
219  // Update alignment with offset value. Note that the offset could be negated
220  // to more accurately represent "(new) SrcPtr - Offset = (old) SrcPtr", but
221  // negation does not change the result of the alignment calculation.
222  Alignment = commonAlignment(Alignment, Offset.getZExtValue());
223  }
224 
225  // Original pattern: insertelt undef, load [free casts of] PtrOp, 0
226  // Use the greater of the alignment on the load or its source pointer.
227  Alignment = std::max(SrcPtr->getPointerAlignment(DL), Alignment);
228  Type *LoadTy = Load->getType();
229  unsigned AS = Load->getPointerAddressSpace();
230  InstructionCost OldCost =
231  TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS);
232  APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0);
233  OldCost += TTI.getScalarizationOverhead(MinVecTy, DemandedElts,
234  /* Insert */ true, HasExtract);
235 
236  // New pattern: load VecPtr
237  InstructionCost NewCost =
238  TTI.getMemoryOpCost(Instruction::Load, MinVecTy, Alignment, AS);
239  // Optionally, we are shuffling the loaded vector element(s) into place.
240  // For the mask set everything but element 0 to undef to prevent poison from
241  // propagating from the extra loaded memory. This will also optionally
242  // shrink/grow the vector from the loaded size to the output size.
243  // We assume this operation has no cost in codegen if there was no offset.
244  // Note that we could use freeze to avoid poison problems, but then we might
245  // still need a shuffle to change the vector size.
246  auto *Ty = cast<FixedVectorType>(I.getType());
247  unsigned OutputNumElts = Ty->getNumElements();
248  SmallVector<int, 16> Mask(OutputNumElts, UndefMaskElem);
249  assert(OffsetEltIndex < MinVecNumElts && "Address offset too big");
250  Mask[0] = OffsetEltIndex;
251  if (OffsetEltIndex)
252  NewCost += TTI.getShuffleCost(TTI::SK_PermuteSingleSrc, MinVecTy, Mask);
253 
254  // We can aggressively convert to the vector form because the backend can
255  // invert this transform if it does not result in a performance win.
256  if (OldCost < NewCost || !NewCost.isValid())
257  return false;
258 
259  // It is safe and potentially profitable to load a vector directly:
260  // inselt undef, load Scalar, 0 --> load VecPtr
262  Value *CastedPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
263  SrcPtr, MinVecTy->getPointerTo(AS));
264  Value *VecLd = Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment);
265  VecLd = Builder.CreateShuffleVector(VecLd, Mask);
266 
267  replaceValue(I, *VecLd);
268  ++NumVecLoad;
269  return true;
270 }
271 
272 /// If we are loading a vector and then inserting it into a larger vector with
273 /// undefined elements, try to load the larger vector and eliminate the insert.
274 /// This removes a shuffle in IR and may allow combining of other loaded values.
275 bool VectorCombine::widenSubvectorLoad(Instruction &I) {
276  // Match subvector insert of fixed vector.
277  auto *Shuf = cast<ShuffleVectorInst>(&I);
278  if (!Shuf->isIdentityWithPadding())
279  return false;
280 
281  // Allow a non-canonical shuffle mask that is choosing elements from op1.
282  unsigned NumOpElts =
283  cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements();
284  unsigned OpIndex = any_of(Shuf->getShuffleMask(), [&NumOpElts](int M) {
285  return M >= (int)(NumOpElts);
286  });
287 
288  auto *Load = dyn_cast<LoadInst>(Shuf->getOperand(OpIndex));
289  if (!canWidenLoad(Load, TTI))
290  return false;
291 
292  // We use minimal alignment (maximum flexibility) because we only care about
293  // the dereferenceable region. When calculating cost and creating a new op,
294  // we may use a larger value based on alignment attributes.
295  auto *Ty = cast<FixedVectorType>(I.getType());
296  const DataLayout &DL = I.getModule()->getDataLayout();
297  Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts();
298  assert(isa<PointerType>(SrcPtr->getType()) && "Expected a pointer type");
299  Align Alignment = Load->getAlign();
300  if (!isSafeToLoadUnconditionally(SrcPtr, Ty, Align(1), DL, Load, &AC, &DT))
301  return false;
302 
303  Alignment = std::max(SrcPtr->getPointerAlignment(DL), Alignment);
304  Type *LoadTy = Load->getType();
305  unsigned AS = Load->getPointerAddressSpace();
306 
307  // Original pattern: insert_subvector (load PtrOp)
308  // This conservatively assumes that the cost of a subvector insert into an
309  // undef value is 0. We could add that cost if the cost model accurately
310  // reflects the real cost of that operation.
311  InstructionCost OldCost =
312  TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS);
313 
314  // New pattern: load PtrOp
315  InstructionCost NewCost =
316  TTI.getMemoryOpCost(Instruction::Load, Ty, Alignment, AS);
317 
318  // We can aggressively convert to the vector form because the backend can
319  // invert this transform if it does not result in a performance win.
320  if (OldCost < NewCost || !NewCost.isValid())
321  return false;
322 
324  Value *CastedPtr =
325  Builder.CreatePointerBitCastOrAddrSpaceCast(SrcPtr, Ty->getPointerTo(AS));
326  Value *VecLd = Builder.CreateAlignedLoad(Ty, CastedPtr, Alignment);
327  replaceValue(I, *VecLd);
328  ++NumVecLoad;
329  return true;
330 }
331 
332 /// Determine which, if any, of the inputs should be replaced by a shuffle
333 /// followed by extract from a different index.
334 ExtractElementInst *VectorCombine::getShuffleExtract(
336  unsigned PreferredExtractIndex = InvalidIndex) const {
337  auto *Index0C = dyn_cast<ConstantInt>(Ext0->getIndexOperand());
338  auto *Index1C = dyn_cast<ConstantInt>(Ext1->getIndexOperand());
339  assert(Index0C && Index1C && "Expected constant extract indexes");
340 
341  unsigned Index0 = Index0C->getZExtValue();
342  unsigned Index1 = Index1C->getZExtValue();
343 
344  // If the extract indexes are identical, no shuffle is needed.
345  if (Index0 == Index1)
346  return nullptr;
347 
348  Type *VecTy = Ext0->getVectorOperand()->getType();
349  assert(VecTy == Ext1->getVectorOperand()->getType() && "Need matching types");
350  InstructionCost Cost0 = TTI.getVectorInstrCost(*Ext0, VecTy, Index0);
351  InstructionCost Cost1 = TTI.getVectorInstrCost(*Ext1, VecTy, Index1);
352 
353  // If both costs are invalid no shuffle is needed
354  if (!Cost0.isValid() && !Cost1.isValid())
355  return nullptr;
356 
357  // We are extracting from 2 different indexes, so one operand must be shuffled
358  // before performing a vector operation and/or extract. The more expensive
359  // extract will be replaced by a shuffle.
360  if (Cost0 > Cost1)
361  return Ext0;
362  if (Cost1 > Cost0)
363  return Ext1;
364 
365  // If the costs are equal and there is a preferred extract index, shuffle the
366  // opposite operand.
367  if (PreferredExtractIndex == Index0)
368  return Ext1;
369  if (PreferredExtractIndex == Index1)
370  return Ext0;
371 
372  // Otherwise, replace the extract with the higher index.
373  return Index0 > Index1 ? Ext0 : Ext1;
374 }
375 
376 /// Compare the relative costs of 2 extracts followed by scalar operation vs.
377 /// vector operation(s) followed by extract. Return true if the existing
378 /// instructions are cheaper than a vector alternative. Otherwise, return false
379 /// and if one of the extracts should be transformed to a shufflevector, set
380 /// \p ConvertToShuffle to that extract instruction.
381 bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
382  ExtractElementInst *Ext1,
383  const Instruction &I,
384  ExtractElementInst *&ConvertToShuffle,
385  unsigned PreferredExtractIndex) {
386  auto *Ext0IndexC = dyn_cast<ConstantInt>(Ext0->getOperand(1));
387  auto *Ext1IndexC = dyn_cast<ConstantInt>(Ext1->getOperand(1));
388  assert(Ext0IndexC && Ext1IndexC && "Expected constant extract indexes");
389 
390  unsigned Opcode = I.getOpcode();
391  Type *ScalarTy = Ext0->getType();
392  auto *VecTy = cast<VectorType>(Ext0->getOperand(0)->getType());
393  InstructionCost ScalarOpCost, VectorOpCost;
394 
395  // Get cost estimates for scalar and vector versions of the operation.
396  bool IsBinOp = Instruction::isBinaryOp(Opcode);
397  if (IsBinOp) {
398  ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy);
399  VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy);
400  } else {
401  assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
402  "Expected a compare");
403  CmpInst::Predicate Pred = cast<CmpInst>(I).getPredicate();
404  ScalarOpCost = TTI.getCmpSelInstrCost(
405  Opcode, ScalarTy, CmpInst::makeCmpResultType(ScalarTy), Pred);
406  VectorOpCost = TTI.getCmpSelInstrCost(
407  Opcode, VecTy, CmpInst::makeCmpResultType(VecTy), Pred);
408  }
409 
410  // Get cost estimates for the extract elements. These costs will factor into
411  // both sequences.
412  unsigned Ext0Index = Ext0IndexC->getZExtValue();
413  unsigned Ext1Index = Ext1IndexC->getZExtValue();
414 
415  InstructionCost Extract0Cost =
416  TTI.getVectorInstrCost(*Ext0, VecTy, Ext0Index);
417  InstructionCost Extract1Cost =
418  TTI.getVectorInstrCost(*Ext1, VecTy, Ext1Index);
419 
420  // A more expensive extract will always be replaced by a splat shuffle.
421  // For example, if Ext0 is more expensive:
422  // opcode (extelt V0, Ext0), (ext V1, Ext1) -->
423  // extelt (opcode (splat V0, Ext0), V1), Ext1
424  // TODO: Evaluate whether that always results in lowest cost. Alternatively,
425  // check the cost of creating a broadcast shuffle and shuffling both
426  // operands to element 0.
427  InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
428 
429  // Extra uses of the extracts mean that we include those costs in the
430  // vector total because those instructions will not be eliminated.
431  InstructionCost OldCost, NewCost;
432  if (Ext0->getOperand(0) == Ext1->getOperand(0) && Ext0Index == Ext1Index) {
433  // Handle a special case. If the 2 extracts are identical, adjust the
434  // formulas to account for that. The extra use charge allows for either the
435  // CSE'd pattern or an unoptimized form with identical values:
436  // opcode (extelt V, C), (extelt V, C) --> extelt (opcode V, V), C
437  bool HasUseTax = Ext0 == Ext1 ? !Ext0->hasNUses(2)
438  : !Ext0->hasOneUse() || !Ext1->hasOneUse();
439  OldCost = CheapExtractCost + ScalarOpCost;
440  NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
441  } else {
442  // Handle the general case. Each extract is actually a different value:
443  // opcode (extelt V0, C0), (extelt V1, C1) --> extelt (opcode V0, V1), C
444  OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
445  NewCost = VectorOpCost + CheapExtractCost +
446  !Ext0->hasOneUse() * Extract0Cost +
447  !Ext1->hasOneUse() * Extract1Cost;
448  }
449 
450  ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
451  if (ConvertToShuffle) {
452  if (IsBinOp && DisableBinopExtractShuffle)
453  return true;
454 
455  // If we are extracting from 2 different indexes, then one operand must be
456  // shuffled before performing the vector operation. The shuffle mask is
457  // undefined except for 1 lane that is being translated to the remaining
458  // extraction lane. Therefore, it is a splat shuffle. Ex:
459  // ShufMask = { undef, undef, 0, undef }
460  // TODO: The cost model has an option for a "broadcast" shuffle
461  // (splat-from-element-0), but no option for a more general splat.
462  NewCost +=
464  }
465 
466  // Aggressively form a vector op if the cost is equal because the transform
467  // may enable further optimization.
468  // Codegen can reverse this transform (scalarize) if it was not profitable.
469  return OldCost < NewCost;
470 }
471 
472 /// Create a shuffle that translates (shifts) 1 element from the input vector
473 /// to a new element location.
474 static Value *createShiftShuffle(Value *Vec, unsigned OldIndex,
475  unsigned NewIndex, IRBuilder<> &Builder) {
476  // The shuffle mask is undefined except for 1 lane that is being translated
477  // to the new element index. Example for OldIndex == 2 and NewIndex == 0:
478  // ShufMask = { 2, undef, undef, undef }
479  auto *VecTy = cast<FixedVectorType>(Vec->getType());
480  SmallVector<int, 32> ShufMask(VecTy->getNumElements(), UndefMaskElem);
481  ShufMask[NewIndex] = OldIndex;
482  return Builder.CreateShuffleVector(Vec, ShufMask, "shift");
483 }
484 
485 /// Given an extract element instruction with constant index operand, shuffle
486 /// the source vector (shift the scalar element) to a NewIndex for extraction.
487 /// Return null if the input can be constant folded, so that we are not creating
488 /// unnecessary instructions.
490  unsigned NewIndex,
491  IRBuilder<> &Builder) {
492  // Shufflevectors can only be created for fixed-width vectors.
493  if (!isa<FixedVectorType>(ExtElt->getOperand(0)->getType()))
494  return nullptr;
495 
496  // If the extract can be constant-folded, this code is unsimplified. Defer
497  // to other passes to handle that.
498  Value *X = ExtElt->getVectorOperand();
499  Value *C = ExtElt->getIndexOperand();
500  assert(isa<ConstantInt>(C) && "Expected a constant index operand");
501  if (isa<Constant>(X))
502  return nullptr;
503 
504  Value *Shuf = createShiftShuffle(X, cast<ConstantInt>(C)->getZExtValue(),
505  NewIndex, Builder);
506  return cast<ExtractElementInst>(Builder.CreateExtractElement(Shuf, NewIndex));
507 }
508 
509 /// Try to reduce extract element costs by converting scalar compares to vector
510 /// compares followed by extract.
511 /// cmp (ext0 V0, C), (ext1 V1, C)
512 void VectorCombine::foldExtExtCmp(ExtractElementInst *Ext0,
513  ExtractElementInst *Ext1, Instruction &I) {
514  assert(isa<CmpInst>(&I) && "Expected a compare");
515  assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
516  cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
517  "Expected matching constant extract indexes");
518 
519  // cmp Pred (extelt V0, C), (extelt V1, C) --> extelt (cmp Pred V0, V1), C
520  ++NumVecCmp;
521  CmpInst::Predicate Pred = cast<CmpInst>(&I)->getPredicate();
522  Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
523  Value *VecCmp = Builder.CreateCmp(Pred, V0, V1);
524  Value *NewExt = Builder.CreateExtractElement(VecCmp, Ext0->getIndexOperand());
525  replaceValue(I, *NewExt);
526 }
527 
528 /// Try to reduce extract element costs by converting scalar binops to vector
529 /// binops followed by extract.
530 /// bo (ext0 V0, C), (ext1 V1, C)
531 void VectorCombine::foldExtExtBinop(ExtractElementInst *Ext0,
532  ExtractElementInst *Ext1, Instruction &I) {
533  assert(isa<BinaryOperator>(&I) && "Expected a binary operator");
534  assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
535  cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
536  "Expected matching constant extract indexes");
537 
538  // bo (extelt V0, C), (extelt V1, C) --> extelt (bo V0, V1), C
539  ++NumVecBO;
540  Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
541  Value *VecBO =
542  Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), V0, V1);
543 
544  // All IR flags are safe to back-propagate because any potential poison
545  // created in unused vector elements is discarded by the extract.
546  if (auto *VecBOInst = dyn_cast<Instruction>(VecBO))
547  VecBOInst->copyIRFlags(&I);
548 
549  Value *NewExt = Builder.CreateExtractElement(VecBO, Ext0->getIndexOperand());
550  replaceValue(I, *NewExt);
551 }
552 
553 /// Match an instruction with extracted vector operands.
554 bool VectorCombine::foldExtractExtract(Instruction &I) {
555  // It is not safe to transform things like div, urem, etc. because we may
556  // create undefined behavior when executing those on unknown vector elements.
558  return false;
559 
560  Instruction *I0, *I1;
562  if (!match(&I, m_Cmp(Pred, m_Instruction(I0), m_Instruction(I1))) &&
563  !match(&I, m_BinOp(m_Instruction(I0), m_Instruction(I1))))
564  return false;
565 
566  Value *V0, *V1;
567  uint64_t C0, C1;
568  if (!match(I0, m_ExtractElt(m_Value(V0), m_ConstantInt(C0))) ||
569  !match(I1, m_ExtractElt(m_Value(V1), m_ConstantInt(C1))) ||
570  V0->getType() != V1->getType())
571  return false;
572 
573  // If the scalar value 'I' is going to be re-inserted into a vector, then try
574  // to create an extract to that same element. The extract/insert can be
575  // reduced to a "select shuffle".
576  // TODO: If we add a larger pattern match that starts from an insert, this
577  // probably becomes unnecessary.
578  auto *Ext0 = cast<ExtractElementInst>(I0);
579  auto *Ext1 = cast<ExtractElementInst>(I1);
580  uint64_t InsertIndex = InvalidIndex;
581  if (I.hasOneUse())
582  match(I.user_back(),
583  m_InsertElt(m_Value(), m_Value(), m_ConstantInt(InsertIndex)));
584 
585  ExtractElementInst *ExtractToChange;
586  if (isExtractExtractCheap(Ext0, Ext1, I, ExtractToChange, InsertIndex))
587  return false;
588 
589  if (ExtractToChange) {
590  unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
591  ExtractElementInst *NewExtract =
592  translateExtract(ExtractToChange, CheapExtractIdx, Builder);
593  if (!NewExtract)
594  return false;
595  if (ExtractToChange == Ext0)
596  Ext0 = NewExtract;
597  else
598  Ext1 = NewExtract;
599  }
600 
601  if (Pred != CmpInst::BAD_ICMP_PREDICATE)
602  foldExtExtCmp(Ext0, Ext1, I);
603  else
604  foldExtExtBinop(Ext0, Ext1, I);
605 
606  Worklist.push(Ext0);
607  Worklist.push(Ext1);
608  return true;
609 }
610 
611 /// Try to replace an extract + scalar fneg + insert with a vector fneg +
612 /// shuffle.
613 bool VectorCombine::foldInsExtFNeg(Instruction &I) {
614  // Match an insert (op (extract)) pattern.
615  Value *DestVec;
616  uint64_t Index;
617  Instruction *FNeg;
618  if (!match(&I, m_InsertElt(m_Value(DestVec), m_OneUse(m_Instruction(FNeg)),
619  m_ConstantInt(Index))))
620  return false;
621 
622  // Note: This handles the canonical fneg instruction and "fsub -0.0, X".
623  Value *SrcVec;
624  Instruction *Extract;
625  if (!match(FNeg, m_FNeg(m_CombineAnd(
626  m_Instruction(Extract),
627  m_ExtractElt(m_Value(SrcVec), m_SpecificInt(Index))))))
628  return false;
629 
630  // TODO: We could handle this with a length-changing shuffle.
631  auto *VecTy = cast<FixedVectorType>(I.getType());
632  if (SrcVec->getType() != VecTy)
633  return false;
634 
635  // Ignore bogus insert/extract index.
636  unsigned NumElts = VecTy->getNumElements();
637  if (Index >= NumElts)
638  return false;
639 
640  // We are inserting the negated element into the same lane that we extracted
641  // from. This is equivalent to a select-shuffle that chooses all but the
642  // negated element from the destination vector.
643  SmallVector<int> Mask(NumElts);
644  std::iota(Mask.begin(), Mask.end(), 0);
645  Mask[Index] = Index + NumElts;
646 
647  Type *ScalarTy = VecTy->getScalarType();
648  InstructionCost OldCost =
649  TTI.getArithmeticInstrCost(Instruction::FNeg, ScalarTy) +
650  TTI.getVectorInstrCost(I, VecTy, Index);
651 
652  // If the extract has one use, it will be eliminated, so count it in the
653  // original cost. If it has more than one use, ignore the cost because it will
654  // be the same before/after.
655  if (Extract->hasOneUse())
656  OldCost += TTI.getVectorInstrCost(*Extract, VecTy, Index);
657 
658  InstructionCost NewCost =
659  TTI.getArithmeticInstrCost(Instruction::FNeg, VecTy) +
661 
662  if (NewCost > OldCost)
663  return false;
664 
665  // insertelt DestVec, (fneg (extractelt SrcVec, Index)), Index -->
666  // shuffle DestVec, (fneg SrcVec), Mask
667  Value *VecFNeg = Builder.CreateFNegFMF(SrcVec, FNeg);
668  Value *Shuf = Builder.CreateShuffleVector(DestVec, VecFNeg, Mask);
669  replaceValue(I, *Shuf);
670  return true;
671 }
672 
673 /// If this is a bitcast of a shuffle, try to bitcast the source vector to the
674 /// destination type followed by shuffle. This can enable further transforms by
675 /// moving bitcasts or shuffles together.
676 bool VectorCombine::foldBitcastShuf(Instruction &I) {
677  Value *V;
679  if (!match(&I, m_BitCast(
681  return false;
682 
683  // 1) Do not fold bitcast shuffle for scalable type. First, shuffle cost for
684  // scalable type is unknown; Second, we cannot reason if the narrowed shuffle
685  // mask for scalable type is a splat or not.
686  // 2) Disallow non-vector casts and length-changing shuffles.
687  // TODO: We could allow any shuffle.
688  auto *SrcTy = dyn_cast<FixedVectorType>(V->getType());
689  if (!SrcTy || I.getOperand(0)->getType() != SrcTy)
690  return false;
691 
692  auto *DestTy = cast<FixedVectorType>(I.getType());
693  unsigned DestNumElts = DestTy->getNumElements();
694  unsigned SrcNumElts = SrcTy->getNumElements();
695  SmallVector<int, 16> NewMask;
696  if (SrcNumElts <= DestNumElts) {
697  // The bitcast is from wide to narrow/equal elements. The shuffle mask can
698  // always be expanded to the equivalent form choosing narrower elements.
699  assert(DestNumElts % SrcNumElts == 0 && "Unexpected shuffle mask");
700  unsigned ScaleFactor = DestNumElts / SrcNumElts;
701  narrowShuffleMaskElts(ScaleFactor, Mask, NewMask);
702  } else {
703  // The bitcast is from narrow elements to wide elements. The shuffle mask
704  // must choose consecutive elements to allow casting first.
705  assert(SrcNumElts % DestNumElts == 0 && "Unexpected shuffle mask");
706  unsigned ScaleFactor = SrcNumElts / DestNumElts;
707  if (!widenShuffleMaskElts(ScaleFactor, Mask, NewMask))
708  return false;
709  }
710 
711  // The new shuffle must not cost more than the old shuffle. The bitcast is
712  // moved ahead of the shuffle, so assume that it has the same cost as before.
715  InstructionCost SrcCost =
717  if (DestCost > SrcCost || !DestCost.isValid())
718  return false;
719 
720  // bitcast (shuf V, MaskC) --> shuf (bitcast V), MaskC'
721  ++NumShufOfBitcast;
722  Value *CastV = Builder.CreateBitCast(V, DestTy);
723  Value *Shuf = Builder.CreateShuffleVector(CastV, NewMask);
724  replaceValue(I, *Shuf);
725  return true;
726 }
727 
728 /// Match a vector binop or compare instruction with at least one inserted
729 /// scalar operand and convert to scalar binop/cmp followed by insertelement.
730 bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
732  Value *Ins0, *Ins1;
733  if (!match(&I, m_BinOp(m_Value(Ins0), m_Value(Ins1))) &&
734  !match(&I, m_Cmp(Pred, m_Value(Ins0), m_Value(Ins1))))
735  return false;
736 
737  // Do not convert the vector condition of a vector select into a scalar
738  // condition. That may cause problems for codegen because of differences in
739  // boolean formats and register-file transfers.
740  // TODO: Can we account for that in the cost model?
741  bool IsCmp = Pred != CmpInst::Predicate::BAD_ICMP_PREDICATE;
742  if (IsCmp)
743  for (User *U : I.users())
744  if (match(U, m_Select(m_Specific(&I), m_Value(), m_Value())))
745  return false;
746 
747  // Match against one or both scalar values being inserted into constant
748  // vectors:
749  // vec_op VecC0, (inselt VecC1, V1, Index)
750  // vec_op (inselt VecC0, V0, Index), VecC1
751  // vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index)
752  // TODO: Deal with mismatched index constants and variable indexes?
753  Constant *VecC0 = nullptr, *VecC1 = nullptr;
754  Value *V0 = nullptr, *V1 = nullptr;
755  uint64_t Index0 = 0, Index1 = 0;
756  if (!match(Ins0, m_InsertElt(m_Constant(VecC0), m_Value(V0),
757  m_ConstantInt(Index0))) &&
758  !match(Ins0, m_Constant(VecC0)))
759  return false;
760  if (!match(Ins1, m_InsertElt(m_Constant(VecC1), m_Value(V1),
761  m_ConstantInt(Index1))) &&
762  !match(Ins1, m_Constant(VecC1)))
763  return false;
764 
765  bool IsConst0 = !V0;
766  bool IsConst1 = !V1;
767  if (IsConst0 && IsConst1)
768  return false;
769  if (!IsConst0 && !IsConst1 && Index0 != Index1)
770  return false;
771 
772  // Bail for single insertion if it is a load.
773  // TODO: Handle this once getVectorInstrCost can cost for load/stores.
774  auto *I0 = dyn_cast_or_null<Instruction>(V0);
775  auto *I1 = dyn_cast_or_null<Instruction>(V1);
776  if ((IsConst0 && I1 && I1->mayReadFromMemory()) ||
777  (IsConst1 && I0 && I0->mayReadFromMemory()))
778  return false;
779 
780  uint64_t Index = IsConst0 ? Index1 : Index0;
781  Type *ScalarTy = IsConst0 ? V1->getType() : V0->getType();
782  Type *VecTy = I.getType();
783  assert(VecTy->isVectorTy() &&
784  (IsConst0 || IsConst1 || V0->getType() == V1->getType()) &&
785  (ScalarTy->isIntegerTy() || ScalarTy->isFloatingPointTy() ||
786  ScalarTy->isPointerTy()) &&
787  "Unexpected types for insert element into binop or cmp");
788 
789  unsigned Opcode = I.getOpcode();
790  InstructionCost ScalarOpCost, VectorOpCost;
791  if (IsCmp) {
792  CmpInst::Predicate Pred = cast<CmpInst>(I).getPredicate();
793  ScalarOpCost = TTI.getCmpSelInstrCost(
794  Opcode, ScalarTy, CmpInst::makeCmpResultType(ScalarTy), Pred);
795  VectorOpCost = TTI.getCmpSelInstrCost(
796  Opcode, VecTy, CmpInst::makeCmpResultType(VecTy), Pred);
797  } else {
798  ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy);
799  VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy);
800  }
801 
802  // Get cost estimate for the insert element. This cost will factor into
803  // both sequences.
804  InstructionCost InsertCost =
805  TTI.getVectorInstrCost(Instruction::InsertElement, VecTy, Index);
806  InstructionCost OldCost =
807  (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost;
808  InstructionCost NewCost = ScalarOpCost + InsertCost +
809  (IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCost) +
810  (IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCost);
811 
812  // We want to scalarize unless the vector variant actually has lower cost.
813  if (OldCost < NewCost || !NewCost.isValid())
814  return false;
815 
816  // vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index) -->
817  // inselt NewVecC, (scalar_op V0, V1), Index
818  if (IsCmp)
819  ++NumScalarCmp;
820  else
821  ++NumScalarBO;
822 
823  // For constant cases, extract the scalar element, this should constant fold.
824  if (IsConst0)
825  V0 = ConstantExpr::getExtractElement(VecC0, Builder.getInt64(Index));
826  if (IsConst1)
827  V1 = ConstantExpr::getExtractElement(VecC1, Builder.getInt64(Index));
828 
829  Value *Scalar =
830  IsCmp ? Builder.CreateCmp(Pred, V0, V1)
831  : Builder.CreateBinOp((Instruction::BinaryOps)Opcode, V0, V1);
832 
833  Scalar->setName(I.getName() + ".scalar");
834 
835  // All IR flags are safe to back-propagate. There is no potential for extra
836  // poison to be created by the scalar instruction.
837  if (auto *ScalarInst = dyn_cast<Instruction>(Scalar))
838  ScalarInst->copyIRFlags(&I);
839 
840  // Fold the vector constants in the original vectors into a new base vector.
841  Value *NewVecC =
842  IsCmp ? Builder.CreateCmp(Pred, VecC0, VecC1)
843  : Builder.CreateBinOp((Instruction::BinaryOps)Opcode, VecC0, VecC1);
844  Value *Insert = Builder.CreateInsertElement(NewVecC, Scalar, Index);
845  replaceValue(I, *Insert);
846  return true;
847 }
848 
849 /// Try to combine a scalar binop + 2 scalar compares of extracted elements of
850 /// a vector into vector operations followed by extract. Note: The SLP pass
851 /// may miss this pattern because of implementation problems.
852 bool VectorCombine::foldExtractedCmps(Instruction &I) {
853  // We are looking for a scalar binop of booleans.
854  // binop i1 (cmp Pred I0, C0), (cmp Pred I1, C1)
855  if (!I.isBinaryOp() || !I.getType()->isIntegerTy(1))
856  return false;
857 
858  // The compare predicates should match, and each compare should have a
859  // constant operand.
860  // TODO: Relax the one-use constraints.
861  Value *B0 = I.getOperand(0), *B1 = I.getOperand(1);
862  Instruction *I0, *I1;
863  Constant *C0, *C1;
864  CmpInst::Predicate P0, P1;
865  if (!match(B0, m_OneUse(m_Cmp(P0, m_Instruction(I0), m_Constant(C0)))) ||
866  !match(B1, m_OneUse(m_Cmp(P1, m_Instruction(I1), m_Constant(C1)))) ||
867  P0 != P1)
868  return false;
869 
870  // The compare operands must be extracts of the same vector with constant
871  // extract indexes.
872  // TODO: Relax the one-use constraints.
873  Value *X;
874  uint64_t Index0, Index1;
875  if (!match(I0, m_OneUse(m_ExtractElt(m_Value(X), m_ConstantInt(Index0)))) ||
877  return false;
878 
879  auto *Ext0 = cast<ExtractElementInst>(I0);
880  auto *Ext1 = cast<ExtractElementInst>(I1);
881  ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1);
882  if (!ConvertToShuf)
883  return false;
884 
885  // The original scalar pattern is:
886  // binop i1 (cmp Pred (ext X, Index0), C0), (cmp Pred (ext X, Index1), C1)
887  CmpInst::Predicate Pred = P0;
888  unsigned CmpOpcode = CmpInst::isFPPredicate(Pred) ? Instruction::FCmp
889  : Instruction::ICmp;
890  auto *VecTy = dyn_cast<FixedVectorType>(X->getType());
891  if (!VecTy)
892  return false;
893 
894  InstructionCost OldCost = TTI.getVectorInstrCost(*Ext0, VecTy, Index0);
895  OldCost += TTI.getVectorInstrCost(*Ext1, VecTy, Index1);
896  OldCost +=
897  TTI.getCmpSelInstrCost(CmpOpcode, I0->getType(),
898  CmpInst::makeCmpResultType(I0->getType()), Pred) *
899  2;
900  OldCost += TTI.getArithmeticInstrCost(I.getOpcode(), I.getType());
901 
902  // The proposed vector pattern is:
903  // vcmp = cmp Pred X, VecC
904  // ext (binop vNi1 vcmp, (shuffle vcmp, Index1)), Index0
905  int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
906  int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
907  auto *CmpTy = cast<FixedVectorType>(CmpInst::makeCmpResultType(X->getType()));
909  CmpOpcode, X->getType(), CmpInst::makeCmpResultType(X->getType()), Pred);
910  SmallVector<int, 32> ShufMask(VecTy->getNumElements(), UndefMaskElem);
911  ShufMask[CheapIndex] = ExpensiveIndex;
913  ShufMask);
914  NewCost += TTI.getArithmeticInstrCost(I.getOpcode(), CmpTy);
915  NewCost += TTI.getVectorInstrCost(*Ext0, CmpTy, CheapIndex);
916 
917  // Aggressively form vector ops if the cost is equal because the transform
918  // may enable further optimization.
919  // Codegen can reverse this transform (scalarize) if it was not profitable.
920  if (OldCost < NewCost || !NewCost.isValid())
921  return false;
922 
923  // Create a vector constant from the 2 scalar constants.
924  SmallVector<Constant *, 32> CmpC(VecTy->getNumElements(),
925  UndefValue::get(VecTy->getElementType()));
926  CmpC[Index0] = C0;
927  CmpC[Index1] = C1;
928  Value *VCmp = Builder.CreateCmp(Pred, X, ConstantVector::get(CmpC));
929 
930  Value *Shuf = createShiftShuffle(VCmp, ExpensiveIndex, CheapIndex, Builder);
931  Value *VecLogic = Builder.CreateBinOp(cast<BinaryOperator>(I).getOpcode(),
932  VCmp, Shuf);
933  Value *NewExt = Builder.CreateExtractElement(VecLogic, CheapIndex);
934  replaceValue(I, *NewExt);
935  ++NumVecCmpBO;
936  return true;
937 }
938 
939 // Check if memory loc modified between two instrs in the same BB
942  const MemoryLocation &Loc, AAResults &AA) {
943  unsigned NumScanned = 0;
944  return std::any_of(Begin, End, [&](const Instruction &Instr) {
945  return isModSet(AA.getModRefInfo(&Instr, Loc)) ||
946  ++NumScanned > MaxInstrsToScan;
947  });
948 }
949 
950 /// Helper class to indicate whether a vector index can be safely scalarized and
951 /// if a freeze needs to be inserted.
953  enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
954 
955  StatusTy Status;
956  Value *ToFreeze;
957 
958  ScalarizationResult(StatusTy Status, Value *ToFreeze = nullptr)
959  : Status(Status), ToFreeze(ToFreeze) {}
960 
961 public:
964  assert(!ToFreeze && "freeze() not called with ToFreeze being set");
965  }
966 
967  static ScalarizationResult unsafe() { return {StatusTy::Unsafe}; }
968  static ScalarizationResult safe() { return {StatusTy::Safe}; }
970  return {StatusTy::SafeWithFreeze, ToFreeze};
971  }
972 
973  /// Returns true if the index can be scalarize without requiring a freeze.
974  bool isSafe() const { return Status == StatusTy::Safe; }
975  /// Returns true if the index cannot be scalarized.
976  bool isUnsafe() const { return Status == StatusTy::Unsafe; }
977  /// Returns true if the index can be scalarize, but requires inserting a
978  /// freeze.
979  bool isSafeWithFreeze() const { return Status == StatusTy::SafeWithFreeze; }
980 
981  /// Reset the state of Unsafe and clear ToFreze if set.
982  void discard() {
983  ToFreeze = nullptr;
984  Status = StatusTy::Unsafe;
985  }
986 
987  /// Freeze the ToFreeze and update the use in \p User to use it.
989  assert(isSafeWithFreeze() &&
990  "should only be used when freezing is required");
991  assert(is_contained(ToFreeze->users(), &UserI) &&
992  "UserI must be a user of ToFreeze");
994  Builder.SetInsertPoint(cast<Instruction>(&UserI));
995  Value *Frozen =
996  Builder.CreateFreeze(ToFreeze, ToFreeze->getName() + ".frozen");
997  for (Use &U : make_early_inc_range((UserI.operands())))
998  if (U.get() == ToFreeze)
999  U.set(Frozen);
1000 
1001  ToFreeze = nullptr;
1002  }
1003 };
1004 
1005 /// Check if it is legal to scalarize a memory access to \p VecTy at index \p
1006 /// Idx. \p Idx must access a valid vector element.
1008  Value *Idx, Instruction *CtxI,
1009  AssumptionCache &AC,
1010  const DominatorTree &DT) {
1011  if (auto *C = dyn_cast<ConstantInt>(Idx)) {
1012  if (C->getValue().ult(VecTy->getNumElements()))
1013  return ScalarizationResult::safe();
1014  return ScalarizationResult::unsafe();
1015  }
1016 
1017  unsigned IntWidth = Idx->getType()->getScalarSizeInBits();
1018  APInt Zero(IntWidth, 0);
1019  APInt MaxElts(IntWidth, VecTy->getNumElements());
1020  ConstantRange ValidIndices(Zero, MaxElts);
1021  ConstantRange IdxRange(IntWidth, true);
1022 
1023  if (isGuaranteedNotToBePoison(Idx, &AC)) {
1024  if (ValidIndices.contains(computeConstantRange(Idx, /* ForSigned */ false,
1025  true, &AC, CtxI, &DT)))
1026  return ScalarizationResult::safe();
1027  return ScalarizationResult::unsafe();
1028  }
1029 
1030  // If the index may be poison, check if we can insert a freeze before the
1031  // range of the index is restricted.
1032  Value *IdxBase;
1033  ConstantInt *CI;
1034  if (match(Idx, m_And(m_Value(IdxBase), m_ConstantInt(CI)))) {
1035  IdxRange = IdxRange.binaryAnd(CI->getValue());
1036  } else if (match(Idx, m_URem(m_Value(IdxBase), m_ConstantInt(CI)))) {
1037  IdxRange = IdxRange.urem(CI->getValue());
1038  }
1039 
1040  if (ValidIndices.contains(IdxRange))
1041  return ScalarizationResult::safeWithFreeze(IdxBase);
1042  return ScalarizationResult::unsafe();
1043 }
1044 
1045 /// The memory operation on a vector of \p ScalarType had alignment of
1046 /// \p VectorAlignment. Compute the maximal, but conservatively correct,
1047 /// alignment that will be valid for the memory operation on a single scalar
1048 /// element of the same type with index \p Idx.
1050  Type *ScalarType, Value *Idx,
1051  const DataLayout &DL) {
1052  if (auto *C = dyn_cast<ConstantInt>(Idx))
1053  return commonAlignment(VectorAlignment,
1054  C->getZExtValue() * DL.getTypeStoreSize(ScalarType));
1055  return commonAlignment(VectorAlignment, DL.getTypeStoreSize(ScalarType));
1056 }
1057 
1058 // Combine patterns like:
1059 // %0 = load <4 x i32>, <4 x i32>* %a
1060 // %1 = insertelement <4 x i32> %0, i32 %b, i32 1
1061 // store <4 x i32> %1, <4 x i32>* %a
1062 // to:
1063 // %0 = bitcast <4 x i32>* %a to i32*
1064 // %1 = getelementptr inbounds i32, i32* %0, i64 0, i64 1
1065 // store i32 %b, i32* %1
1066 bool VectorCombine::foldSingleElementStore(Instruction &I) {
1067  auto *SI = cast<StoreInst>(&I);
1068  if (!SI->isSimple() ||
1069  !isa<FixedVectorType>(SI->getValueOperand()->getType()))
1070  return false;
1071 
1072  // TODO: Combine more complicated patterns (multiple insert) by referencing
1073  // TargetTransformInfo.
1075  Value *NewElement;
1076  Value *Idx;
1077  if (!match(SI->getValueOperand(),
1078  m_InsertElt(m_Instruction(Source), m_Value(NewElement),
1079  m_Value(Idx))))
1080  return false;
1081 
1082  if (auto *Load = dyn_cast<LoadInst>(Source)) {
1083  auto VecTy = cast<FixedVectorType>(SI->getValueOperand()->getType());
1084  const DataLayout &DL = I.getModule()->getDataLayout();
1085  Value *SrcAddr = Load->getPointerOperand()->stripPointerCasts();
1086  // Don't optimize for atomic/volatile load or store. Ensure memory is not
1087  // modified between, vector type matches store size, and index is inbounds.
1088  if (!Load->isSimple() || Load->getParent() != SI->getParent() ||
1089  !DL.typeSizeEqualsStoreSize(Load->getType()) ||
1090  SrcAddr != SI->getPointerOperand()->stripPointerCasts())
1091  return false;
1092 
1093  auto ScalarizableIdx = canScalarizeAccess(VecTy, Idx, Load, AC, DT);
1094  if (ScalarizableIdx.isUnsafe() ||
1095  isMemModifiedBetween(Load->getIterator(), SI->getIterator(),
1096  MemoryLocation::get(SI), AA))
1097  return false;
1098 
1099  if (ScalarizableIdx.isSafeWithFreeze())
1100  ScalarizableIdx.freeze(Builder, *cast<Instruction>(Idx));
1101  Value *GEP = Builder.CreateInBoundsGEP(
1102  SI->getValueOperand()->getType(), SI->getPointerOperand(),
1103  {ConstantInt::get(Idx->getType(), 0), Idx});
1104  StoreInst *NSI = Builder.CreateStore(NewElement, GEP);
1105  NSI->copyMetadata(*SI);
1106  Align ScalarOpAlignment = computeAlignmentAfterScalarization(
1107  std::max(SI->getAlign(), Load->getAlign()), NewElement->getType(), Idx,
1108  DL);
1109  NSI->setAlignment(ScalarOpAlignment);
1110  replaceValue(I, *NSI);
1112  return true;
1113  }
1114 
1115  return false;
1116 }
1117 
1118 /// Try to scalarize vector loads feeding extractelement instructions.
1119 bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
1120  Value *Ptr;
1121  if (!match(&I, m_Load(m_Value(Ptr))))
1122  return false;
1123 
1124  auto *FixedVT = cast<FixedVectorType>(I.getType());
1125  auto *LI = cast<LoadInst>(&I);
1126  const DataLayout &DL = I.getModule()->getDataLayout();
1127  if (LI->isVolatile() || !DL.typeSizeEqualsStoreSize(FixedVT))
1128  return false;
1129 
1130  InstructionCost OriginalCost =
1131  TTI.getMemoryOpCost(Instruction::Load, FixedVT, LI->getAlign(),
1132  LI->getPointerAddressSpace());
1133  InstructionCost ScalarizedCost = 0;
1134 
1135  Instruction *LastCheckedInst = LI;
1136  unsigned NumInstChecked = 0;
1137  // Check if all users of the load are extracts with no memory modifications
1138  // between the load and the extract. Compute the cost of both the original
1139  // code and the scalarized version.
1140  for (User *U : LI->users()) {
1141  auto *UI = dyn_cast<ExtractElementInst>(U);
1142  if (!UI || UI->getParent() != LI->getParent())
1143  return false;
1144 
1145  if (!isGuaranteedNotToBePoison(UI->getOperand(1), &AC, LI, &DT))
1146  return false;
1147 
1148  // Check if any instruction between the load and the extract may modify
1149  // memory.
1150  if (LastCheckedInst->comesBefore(UI)) {
1151  for (Instruction &I :
1152  make_range(std::next(LI->getIterator()), UI->getIterator())) {
1153  // Bail out if we reached the check limit or the instruction may write
1154  // to memory.
1155  if (NumInstChecked == MaxInstrsToScan || I.mayWriteToMemory())
1156  return false;
1157  NumInstChecked++;
1158  }
1159  LastCheckedInst = UI;
1160  }
1161 
1162  auto ScalarIdx = canScalarizeAccess(FixedVT, UI->getOperand(1), &I, AC, DT);
1163  if (!ScalarIdx.isSafe()) {
1164  // TODO: Freeze index if it is safe to do so.
1165  ScalarIdx.discard();
1166  return false;
1167  }
1168 
1169  auto *Index = dyn_cast<ConstantInt>(UI->getOperand(1));
1170  OriginalCost +=
1171  TTI.getVectorInstrCost(Instruction::ExtractElement, FixedVT,
1172  Index ? Index->getZExtValue() : -1);
1173  ScalarizedCost +=
1174  TTI.getMemoryOpCost(Instruction::Load, FixedVT->getElementType(),
1175  Align(1), LI->getPointerAddressSpace());
1176  ScalarizedCost += TTI.getAddressComputationCost(FixedVT->getElementType());
1177  }
1178 
1179  if (ScalarizedCost >= OriginalCost)
1180  return false;
1181 
1182  // Replace extracts with narrow scalar loads.
1183  for (User *U : LI->users()) {
1184  auto *EI = cast<ExtractElementInst>(U);
1185  Builder.SetInsertPoint(EI);
1186 
1187  Value *Idx = EI->getOperand(1);
1188  Value *GEP =
1189  Builder.CreateInBoundsGEP(FixedVT, Ptr, {Builder.getInt32(0), Idx});
1190  auto *NewLoad = cast<LoadInst>(Builder.CreateLoad(
1191  FixedVT->getElementType(), GEP, EI->getName() + ".scalar"));
1192 
1193  Align ScalarOpAlignment = computeAlignmentAfterScalarization(
1194  LI->getAlign(), FixedVT->getElementType(), Idx, DL);
1195  NewLoad->setAlignment(ScalarOpAlignment);
1196 
1197  replaceValue(*EI, *NewLoad);
1198  }
1199 
1200  return true;
1201 }
1202 
1203 /// Try to convert "shuffle (binop), (binop)" with a shared binop operand into
1204 /// "binop (shuffle), (shuffle)".
1205 bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
1206  auto *VecTy = cast<FixedVectorType>(I.getType());
1207  BinaryOperator *B0, *B1;
1209  if (!match(&I, m_Shuffle(m_OneUse(m_BinOp(B0)), m_OneUse(m_BinOp(B1)),
1210  m_Mask(Mask))) ||
1211  B0->getOpcode() != B1->getOpcode() || B0->getType() != VecTy)
1212  return false;
1213 
1214  // Try to replace a binop with a shuffle if the shuffle is not costly.
1215  // The new shuffle will choose from a single, common operand, so it may be
1216  // cheaper than the existing two-operand shuffle.
1217  SmallVector<int> UnaryMask = createUnaryMask(Mask, Mask.size());
1218  Instruction::BinaryOps Opcode = B0->getOpcode();
1219  InstructionCost BinopCost = TTI.getArithmeticInstrCost(Opcode, VecTy);
1220  InstructionCost ShufCost = TTI.getShuffleCost(
1221  TargetTransformInfo::SK_PermuteSingleSrc, VecTy, UnaryMask);
1222  if (ShufCost > BinopCost)
1223  return false;
1224 
1225  // If we have something like "add X, Y" and "add Z, X", swap ops to match.
1226  Value *X = B0->getOperand(0), *Y = B0->getOperand(1);
1227  Value *Z = B1->getOperand(0), *W = B1->getOperand(1);
1228  if (BinaryOperator::isCommutative(Opcode) && X != Z && Y != W)
1229  std::swap(X, Y);
1230 
1231  Value *Shuf0, *Shuf1;
1232  if (X == Z) {
1233  // shuf (bo X, Y), (bo X, W) --> bo (shuf X), (shuf Y, W)
1234  Shuf0 = Builder.CreateShuffleVector(X, UnaryMask);
1235  Shuf1 = Builder.CreateShuffleVector(Y, W, Mask);
1236  } else if (Y == W) {
1237  // shuf (bo X, Y), (bo Z, Y) --> bo (shuf X, Z), (shuf Y)
1238  Shuf0 = Builder.CreateShuffleVector(X, Z, Mask);
1239  Shuf1 = Builder.CreateShuffleVector(Y, UnaryMask);
1240  } else {
1241  return false;
1242  }
1243 
1244  Value *NewBO = Builder.CreateBinOp(Opcode, Shuf0, Shuf1);
1245  // Intersect flags from the old binops.
1246  if (auto *NewInst = dyn_cast<Instruction>(NewBO)) {
1247  NewInst->copyIRFlags(B0);
1248  NewInst->andIRFlags(B1);
1249  }
1250  replaceValue(I, *NewBO);
1251  return true;
1252 }
1253 
1254 /// Given a commutative reduction, the order of the input lanes does not alter
1255 /// the results. We can use this to remove certain shuffles feeding the
1256 /// reduction, removing the need to shuffle at all.
1257 bool VectorCombine::foldShuffleFromReductions(Instruction &I) {
1258  auto *II = dyn_cast<IntrinsicInst>(&I);
1259  if (!II)
1260  return false;
1261  switch (II->getIntrinsicID()) {
1262  case Intrinsic::vector_reduce_add:
1263  case Intrinsic::vector_reduce_mul:
1264  case Intrinsic::vector_reduce_and:
1265  case Intrinsic::vector_reduce_or:
1266  case Intrinsic::vector_reduce_xor:
1267  case Intrinsic::vector_reduce_smin:
1268  case Intrinsic::vector_reduce_smax:
1269  case Intrinsic::vector_reduce_umin:
1270  case Intrinsic::vector_reduce_umax:
1271  break;
1272  default:
1273  return false;
1274  }
1275 
1276  // Find all the inputs when looking through operations that do not alter the
1277  // lane order (binops, for example). Currently we look for a single shuffle,
1278  // and can ignore splat values.
1279  std::queue<Value *> Worklist;
1280  SmallPtrSet<Value *, 4> Visited;
1281  ShuffleVectorInst *Shuffle = nullptr;
1282  if (auto *Op = dyn_cast<Instruction>(I.getOperand(0)))
1283  Worklist.push(Op);
1284 
1285  while (!Worklist.empty()) {
1286  Value *CV = Worklist.front();
1287  Worklist.pop();
1288  if (Visited.contains(CV))
1289  continue;
1290 
1291  // Splats don't change the order, so can be safely ignored.
1292  if (isSplatValue(CV))
1293  continue;
1294 
1295  Visited.insert(CV);
1296 
1297  if (auto *CI = dyn_cast<Instruction>(CV)) {
1298  if (CI->isBinaryOp()) {
1299  for (auto *Op : CI->operand_values())
1300  Worklist.push(Op);
1301  continue;
1302  } else if (auto *SV = dyn_cast<ShuffleVectorInst>(CI)) {
1303  if (Shuffle && Shuffle != SV)
1304  return false;
1305  Shuffle = SV;
1306  continue;
1307  }
1308  }
1309 
1310  // Anything else is currently an unknown node.
1311  return false;
1312  }
1313 
1314  if (!Shuffle)
1315  return false;
1316 
1317  // Check all uses of the binary ops and shuffles are also included in the
1318  // lane-invariant operations (Visited should be the list of lanewise
1319  // instructions, including the shuffle that we found).
1320  for (auto *V : Visited)
1321  for (auto *U : V->users())
1322  if (!Visited.contains(U) && U != &I)
1323  return false;
1324 
1326  dyn_cast<FixedVectorType>(II->getOperand(0)->getType());
1327  if (!VecType)
1328  return false;
1329  FixedVectorType *ShuffleInputType =
1330  dyn_cast<FixedVectorType>(Shuffle->getOperand(0)->getType());
1331  if (!ShuffleInputType)
1332  return false;
1333  int NumInputElts = ShuffleInputType->getNumElements();
1334 
1335  // Find the mask from sorting the lanes into order. This is most likely to
1336  // become a identity or concat mask. Undef elements are pushed to the end.
1337  SmallVector<int> ConcatMask;
1338  Shuffle->getShuffleMask(ConcatMask);
1339  sort(ConcatMask, [](int X, int Y) { return (unsigned)X < (unsigned)Y; });
1340  bool UsesSecondVec =
1341  any_of(ConcatMask, [&](int M) { return M >= NumInputElts; });
1344  Shuffle->getShuffleMask());
1347  ConcatMask);
1348 
1349  LLVM_DEBUG(dbgs() << "Found a reduction feeding from a shuffle: " << *Shuffle
1350  << "\n");
1351  LLVM_DEBUG(dbgs() << " OldCost: " << OldCost << " vs NewCost: " << NewCost
1352  << "\n");
1353  if (NewCost < OldCost) {
1354  Builder.SetInsertPoint(Shuffle);
1355  Value *NewShuffle = Builder.CreateShuffleVector(
1356  Shuffle->getOperand(0), Shuffle->getOperand(1), ConcatMask);
1357  LLVM_DEBUG(dbgs() << "Created new shuffle: " << *NewShuffle << "\n");
1358  replaceValue(*Shuffle, *NewShuffle);
1359  }
1360 
1361  // See if we can re-use foldSelectShuffle, getting it to reduce the size of
1362  // the shuffle into a nicer order, as it can ignore the order of the shuffles.
1363  return foldSelectShuffle(*Shuffle, true);
1364 }
1365 
1366 /// This method looks for groups of shuffles acting on binops, of the form:
1367 /// %x = shuffle ...
1368 /// %y = shuffle ...
1369 /// %a = binop %x, %y
1370 /// %b = binop %x, %y
1371 /// shuffle %a, %b, selectmask
1372 /// We may, especially if the shuffle is wider than legal, be able to convert
1373 /// the shuffle to a form where only parts of a and b need to be computed. On
1374 /// architectures with no obvious "select" shuffle, this can reduce the total
1375 /// number of operations if the target reports them as cheaper.
1376 bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
1377  auto *SVI = cast<ShuffleVectorInst>(&I);
1378  auto *VT = cast<FixedVectorType>(I.getType());
1379  auto *Op0 = dyn_cast<Instruction>(SVI->getOperand(0));
1380  auto *Op1 = dyn_cast<Instruction>(SVI->getOperand(1));
1381  if (!Op0 || !Op1 || Op0 == Op1 || !Op0->isBinaryOp() || !Op1->isBinaryOp() ||
1382  VT != Op0->getType())
1383  return false;
1384 
1385  auto *SVI0A = dyn_cast<Instruction>(Op0->getOperand(0));
1386  auto *SVI0B = dyn_cast<Instruction>(Op0->getOperand(1));
1387  auto *SVI1A = dyn_cast<Instruction>(Op1->getOperand(0));
1388  auto *SVI1B = dyn_cast<Instruction>(Op1->getOperand(1));
1389  SmallPtrSet<Instruction *, 4> InputShuffles({SVI0A, SVI0B, SVI1A, SVI1B});
1390  auto checkSVNonOpUses = [&](Instruction *I) {
1391  if (!I || I->getOperand(0)->getType() != VT)
1392  return true;
1393  return any_of(I->users(), [&](User *U) {
1394  return U != Op0 && U != Op1 &&
1395  !(isa<ShuffleVectorInst>(U) &&
1396  (InputShuffles.contains(cast<Instruction>(U)) ||
1397  isInstructionTriviallyDead(cast<Instruction>(U))));
1398  });
1399  };
1400  if (checkSVNonOpUses(SVI0A) || checkSVNonOpUses(SVI0B) ||
1401  checkSVNonOpUses(SVI1A) || checkSVNonOpUses(SVI1B))
1402  return false;
1403 
1404  // Collect all the uses that are shuffles that we can transform together. We
1405  // may not have a single shuffle, but a group that can all be transformed
1406  // together profitably.
1408  auto collectShuffles = [&](Instruction *I) {
1409  for (auto *U : I->users()) {
1410  auto *SV = dyn_cast<ShuffleVectorInst>(U);
1411  if (!SV || SV->getType() != VT)
1412  return false;
1413  if ((SV->getOperand(0) != Op0 && SV->getOperand(0) != Op1) ||
1414  (SV->getOperand(1) != Op0 && SV->getOperand(1) != Op1))
1415  return false;
1416  if (!llvm::is_contained(Shuffles, SV))
1417  Shuffles.push_back(SV);
1418  }
1419  return true;
1420  };
1421  if (!collectShuffles(Op0) || !collectShuffles(Op1))
1422  return false;
1423  // From a reduction, we need to be processing a single shuffle, otherwise the
1424  // other uses will not be lane-invariant.
1425  if (FromReduction && Shuffles.size() > 1)
1426  return false;
1427 
1428  // Add any shuffle uses for the shuffles we have found, to include them in our
1429  // cost calculations.
1430  if (!FromReduction) {
1431  for (ShuffleVectorInst *SV : Shuffles) {
1432  for (auto *U : SV->users()) {
1433  ShuffleVectorInst *SSV = dyn_cast<ShuffleVectorInst>(U);
1434  if (SSV && isa<UndefValue>(SSV->getOperand(1)) && SSV->getType() == VT)
1435  Shuffles.push_back(SSV);
1436  }
1437  }
1438  }
1439 
1440  // For each of the output shuffles, we try to sort all the first vector
1441  // elements to the beginning, followed by the second array elements at the
1442  // end. If the binops are legalized to smaller vectors, this may reduce total
1443  // number of binops. We compute the ReconstructMask mask needed to convert
1444  // back to the original lane order.
1446  SmallVector<SmallVector<int>> OrigReconstructMasks;
1447  int MaxV1Elt = 0, MaxV2Elt = 0;
1448  unsigned NumElts = VT->getNumElements();
1449  for (ShuffleVectorInst *SVN : Shuffles) {
1451  SVN->getShuffleMask(Mask);
1452 
1453  // Check the operands are the same as the original, or reversed (in which
1454  // case we need to commute the mask).
1455  Value *SVOp0 = SVN->getOperand(0);
1456  Value *SVOp1 = SVN->getOperand(1);
1457  if (isa<UndefValue>(SVOp1)) {
1458  auto *SSV = cast<ShuffleVectorInst>(SVOp0);
1459  SVOp0 = SSV->getOperand(0);
1460  SVOp1 = SSV->getOperand(1);
1461  for (unsigned I = 0, E = Mask.size(); I != E; I++) {
1462  if (Mask[I] >= static_cast<int>(SSV->getShuffleMask().size()))
1463  return false;
1464  Mask[I] = Mask[I] < 0 ? Mask[I] : SSV->getMaskValue(Mask[I]);
1465  }
1466  }
1467  if (SVOp0 == Op1 && SVOp1 == Op0) {
1468  std::swap(SVOp0, SVOp1);
1470  }
1471  if (SVOp0 != Op0 || SVOp1 != Op1)
1472  return false;
1473 
1474  // Calculate the reconstruction mask for this shuffle, as the mask needed to
1475  // take the packed values from Op0/Op1 and reconstructing to the original
1476  // order.
1477  SmallVector<int> ReconstructMask;
1478  for (unsigned I = 0; I < Mask.size(); I++) {
1479  if (Mask[I] < 0) {
1480  ReconstructMask.push_back(-1);
1481  } else if (Mask[I] < static_cast<int>(NumElts)) {
1482  MaxV1Elt = std::max(MaxV1Elt, Mask[I]);
1483  auto It = find_if(V1, [&](const std::pair<int, int> &A) {
1484  return Mask[I] == A.first;
1485  });
1486  if (It != V1.end())
1487  ReconstructMask.push_back(It - V1.begin());
1488  else {
1489  ReconstructMask.push_back(V1.size());
1490  V1.emplace_back(Mask[I], V1.size());
1491  }
1492  } else {
1493  MaxV2Elt = std::max<int>(MaxV2Elt, Mask[I] - NumElts);
1494  auto It = find_if(V2, [&](const std::pair<int, int> &A) {
1495  return Mask[I] - static_cast<int>(NumElts) == A.first;
1496  });
1497  if (It != V2.end())
1498  ReconstructMask.push_back(NumElts + It - V2.begin());
1499  else {
1500  ReconstructMask.push_back(NumElts + V2.size());
1501  V2.emplace_back(Mask[I] - NumElts, NumElts + V2.size());
1502  }
1503  }
1504  }
1505 
1506  // For reductions, we know that the lane ordering out doesn't alter the
1507  // result. In-order can help simplify the shuffle away.
1508  if (FromReduction)
1509  sort(ReconstructMask);
1510  OrigReconstructMasks.push_back(std::move(ReconstructMask));
1511  }
1512 
1513  // If the Maximum element used from V1 and V2 are not larger than the new
1514  // vectors, the vectors are already packes and performing the optimization
1515  // again will likely not help any further. This also prevents us from getting
1516  // stuck in a cycle in case the costs do not also rule it out.
1517  if (V1.empty() || V2.empty() ||
1518  (MaxV1Elt == static_cast<int>(V1.size()) - 1 &&
1519  MaxV2Elt == static_cast<int>(V2.size()) - 1))
1520  return false;
1521 
1522  // GetBaseMaskValue takes one of the inputs, which may either be a shuffle, a
1523  // shuffle of another shuffle, or not a shuffle (that is treated like a
1524  // identity shuffle).
1525  auto GetBaseMaskValue = [&](Instruction *I, int M) {
1526  auto *SV = dyn_cast<ShuffleVectorInst>(I);
1527  if (!SV)
1528  return M;
1529  if (isa<UndefValue>(SV->getOperand(1)))
1530  if (auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
1531  if (InputShuffles.contains(SSV))
1532  return SSV->getMaskValue(SV->getMaskValue(M));
1533  return SV->getMaskValue(M);
1534  };
1535 
1536  // Attempt to sort the inputs my ascending mask values to make simpler input
1537  // shuffles and push complex shuffles down to the uses. We sort on the first
1538  // of the two input shuffle orders, to try and get at least one input into a
1539  // nice order.
1540  auto SortBase = [&](Instruction *A, std::pair<int, int> X,
1541  std::pair<int, int> Y) {
1542  int MXA = GetBaseMaskValue(A, X.first);
1543  int MYA = GetBaseMaskValue(A, Y.first);
1544  return MXA < MYA;
1545  };
1546  stable_sort(V1, [&](std::pair<int, int> A, std::pair<int, int> B) {
1547  return SortBase(SVI0A, A, B);
1548  });
1549  stable_sort(V2, [&](std::pair<int, int> A, std::pair<int, int> B) {
1550  return SortBase(SVI1A, A, B);
1551  });
1552  // Calculate our ReconstructMasks from the OrigReconstructMasks and the
1553  // modified order of the input shuffles.
1554  SmallVector<SmallVector<int>> ReconstructMasks;
1555  for (auto Mask : OrigReconstructMasks) {
1556  SmallVector<int> ReconstructMask;
1557  for (int M : Mask) {
1558  auto FindIndex = [](const SmallVector<std::pair<int, int>> &V, int M) {
1559  auto It = find_if(V, [M](auto A) { return A.second == M; });
1560  assert(It != V.end() && "Expected all entries in Mask");
1561  return std::distance(V.begin(), It);
1562  };
1563  if (M < 0)
1564  ReconstructMask.push_back(-1);
1565  else if (M < static_cast<int>(NumElts)) {
1566  ReconstructMask.push_back(FindIndex(V1, M));
1567  } else {
1568  ReconstructMask.push_back(NumElts + FindIndex(V2, M));
1569  }
1570  }
1571  ReconstructMasks.push_back(std::move(ReconstructMask));
1572  }
1573 
1574  // Calculate the masks needed for the new input shuffles, which get padded
1575  // with undef
1576  SmallVector<int> V1A, V1B, V2A, V2B;
1577  for (unsigned I = 0; I < V1.size(); I++) {
1578  V1A.push_back(GetBaseMaskValue(SVI0A, V1[I].first));
1579  V1B.push_back(GetBaseMaskValue(SVI0B, V1[I].first));
1580  }
1581  for (unsigned I = 0; I < V2.size(); I++) {
1582  V2A.push_back(GetBaseMaskValue(SVI1A, V2[I].first));
1583  V2B.push_back(GetBaseMaskValue(SVI1B, V2[I].first));
1584  }
1585  while (V1A.size() < NumElts) {
1586  V1A.push_back(UndefMaskElem);
1587  V1B.push_back(UndefMaskElem);
1588  }
1589  while (V2A.size() < NumElts) {
1590  V2A.push_back(UndefMaskElem);
1591  V2B.push_back(UndefMaskElem);
1592  }
1593 
1594  auto AddShuffleCost = [&](InstructionCost C, Instruction *I) {
1595  auto *SV = dyn_cast<ShuffleVectorInst>(I);
1596  if (!SV)
1597  return C;
1598  return C + TTI.getShuffleCost(isa<UndefValue>(SV->getOperand(1))
1601  VT, SV->getShuffleMask());
1602  };
1603  auto AddShuffleMaskCost = [&](InstructionCost C, ArrayRef<int> Mask) {
1605  };
1606 
1607  // Get the costs of the shuffles + binops before and after with the new
1608  // shuffle masks.
1609  InstructionCost CostBefore =
1610  TTI.getArithmeticInstrCost(Op0->getOpcode(), VT) +
1611  TTI.getArithmeticInstrCost(Op1->getOpcode(), VT);
1612  CostBefore += std::accumulate(Shuffles.begin(), Shuffles.end(),
1613  InstructionCost(0), AddShuffleCost);
1614  CostBefore += std::accumulate(InputShuffles.begin(), InputShuffles.end(),
1615  InstructionCost(0), AddShuffleCost);
1616 
1617  // The new binops will be unused for lanes past the used shuffle lengths.
1618  // These types attempt to get the correct cost for that from the target.
1619  FixedVectorType *Op0SmallVT =
1620  FixedVectorType::get(VT->getScalarType(), V1.size());
1621  FixedVectorType *Op1SmallVT =
1622  FixedVectorType::get(VT->getScalarType(), V2.size());
1623  InstructionCost CostAfter =
1624  TTI.getArithmeticInstrCost(Op0->getOpcode(), Op0SmallVT) +
1625  TTI.getArithmeticInstrCost(Op1->getOpcode(), Op1SmallVT);
1626  CostAfter += std::accumulate(ReconstructMasks.begin(), ReconstructMasks.end(),
1627  InstructionCost(0), AddShuffleMaskCost);
1628  std::set<SmallVector<int>> OutputShuffleMasks({V1A, V1B, V2A, V2B});
1629  CostAfter +=
1630  std::accumulate(OutputShuffleMasks.begin(), OutputShuffleMasks.end(),
1631  InstructionCost(0), AddShuffleMaskCost);
1632 
1633  LLVM_DEBUG(dbgs() << "Found a binop select shuffle pattern: " << I << "\n");
1634  LLVM_DEBUG(dbgs() << " CostBefore: " << CostBefore
1635  << " vs CostAfter: " << CostAfter << "\n");
1636  if (CostBefore <= CostAfter)
1637  return false;
1638 
1639  // The cost model has passed, create the new instructions.
1640  auto GetShuffleOperand = [&](Instruction *I, unsigned Op) -> Value * {
1641  auto *SV = dyn_cast<ShuffleVectorInst>(I);
1642  if (!SV)
1643  return I;
1644  if (isa<UndefValue>(SV->getOperand(1)))
1645  if (auto *SSV = dyn_cast<ShuffleVectorInst>(SV->getOperand(0)))
1646  if (InputShuffles.contains(SSV))
1647  return SSV->getOperand(Op);
1648  return SV->getOperand(Op);
1649  };
1650  Builder.SetInsertPoint(SVI0A->getNextNode());
1651  Value *NSV0A = Builder.CreateShuffleVector(GetShuffleOperand(SVI0A, 0),
1652  GetShuffleOperand(SVI0A, 1), V1A);
1653  Builder.SetInsertPoint(SVI0B->getNextNode());
1654  Value *NSV0B = Builder.CreateShuffleVector(GetShuffleOperand(SVI0B, 0),
1655  GetShuffleOperand(SVI0B, 1), V1B);
1656  Builder.SetInsertPoint(SVI1A->getNextNode());
1657  Value *NSV1A = Builder.CreateShuffleVector(GetShuffleOperand(SVI1A, 0),
1658  GetShuffleOperand(SVI1A, 1), V2A);
1659  Builder.SetInsertPoint(SVI1B->getNextNode());
1660  Value *NSV1B = Builder.CreateShuffleVector(GetShuffleOperand(SVI1B, 0),
1661  GetShuffleOperand(SVI1B, 1), V2B);
1662  Builder.SetInsertPoint(Op0);
1663  Value *NOp0 = Builder.CreateBinOp((Instruction::BinaryOps)Op0->getOpcode(),
1664  NSV0A, NSV0B);
1665  if (auto *I = dyn_cast<Instruction>(NOp0))
1666  I->copyIRFlags(Op0, true);
1667  Builder.SetInsertPoint(Op1);
1668  Value *NOp1 = Builder.CreateBinOp((Instruction::BinaryOps)Op1->getOpcode(),
1669  NSV1A, NSV1B);
1670  if (auto *I = dyn_cast<Instruction>(NOp1))
1671  I->copyIRFlags(Op1, true);
1672 
1673  for (int S = 0, E = ReconstructMasks.size(); S != E; S++) {
1674  Builder.SetInsertPoint(Shuffles[S]);
1675  Value *NSV = Builder.CreateShuffleVector(NOp0, NOp1, ReconstructMasks[S]);
1676  replaceValue(*Shuffles[S], *NSV);
1677  }
1678 
1679  Worklist.pushValue(NSV0A);
1680  Worklist.pushValue(NSV0B);
1681  Worklist.pushValue(NSV1A);
1682  Worklist.pushValue(NSV1B);
1683  for (auto *S : Shuffles)
1684  Worklist.add(S);
1685  return true;
1686 }
1687 
1688 /// This is the entry point for all transforms. Pass manager differences are
1689 /// handled in the callers of this function.
1690 bool VectorCombine::run() {
1692  return false;
1693 
1694  // Don't attempt vectorization if the target does not support vectors.
1695  if (!TTI.getNumberOfRegisters(TTI.getRegisterClassForType(/*Vector*/ true)))
1696  return false;
1697 
1698  bool MadeChange = false;
1699  auto FoldInst = [this, &MadeChange](Instruction &I) {
1700  Builder.SetInsertPoint(&I);
1701  bool IsFixedVectorType = isa<FixedVectorType>(I.getType());
1702  auto Opcode = I.getOpcode();
1703 
1704  // These folds should be beneficial regardless of when this pass is run
1705  // in the optimization pipeline.
1706  // The type checking is for run-time efficiency. We can avoid wasting time
1707  // dispatching to folding functions if there's no chance of matching.
1708  if (IsFixedVectorType) {
1709  switch (Opcode) {
1710  case Instruction::InsertElement:
1711  MadeChange |= vectorizeLoadInsert(I);
1712  break;
1713  case Instruction::ShuffleVector:
1714  MadeChange |= widenSubvectorLoad(I);
1715  break;
1716  case Instruction::Load:
1717  MadeChange |= scalarizeLoadExtract(I);
1718  break;
1719  default:
1720  break;
1721  }
1722  }
1723 
1724  // This transform works with scalable and fixed vectors
1725  // TODO: Identify and allow other scalable transforms
1726  if (isa<VectorType>(I.getType()))
1727  MadeChange |= scalarizeBinopOrCmp(I);
1728 
1729  if (Opcode == Instruction::Store)
1730  MadeChange |= foldSingleElementStore(I);
1731 
1732 
1733  // If this is an early pipeline invocation of this pass, we are done.
1734  if (TryEarlyFoldsOnly)
1735  return;
1736 
1737  // Otherwise, try folds that improve codegen but may interfere with
1738  // early IR canonicalizations.
1739  // The type checking is for run-time efficiency. We can avoid wasting time
1740  // dispatching to folding functions if there's no chance of matching.
1741  if (IsFixedVectorType) {
1742  switch (Opcode) {
1743  case Instruction::InsertElement:
1744  MadeChange |= foldInsExtFNeg(I);
1745  break;
1746  case Instruction::ShuffleVector:
1747  MadeChange |= foldShuffleOfBinops(I);
1748  MadeChange |= foldSelectShuffle(I);
1749  break;
1750  case Instruction::BitCast:
1751  MadeChange |= foldBitcastShuf(I);
1752  break;
1753  }
1754  } else {
1755  switch (Opcode) {
1756  case Instruction::Call:
1757  MadeChange |= foldShuffleFromReductions(I);
1758  break;
1759  case Instruction::ICmp:
1760  case Instruction::FCmp:
1761  MadeChange |= foldExtractExtract(I);
1762  break;
1763  default:
1764  if (Instruction::isBinaryOp(Opcode)) {
1765  MadeChange |= foldExtractExtract(I);
1766  MadeChange |= foldExtractedCmps(I);
1767  }
1768  break;
1769  }
1770  }
1771  };
1772 
1773  for (BasicBlock &BB : F) {
1774  // Ignore unreachable basic blocks.
1775  if (!DT.isReachableFromEntry(&BB))
1776  continue;
1777  // Use early increment range so that we can erase instructions in loop.
1778  for (Instruction &I : make_early_inc_range(BB)) {
1779  if (I.isDebugOrPseudoInst())
1780  continue;
1781  FoldInst(I);
1782  }
1783  }
1784 
1785  while (!Worklist.isEmpty()) {
1786  Instruction *I = Worklist.removeOne();
1787  if (!I)
1788  continue;
1789 
1791  eraseInstruction(*I);
1792  continue;
1793  }
1794 
1795  FoldInst(*I);
1796  }
1797 
1798  return MadeChange;
1799 }
1800 
1801 // Pass manager boilerplate below here.
1802 
1803 namespace {
1804 class VectorCombineLegacyPass : public FunctionPass {
1805 public:
1806  static char ID;
1807  VectorCombineLegacyPass() : FunctionPass(ID) {
1809  }
1810 
1811  void getAnalysisUsage(AnalysisUsage &AU) const override {
1816  AU.setPreservesCFG();
1822  }
1823 
1824  bool runOnFunction(Function &F) override {
1825  if (skipFunction(F))
1826  return false;
1827  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1828  auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1829  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1830  auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1831  VectorCombine Combiner(F, TTI, DT, AA, AC, false);
1832  return Combiner.run();
1833  }
1834 };
1835 } // namespace
1836 
1838 INITIALIZE_PASS_BEGIN(VectorCombineLegacyPass, "vector-combine",
1839  "Optimize scalar/vector ops", false,
1840  false)
1843 INITIALIZE_PASS_END(VectorCombineLegacyPass, "vector-combine",
1844  "Optimize scalar/vector ops", false, false)
1846  return new VectorCombineLegacyPass();
1847 }
1848 
1851  auto &AC = FAM.getResult<AssumptionAnalysis>(F);
1854  AAResults &AA = FAM.getResult<AAManager>(F);
1855  VectorCombine Combiner(F, TTI, DT, AA, AC, TryEarlyFoldsOnly);
1856  if (!Combiner.run())
1857  return PreservedAnalyses::all();
1858  PreservedAnalyses PA;
1859  PA.preserveSet<CFGAnalyses>();
1860  return PA;
1861 }
llvm::InstructionCost
Definition: InstructionCost.h:30
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
AssumptionCache.h
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:876
llvm::mustSuppressSpeculation
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
Definition: ValueTracking.cpp:4720
llvm::TargetTransformInfo::SK_Select
@ SK_Select
Selects elements from the corresponding lane of either source operand.
Definition: TargetTransformInfo.h:890
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2586
llvm::TargetTransformInfo::getShuffleCost
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask=None, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args=None) const
Definition: TargetTransformInfo.cpp:802
llvm::MemoryLocation::get
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition: MemoryLocation.cpp:36
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::ConstantExpr::getExtractElement
static Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2546
llvm::Value::getPointerAlignment
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:918
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::TargetTransformInfo::getMinVectorRegisterBitWidth
unsigned getMinVectorRegisterBitWidth() const
Definition: TargetTransformInfo.cpp:649
ScalarizationResult::~ScalarizationResult
~ScalarizationResult()
Definition: VectorCombine.cpp:963
llvm::PatternMatch::m_Mask
Definition: PatternMatch.h:1514
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
llvm::dxil::ParameterKind::I1
@ I1
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
llvm::ConstantRange::binaryAnd
ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
Definition: ConstantRange.cpp:1400
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:719
llvm::User::operands
op_range operands()
Definition: User.h:242
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:237
llvm::ShuffleVectorInst::getShuffleMask
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
Definition: Instructions.cpp:2211
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
llvm::TargetTransformInfo::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:884
Loads.h
llvm::ExtractElementInst
This instruction extracts a single (scalar) element from a VectorType value.
Definition: Instructions.h:1871
llvm::Function
Definition: Function.h:60
Pass.h
ScalarizationResult::isUnsafe
bool isUnsafe() const
Returns true if the index cannot be scalarized.
Definition: VectorCombine.cpp:976
canScalarizeAccess
static ScalarizationResult canScalarizeAccess(FixedVectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
Definition: VectorCombine.cpp:1007
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:328
llvm::PatternMatch::m_InsertElt
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Definition: PatternMatch.h:1484
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
llvm::TargetTransformInfo::getAddressComputationCost
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
Definition: TargetTransformInfo.cpp:989
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
Statistic.h
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:266
llvm::VectorCombinePass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
Definition: VectorCombine.cpp:1849
llvm::PatternMatch::m_Load
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
Definition: PatternMatch.h:1563
llvm::CmpInst::makeCmpResultType
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:1044
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:173
llvm::IRBuilder<>
llvm::isModSet
bool isModSet(const ModRefInfo MRI)
Definition: ModRef.h:48
eraseInstruction
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
Definition: LICM.cpp:1471
ValueTracking.h
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
GlobalsModRef.h
VectorCombine.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::narrowShuffleMaskElts
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
Definition: VectorUtils.cpp:469
llvm::createVectorCombinePass
Pass * createVectorCombinePass()
Definition: VectorCombine.cpp:1845
canWidenLoad
static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI)
Definition: VectorCombine.cpp:134
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::StoreInst::setAlignment
void setAlignment(Align Align)
Definition: Instructions.h:345
llvm::Instruction::comesBefore
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
Definition: Instruction.cpp:114
llvm::createUnaryMask
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
Definition: VectorUtils.cpp:984
llvm::PatternMatch::m_BinOp
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
llvm::SmallPtrSet< Value *, 4 >
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::Instruction::copyMetadata
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Definition: Instruction.cpp:871
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(VectorCombineLegacyPass, "vector-combine", "Optimize scalar/vector ops", false, false) INITIALIZE_PASS_END(VectorCombineLegacyPass
InvalidIndex
static const unsigned InvalidIndex
Definition: VectorCombine.cpp:61
llvm::PatternMatch::m_BitCast
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
Definition: PatternMatch.h:1593
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
BasicAliasAnalysis.h
MaxInstrsToScan
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
llvm::FixedVectorType
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:525
llvm::TargetTransformInfo::SK_PermuteSingleSrc
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
Definition: TargetTransformInfo.h:898
OpIndex
unsigned OpIndex
Definition: SPIRVModuleAnalysis.cpp:46
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::NVPTX::PTXLdStInstCode::VecType
VecType
Definition: NVPTX.h:121
llvm::commonAlignment
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:213
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::UndefMaskElem
constexpr int UndefMaskElem
Definition: Instructions.h:1995
llvm::PatternMatch::m_OneUse
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
llvm::Instruction::mayReadFromMemory
bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
Definition: Instruction.cpp:606
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
CommandLine.h
llvm::FixedVectorType::getNumElements
unsigned getNumElements() const
Definition: DerivedTypes.h:568
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::TargetTransformInfo::SK_PermuteTwoSrc
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
Definition: TargetTransformInfo.h:896
llvm::isSplatValue
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Definition: VectorUtils.cpp:386
llvm::TargetTransformInfo::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
Definition: TargetTransformInfo.cpp:790
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition: PatternMatch.h:1462
llvm::CmpInst::isFPPredicate
bool isFPPredicate() const
Definition: InstrTypes.h:826
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::AAResults
Definition: AliasAnalysis.h:294
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
createShiftShuffle
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilder<> &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
Definition: VectorCombine.cpp:474
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
InstructionWorklist.h
SI
@ SI
Definition: SIInstrInfo.cpp:7882
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:246
llvm::InstructionWorklist::remove
void remove(Instruction *I)
Remove I from the worklist if it exists.
Definition: InstructionWorklist.h:85
llvm::PatternMatch::m_Instruction
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:716
false
Definition: StackSlotColoring.cpp:141
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::ShuffleVectorInst::getMaskValue
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
Definition: Instructions.h:2062
llvm::PatternMatch::m_FNeg
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
Definition: PatternMatch.h:1033
llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition: InstrTypes.h:392
llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:147
llvm::Instruction
Definition: Instruction.h:42
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:189
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1713
DisableBinopExtractShuffle
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
llvm::PatternMatch::m_URem
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1075
Align
uint64_t Align
Definition: ELFObjHandler.cpp:82
PatternMatch.h
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:684
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:155
llvm::NVPTX::PTXLdStInstCode::Scalar
@ Scalar
Definition: NVPTX.h:122
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::ShuffleVectorInst::getType
VectorType * getType() const
Overload to return most specific vector type.
Definition: Instructions.h:2053
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
ScalarizationResult::isSafeWithFreeze
bool isSafeWithFreeze() const
Returns true if the index can be scalarize, but requires inserting a freeze.
Definition: VectorCombine.cpp:979
llvm::Instruction::isCommutative
bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
Definition: Instruction.cpp:800
llvm::dxil::PointerTypeAnalysis::run
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Definition: PointerTypeAnalysis.cpp:189
llvm::PatternMatch::m_ExtractElt
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
Definition: PatternMatch.h:1492
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:210
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1657
llvm::APInt::getOneBitSet
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:222
ScalarizationResult::freeze
void freeze(IRBuilder<> &Builder, Instruction &UserI)
Freeze the ToFreeze and update the use in User to use it.
Definition: VectorCombine.cpp:988
VectorUtils.h
llvm::cl::opt< bool >
llvm::ShuffleVectorInst::commuteShuffleMask
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
Definition: Instructions.h:2412
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:297
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:416
Index
uint32_t Index
Definition: ELFObjHandler.cpp:83
uint64_t
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2642
llvm::isSafeToLoadUnconditionally
bool isSafeToLoadUnconditionally(Value *V, Align Alignment, APInt &Size, const DataLayout &DL, Instruction *ScanFrom=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
Definition: Loads.cpp:325
llvm::Combiner
Definition: Combiner.h:26
llvm::InstructionWorklist::push
void push(Instruction *I)
Push the instruction onto the worklist stack.
Definition: InstructionWorklist.h:58
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::BasicAAWrapperPass
Legacy wrapper pass to provide the BasicAAResult object.
Definition: BasicAliasAnalysis.h:162
llvm::TargetTransformInfo::getRegisterClassForType
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
Definition: TargetTransformInfo.cpp:635
DisableVectorCombine
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::AAResults::getModRefInfo
ModRefInfo getModRefInfo(const Instruction *I, const Optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Definition: AliasAnalysis.h:488
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:447
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:716
llvm::PatternMatch::m_And
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1093
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1843
llvm::TargetTransformInfo::getScalarizationOverhead
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract) const
Estimate the overhead of scalarizing an instruction.
Definition: TargetTransformInfo.cpp:516
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
translateExtract
static ExtractElementInst * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilder<> &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
Definition: VectorCombine.cpp:489
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:335
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:60
llvm::CmpInst::BAD_ICMP_PREDICATE
@ BAD_ICMP_PREDICATE
Definition: InstrTypes.h:752
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:168
llvm::Instruction::isBinaryOp
bool isBinaryOp() const
Definition: Instruction.h:169
llvm::PatternMatch::m_Constant
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:144
getOpcode
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
Status
Definition: SIModeRegister.cpp:29
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::PatternMatch::m_SpecificInt
specific_intval< false > m_SpecificInt(APInt V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:854
llvm::PatternMatch::m_CombineAnd
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
Definition: PatternMatch.h:224
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:99
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:202
llvm::ArrayRef< int >
llvm::isInstructionTriviallyDead
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition: Local.cpp:396
llvm::BinaryOperator
Definition: InstrTypes.h:188
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1716
llvm::InstructionWorklist::pushValue
void pushValue(Value *V)
Definition: InstructionWorklist.h:68
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
llvm::PatternMatch::m_Undef
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:136
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::ExtractElementInst::getVectorOperand
Value * getVectorOperand()
Definition: Instructions.h:1900
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
llvm::PatternMatch::m_Shuffle
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
Definition: PatternMatch.h:1551
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
llvm::ms_demangle::IntrinsicFunctionKind::New
@ New
ScalarizationResult::isSafe
bool isSafe() const
Returns true if the index can be scalarize without requiring a freeze.
Definition: VectorCombine.cpp:974
llvm::initializeVectorCombineLegacyPassPass
void initializeVectorCombineLegacyPassPass(PassRegistry &)
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::computeConstantRange
ConstantRange computeConstantRange(const Value *V, bool ForSigned, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
Definition: ValueTracking.cpp:7263
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::TargetTransformInfo::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned ClassID) const
Definition: TargetTransformInfo.cpp:631
llvm::ConstantVector::get
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1348
llvm::InstructionCost::isValid
bool isValid() const
Definition: InstructionCost.h:80
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:173
llvm::ComplexDeinterleavingOperation::Shuffle
@ Shuffle
llvm::Value::stripAndAccumulateInBoundsConstantOffsets
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition: Value.h:727
llvm::find_if
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1736
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:85
ScalarizationResult::discard
void discard()
Reset the state of Unsafe and clear ToFreze if set.
Definition: VectorCombine.cpp:982
llvm::widenShuffleMaskElts
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Definition: VectorUtils.cpp:490
llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:123
llvm::stable_sort
void stable_sort(R &&Range)
Definition: STLExtras.h:1922
ScalarizationResult::unsafe
static ScalarizationResult unsafe()
Definition: VectorCombine.cpp:967
ScalarizationResult
Helper class to indicate whether a vector index can be safely scalarized and if a freeze needs to be ...
Definition: VectorCombine.cpp:952
ops
vector Optimize scalar vector ops
Definition: VectorCombine.cpp:1844
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
Function.h
llvm::Value::hasNUses
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition: Value.cpp:148
ScalarizationResult::safeWithFreeze
static ScalarizationResult safeWithFreeze(Value *ToFreeze)
Definition: VectorCombine.cpp:969
ScalarizationResult::safe
static ScalarizationResult safe()
Definition: VectorCombine.cpp:968
llvm::ARCCC::Z
@ Z
Definition: ARCInfo.h:41
llvm::TargetTransformInfo::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:926
llvm::InstructionWorklist
InstructionWorklist - This is the worklist management logic for InstCombine and other simplification ...
Definition: InstructionWorklist.h:25
llvm::PatternMatch::m_ZeroInt
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:524
llvm::ConstantRange::contains
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
Definition: ConstantRange.cpp:449
llvm::TargetTransformInfo::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index=-1) const
Definition: TargetTransformInfo.cpp:895
llvm::ConstantRange
This class represents a range of values.
Definition: ConstantRange.h:47
llvm::ConstantRange::urem
ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
Definition: ConstantRange.cpp:1323
llvm::Instruction::BinaryOps
BinaryOps
Definition: Instruction.h:788
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
llvm::ShuffleVectorInst
This instruction constructs a fixed permutation of two input vectors.
Definition: Instructions.h:2007
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:188
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:772
Dominators.h
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:924
llvm::GlobalsAAWrapperPass
Legacy wrapper pass to provide the GlobalsAAResult object.
Definition: GlobalsModRef.h:142
TargetTransformInfo.h
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::ExtractElementInst::getIndexOperand
Value * getIndexOperand()
Definition: Instructions.h:1901
llvm::InstructionWorklist::pushUsersToWorkList
void pushUsersToWorkList(Instruction &I)
When an instruction is simplified, add all users of the instruction to the work lists because they mi...
Definition: InstructionWorklist.h:106
Vectorize.h
llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::isGuaranteedNotToBePoison
bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Definition: ValueTracking.cpp:5447
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:413
llvm::SmallPtrSetImpl::contains
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:389
combine
vector combine
Definition: VectorCombine.cpp:1843
llvm::PatternMatch::m_Cmp
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:89
InitializePasses.h
computeAlignmentAfterScalarization
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
Definition: VectorCombine.cpp:1049
llvm::isSafeToSpeculativelyExecute
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Definition: ValueTracking.cpp:4731
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:668
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::MemoryLocation
Representation for a specific memory location.
Definition: MemoryLocation.h:210
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1251
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:164
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
isMemModifiedBetween
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
Definition: VectorCombine.cpp:940