LLVM  14.0.0git
VectorCombine.cpp
Go to the documentation of this file.
1 //===------- VectorCombine.cpp - Optimize partial vector operations -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass optimizes scalar/vector interactions using target cost models. The
10 // transforms implemented here may not fit in traditional loop-based or SLP
11 // vectorization passes.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/Statistic.h"
20 #include "llvm/Analysis/Loads.h"
24 #include "llvm/IR/Dominators.h"
25 #include "llvm/IR/Function.h"
26 #include "llvm/IR/IRBuilder.h"
27 #include "llvm/IR/PatternMatch.h"
28 #include "llvm/InitializePasses.h"
29 #include "llvm/Pass.h"
33 
34 #define DEBUG_TYPE "vector-combine"
36 
37 using namespace llvm;
38 using namespace llvm::PatternMatch;
39 
40 STATISTIC(NumVecLoad, "Number of vector loads formed");
41 STATISTIC(NumVecCmp, "Number of vector compares formed");
42 STATISTIC(NumVecBO, "Number of vector binops formed");
43 STATISTIC(NumVecCmpBO, "Number of vector compare + binop formed");
44 STATISTIC(NumShufOfBitcast, "Number of shuffles moved after bitcast");
45 STATISTIC(NumScalarBO, "Number of scalar binops formed");
46 STATISTIC(NumScalarCmp, "Number of scalar compares formed");
47 
49  "disable-vector-combine", cl::init(false), cl::Hidden,
50  cl::desc("Disable all vector combine transforms"));
51 
53  "disable-binop-extract-shuffle", cl::init(false), cl::Hidden,
54  cl::desc("Disable binop extract to shuffle transforms"));
55 
57  "vector-combine-max-scan-instrs", cl::init(30), cl::Hidden,
58  cl::desc("Max number of instructions to scan for vector combining."));
59 
61 
62 namespace {
63 class VectorCombine {
64 public:
65  VectorCombine(Function &F, const TargetTransformInfo &TTI,
66  const DominatorTree &DT, AAResults &AA, AssumptionCache &AC)
67  : F(F), Builder(F.getContext()), TTI(TTI), DT(DT), AA(AA), AC(AC) {}
68 
69  bool run();
70 
71 private:
72  Function &F;
74  const TargetTransformInfo &TTI;
75  const DominatorTree &DT;
76  AAResults &AA;
77  AssumptionCache ∾
78  InstructionWorklist Worklist;
79 
80  bool vectorizeLoadInsert(Instruction &I);
81  ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
82  ExtractElementInst *Ext1,
83  unsigned PreferredExtractIndex) const;
84  bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
85  unsigned Opcode,
86  ExtractElementInst *&ConvertToShuffle,
87  unsigned PreferredExtractIndex);
88  void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
89  Instruction &I);
90  void foldExtExtBinop(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
91  Instruction &I);
92  bool foldExtractExtract(Instruction &I);
93  bool foldBitcastShuf(Instruction &I);
94  bool scalarizeBinopOrCmp(Instruction &I);
95  bool foldExtractedCmps(Instruction &I);
96  bool foldSingleElementStore(Instruction &I);
97  bool scalarizeLoadExtract(Instruction &I);
98 
99  void replaceValue(Value &Old, Value &New) {
100  Old.replaceAllUsesWith(&New);
101  New.takeName(&Old);
102  if (auto *NewI = dyn_cast<Instruction>(&New)) {
103  Worklist.pushUsersToWorkList(*NewI);
104  Worklist.pushValue(NewI);
105  }
106  Worklist.pushValue(&Old);
107  }
108 
110  for (Value *Op : I.operands())
111  Worklist.pushValue(Op);
112  Worklist.remove(&I);
113  I.eraseFromParent();
114  }
115 };
116 } // namespace
117 
118 bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
119  // Match insert into fixed vector of scalar value.
120  // TODO: Handle non-zero insert index.
121  auto *Ty = dyn_cast<FixedVectorType>(I.getType());
122  Value *Scalar;
123  if (!Ty || !match(&I, m_InsertElt(m_Undef(), m_Value(Scalar), m_ZeroInt())) ||
124  !Scalar->hasOneUse())
125  return false;
126 
127  // Optionally match an extract from another vector.
128  Value *X;
129  bool HasExtract = match(Scalar, m_ExtractElt(m_Value(X), m_ZeroInt()));
130  if (!HasExtract)
131  X = Scalar;
132 
133  // Match source value as load of scalar or vector.
134  // Do not vectorize scalar load (widening) if atomic/volatile or under
135  // asan/hwasan/memtag/tsan. The widened load may load data from dirty regions
136  // or create data races non-existent in the source.
137  auto *Load = dyn_cast<LoadInst>(X);
138  if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
139  Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
141  return false;
142 
143  const DataLayout &DL = I.getModule()->getDataLayout();
144  Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts();
145  assert(isa<PointerType>(SrcPtr->getType()) && "Expected a pointer type");
146 
147  // If original AS != Load's AS, we can't bitcast the original pointer and have
148  // to use Load's operand instead. Ideally we would want to strip pointer casts
149  // without changing AS, but there's no API to do that ATM.
150  unsigned AS = Load->getPointerAddressSpace();
151  if (AS != SrcPtr->getType()->getPointerAddressSpace())
152  SrcPtr = Load->getPointerOperand();
153 
154  // We are potentially transforming byte-sized (8-bit) memory accesses, so make
155  // sure we have all of our type-based constraints in place for this target.
156  Type *ScalarTy = Scalar->getType();
157  uint64_t ScalarSize = ScalarTy->getPrimitiveSizeInBits();
158  unsigned MinVectorSize = TTI.getMinVectorRegisterBitWidth();
159  if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
160  ScalarSize % 8 != 0)
161  return false;
162 
163  // Check safety of replacing the scalar load with a larger vector load.
164  // We use minimal alignment (maximum flexibility) because we only care about
165  // the dereferenceable region. When calculating cost and creating a new op,
166  // we may use a larger value based on alignment attributes.
167  unsigned MinVecNumElts = MinVectorSize / ScalarSize;
168  auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts, false);
169  unsigned OffsetEltIndex = 0;
170  Align Alignment = Load->getAlign();
171  if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT)) {
172  // It is not safe to load directly from the pointer, but we can still peek
173  // through gep offsets and check if it safe to load from a base address with
174  // updated alignment. If it is, we can shuffle the element(s) into place
175  // after loading.
176  unsigned OffsetBitWidth = DL.getIndexTypeSizeInBits(SrcPtr->getType());
177  APInt Offset(OffsetBitWidth, 0);
179 
180  // We want to shuffle the result down from a high element of a vector, so
181  // the offset must be positive.
182  if (Offset.isNegative())
183  return false;
184 
185  // The offset must be a multiple of the scalar element to shuffle cleanly
186  // in the element's size.
187  uint64_t ScalarSizeInBytes = ScalarSize / 8;
188  if (Offset.urem(ScalarSizeInBytes) != 0)
189  return false;
190 
191  // If we load MinVecNumElts, will our target element still be loaded?
192  OffsetEltIndex = Offset.udiv(ScalarSizeInBytes).getZExtValue();
193  if (OffsetEltIndex >= MinVecNumElts)
194  return false;
195 
196  if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT))
197  return false;
198 
199  // Update alignment with offset value. Note that the offset could be negated
200  // to more accurately represent "(new) SrcPtr - Offset = (old) SrcPtr", but
201  // negation does not change the result of the alignment calculation.
202  Alignment = commonAlignment(Alignment, Offset.getZExtValue());
203  }
204 
205  // Original pattern: insertelt undef, load [free casts of] PtrOp, 0
206  // Use the greater of the alignment on the load or its source pointer.
207  Alignment = std::max(SrcPtr->getPointerAlignment(DL), Alignment);
208  Type *LoadTy = Load->getType();
209  InstructionCost OldCost =
210  TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS);
211  APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0);
212  OldCost += TTI.getScalarizationOverhead(MinVecTy, DemandedElts,
213  /* Insert */ true, HasExtract);
214 
215  // New pattern: load VecPtr
216  InstructionCost NewCost =
217  TTI.getMemoryOpCost(Instruction::Load, MinVecTy, Alignment, AS);
218  // Optionally, we are shuffling the loaded vector element(s) into place.
219  // For the mask set everything but element 0 to undef to prevent poison from
220  // propagating from the extra loaded memory. This will also optionally
221  // shrink/grow the vector from the loaded size to the output size.
222  // We assume this operation has no cost in codegen if there was no offset.
223  // Note that we could use freeze to avoid poison problems, but then we might
224  // still need a shuffle to change the vector size.
225  unsigned OutputNumElts = Ty->getNumElements();
226  SmallVector<int, 16> Mask(OutputNumElts, UndefMaskElem);
227  assert(OffsetEltIndex < MinVecNumElts && "Address offset too big");
228  Mask[0] = OffsetEltIndex;
229  if (OffsetEltIndex)
230  NewCost += TTI.getShuffleCost(TTI::SK_PermuteSingleSrc, MinVecTy, Mask);
231 
232  // We can aggressively convert to the vector form because the backend can
233  // invert this transform if it does not result in a performance win.
234  if (OldCost < NewCost || !NewCost.isValid())
235  return false;
236 
237  // It is safe and potentially profitable to load a vector directly:
238  // inselt undef, load Scalar, 0 --> load VecPtr
240  Value *CastedPtr = Builder.CreateBitCast(SrcPtr, MinVecTy->getPointerTo(AS));
241  Value *VecLd = Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment);
242  VecLd = Builder.CreateShuffleVector(VecLd, Mask);
243 
244  replaceValue(I, *VecLd);
245  ++NumVecLoad;
246  return true;
247 }
248 
249 /// Determine which, if any, of the inputs should be replaced by a shuffle
250 /// followed by extract from a different index.
251 ExtractElementInst *VectorCombine::getShuffleExtract(
253  unsigned PreferredExtractIndex = InvalidIndex) const {
254  assert(isa<ConstantInt>(Ext0->getIndexOperand()) &&
255  isa<ConstantInt>(Ext1->getIndexOperand()) &&
256  "Expected constant extract indexes");
257 
258  unsigned Index0 = cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue();
259  unsigned Index1 = cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue();
260 
261  // If the extract indexes are identical, no shuffle is needed.
262  if (Index0 == Index1)
263  return nullptr;
264 
265  Type *VecTy = Ext0->getVectorOperand()->getType();
266  assert(VecTy == Ext1->getVectorOperand()->getType() && "Need matching types");
267  InstructionCost Cost0 =
268  TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0);
269  InstructionCost Cost1 =
270  TTI.getVectorInstrCost(Ext1->getOpcode(), VecTy, Index1);
271 
272  // If both costs are invalid no shuffle is needed
273  if (!Cost0.isValid() && !Cost1.isValid())
274  return nullptr;
275 
276  // We are extracting from 2 different indexes, so one operand must be shuffled
277  // before performing a vector operation and/or extract. The more expensive
278  // extract will be replaced by a shuffle.
279  if (Cost0 > Cost1)
280  return Ext0;
281  if (Cost1 > Cost0)
282  return Ext1;
283 
284  // If the costs are equal and there is a preferred extract index, shuffle the
285  // opposite operand.
286  if (PreferredExtractIndex == Index0)
287  return Ext1;
288  if (PreferredExtractIndex == Index1)
289  return Ext0;
290 
291  // Otherwise, replace the extract with the higher index.
292  return Index0 > Index1 ? Ext0 : Ext1;
293 }
294 
295 /// Compare the relative costs of 2 extracts followed by scalar operation vs.
296 /// vector operation(s) followed by extract. Return true if the existing
297 /// instructions are cheaper than a vector alternative. Otherwise, return false
298 /// and if one of the extracts should be transformed to a shufflevector, set
299 /// \p ConvertToShuffle to that extract instruction.
300 bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
301  ExtractElementInst *Ext1,
302  unsigned Opcode,
303  ExtractElementInst *&ConvertToShuffle,
304  unsigned PreferredExtractIndex) {
305  assert(isa<ConstantInt>(Ext0->getOperand(1)) &&
306  isa<ConstantInt>(Ext1->getOperand(1)) &&
307  "Expected constant extract indexes");
308  Type *ScalarTy = Ext0->getType();
309  auto *VecTy = cast<VectorType>(Ext0->getOperand(0)->getType());
310  InstructionCost ScalarOpCost, VectorOpCost;
311 
312  // Get cost estimates for scalar and vector versions of the operation.
313  bool IsBinOp = Instruction::isBinaryOp(Opcode);
314  if (IsBinOp) {
315  ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy);
316  VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy);
317  } else {
318  assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
319  "Expected a compare");
320  ScalarOpCost = TTI.getCmpSelInstrCost(Opcode, ScalarTy,
321  CmpInst::makeCmpResultType(ScalarTy));
322  VectorOpCost = TTI.getCmpSelInstrCost(Opcode, VecTy,
324  }
325 
326  // Get cost estimates for the extract elements. These costs will factor into
327  // both sequences.
328  unsigned Ext0Index = cast<ConstantInt>(Ext0->getOperand(1))->getZExtValue();
329  unsigned Ext1Index = cast<ConstantInt>(Ext1->getOperand(1))->getZExtValue();
330 
331  InstructionCost Extract0Cost =
332  TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext0Index);
333  InstructionCost Extract1Cost =
334  TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext1Index);
335 
336  // A more expensive extract will always be replaced by a splat shuffle.
337  // For example, if Ext0 is more expensive:
338  // opcode (extelt V0, Ext0), (ext V1, Ext1) -->
339  // extelt (opcode (splat V0, Ext0), V1), Ext1
340  // TODO: Evaluate whether that always results in lowest cost. Alternatively,
341  // check the cost of creating a broadcast shuffle and shuffling both
342  // operands to element 0.
343  InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
344 
345  // Extra uses of the extracts mean that we include those costs in the
346  // vector total because those instructions will not be eliminated.
347  InstructionCost OldCost, NewCost;
348  if (Ext0->getOperand(0) == Ext1->getOperand(0) && Ext0Index == Ext1Index) {
349  // Handle a special case. If the 2 extracts are identical, adjust the
350  // formulas to account for that. The extra use charge allows for either the
351  // CSE'd pattern or an unoptimized form with identical values:
352  // opcode (extelt V, C), (extelt V, C) --> extelt (opcode V, V), C
353  bool HasUseTax = Ext0 == Ext1 ? !Ext0->hasNUses(2)
354  : !Ext0->hasOneUse() || !Ext1->hasOneUse();
355  OldCost = CheapExtractCost + ScalarOpCost;
356  NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
357  } else {
358  // Handle the general case. Each extract is actually a different value:
359  // opcode (extelt V0, C0), (extelt V1, C1) --> extelt (opcode V0, V1), C
360  OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
361  NewCost = VectorOpCost + CheapExtractCost +
362  !Ext0->hasOneUse() * Extract0Cost +
363  !Ext1->hasOneUse() * Extract1Cost;
364  }
365 
366  ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
367  if (ConvertToShuffle) {
368  if (IsBinOp && DisableBinopExtractShuffle)
369  return true;
370 
371  // If we are extracting from 2 different indexes, then one operand must be
372  // shuffled before performing the vector operation. The shuffle mask is
373  // undefined except for 1 lane that is being translated to the remaining
374  // extraction lane. Therefore, it is a splat shuffle. Ex:
375  // ShufMask = { undef, undef, 0, undef }
376  // TODO: The cost model has an option for a "broadcast" shuffle
377  // (splat-from-element-0), but no option for a more general splat.
378  NewCost +=
380  }
381 
382  // Aggressively form a vector op if the cost is equal because the transform
383  // may enable further optimization.
384  // Codegen can reverse this transform (scalarize) if it was not profitable.
385  return OldCost < NewCost;
386 }
387 
388 /// Create a shuffle that translates (shifts) 1 element from the input vector
389 /// to a new element location.
390 static Value *createShiftShuffle(Value *Vec, unsigned OldIndex,
391  unsigned NewIndex, IRBuilder<> &Builder) {
392  // The shuffle mask is undefined except for 1 lane that is being translated
393  // to the new element index. Example for OldIndex == 2 and NewIndex == 0:
394  // ShufMask = { 2, undef, undef, undef }
395  auto *VecTy = cast<FixedVectorType>(Vec->getType());
396  SmallVector<int, 32> ShufMask(VecTy->getNumElements(), UndefMaskElem);
397  ShufMask[NewIndex] = OldIndex;
398  return Builder.CreateShuffleVector(Vec, ShufMask, "shift");
399 }
400 
401 /// Given an extract element instruction with constant index operand, shuffle
402 /// the source vector (shift the scalar element) to a NewIndex for extraction.
403 /// Return null if the input can be constant folded, so that we are not creating
404 /// unnecessary instructions.
406  unsigned NewIndex,
407  IRBuilder<> &Builder) {
408  // If the extract can be constant-folded, this code is unsimplified. Defer
409  // to other passes to handle that.
410  Value *X = ExtElt->getVectorOperand();
411  Value *C = ExtElt->getIndexOperand();
412  assert(isa<ConstantInt>(C) && "Expected a constant index operand");
413  if (isa<Constant>(X))
414  return nullptr;
415 
416  Value *Shuf = createShiftShuffle(X, cast<ConstantInt>(C)->getZExtValue(),
417  NewIndex, Builder);
418  return cast<ExtractElementInst>(Builder.CreateExtractElement(Shuf, NewIndex));
419 }
420 
421 /// Try to reduce extract element costs by converting scalar compares to vector
422 /// compares followed by extract.
423 /// cmp (ext0 V0, C), (ext1 V1, C)
424 void VectorCombine::foldExtExtCmp(ExtractElementInst *Ext0,
425  ExtractElementInst *Ext1, Instruction &I) {
426  assert(isa<CmpInst>(&I) && "Expected a compare");
427  assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
428  cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
429  "Expected matching constant extract indexes");
430 
431  // cmp Pred (extelt V0, C), (extelt V1, C) --> extelt (cmp Pred V0, V1), C
432  ++NumVecCmp;
433  CmpInst::Predicate Pred = cast<CmpInst>(&I)->getPredicate();
434  Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
435  Value *VecCmp = Builder.CreateCmp(Pred, V0, V1);
436  Value *NewExt = Builder.CreateExtractElement(VecCmp, Ext0->getIndexOperand());
437  replaceValue(I, *NewExt);
438 }
439 
440 /// Try to reduce extract element costs by converting scalar binops to vector
441 /// binops followed by extract.
442 /// bo (ext0 V0, C), (ext1 V1, C)
443 void VectorCombine::foldExtExtBinop(ExtractElementInst *Ext0,
444  ExtractElementInst *Ext1, Instruction &I) {
445  assert(isa<BinaryOperator>(&I) && "Expected a binary operator");
446  assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
447  cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
448  "Expected matching constant extract indexes");
449 
450  // bo (extelt V0, C), (extelt V1, C) --> extelt (bo V0, V1), C
451  ++NumVecBO;
452  Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
453  Value *VecBO =
454  Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), V0, V1);
455 
456  // All IR flags are safe to back-propagate because any potential poison
457  // created in unused vector elements is discarded by the extract.
458  if (auto *VecBOInst = dyn_cast<Instruction>(VecBO))
459  VecBOInst->copyIRFlags(&I);
460 
461  Value *NewExt = Builder.CreateExtractElement(VecBO, Ext0->getIndexOperand());
462  replaceValue(I, *NewExt);
463 }
464 
465 /// Match an instruction with extracted vector operands.
466 bool VectorCombine::foldExtractExtract(Instruction &I) {
467  // It is not safe to transform things like div, urem, etc. because we may
468  // create undefined behavior when executing those on unknown vector elements.
470  return false;
471 
472  Instruction *I0, *I1;
474  if (!match(&I, m_Cmp(Pred, m_Instruction(I0), m_Instruction(I1))) &&
475  !match(&I, m_BinOp(m_Instruction(I0), m_Instruction(I1))))
476  return false;
477 
478  Value *V0, *V1;
479  uint64_t C0, C1;
480  if (!match(I0, m_ExtractElt(m_Value(V0), m_ConstantInt(C0))) ||
481  !match(I1, m_ExtractElt(m_Value(V1), m_ConstantInt(C1))) ||
482  V0->getType() != V1->getType())
483  return false;
484 
485  // If the scalar value 'I' is going to be re-inserted into a vector, then try
486  // to create an extract to that same element. The extract/insert can be
487  // reduced to a "select shuffle".
488  // TODO: If we add a larger pattern match that starts from an insert, this
489  // probably becomes unnecessary.
490  auto *Ext0 = cast<ExtractElementInst>(I0);
491  auto *Ext1 = cast<ExtractElementInst>(I1);
492  uint64_t InsertIndex = InvalidIndex;
493  if (I.hasOneUse())
494  match(I.user_back(),
495  m_InsertElt(m_Value(), m_Value(), m_ConstantInt(InsertIndex)));
496 
497  ExtractElementInst *ExtractToChange;
498  if (isExtractExtractCheap(Ext0, Ext1, I.getOpcode(), ExtractToChange,
499  InsertIndex))
500  return false;
501 
502  if (ExtractToChange) {
503  unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
504  ExtractElementInst *NewExtract =
505  translateExtract(ExtractToChange, CheapExtractIdx, Builder);
506  if (!NewExtract)
507  return false;
508  if (ExtractToChange == Ext0)
509  Ext0 = NewExtract;
510  else
511  Ext1 = NewExtract;
512  }
513 
514  if (Pred != CmpInst::BAD_ICMP_PREDICATE)
515  foldExtExtCmp(Ext0, Ext1, I);
516  else
517  foldExtExtBinop(Ext0, Ext1, I);
518 
519  Worklist.push(Ext0);
520  Worklist.push(Ext1);
521  return true;
522 }
523 
524 /// If this is a bitcast of a shuffle, try to bitcast the source vector to the
525 /// destination type followed by shuffle. This can enable further transforms by
526 /// moving bitcasts or shuffles together.
527 bool VectorCombine::foldBitcastShuf(Instruction &I) {
528  Value *V;
530  if (!match(&I, m_BitCast(
532  return false;
533 
534  // 1) Do not fold bitcast shuffle for scalable type. First, shuffle cost for
535  // scalable type is unknown; Second, we cannot reason if the narrowed shuffle
536  // mask for scalable type is a splat or not.
537  // 2) Disallow non-vector casts and length-changing shuffles.
538  // TODO: We could allow any shuffle.
539  auto *DestTy = dyn_cast<FixedVectorType>(I.getType());
540  auto *SrcTy = dyn_cast<FixedVectorType>(V->getType());
541  if (!SrcTy || !DestTy || I.getOperand(0)->getType() != SrcTy)
542  return false;
543 
544  unsigned DestNumElts = DestTy->getNumElements();
545  unsigned SrcNumElts = SrcTy->getNumElements();
546  SmallVector<int, 16> NewMask;
547  if (SrcNumElts <= DestNumElts) {
548  // The bitcast is from wide to narrow/equal elements. The shuffle mask can
549  // always be expanded to the equivalent form choosing narrower elements.
550  assert(DestNumElts % SrcNumElts == 0 && "Unexpected shuffle mask");
551  unsigned ScaleFactor = DestNumElts / SrcNumElts;
552  narrowShuffleMaskElts(ScaleFactor, Mask, NewMask);
553  } else {
554  // The bitcast is from narrow elements to wide elements. The shuffle mask
555  // must choose consecutive elements to allow casting first.
556  assert(SrcNumElts % DestNumElts == 0 && "Unexpected shuffle mask");
557  unsigned ScaleFactor = SrcNumElts / DestNumElts;
558  if (!widenShuffleMaskElts(ScaleFactor, Mask, NewMask))
559  return false;
560  }
561 
562  // The new shuffle must not cost more than the old shuffle. The bitcast is
563  // moved ahead of the shuffle, so assume that it has the same cost as before.
566  InstructionCost SrcCost =
568  if (DestCost > SrcCost || !DestCost.isValid())
569  return false;
570 
571  // bitcast (shuf V, MaskC) --> shuf (bitcast V), MaskC'
572  ++NumShufOfBitcast;
573  Value *CastV = Builder.CreateBitCast(V, DestTy);
574  Value *Shuf = Builder.CreateShuffleVector(CastV, NewMask);
575  replaceValue(I, *Shuf);
576  return true;
577 }
578 
579 /// Match a vector binop or compare instruction with at least one inserted
580 /// scalar operand and convert to scalar binop/cmp followed by insertelement.
581 bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
583  Value *Ins0, *Ins1;
584  if (!match(&I, m_BinOp(m_Value(Ins0), m_Value(Ins1))) &&
585  !match(&I, m_Cmp(Pred, m_Value(Ins0), m_Value(Ins1))))
586  return false;
587 
588  // Do not convert the vector condition of a vector select into a scalar
589  // condition. That may cause problems for codegen because of differences in
590  // boolean formats and register-file transfers.
591  // TODO: Can we account for that in the cost model?
592  bool IsCmp = Pred != CmpInst::Predicate::BAD_ICMP_PREDICATE;
593  if (IsCmp)
594  for (User *U : I.users())
595  if (match(U, m_Select(m_Specific(&I), m_Value(), m_Value())))
596  return false;
597 
598  // Match against one or both scalar values being inserted into constant
599  // vectors:
600  // vec_op VecC0, (inselt VecC1, V1, Index)
601  // vec_op (inselt VecC0, V0, Index), VecC1
602  // vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index)
603  // TODO: Deal with mismatched index constants and variable indexes?
604  Constant *VecC0 = nullptr, *VecC1 = nullptr;
605  Value *V0 = nullptr, *V1 = nullptr;
606  uint64_t Index0 = 0, Index1 = 0;
607  if (!match(Ins0, m_InsertElt(m_Constant(VecC0), m_Value(V0),
608  m_ConstantInt(Index0))) &&
609  !match(Ins0, m_Constant(VecC0)))
610  return false;
611  if (!match(Ins1, m_InsertElt(m_Constant(VecC1), m_Value(V1),
612  m_ConstantInt(Index1))) &&
613  !match(Ins1, m_Constant(VecC1)))
614  return false;
615 
616  bool IsConst0 = !V0;
617  bool IsConst1 = !V1;
618  if (IsConst0 && IsConst1)
619  return false;
620  if (!IsConst0 && !IsConst1 && Index0 != Index1)
621  return false;
622 
623  // Bail for single insertion if it is a load.
624  // TODO: Handle this once getVectorInstrCost can cost for load/stores.
625  auto *I0 = dyn_cast_or_null<Instruction>(V0);
626  auto *I1 = dyn_cast_or_null<Instruction>(V1);
627  if ((IsConst0 && I1 && I1->mayReadFromMemory()) ||
628  (IsConst1 && I0 && I0->mayReadFromMemory()))
629  return false;
630 
631  uint64_t Index = IsConst0 ? Index1 : Index0;
632  Type *ScalarTy = IsConst0 ? V1->getType() : V0->getType();
633  Type *VecTy = I.getType();
634  assert(VecTy->isVectorTy() &&
635  (IsConst0 || IsConst1 || V0->getType() == V1->getType()) &&
636  (ScalarTy->isIntegerTy() || ScalarTy->isFloatingPointTy() ||
637  ScalarTy->isPointerTy()) &&
638  "Unexpected types for insert element into binop or cmp");
639 
640  unsigned Opcode = I.getOpcode();
641  InstructionCost ScalarOpCost, VectorOpCost;
642  if (IsCmp) {
643  ScalarOpCost = TTI.getCmpSelInstrCost(Opcode, ScalarTy);
644  VectorOpCost = TTI.getCmpSelInstrCost(Opcode, VecTy);
645  } else {
646  ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy);
647  VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy);
648  }
649 
650  // Get cost estimate for the insert element. This cost will factor into
651  // both sequences.
652  InstructionCost InsertCost =
653  TTI.getVectorInstrCost(Instruction::InsertElement, VecTy, Index);
654  InstructionCost OldCost =
655  (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost;
656  InstructionCost NewCost = ScalarOpCost + InsertCost +
657  (IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCost) +
658  (IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCost);
659 
660  // We want to scalarize unless the vector variant actually has lower cost.
661  if (OldCost < NewCost || !NewCost.isValid())
662  return false;
663 
664  // vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index) -->
665  // inselt NewVecC, (scalar_op V0, V1), Index
666  if (IsCmp)
667  ++NumScalarCmp;
668  else
669  ++NumScalarBO;
670 
671  // For constant cases, extract the scalar element, this should constant fold.
672  if (IsConst0)
673  V0 = ConstantExpr::getExtractElement(VecC0, Builder.getInt64(Index));
674  if (IsConst1)
675  V1 = ConstantExpr::getExtractElement(VecC1, Builder.getInt64(Index));
676 
677  Value *Scalar =
678  IsCmp ? Builder.CreateCmp(Pred, V0, V1)
679  : Builder.CreateBinOp((Instruction::BinaryOps)Opcode, V0, V1);
680 
681  Scalar->setName(I.getName() + ".scalar");
682 
683  // All IR flags are safe to back-propagate. There is no potential for extra
684  // poison to be created by the scalar instruction.
685  if (auto *ScalarInst = dyn_cast<Instruction>(Scalar))
686  ScalarInst->copyIRFlags(&I);
687 
688  // Fold the vector constants in the original vectors into a new base vector.
689  Constant *NewVecC = IsCmp ? ConstantExpr::getCompare(Pred, VecC0, VecC1)
690  : ConstantExpr::get(Opcode, VecC0, VecC1);
691  Value *Insert = Builder.CreateInsertElement(NewVecC, Scalar, Index);
692  replaceValue(I, *Insert);
693  return true;
694 }
695 
696 /// Try to combine a scalar binop + 2 scalar compares of extracted elements of
697 /// a vector into vector operations followed by extract. Note: The SLP pass
698 /// may miss this pattern because of implementation problems.
699 bool VectorCombine::foldExtractedCmps(Instruction &I) {
700  // We are looking for a scalar binop of booleans.
701  // binop i1 (cmp Pred I0, C0), (cmp Pred I1, C1)
702  if (!I.isBinaryOp() || !I.getType()->isIntegerTy(1))
703  return false;
704 
705  // The compare predicates should match, and each compare should have a
706  // constant operand.
707  // TODO: Relax the one-use constraints.
708  Value *B0 = I.getOperand(0), *B1 = I.getOperand(1);
709  Instruction *I0, *I1;
710  Constant *C0, *C1;
711  CmpInst::Predicate P0, P1;
712  if (!match(B0, m_OneUse(m_Cmp(P0, m_Instruction(I0), m_Constant(C0)))) ||
713  !match(B1, m_OneUse(m_Cmp(P1, m_Instruction(I1), m_Constant(C1)))) ||
714  P0 != P1)
715  return false;
716 
717  // The compare operands must be extracts of the same vector with constant
718  // extract indexes.
719  // TODO: Relax the one-use constraints.
720  Value *X;
721  uint64_t Index0, Index1;
722  if (!match(I0, m_OneUse(m_ExtractElt(m_Value(X), m_ConstantInt(Index0)))) ||
724  return false;
725 
726  auto *Ext0 = cast<ExtractElementInst>(I0);
727  auto *Ext1 = cast<ExtractElementInst>(I1);
728  ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1);
729  if (!ConvertToShuf)
730  return false;
731 
732  // The original scalar pattern is:
733  // binop i1 (cmp Pred (ext X, Index0), C0), (cmp Pred (ext X, Index1), C1)
734  CmpInst::Predicate Pred = P0;
735  unsigned CmpOpcode = CmpInst::isFPPredicate(Pred) ? Instruction::FCmp
736  : Instruction::ICmp;
737  auto *VecTy = dyn_cast<FixedVectorType>(X->getType());
738  if (!VecTy)
739  return false;
740 
741  InstructionCost OldCost =
742  TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0);
743  OldCost += TTI.getVectorInstrCost(Ext1->getOpcode(), VecTy, Index1);
744  OldCost += TTI.getCmpSelInstrCost(CmpOpcode, I0->getType()) * 2;
745  OldCost += TTI.getArithmeticInstrCost(I.getOpcode(), I.getType());
746 
747  // The proposed vector pattern is:
748  // vcmp = cmp Pred X, VecC
749  // ext (binop vNi1 vcmp, (shuffle vcmp, Index1)), Index0
750  int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
751  int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
752  auto *CmpTy = cast<FixedVectorType>(CmpInst::makeCmpResultType(X->getType()));
753  InstructionCost NewCost = TTI.getCmpSelInstrCost(CmpOpcode, X->getType());
754  SmallVector<int, 32> ShufMask(VecTy->getNumElements(), UndefMaskElem);
755  ShufMask[CheapIndex] = ExpensiveIndex;
757  ShufMask);
758  NewCost += TTI.getArithmeticInstrCost(I.getOpcode(), CmpTy);
759  NewCost += TTI.getVectorInstrCost(Ext0->getOpcode(), CmpTy, CheapIndex);
760 
761  // Aggressively form vector ops if the cost is equal because the transform
762  // may enable further optimization.
763  // Codegen can reverse this transform (scalarize) if it was not profitable.
764  if (OldCost < NewCost || !NewCost.isValid())
765  return false;
766 
767  // Create a vector constant from the 2 scalar constants.
768  SmallVector<Constant *, 32> CmpC(VecTy->getNumElements(),
769  UndefValue::get(VecTy->getElementType()));
770  CmpC[Index0] = C0;
771  CmpC[Index1] = C1;
772  Value *VCmp = Builder.CreateCmp(Pred, X, ConstantVector::get(CmpC));
773 
774  Value *Shuf = createShiftShuffle(VCmp, ExpensiveIndex, CheapIndex, Builder);
775  Value *VecLogic = Builder.CreateBinOp(cast<BinaryOperator>(I).getOpcode(),
776  VCmp, Shuf);
777  Value *NewExt = Builder.CreateExtractElement(VecLogic, CheapIndex);
778  replaceValue(I, *NewExt);
779  ++NumVecCmpBO;
780  return true;
781 }
782 
783 // Check if memory loc modified between two instrs in the same BB
786  const MemoryLocation &Loc, AAResults &AA) {
787  unsigned NumScanned = 0;
788  return std::any_of(Begin, End, [&](const Instruction &Instr) {
789  return isModSet(AA.getModRefInfo(&Instr, Loc)) ||
790  ++NumScanned > MaxInstrsToScan;
791  });
792 }
793 
794 /// Helper class to indicate whether a vector index can be safely scalarized and
795 /// if a freeze needs to be inserted.
797  enum class StatusTy { Unsafe, Safe, SafeWithFreeze };
798 
799  StatusTy Status;
800  Value *ToFreeze;
801 
802  ScalarizationResult(StatusTy Status, Value *ToFreeze = nullptr)
803  : Status(Status), ToFreeze(ToFreeze) {}
804 
805 public:
808  assert(!ToFreeze && "freeze() not called with ToFreeze being set");
809  }
810 
811  static ScalarizationResult unsafe() { return {StatusTy::Unsafe}; }
812  static ScalarizationResult safe() { return {StatusTy::Safe}; }
814  return {StatusTy::SafeWithFreeze, ToFreeze};
815  }
816 
817  /// Returns true if the index can be scalarize without requiring a freeze.
818  bool isSafe() const { return Status == StatusTy::Safe; }
819  /// Returns true if the index cannot be scalarized.
820  bool isUnsafe() const { return Status == StatusTy::Unsafe; }
821  /// Returns true if the index can be scalarize, but requires inserting a
822  /// freeze.
823  bool isSafeWithFreeze() const { return Status == StatusTy::SafeWithFreeze; }
824 
825  /// Freeze the ToFreeze and update the use in \p User to use it.
827  assert(isSafeWithFreeze() &&
828  "should only be used when freezing is required");
829  assert(is_contained(ToFreeze->users(), &UserI) &&
830  "UserI must be a user of ToFreeze");
832  Builder.SetInsertPoint(cast<Instruction>(&UserI));
833  Value *Frozen =
834  Builder.CreateFreeze(ToFreeze, ToFreeze->getName() + ".frozen");
835  for (Use &U : make_early_inc_range((UserI.operands())))
836  if (U.get() == ToFreeze)
837  U.set(Frozen);
838 
839  ToFreeze = nullptr;
840  }
841 };
842 
843 /// Check if it is legal to scalarize a memory access to \p VecTy at index \p
844 /// Idx. \p Idx must access a valid vector element.
846  Value *Idx, Instruction *CtxI,
847  AssumptionCache &AC,
848  const DominatorTree &DT) {
849  if (auto *C = dyn_cast<ConstantInt>(Idx)) {
850  if (C->getValue().ult(VecTy->getNumElements()))
851  return ScalarizationResult::safe();
853  }
854 
855  unsigned IntWidth = Idx->getType()->getScalarSizeInBits();
856  APInt Zero(IntWidth, 0);
857  APInt MaxElts(IntWidth, VecTy->getNumElements());
858  ConstantRange ValidIndices(Zero, MaxElts);
859  ConstantRange IdxRange(IntWidth, true);
860 
861  if (isGuaranteedNotToBePoison(Idx, &AC)) {
862  if (ValidIndices.contains(computeConstantRange(Idx, true, &AC, CtxI, &DT)))
863  return ScalarizationResult::safe();
865  }
866 
867  // If the index may be poison, check if we can insert a freeze before the
868  // range of the index is restricted.
869  Value *IdxBase;
870  ConstantInt *CI;
871  if (match(Idx, m_And(m_Value(IdxBase), m_ConstantInt(CI)))) {
872  IdxRange = IdxRange.binaryAnd(CI->getValue());
873  } else if (match(Idx, m_URem(m_Value(IdxBase), m_ConstantInt(CI)))) {
874  IdxRange = IdxRange.urem(CI->getValue());
875  }
876 
877  if (ValidIndices.contains(IdxRange))
878  return ScalarizationResult::safeWithFreeze(IdxBase);
880 }
881 
882 /// The memory operation on a vector of \p ScalarType had alignment of
883 /// \p VectorAlignment. Compute the maximal, but conservatively correct,
884 /// alignment that will be valid for the memory operation on a single scalar
885 /// element of the same type with index \p Idx.
887  Type *ScalarType, Value *Idx,
888  const DataLayout &DL) {
889  if (auto *C = dyn_cast<ConstantInt>(Idx))
890  return commonAlignment(VectorAlignment,
891  C->getZExtValue() * DL.getTypeStoreSize(ScalarType));
892  return commonAlignment(VectorAlignment, DL.getTypeStoreSize(ScalarType));
893 }
894 
895 // Combine patterns like:
896 // %0 = load <4 x i32>, <4 x i32>* %a
897 // %1 = insertelement <4 x i32> %0, i32 %b, i32 1
898 // store <4 x i32> %1, <4 x i32>* %a
899 // to:
900 // %0 = bitcast <4 x i32>* %a to i32*
901 // %1 = getelementptr inbounds i32, i32* %0, i64 0, i64 1
902 // store i32 %b, i32* %1
903 bool VectorCombine::foldSingleElementStore(Instruction &I) {
904  StoreInst *SI = dyn_cast<StoreInst>(&I);
905  if (!SI || !SI->isSimple() ||
906  !isa<FixedVectorType>(SI->getValueOperand()->getType()))
907  return false;
908 
909  // TODO: Combine more complicated patterns (multiple insert) by referencing
910  // TargetTransformInfo.
912  Value *NewElement;
913  Value *Idx;
914  if (!match(SI->getValueOperand(),
915  m_InsertElt(m_Instruction(Source), m_Value(NewElement),
916  m_Value(Idx))))
917  return false;
918 
919  if (auto *Load = dyn_cast<LoadInst>(Source)) {
920  auto VecTy = cast<FixedVectorType>(SI->getValueOperand()->getType());
921  const DataLayout &DL = I.getModule()->getDataLayout();
922  Value *SrcAddr = Load->getPointerOperand()->stripPointerCasts();
923  // Don't optimize for atomic/volatile load or store. Ensure memory is not
924  // modified between, vector type matches store size, and index is inbounds.
925  if (!Load->isSimple() || Load->getParent() != SI->getParent() ||
926  !DL.typeSizeEqualsStoreSize(Load->getType()) ||
927  SrcAddr != SI->getPointerOperand()->stripPointerCasts())
928  return false;
929 
930  auto ScalarizableIdx = canScalarizeAccess(VecTy, Idx, Load, AC, DT);
931  if (ScalarizableIdx.isUnsafe() ||
932  isMemModifiedBetween(Load->getIterator(), SI->getIterator(),
933  MemoryLocation::get(SI), AA))
934  return false;
935 
936  if (ScalarizableIdx.isSafeWithFreeze())
937  ScalarizableIdx.freeze(Builder, *cast<Instruction>(Idx));
938  Value *GEP = Builder.CreateInBoundsGEP(
939  SI->getValueOperand()->getType(), SI->getPointerOperand(),
940  {ConstantInt::get(Idx->getType(), 0), Idx});
941  StoreInst *NSI = Builder.CreateStore(NewElement, GEP);
942  NSI->copyMetadata(*SI);
943  Align ScalarOpAlignment = computeAlignmentAfterScalarization(
944  std::max(SI->getAlign(), Load->getAlign()), NewElement->getType(), Idx,
945  DL);
946  NSI->setAlignment(ScalarOpAlignment);
947  replaceValue(I, *NSI);
949  return true;
950  }
951 
952  return false;
953 }
954 
955 /// Try to scalarize vector loads feeding extractelement instructions.
956 bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
957  Value *Ptr;
958  if (!match(&I, m_Load(m_Value(Ptr))))
959  return false;
960 
961  auto *LI = cast<LoadInst>(&I);
962  const DataLayout &DL = I.getModule()->getDataLayout();
963  if (LI->isVolatile() || !DL.typeSizeEqualsStoreSize(LI->getType()))
964  return false;
965 
966  auto *FixedVT = dyn_cast<FixedVectorType>(LI->getType());
967  if (!FixedVT)
968  return false;
969 
970  InstructionCost OriginalCost = TTI.getMemoryOpCost(
971  Instruction::Load, LI->getType(), Align(LI->getAlignment()),
972  LI->getPointerAddressSpace());
973  InstructionCost ScalarizedCost = 0;
974 
975  Instruction *LastCheckedInst = LI;
976  unsigned NumInstChecked = 0;
977  // Check if all users of the load are extracts with no memory modifications
978  // between the load and the extract. Compute the cost of both the original
979  // code and the scalarized version.
980  for (User *U : LI->users()) {
981  auto *UI = dyn_cast<ExtractElementInst>(U);
982  if (!UI || UI->getParent() != LI->getParent())
983  return false;
984 
985  if (!isGuaranteedNotToBePoison(UI->getOperand(1), &AC, LI, &DT))
986  return false;
987 
988  // Check if any instruction between the load and the extract may modify
989  // memory.
990  if (LastCheckedInst->comesBefore(UI)) {
991  for (Instruction &I :
992  make_range(std::next(LI->getIterator()), UI->getIterator())) {
993  // Bail out if we reached the check limit or the instruction may write
994  // to memory.
995  if (NumInstChecked == MaxInstrsToScan || I.mayWriteToMemory())
996  return false;
997  NumInstChecked++;
998  }
999  }
1000 
1001  if (!LastCheckedInst)
1002  LastCheckedInst = UI;
1003  else if (LastCheckedInst->comesBefore(UI))
1004  LastCheckedInst = UI;
1005 
1006  auto ScalarIdx = canScalarizeAccess(FixedVT, UI->getOperand(1), &I, AC, DT);
1007  if (!ScalarIdx.isSafe()) {
1008  // TODO: Freeze index if it is safe to do so.
1009  return false;
1010  }
1011 
1012  auto *Index = dyn_cast<ConstantInt>(UI->getOperand(1));
1013  OriginalCost +=
1014  TTI.getVectorInstrCost(Instruction::ExtractElement, LI->getType(),
1015  Index ? Index->getZExtValue() : -1);
1016  ScalarizedCost +=
1017  TTI.getMemoryOpCost(Instruction::Load, FixedVT->getElementType(),
1018  Align(1), LI->getPointerAddressSpace());
1019  ScalarizedCost += TTI.getAddressComputationCost(FixedVT->getElementType());
1020  }
1021 
1022  if (ScalarizedCost >= OriginalCost)
1023  return false;
1024 
1025  // Replace extracts with narrow scalar loads.
1026  for (User *U : LI->users()) {
1027  auto *EI = cast<ExtractElementInst>(U);
1028  Builder.SetInsertPoint(EI);
1029 
1030  Value *Idx = EI->getOperand(1);
1031  Value *GEP =
1032  Builder.CreateInBoundsGEP(FixedVT, Ptr, {Builder.getInt32(0), Idx});
1033  auto *NewLoad = cast<LoadInst>(Builder.CreateLoad(
1034  FixedVT->getElementType(), GEP, EI->getName() + ".scalar"));
1035 
1036  Align ScalarOpAlignment = computeAlignmentAfterScalarization(
1037  LI->getAlign(), FixedVT->getElementType(), Idx, DL);
1038  NewLoad->setAlignment(ScalarOpAlignment);
1039 
1040  replaceValue(*EI, *NewLoad);
1041  }
1042 
1043  return true;
1044 }
1045 
1046 /// This is the entry point for all transforms. Pass manager differences are
1047 /// handled in the callers of this function.
1048 bool VectorCombine::run() {
1050  return false;
1051 
1052  // Don't attempt vectorization if the target does not support vectors.
1053  if (!TTI.getNumberOfRegisters(TTI.getRegisterClassForType(/*Vector*/ true)))
1054  return false;
1055 
1056  bool MadeChange = false;
1057  auto FoldInst = [this, &MadeChange](Instruction &I) {
1058  Builder.SetInsertPoint(&I);
1059  MadeChange |= vectorizeLoadInsert(I);
1060  MadeChange |= foldExtractExtract(I);
1061  MadeChange |= foldBitcastShuf(I);
1062  MadeChange |= scalarizeBinopOrCmp(I);
1063  MadeChange |= foldExtractedCmps(I);
1064  MadeChange |= scalarizeLoadExtract(I);
1065  MadeChange |= foldSingleElementStore(I);
1066  };
1067  for (BasicBlock &BB : F) {
1068  // Ignore unreachable basic blocks.
1069  if (!DT.isReachableFromEntry(&BB))
1070  continue;
1071  // Use early increment range so that we can erase instructions in loop.
1072  for (Instruction &I : make_early_inc_range(BB)) {
1073  if (isa<DbgInfoIntrinsic>(I))
1074  continue;
1075  FoldInst(I);
1076  }
1077  }
1078 
1079  while (!Worklist.isEmpty()) {
1080  Instruction *I = Worklist.removeOne();
1081  if (!I)
1082  continue;
1083 
1085  eraseInstruction(*I);
1086  continue;
1087  }
1088 
1089  FoldInst(*I);
1090  }
1091 
1092  return MadeChange;
1093 }
1094 
1095 // Pass manager boilerplate below here.
1096 
1097 namespace {
1098 class VectorCombineLegacyPass : public FunctionPass {
1099 public:
1100  static char ID;
1101  VectorCombineLegacyPass() : FunctionPass(ID) {
1103  }
1104 
1105  void getAnalysisUsage(AnalysisUsage &AU) const override {
1110  AU.setPreservesCFG();
1116  }
1117 
1118  bool runOnFunction(Function &F) override {
1119  if (skipFunction(F))
1120  return false;
1121  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1122  auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1123  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1124  auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1125  VectorCombine Combiner(F, TTI, DT, AA, AC);
1126  return Combiner.run();
1127  }
1128 };
1129 } // namespace
1130 
1132 INITIALIZE_PASS_BEGIN(VectorCombineLegacyPass, "vector-combine",
1133  "Optimize scalar/vector ops", false,
1134  false)
1137 INITIALIZE_PASS_END(VectorCombineLegacyPass, "vector-combine",
1138  "Optimize scalar/vector ops", false, false)
1140  return new VectorCombineLegacyPass();
1141 }
1142 
1145  auto &AC = FAM.getResult<AssumptionAnalysis>(F);
1148  AAResults &AA = FAM.getResult<AAManager>(F);
1149  VectorCombine Combiner(F, TTI, DT, AA, AC);
1150  if (!Combiner.run())
1151  return PreservedAnalyses::all();
1152  PreservedAnalyses PA;
1153  PA.preserveSet<CFGAnalyses>();
1154  return PA;
1155 }
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
AssumptionCache.h
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1288
llvm::mustSuppressSpeculation
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
Definition: ValueTracking.cpp:4599
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2331
B1
llvm::MemoryLocation::get
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition: MemoryLocation.cpp:37
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::ConstantExpr::getExtractElement
static Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2527
llvm::Value::getPointerAlignment
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:917
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::TargetTransformInfo::getMinVectorRegisterBitWidth
unsigned getMinVectorRegisterBitWidth() const
Definition: TargetTransformInfo.cpp:599
ScalarizationResult::~ScalarizationResult
~ScalarizationResult()
Definition: VectorCombine.cpp:807
llvm::PatternMatch::m_Mask
Definition: PatternMatch.h:1524
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::TargetTransformInfo::getShuffleCost
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask=None, int Index=0, VectorType *SubTp=nullptr) const
Definition: TargetTransformInfo.cpp:726
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:435
llvm::ConstantRange::binaryAnd
ConstantRange binaryAnd(const ConstantRange &Other) const
Return a new range representing the possible values resulting from a binary-and of a value in this ra...
Definition: ConstantRange.cpp:1297
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:720
llvm::User::operands
op_range operands()
Definition: User.h:242
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::isSafeToSpeculativelyExecute
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Definition: ValueTracking.cpp:4611
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:228
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:779
Loads.h
llvm::ExtractElementInst
This instruction extracts a single (scalar) element from a VectorType value.
Definition: Instructions.h:1873
llvm::Function
Definition: Function.h:61
Pass.h
llvm::TargetTransformInfo::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:827
ScalarizationResult::isUnsafe
bool isUnsafe() const
Returns true if the index cannot be scalarized.
Definition: VectorCombine.cpp:820
canScalarizeAccess
static ScalarizationResult canScalarizeAccess(FixedVectorType *VecTy, Value *Idx, Instruction *CtxI, AssumptionCache &AC, const DominatorTree &DT)
Check if it is legal to scalarize a memory access to VecTy at index Idx.
Definition: VectorCombine.cpp:845
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::PatternMatch::m_InsertElt
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Definition: PatternMatch.h:1494
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
llvm::TargetTransformInfo::getAddressComputationCost
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
Definition: TargetTransformInfo.cpp:889
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::PatternMatch::m_Load
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
Definition: PatternMatch.h:1573
llvm::CmpInst::makeCmpResultType
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:1031
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:734
llvm::IRBuilder<>
llvm::InstructionWorklist::isEmpty
bool isEmpty() const
Definition: InstructionWorklist.h:39
ValueTracking.h
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
GlobalsModRef.h
VectorCombine.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::narrowShuffleMaskElts
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
Definition: VectorUtils.cpp:427
llvm::createVectorCombinePass
Pass * createVectorCombinePass()
Definition: VectorCombine.cpp:1139
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::StoreInst::setAlignment
void setAlignment(Align Align)
Definition: Instructions.h:357
llvm::Instruction::comesBefore
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
Definition: Instruction.cpp:111
llvm::PatternMatch::m_BinOp
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::Instruction::copyMetadata
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Definition: Instruction.cpp:829
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(VectorCombineLegacyPass, "vector-combine", "Optimize scalar/vector ops", false, false) INITIALIZE_PASS_END(VectorCombineLegacyPass
InvalidIndex
static const unsigned InvalidIndex
Definition: VectorCombine.cpp:60
llvm::PatternMatch::m_BitCast
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
Definition: PatternMatch.h:1603
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
BasicAliasAnalysis.h
MaxInstrsToScan
static cl::opt< unsigned > MaxInstrsToScan("vector-combine-max-scan-instrs", cl::init(30), cl::Hidden, cl::desc("Max number of instructions to scan for vector combining."))
llvm::FixedVectorType
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:525
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::TargetTransformInfo::SK_PermuteSingleSrc
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
Definition: TargetTransformInfo.h:870
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::UndefMaskElem
constexpr int UndefMaskElem
Definition: Instructions.h:1997
llvm::PatternMatch::m_OneUse
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
CommandLine.h
llvm::FixedVectorType::getNumElements
unsigned getNumElements() const
Definition: DerivedTypes.h:568
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:160
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition: PatternMatch.h:1472
llvm::CmpInst::isFPPredicate
bool isFPPredicate() const
Definition: InstrTypes.h:813
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::AAResults
Definition: AliasAnalysis.h:508
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
createShiftShuffle
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilder<> &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
Definition: VectorCombine.cpp:390
InstructionWorklist.h
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
getOpcode
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:199
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:237
llvm::InstructionWorklist::remove
void remove(Instruction *I)
Remove I from the worklist if it exists.
Definition: InstructionWorklist.h:85
llvm::PatternMatch::m_Instruction
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:748
false
Definition: StackSlotColoring.cpp:142
llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:145
llvm::Instruction
Definition: Instruction.h:45
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:153
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:287
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1771
DisableBinopExtractShuffle
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
llvm::PatternMatch::m_URem
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1105
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
PatternMatch.h
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::NVPTX::PTXLdStInstCode::Scalar
@ Scalar
Definition: NVPTX.h:122
llvm::isModSet
LLVM_NODISCARD bool isModSet(const ModRefInfo MRI)
Definition: AliasAnalysis.h:197
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
ScalarizationResult::isSafeWithFreeze
bool isSafeWithFreeze() const
Returns true if the index can be scalarize, but requires inserting a freeze.
Definition: VectorCombine.cpp:823
llvm::PatternMatch::m_ExtractElt
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
Definition: PatternMatch.h:1502
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:201
llvm::APInt::getOneBitSet
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:224
ScalarizationResult::freeze
void freeze(IRBuilder<> &Builder, Instruction &UserI)
Freeze the ToFreeze and update the use in User to use it.
Definition: VectorCombine.cpp:826
VectorUtils.h
llvm::cl::opt< bool >
llvm::ConstantExpr::getCompare
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:2373
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:304
llvm::InstructionWorklist::removeOne
Instruction * removeOne()
Definition: InstructionWorklist.h:96
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::VectorCombinePass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
Definition: VectorCombine.cpp:1143
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
uint64_t
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2387
llvm::Combiner
Definition: Combiner.h:27
llvm::InstructionWorklist::push
void push(Instruction *I)
Push the instruction onto the worklist stack.
Definition: InstructionWorklist.h:58
eraseInstruction
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater *MSSAU)
Definition: LICM.cpp:1525
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:169
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::BasicAAWrapperPass
Legacy wrapper pass to provide the BasicAAResult object.
Definition: BasicAliasAnalysis.h:244
llvm::TargetTransformInfo::getRegisterClassForType
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
Definition: TargetTransformInfo.cpp:585
DisableVectorCombine
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:576
llvm::PatternMatch::m_And
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1123
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1616
llvm::TargetTransformInfo::getScalarizationOverhead
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract) const
Estimate the overhead of scalarizing an instruction.
Definition: TargetTransformInfo.cpp:480
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
translateExtract
static ExtractElementInst * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilder<> &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
Definition: VectorCombine.cpp:405
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:328
llvm::CmpInst::BAD_ICMP_PREDICATE
@ BAD_ICMP_PREDICATE
Definition: InstrTypes.h:753
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:163
llvm::Instruction::isBinaryOp
bool isBinaryOp() const
Definition: Instruction.h:165
llvm::PatternMatch::m_Constant
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:142
Status
Definition: SIModeRegister.cpp:28
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:100
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:200
llvm::ArrayRef< int >
llvm::isInstructionTriviallyDead
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction has no side ef...
Definition: Local.cpp:398
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1558
llvm::InstructionWorklist::pushValue
void pushValue(Value *V)
Definition: InstructionWorklist.h:68
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::PatternMatch::m_Undef
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:136
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::ExtractElementInst::getVectorOperand
Value * getVectorOperand()
Definition: Instructions.h:1902
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:116
llvm::PatternMatch::m_Shuffle
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
Definition: PatternMatch.h:1561
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
llvm::ms_demangle::IntrinsicFunctionKind::New
@ New
ScalarizationResult::isSafe
bool isSafe() const
Returns true if the index can be scalarize without requiring a freeze.
Definition: VectorCombine.cpp:818
llvm::initializeVectorCombineLegacyPassPass
void initializeVectorCombineLegacyPassPass(PassRegistry &)
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::TargetTransformInfo::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy=nullptr, CmpInst::Predicate VecPred=CmpInst::BAD_ICMP_PREDICATE, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:808
llvm::TargetTransformInfo::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned ClassID) const
Definition: TargetTransformInfo.cpp:581
llvm::ConstantVector::get
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1369
llvm::InstructionCost::isValid
bool isValid() const
Definition: InstructionCost.h:79
llvm::Instruction::mayReadFromMemory
bool mayReadFromMemory() const
Return true if this instruction may read memory.
Definition: Instruction.cpp:564
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::computeConstantRange
ConstantRange computeConstantRange(const Value *V, bool UseInstrInfo=true, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
Definition: ValueTracking.cpp:7027
llvm::Value::stripAndAccumulateInBoundsConstantOffsets
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition: Value.h:723
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::widenShuffleMaskElts
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Definition: VectorUtils.cpp:448
llvm::commonAlignment
Align commonAlignment(Align A, Align B)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:211
llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:936
ScalarizationResult::unsafe
static ScalarizationResult unsafe()
Definition: VectorCombine.cpp:811
ScalarizationResult
Helper class to indicate whether a vector index can be safely scalarized and if a freeze needs to be ...
Definition: VectorCombine.cpp:796
ops
vector Optimize scalar vector ops
Definition: VectorCombine.cpp:1138
get
Should compile to something r4 addze r3 instead we get
Definition: README.txt:24
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:321
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
Function.h
llvm::Value::hasNUses
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition: Value.cpp:149
ScalarizationResult::safeWithFreeze
static ScalarizationResult safeWithFreeze(Value *ToFreeze)
Definition: VectorCombine.cpp:813
ScalarizationResult::safe
static ScalarizationResult safe()
Definition: VectorCombine.cpp:812
llvm::InstructionWorklist
InstructionWorklist - This is the worklist management logic for InstCombine and other simplification ...
Definition: InstructionWorklist.h:25
llvm::PatternMatch::m_ZeroInt
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:522
llvm::ConstantRange::contains
bool contains(const APInt &Val) const
Return true if the specified value is in the set.
Definition: ConstantRange.cpp:393
llvm::TargetTransformInfo::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index=-1) const
Definition: TargetTransformInfo.cpp:819
llvm::ConstantRange
This class represents a range of values.
Definition: ConstantRange.h:47
llvm::ConstantRange::urem
ConstantRange urem(const ConstantRange &Other) const
Return a new range representing the possible values resulting from an unsigned remainder operation of...
Definition: ConstantRange.cpp:1219
llvm::Instruction::BinaryOps
BinaryOps
Definition: Instruction.h:785
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:252
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:191
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:802
Dominators.h
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1336
llvm::GlobalsAAWrapperPass
Legacy wrapper pass to provide the GlobalsAAResult object.
Definition: GlobalsModRef.h:143
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
TargetTransformInfo.h
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::ExtractElementInst::getIndexOperand
Value * getIndexOperand()
Definition: Instructions.h:1903
llvm::InstructionWorklist::pushUsersToWorkList
void pushUsersToWorkList(Instruction &I)
When an instruction is simplified, add all users of the instruction to the work lists because they mi...
Definition: InstructionWorklist.h:106
Vectorize.h
llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:93
llvm::isSafeToLoadUnconditionally
bool isSafeToLoadUnconditionally(Value *V, Align Alignment, APInt &Size, const DataLayout &DL, Instruction *ScanFrom=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
Definition: Loads.cpp:335
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::isGuaranteedNotToBePoison
bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Definition: ValueTracking.cpp:5270
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::TargetTransformInfo::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
Definition: TargetTransformInfo.cpp:714
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:414
combine
vector combine
Definition: VectorCombine.cpp:1137
llvm::PatternMatch::m_Cmp
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:89
InitializePasses.h
computeAlignmentAfterScalarization
static Align computeAlignmentAfterScalarization(Align VectorAlignment, Type *ScalarType, Value *Idx, const DataLayout &DL)
The memory operation on a vector of ScalarType had alignment of VectorAlignment.
Definition: VectorCombine.cpp:886
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:632
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:422
llvm::AAResults::getModRefInfo
ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc)
getModRefInfo (for call sites) - Return information about whether a particular call site modifies or ...
Definition: AliasAnalysis.cpp:218
llvm::MemoryLocation
Representation for a specific memory location.
Definition: MemoryLocation.h:209
Other
Optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1172
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:128
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
isMemModifiedBetween
static bool isMemModifiedBetween(BasicBlock::iterator Begin, BasicBlock::iterator End, const MemoryLocation &Loc, AAResults &AA)
Definition: VectorCombine.cpp:784