LLVM  13.0.0git
VectorCombine.cpp
Go to the documentation of this file.
1 //===------- VectorCombine.cpp - Optimize partial vector operations -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass optimizes scalar/vector interactions using target cost models. The
10 // transforms implemented here may not fit in traditional loop-based or SLP
11 // vectorization passes.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/ADT/Statistic.h"
19 #include "llvm/Analysis/Loads.h"
23 #include "llvm/IR/Dominators.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/IRBuilder.h"
26 #include "llvm/IR/PatternMatch.h"
27 #include "llvm/InitializePasses.h"
28 #include "llvm/Pass.h"
32 
33 using namespace llvm;
34 using namespace llvm::PatternMatch;
35 
36 #define DEBUG_TYPE "vector-combine"
37 STATISTIC(NumVecLoad, "Number of vector loads formed");
38 STATISTIC(NumVecCmp, "Number of vector compares formed");
39 STATISTIC(NumVecBO, "Number of vector binops formed");
40 STATISTIC(NumVecCmpBO, "Number of vector compare + binop formed");
41 STATISTIC(NumShufOfBitcast, "Number of shuffles moved after bitcast");
42 STATISTIC(NumScalarBO, "Number of scalar binops formed");
43 STATISTIC(NumScalarCmp, "Number of scalar compares formed");
44 
46  "disable-vector-combine", cl::init(false), cl::Hidden,
47  cl::desc("Disable all vector combine transforms"));
48 
50  "disable-binop-extract-shuffle", cl::init(false), cl::Hidden,
51  cl::desc("Disable binop extract to shuffle transforms"));
52 
54 
55 namespace {
56 class VectorCombine {
57 public:
58  VectorCombine(Function &F, const TargetTransformInfo &TTI,
59  const DominatorTree &DT)
60  : F(F), Builder(F.getContext()), TTI(TTI), DT(DT) {}
61 
62  bool run();
63 
64 private:
65  Function &F;
67  const TargetTransformInfo &TTI;
68  const DominatorTree &DT;
69 
70  bool vectorizeLoadInsert(Instruction &I);
71  ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0,
72  ExtractElementInst *Ext1,
73  unsigned PreferredExtractIndex) const;
74  bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
75  unsigned Opcode,
76  ExtractElementInst *&ConvertToShuffle,
77  unsigned PreferredExtractIndex);
78  void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
79  Instruction &I);
80  void foldExtExtBinop(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
81  Instruction &I);
82  bool foldExtractExtract(Instruction &I);
83  bool foldBitcastShuf(Instruction &I);
84  bool scalarizeBinopOrCmp(Instruction &I);
85  bool foldExtractedCmps(Instruction &I);
86 };
87 } // namespace
88 
89 static void replaceValue(Value &Old, Value &New) {
90  Old.replaceAllUsesWith(&New);
91  New.takeName(&Old);
92 }
93 
94 bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
95  // Match insert into fixed vector of scalar value.
96  // TODO: Handle non-zero insert index.
97  auto *Ty = dyn_cast<FixedVectorType>(I.getType());
98  Value *Scalar;
99  if (!Ty || !match(&I, m_InsertElt(m_Undef(), m_Value(Scalar), m_ZeroInt())) ||
100  !Scalar->hasOneUse())
101  return false;
102 
103  // Optionally match an extract from another vector.
104  Value *X;
105  bool HasExtract = match(Scalar, m_ExtractElt(m_Value(X), m_ZeroInt()));
106  if (!HasExtract)
107  X = Scalar;
108 
109  // Match source value as load of scalar or vector.
110  // Do not vectorize scalar load (widening) if atomic/volatile or under
111  // asan/hwasan/memtag/tsan. The widened load may load data from dirty regions
112  // or create data races non-existent in the source.
113  auto *Load = dyn_cast<LoadInst>(X);
114  if (!Load || !Load->isSimple() || !Load->hasOneUse() ||
115  Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
117  return false;
118 
119  const DataLayout &DL = I.getModule()->getDataLayout();
120  Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts();
121  assert(isa<PointerType>(SrcPtr->getType()) && "Expected a pointer type");
122 
123  // If original AS != Load's AS, we can't bitcast the original pointer and have
124  // to use Load's operand instead. Ideally we would want to strip pointer casts
125  // without changing AS, but there's no API to do that ATM.
126  unsigned AS = Load->getPointerAddressSpace();
127  if (AS != SrcPtr->getType()->getPointerAddressSpace())
128  SrcPtr = Load->getPointerOperand();
129 
130  // We are potentially transforming byte-sized (8-bit) memory accesses, so make
131  // sure we have all of our type-based constraints in place for this target.
132  Type *ScalarTy = Scalar->getType();
133  uint64_t ScalarSize = ScalarTy->getPrimitiveSizeInBits();
134  unsigned MinVectorSize = TTI.getMinVectorRegisterBitWidth();
135  if (!ScalarSize || !MinVectorSize || MinVectorSize % ScalarSize != 0 ||
136  ScalarSize % 8 != 0)
137  return false;
138 
139  // Check safety of replacing the scalar load with a larger vector load.
140  // We use minimal alignment (maximum flexibility) because we only care about
141  // the dereferenceable region. When calculating cost and creating a new op,
142  // we may use a larger value based on alignment attributes.
143  unsigned MinVecNumElts = MinVectorSize / ScalarSize;
144  auto *MinVecTy = VectorType::get(ScalarTy, MinVecNumElts, false);
145  unsigned OffsetEltIndex = 0;
146  Align Alignment = Load->getAlign();
147  if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT)) {
148  // It is not safe to load directly from the pointer, but we can still peek
149  // through gep offsets and check if it safe to load from a base address with
150  // updated alignment. If it is, we can shuffle the element(s) into place
151  // after loading.
152  unsigned OffsetBitWidth = DL.getIndexTypeSizeInBits(SrcPtr->getType());
153  APInt Offset(OffsetBitWidth, 0);
155 
156  // We want to shuffle the result down from a high element of a vector, so
157  // the offset must be positive.
158  if (Offset.isNegative())
159  return false;
160 
161  // The offset must be a multiple of the scalar element to shuffle cleanly
162  // in the element's size.
163  uint64_t ScalarSizeInBytes = ScalarSize / 8;
164  if (Offset.urem(ScalarSizeInBytes) != 0)
165  return false;
166 
167  // If we load MinVecNumElts, will our target element still be loaded?
168  OffsetEltIndex = Offset.udiv(ScalarSizeInBytes).getZExtValue();
169  if (OffsetEltIndex >= MinVecNumElts)
170  return false;
171 
172  if (!isSafeToLoadUnconditionally(SrcPtr, MinVecTy, Align(1), DL, Load, &DT))
173  return false;
174 
175  // Update alignment with offset value. Note that the offset could be negated
176  // to more accurately represent "(new) SrcPtr - Offset = (old) SrcPtr", but
177  // negation does not change the result of the alignment calculation.
178  Alignment = commonAlignment(Alignment, Offset.getZExtValue());
179  }
180 
181  // Original pattern: insertelt undef, load [free casts of] PtrOp, 0
182  // Use the greater of the alignment on the load or its source pointer.
183  Alignment = std::max(SrcPtr->getPointerAlignment(DL), Alignment);
184  Type *LoadTy = Load->getType();
185  InstructionCost OldCost =
186  TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS);
187  APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0);
188  OldCost += TTI.getScalarizationOverhead(MinVecTy, DemandedElts,
189  /* Insert */ true, HasExtract);
190 
191  // New pattern: load VecPtr
192  InstructionCost NewCost =
193  TTI.getMemoryOpCost(Instruction::Load, MinVecTy, Alignment, AS);
194  // Optionally, we are shuffling the loaded vector element(s) into place.
195  // For the mask set everything but element 0 to undef to prevent poison from
196  // propagating from the extra loaded memory. This will also optionally
197  // shrink/grow the vector from the loaded size to the output size.
198  // We assume this operation has no cost in codegen if there was no offset.
199  // Note that we could use freeze to avoid poison problems, but then we might
200  // still need a shuffle to change the vector size.
201  unsigned OutputNumElts = Ty->getNumElements();
202  SmallVector<int, 16> Mask(OutputNumElts, UndefMaskElem);
203  assert(OffsetEltIndex < MinVecNumElts && "Address offset too big");
204  Mask[0] = OffsetEltIndex;
205  if (OffsetEltIndex)
206  NewCost += TTI.getShuffleCost(TTI::SK_PermuteSingleSrc, MinVecTy, Mask);
207 
208  // We can aggressively convert to the vector form because the backend can
209  // invert this transform if it does not result in a performance win.
210  if (OldCost < NewCost || !NewCost.isValid())
211  return false;
212 
213  // It is safe and potentially profitable to load a vector directly:
214  // inselt undef, load Scalar, 0 --> load VecPtr
216  Value *CastedPtr = Builder.CreateBitCast(SrcPtr, MinVecTy->getPointerTo(AS));
217  Value *VecLd = Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment);
218  VecLd = Builder.CreateShuffleVector(VecLd, Mask);
219 
220  replaceValue(I, *VecLd);
221  ++NumVecLoad;
222  return true;
223 }
224 
225 /// Determine which, if any, of the inputs should be replaced by a shuffle
226 /// followed by extract from a different index.
227 ExtractElementInst *VectorCombine::getShuffleExtract(
229  unsigned PreferredExtractIndex = InvalidIndex) const {
230  assert(isa<ConstantInt>(Ext0->getIndexOperand()) &&
231  isa<ConstantInt>(Ext1->getIndexOperand()) &&
232  "Expected constant extract indexes");
233 
234  unsigned Index0 = cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue();
235  unsigned Index1 = cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue();
236 
237  // If the extract indexes are identical, no shuffle is needed.
238  if (Index0 == Index1)
239  return nullptr;
240 
241  Type *VecTy = Ext0->getVectorOperand()->getType();
242  assert(VecTy == Ext1->getVectorOperand()->getType() && "Need matching types");
243  InstructionCost Cost0 =
244  TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0);
245  InstructionCost Cost1 =
246  TTI.getVectorInstrCost(Ext1->getOpcode(), VecTy, Index1);
247 
248  // If both costs are invalid no shuffle is needed
249  if (!Cost0.isValid() && !Cost1.isValid())
250  return nullptr;
251 
252  // We are extracting from 2 different indexes, so one operand must be shuffled
253  // before performing a vector operation and/or extract. The more expensive
254  // extract will be replaced by a shuffle.
255  if (Cost0 > Cost1)
256  return Ext0;
257  if (Cost1 > Cost0)
258  return Ext1;
259 
260  // If the costs are equal and there is a preferred extract index, shuffle the
261  // opposite operand.
262  if (PreferredExtractIndex == Index0)
263  return Ext1;
264  if (PreferredExtractIndex == Index1)
265  return Ext0;
266 
267  // Otherwise, replace the extract with the higher index.
268  return Index0 > Index1 ? Ext0 : Ext1;
269 }
270 
271 /// Compare the relative costs of 2 extracts followed by scalar operation vs.
272 /// vector operation(s) followed by extract. Return true if the existing
273 /// instructions are cheaper than a vector alternative. Otherwise, return false
274 /// and if one of the extracts should be transformed to a shufflevector, set
275 /// \p ConvertToShuffle to that extract instruction.
276 bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
277  ExtractElementInst *Ext1,
278  unsigned Opcode,
279  ExtractElementInst *&ConvertToShuffle,
280  unsigned PreferredExtractIndex) {
281  assert(isa<ConstantInt>(Ext0->getOperand(1)) &&
282  isa<ConstantInt>(Ext1->getOperand(1)) &&
283  "Expected constant extract indexes");
284  Type *ScalarTy = Ext0->getType();
285  auto *VecTy = cast<VectorType>(Ext0->getOperand(0)->getType());
286  InstructionCost ScalarOpCost, VectorOpCost;
287 
288  // Get cost estimates for scalar and vector versions of the operation.
289  bool IsBinOp = Instruction::isBinaryOp(Opcode);
290  if (IsBinOp) {
291  ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy);
292  VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy);
293  } else {
294  assert((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) &&
295  "Expected a compare");
296  ScalarOpCost = TTI.getCmpSelInstrCost(Opcode, ScalarTy,
297  CmpInst::makeCmpResultType(ScalarTy));
298  VectorOpCost = TTI.getCmpSelInstrCost(Opcode, VecTy,
300  }
301 
302  // Get cost estimates for the extract elements. These costs will factor into
303  // both sequences.
304  unsigned Ext0Index = cast<ConstantInt>(Ext0->getOperand(1))->getZExtValue();
305  unsigned Ext1Index = cast<ConstantInt>(Ext1->getOperand(1))->getZExtValue();
306 
307  InstructionCost Extract0Cost =
308  TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext0Index);
309  InstructionCost Extract1Cost =
310  TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, Ext1Index);
311 
312  // A more expensive extract will always be replaced by a splat shuffle.
313  // For example, if Ext0 is more expensive:
314  // opcode (extelt V0, Ext0), (ext V1, Ext1) -->
315  // extelt (opcode (splat V0, Ext0), V1), Ext1
316  // TODO: Evaluate whether that always results in lowest cost. Alternatively,
317  // check the cost of creating a broadcast shuffle and shuffling both
318  // operands to element 0.
319  InstructionCost CheapExtractCost = std::min(Extract0Cost, Extract1Cost);
320 
321  // Extra uses of the extracts mean that we include those costs in the
322  // vector total because those instructions will not be eliminated.
323  InstructionCost OldCost, NewCost;
324  if (Ext0->getOperand(0) == Ext1->getOperand(0) && Ext0Index == Ext1Index) {
325  // Handle a special case. If the 2 extracts are identical, adjust the
326  // formulas to account for that. The extra use charge allows for either the
327  // CSE'd pattern or an unoptimized form with identical values:
328  // opcode (extelt V, C), (extelt V, C) --> extelt (opcode V, V), C
329  bool HasUseTax = Ext0 == Ext1 ? !Ext0->hasNUses(2)
330  : !Ext0->hasOneUse() || !Ext1->hasOneUse();
331  OldCost = CheapExtractCost + ScalarOpCost;
332  NewCost = VectorOpCost + CheapExtractCost + HasUseTax * CheapExtractCost;
333  } else {
334  // Handle the general case. Each extract is actually a different value:
335  // opcode (extelt V0, C0), (extelt V1, C1) --> extelt (opcode V0, V1), C
336  OldCost = Extract0Cost + Extract1Cost + ScalarOpCost;
337  NewCost = VectorOpCost + CheapExtractCost +
338  !Ext0->hasOneUse() * Extract0Cost +
339  !Ext1->hasOneUse() * Extract1Cost;
340  }
341 
342  ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex);
343  if (ConvertToShuffle) {
344  if (IsBinOp && DisableBinopExtractShuffle)
345  return true;
346 
347  // If we are extracting from 2 different indexes, then one operand must be
348  // shuffled before performing the vector operation. The shuffle mask is
349  // undefined except for 1 lane that is being translated to the remaining
350  // extraction lane. Therefore, it is a splat shuffle. Ex:
351  // ShufMask = { undef, undef, 0, undef }
352  // TODO: The cost model has an option for a "broadcast" shuffle
353  // (splat-from-element-0), but no option for a more general splat.
354  NewCost +=
356  }
357 
358  // Aggressively form a vector op if the cost is equal because the transform
359  // may enable further optimization.
360  // Codegen can reverse this transform (scalarize) if it was not profitable.
361  return OldCost < NewCost;
362 }
363 
364 /// Create a shuffle that translates (shifts) 1 element from the input vector
365 /// to a new element location.
366 static Value *createShiftShuffle(Value *Vec, unsigned OldIndex,
367  unsigned NewIndex, IRBuilder<> &Builder) {
368  // The shuffle mask is undefined except for 1 lane that is being translated
369  // to the new element index. Example for OldIndex == 2 and NewIndex == 0:
370  // ShufMask = { 2, undef, undef, undef }
371  auto *VecTy = cast<FixedVectorType>(Vec->getType());
372  SmallVector<int, 32> ShufMask(VecTy->getNumElements(), UndefMaskElem);
373  ShufMask[NewIndex] = OldIndex;
374  return Builder.CreateShuffleVector(Vec, ShufMask, "shift");
375 }
376 
377 /// Given an extract element instruction with constant index operand, shuffle
378 /// the source vector (shift the scalar element) to a NewIndex for extraction.
379 /// Return null if the input can be constant folded, so that we are not creating
380 /// unnecessary instructions.
382  unsigned NewIndex,
383  IRBuilder<> &Builder) {
384  // If the extract can be constant-folded, this code is unsimplified. Defer
385  // to other passes to handle that.
386  Value *X = ExtElt->getVectorOperand();
387  Value *C = ExtElt->getIndexOperand();
388  assert(isa<ConstantInt>(C) && "Expected a constant index operand");
389  if (isa<Constant>(X))
390  return nullptr;
391 
392  Value *Shuf = createShiftShuffle(X, cast<ConstantInt>(C)->getZExtValue(),
393  NewIndex, Builder);
394  return cast<ExtractElementInst>(Builder.CreateExtractElement(Shuf, NewIndex));
395 }
396 
397 /// Try to reduce extract element costs by converting scalar compares to vector
398 /// compares followed by extract.
399 /// cmp (ext0 V0, C), (ext1 V1, C)
400 void VectorCombine::foldExtExtCmp(ExtractElementInst *Ext0,
401  ExtractElementInst *Ext1, Instruction &I) {
402  assert(isa<CmpInst>(&I) && "Expected a compare");
403  assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
404  cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
405  "Expected matching constant extract indexes");
406 
407  // cmp Pred (extelt V0, C), (extelt V1, C) --> extelt (cmp Pred V0, V1), C
408  ++NumVecCmp;
409  CmpInst::Predicate Pred = cast<CmpInst>(&I)->getPredicate();
410  Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
411  Value *VecCmp = Builder.CreateCmp(Pred, V0, V1);
412  Value *NewExt = Builder.CreateExtractElement(VecCmp, Ext0->getIndexOperand());
413  replaceValue(I, *NewExt);
414 }
415 
416 /// Try to reduce extract element costs by converting scalar binops to vector
417 /// binops followed by extract.
418 /// bo (ext0 V0, C), (ext1 V1, C)
419 void VectorCombine::foldExtExtBinop(ExtractElementInst *Ext0,
420  ExtractElementInst *Ext1, Instruction &I) {
421  assert(isa<BinaryOperator>(&I) && "Expected a binary operator");
422  assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
423  cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
424  "Expected matching constant extract indexes");
425 
426  // bo (extelt V0, C), (extelt V1, C) --> extelt (bo V0, V1), C
427  ++NumVecBO;
428  Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
429  Value *VecBO =
430  Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), V0, V1);
431 
432  // All IR flags are safe to back-propagate because any potential poison
433  // created in unused vector elements is discarded by the extract.
434  if (auto *VecBOInst = dyn_cast<Instruction>(VecBO))
435  VecBOInst->copyIRFlags(&I);
436 
437  Value *NewExt = Builder.CreateExtractElement(VecBO, Ext0->getIndexOperand());
438  replaceValue(I, *NewExt);
439 }
440 
441 /// Match an instruction with extracted vector operands.
442 bool VectorCombine::foldExtractExtract(Instruction &I) {
443  // It is not safe to transform things like div, urem, etc. because we may
444  // create undefined behavior when executing those on unknown vector elements.
446  return false;
447 
448  Instruction *I0, *I1;
450  if (!match(&I, m_Cmp(Pred, m_Instruction(I0), m_Instruction(I1))) &&
451  !match(&I, m_BinOp(m_Instruction(I0), m_Instruction(I1))))
452  return false;
453 
454  Value *V0, *V1;
455  uint64_t C0, C1;
456  if (!match(I0, m_ExtractElt(m_Value(V0), m_ConstantInt(C0))) ||
457  !match(I1, m_ExtractElt(m_Value(V1), m_ConstantInt(C1))) ||
458  V0->getType() != V1->getType())
459  return false;
460 
461  // If the scalar value 'I' is going to be re-inserted into a vector, then try
462  // to create an extract to that same element. The extract/insert can be
463  // reduced to a "select shuffle".
464  // TODO: If we add a larger pattern match that starts from an insert, this
465  // probably becomes unnecessary.
466  auto *Ext0 = cast<ExtractElementInst>(I0);
467  auto *Ext1 = cast<ExtractElementInst>(I1);
468  uint64_t InsertIndex = InvalidIndex;
469  if (I.hasOneUse())
470  match(I.user_back(),
471  m_InsertElt(m_Value(), m_Value(), m_ConstantInt(InsertIndex)));
472 
473  ExtractElementInst *ExtractToChange;
474  if (isExtractExtractCheap(Ext0, Ext1, I.getOpcode(), ExtractToChange,
475  InsertIndex))
476  return false;
477 
478  if (ExtractToChange) {
479  unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
480  ExtractElementInst *NewExtract =
481  translateExtract(ExtractToChange, CheapExtractIdx, Builder);
482  if (!NewExtract)
483  return false;
484  if (ExtractToChange == Ext0)
485  Ext0 = NewExtract;
486  else
487  Ext1 = NewExtract;
488  }
489 
490  if (Pred != CmpInst::BAD_ICMP_PREDICATE)
491  foldExtExtCmp(Ext0, Ext1, I);
492  else
493  foldExtExtBinop(Ext0, Ext1, I);
494 
495  return true;
496 }
497 
498 /// If this is a bitcast of a shuffle, try to bitcast the source vector to the
499 /// destination type followed by shuffle. This can enable further transforms by
500 /// moving bitcasts or shuffles together.
501 bool VectorCombine::foldBitcastShuf(Instruction &I) {
502  Value *V;
504  if (!match(&I, m_BitCast(
506  return false;
507 
508  // 1) Do not fold bitcast shuffle for scalable type. First, shuffle cost for
509  // scalable type is unknown; Second, we cannot reason if the narrowed shuffle
510  // mask for scalable type is a splat or not.
511  // 2) Disallow non-vector casts and length-changing shuffles.
512  // TODO: We could allow any shuffle.
513  auto *DestTy = dyn_cast<FixedVectorType>(I.getType());
514  auto *SrcTy = dyn_cast<FixedVectorType>(V->getType());
515  if (!SrcTy || !DestTy || I.getOperand(0)->getType() != SrcTy)
516  return false;
517 
518  unsigned DestNumElts = DestTy->getNumElements();
519  unsigned SrcNumElts = SrcTy->getNumElements();
520  SmallVector<int, 16> NewMask;
521  if (SrcNumElts <= DestNumElts) {
522  // The bitcast is from wide to narrow/equal elements. The shuffle mask can
523  // always be expanded to the equivalent form choosing narrower elements.
524  assert(DestNumElts % SrcNumElts == 0 && "Unexpected shuffle mask");
525  unsigned ScaleFactor = DestNumElts / SrcNumElts;
526  narrowShuffleMaskElts(ScaleFactor, Mask, NewMask);
527  } else {
528  // The bitcast is from narrow elements to wide elements. The shuffle mask
529  // must choose consecutive elements to allow casting first.
530  assert(SrcNumElts % DestNumElts == 0 && "Unexpected shuffle mask");
531  unsigned ScaleFactor = SrcNumElts / DestNumElts;
532  if (!widenShuffleMaskElts(ScaleFactor, Mask, NewMask))
533  return false;
534  }
535 
536  // The new shuffle must not cost more than the old shuffle. The bitcast is
537  // moved ahead of the shuffle, so assume that it has the same cost as before.
540  InstructionCost SrcCost =
542  if (DestCost > SrcCost || !DestCost.isValid())
543  return false;
544 
545  // bitcast (shuf V, MaskC) --> shuf (bitcast V), MaskC'
546  ++NumShufOfBitcast;
547  Value *CastV = Builder.CreateBitCast(V, DestTy);
548  Value *Shuf = Builder.CreateShuffleVector(CastV, NewMask);
549  replaceValue(I, *Shuf);
550  return true;
551 }
552 
553 /// Match a vector binop or compare instruction with at least one inserted
554 /// scalar operand and convert to scalar binop/cmp followed by insertelement.
555 bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
557  Value *Ins0, *Ins1;
558  if (!match(&I, m_BinOp(m_Value(Ins0), m_Value(Ins1))) &&
559  !match(&I, m_Cmp(Pred, m_Value(Ins0), m_Value(Ins1))))
560  return false;
561 
562  // Do not convert the vector condition of a vector select into a scalar
563  // condition. That may cause problems for codegen because of differences in
564  // boolean formats and register-file transfers.
565  // TODO: Can we account for that in the cost model?
566  bool IsCmp = Pred != CmpInst::Predicate::BAD_ICMP_PREDICATE;
567  if (IsCmp)
568  for (User *U : I.users())
569  if (match(U, m_Select(m_Specific(&I), m_Value(), m_Value())))
570  return false;
571 
572  // Match against one or both scalar values being inserted into constant
573  // vectors:
574  // vec_op VecC0, (inselt VecC1, V1, Index)
575  // vec_op (inselt VecC0, V0, Index), VecC1
576  // vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index)
577  // TODO: Deal with mismatched index constants and variable indexes?
578  Constant *VecC0 = nullptr, *VecC1 = nullptr;
579  Value *V0 = nullptr, *V1 = nullptr;
580  uint64_t Index0 = 0, Index1 = 0;
581  if (!match(Ins0, m_InsertElt(m_Constant(VecC0), m_Value(V0),
582  m_ConstantInt(Index0))) &&
583  !match(Ins0, m_Constant(VecC0)))
584  return false;
585  if (!match(Ins1, m_InsertElt(m_Constant(VecC1), m_Value(V1),
586  m_ConstantInt(Index1))) &&
587  !match(Ins1, m_Constant(VecC1)))
588  return false;
589 
590  bool IsConst0 = !V0;
591  bool IsConst1 = !V1;
592  if (IsConst0 && IsConst1)
593  return false;
594  if (!IsConst0 && !IsConst1 && Index0 != Index1)
595  return false;
596 
597  // Bail for single insertion if it is a load.
598  // TODO: Handle this once getVectorInstrCost can cost for load/stores.
599  auto *I0 = dyn_cast_or_null<Instruction>(V0);
600  auto *I1 = dyn_cast_or_null<Instruction>(V1);
601  if ((IsConst0 && I1 && I1->mayReadFromMemory()) ||
602  (IsConst1 && I0 && I0->mayReadFromMemory()))
603  return false;
604 
605  uint64_t Index = IsConst0 ? Index1 : Index0;
606  Type *ScalarTy = IsConst0 ? V1->getType() : V0->getType();
607  Type *VecTy = I.getType();
608  assert(VecTy->isVectorTy() &&
609  (IsConst0 || IsConst1 || V0->getType() == V1->getType()) &&
610  (ScalarTy->isIntegerTy() || ScalarTy->isFloatingPointTy() ||
611  ScalarTy->isPointerTy()) &&
612  "Unexpected types for insert element into binop or cmp");
613 
614  unsigned Opcode = I.getOpcode();
615  InstructionCost ScalarOpCost, VectorOpCost;
616  if (IsCmp) {
617  ScalarOpCost = TTI.getCmpSelInstrCost(Opcode, ScalarTy);
618  VectorOpCost = TTI.getCmpSelInstrCost(Opcode, VecTy);
619  } else {
620  ScalarOpCost = TTI.getArithmeticInstrCost(Opcode, ScalarTy);
621  VectorOpCost = TTI.getArithmeticInstrCost(Opcode, VecTy);
622  }
623 
624  // Get cost estimate for the insert element. This cost will factor into
625  // both sequences.
626  InstructionCost InsertCost =
627  TTI.getVectorInstrCost(Instruction::InsertElement, VecTy, Index);
628  InstructionCost OldCost =
629  (IsConst0 ? 0 : InsertCost) + (IsConst1 ? 0 : InsertCost) + VectorOpCost;
630  InstructionCost NewCost = ScalarOpCost + InsertCost +
631  (IsConst0 ? 0 : !Ins0->hasOneUse() * InsertCost) +
632  (IsConst1 ? 0 : !Ins1->hasOneUse() * InsertCost);
633 
634  // We want to scalarize unless the vector variant actually has lower cost.
635  if (OldCost < NewCost || !NewCost.isValid())
636  return false;
637 
638  // vec_op (inselt VecC0, V0, Index), (inselt VecC1, V1, Index) -->
639  // inselt NewVecC, (scalar_op V0, V1), Index
640  if (IsCmp)
641  ++NumScalarCmp;
642  else
643  ++NumScalarBO;
644 
645  // For constant cases, extract the scalar element, this should constant fold.
646  if (IsConst0)
647  V0 = ConstantExpr::getExtractElement(VecC0, Builder.getInt64(Index));
648  if (IsConst1)
649  V1 = ConstantExpr::getExtractElement(VecC1, Builder.getInt64(Index));
650 
651  Value *Scalar =
652  IsCmp ? Builder.CreateCmp(Pred, V0, V1)
653  : Builder.CreateBinOp((Instruction::BinaryOps)Opcode, V0, V1);
654 
655  Scalar->setName(I.getName() + ".scalar");
656 
657  // All IR flags are safe to back-propagate. There is no potential for extra
658  // poison to be created by the scalar instruction.
659  if (auto *ScalarInst = dyn_cast<Instruction>(Scalar))
660  ScalarInst->copyIRFlags(&I);
661 
662  // Fold the vector constants in the original vectors into a new base vector.
663  Constant *NewVecC = IsCmp ? ConstantExpr::getCompare(Pred, VecC0, VecC1)
664  : ConstantExpr::get(Opcode, VecC0, VecC1);
665  Value *Insert = Builder.CreateInsertElement(NewVecC, Scalar, Index);
666  replaceValue(I, *Insert);
667  return true;
668 }
669 
670 /// Try to combine a scalar binop + 2 scalar compares of extracted elements of
671 /// a vector into vector operations followed by extract. Note: The SLP pass
672 /// may miss this pattern because of implementation problems.
673 bool VectorCombine::foldExtractedCmps(Instruction &I) {
674  // We are looking for a scalar binop of booleans.
675  // binop i1 (cmp Pred I0, C0), (cmp Pred I1, C1)
676  if (!I.isBinaryOp() || !I.getType()->isIntegerTy(1))
677  return false;
678 
679  // The compare predicates should match, and each compare should have a
680  // constant operand.
681  // TODO: Relax the one-use constraints.
682  Value *B0 = I.getOperand(0), *B1 = I.getOperand(1);
683  Instruction *I0, *I1;
684  Constant *C0, *C1;
685  CmpInst::Predicate P0, P1;
686  if (!match(B0, m_OneUse(m_Cmp(P0, m_Instruction(I0), m_Constant(C0)))) ||
687  !match(B1, m_OneUse(m_Cmp(P1, m_Instruction(I1), m_Constant(C1)))) ||
688  P0 != P1)
689  return false;
690 
691  // The compare operands must be extracts of the same vector with constant
692  // extract indexes.
693  // TODO: Relax the one-use constraints.
694  Value *X;
695  uint64_t Index0, Index1;
696  if (!match(I0, m_OneUse(m_ExtractElt(m_Value(X), m_ConstantInt(Index0)))) ||
698  return false;
699 
700  auto *Ext0 = cast<ExtractElementInst>(I0);
701  auto *Ext1 = cast<ExtractElementInst>(I1);
702  ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1);
703  if (!ConvertToShuf)
704  return false;
705 
706  // The original scalar pattern is:
707  // binop i1 (cmp Pred (ext X, Index0), C0), (cmp Pred (ext X, Index1), C1)
708  CmpInst::Predicate Pred = P0;
709  unsigned CmpOpcode = CmpInst::isFPPredicate(Pred) ? Instruction::FCmp
710  : Instruction::ICmp;
711  auto *VecTy = dyn_cast<FixedVectorType>(X->getType());
712  if (!VecTy)
713  return false;
714 
715  InstructionCost OldCost =
716  TTI.getVectorInstrCost(Ext0->getOpcode(), VecTy, Index0);
717  OldCost += TTI.getVectorInstrCost(Ext1->getOpcode(), VecTy, Index1);
718  OldCost += TTI.getCmpSelInstrCost(CmpOpcode, I0->getType()) * 2;
719  OldCost += TTI.getArithmeticInstrCost(I.getOpcode(), I.getType());
720 
721  // The proposed vector pattern is:
722  // vcmp = cmp Pred X, VecC
723  // ext (binop vNi1 vcmp, (shuffle vcmp, Index1)), Index0
724  int CheapIndex = ConvertToShuf == Ext0 ? Index1 : Index0;
725  int ExpensiveIndex = ConvertToShuf == Ext0 ? Index0 : Index1;
726  auto *CmpTy = cast<FixedVectorType>(CmpInst::makeCmpResultType(X->getType()));
727  InstructionCost NewCost = TTI.getCmpSelInstrCost(CmpOpcode, X->getType());
728  SmallVector<int, 32> ShufMask(VecTy->getNumElements(), UndefMaskElem);
729  ShufMask[CheapIndex] = ExpensiveIndex;
731  ShufMask);
732  NewCost += TTI.getArithmeticInstrCost(I.getOpcode(), CmpTy);
733  NewCost += TTI.getVectorInstrCost(Ext0->getOpcode(), CmpTy, CheapIndex);
734 
735  // Aggressively form vector ops if the cost is equal because the transform
736  // may enable further optimization.
737  // Codegen can reverse this transform (scalarize) if it was not profitable.
738  if (OldCost < NewCost || !NewCost.isValid())
739  return false;
740 
741  // Create a vector constant from the 2 scalar constants.
742  SmallVector<Constant *, 32> CmpC(VecTy->getNumElements(),
743  UndefValue::get(VecTy->getElementType()));
744  CmpC[Index0] = C0;
745  CmpC[Index1] = C1;
746  Value *VCmp = Builder.CreateCmp(Pred, X, ConstantVector::get(CmpC));
747 
748  Value *Shuf = createShiftShuffle(VCmp, ExpensiveIndex, CheapIndex, Builder);
749  Value *VecLogic = Builder.CreateBinOp(cast<BinaryOperator>(I).getOpcode(),
750  VCmp, Shuf);
751  Value *NewExt = Builder.CreateExtractElement(VecLogic, CheapIndex);
752  replaceValue(I, *NewExt);
753  ++NumVecCmpBO;
754  return true;
755 }
756 
757 /// This is the entry point for all transforms. Pass manager differences are
758 /// handled in the callers of this function.
759 bool VectorCombine::run() {
761  return false;
762 
763  // Don't attempt vectorization if the target does not support vectors.
764  if (!TTI.getNumberOfRegisters(TTI.getRegisterClassForType(/*Vector*/ true)))
765  return false;
766 
767  bool MadeChange = false;
768  for (BasicBlock &BB : F) {
769  // Ignore unreachable basic blocks.
770  if (!DT.isReachableFromEntry(&BB))
771  continue;
772  // Do not delete instructions under here and invalidate the iterator.
773  // Walk the block forwards to enable simple iterative chains of transforms.
774  // TODO: It could be more efficient to remove dead instructions
775  // iteratively in this loop rather than waiting until the end.
776  for (Instruction &I : BB) {
777  if (isa<DbgInfoIntrinsic>(I))
778  continue;
779  Builder.SetInsertPoint(&I);
780  MadeChange |= vectorizeLoadInsert(I);
781  MadeChange |= foldExtractExtract(I);
782  MadeChange |= foldBitcastShuf(I);
783  MadeChange |= scalarizeBinopOrCmp(I);
784  MadeChange |= foldExtractedCmps(I);
785  }
786  }
787 
788  // We're done with transforms, so remove dead instructions.
789  if (MadeChange)
790  for (BasicBlock &BB : F)
792 
793  return MadeChange;
794 }
795 
796 // Pass manager boilerplate below here.
797 
798 namespace {
799 class VectorCombineLegacyPass : public FunctionPass {
800 public:
801  static char ID;
802  VectorCombineLegacyPass() : FunctionPass(ID) {
804  }
805 
806  void getAnalysisUsage(AnalysisUsage &AU) const override {
809  AU.setPreservesCFG();
815  }
816 
817  bool runOnFunction(Function &F) override {
818  if (skipFunction(F))
819  return false;
820  auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
821  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
822  VectorCombine Combiner(F, TTI, DT);
823  return Combiner.run();
824  }
825 };
826 } // namespace
827 
829 INITIALIZE_PASS_BEGIN(VectorCombineLegacyPass, "vector-combine",
830  "Optimize scalar/vector ops", false,
831  false)
833 INITIALIZE_PASS_END(VectorCombineLegacyPass, "vector-combine",
834  "Optimize scalar/vector ops", false, false)
836  return new VectorCombineLegacyPass();
837 }
838 
843  VectorCombine Combiner(F, TTI, DT);
844  if (!Combiner.run())
845  return PreservedAnalyses::all();
847  PA.preserveSet<CFGAnalyses>();
848  PA.preserve<GlobalsAA>();
849  PA.preserve<AAManager>();
850  PA.preserve<BasicAA>();
851  return PA;
852 }
llvm::GlobalsAA
Analysis pass providing a never-invalidated alias analysis result.
Definition: GlobalsModRef.h:132
llvm::InstructionCost
Definition: InstructionCost.h:26
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::BasicAA
Analysis pass providing a never-invalidated alias analysis result.
Definition: BasicAliasAnalysis.h:233
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1221
llvm::mustSuppressSpeculation
bool mustSuppressSpeculation(const LoadInst &LI)
Return true if speculation of the given load must be suppressed to avoid ordering or interfering with...
Definition: ValueTracking.cpp:4529
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2319
B1
llvm
Definition: AllocatorList.h:23
llvm::ConstantExpr::getExtractElement
static Constant * getExtractElement(Constant *Vec, Constant *Idx, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2526
llvm::Value::getPointerAlignment
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:879
llvm::TargetTransformInfo::getMinVectorRegisterBitWidth
unsigned getMinVectorRegisterBitWidth() const
Definition: TargetTransformInfo.cpp:592
llvm::PatternMatch::m_Mask
Definition: PatternMatch.h:1524
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::TargetTransformInfo::getShuffleCost
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask=None, int Index=0, VectorType *SubTp=nullptr) const
Definition: TargetTransformInfo.cpp:719
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:447
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:722
llvm::isSafeToSpeculativelyExecute
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Definition: ValueTracking.cpp:4541
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:229
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:785
Loads.h
llvm::ExtractElementInst
This instruction extracts a single (scalar) element from a VectorType value.
Definition: Instructions.h:1850
llvm::Function
Definition: Function.h:61
Pass.h
llvm::TargetTransformInfo::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:820
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::PatternMatch::m_InsertElt
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Definition: PatternMatch.h:1494
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::CmpInst::makeCmpResultType
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:1034
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:693
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::IRBuilder<>
ValueTracking.h
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
GlobalsModRef.h
VectorCombine.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::narrowShuffleMaskElts
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
Definition: VectorUtils.cpp:411
llvm::createVectorCombinePass
Pass * createVectorCombinePass()
Definition: VectorCombine.cpp:835
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
llvm::PatternMatch::m_BinOp
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(VectorCombineLegacyPass, "vector-combine", "Optimize scalar/vector ops", false, false) INITIALIZE_PASS_END(VectorCombineLegacyPass
InvalidIndex
static const unsigned InvalidIndex
Definition: VectorCombine.cpp:53
llvm::PatternMatch::m_BitCast
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
Definition: PatternMatch.h:1603
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:163
BasicAliasAnalysis.h
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::TargetTransformInfo::SK_PermuteSingleSrc
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
Definition: TargetTransformInfo.h:861
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::UndefMaskElem
constexpr int UndefMaskElem
Definition: Instructions.h:1974
llvm::PatternMatch::m_OneUse
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
CommandLine.h
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:160
llvm::SimplifyInstructionsInBlock
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition: Local.cpp:686
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition: PatternMatch.h:1472
llvm::CmpInst::isFPPredicate
bool isFPPredicate() const
Definition: InstrTypes.h:816
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
createShiftShuffle
static Value * createShiftShuffle(Value *Vec, unsigned OldIndex, unsigned NewIndex, IRBuilder<> &Builder)
Create a shuffle that translates (shifts) 1 element from the input vector to a new element location.
Definition: VectorCombine.cpp:366
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
getOpcode
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:199
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:235
llvm::PatternMatch::m_Instruction
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:748
false
Definition: StackSlotColoring.cpp:142
llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:145
llvm::Instruction
Definition: Instruction.h:45
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:281
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1770
DisableBinopExtractShuffle
static cl::opt< bool > DisableBinopExtractShuffle("disable-binop-extract-shuffle", cl::init(false), cl::Hidden, cl::desc("Disable binop extract to shuffle transforms"))
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
PatternMatch.h
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::NVPTX::PTXLdStInstCode::Scalar
@ Scalar
Definition: NVPTX.h:122
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::PatternMatch::m_ExtractElt
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
Definition: PatternMatch.h:1502
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:202
llvm::APInt::getOneBitSet
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:593
VectorUtils.h
llvm::cl::opt< bool >
llvm::ConstantExpr::getCompare
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:2372
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::VectorCombinePass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &)
Definition: VectorCombine.cpp:839
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2375
llvm::Combiner
Definition: Combiner.h:27
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:176
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::BasicAAWrapperPass
Legacy wrapper pass to provide the BasicAAResult object.
Definition: BasicAliasAnalysis.h:245
llvm::TargetTransformInfo::getRegisterClassForType
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
Definition: TargetTransformInfo.cpp:578
DisableVectorCombine
static cl::opt< bool > DisableVectorCombine("disable-vector-combine", cl::init(false), cl::Hidden, cl::desc("Disable all vector combine transforms"))
I
#define I(x, y, z)
Definition: MD5.cpp:59
replaceValue
static void replaceValue(Value &Old, Value &New)
Definition: VectorCombine.cpp:89
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::TargetTransformInfo::getScalarizationOverhead
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract) const
Estimate the overhead of scalarizing an instruction.
Definition: TargetTransformInfo.cpp:474
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
translateExtract
static ExtractElementInst * translateExtract(ExtractElementInst *ExtElt, unsigned NewIndex, IRBuilder<> &Builder)
Given an extract element instruction with constant index operand, shuffle the source vector (shift th...
Definition: VectorCombine.cpp:381
llvm::CmpInst::BAD_ICMP_PREDICATE
@ BAD_ICMP_PREDICATE
Definition: InstrTypes.h:755
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:162
llvm::Instruction::isBinaryOp
bool isBinaryOp() const
Definition: Instruction.h:165
llvm::PatternMatch::m_Constant
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:142
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:70
llvm::ArrayRef< int >
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::PatternMatch::m_Undef
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:136
llvm::ExtractElementInst::getVectorOperand
Value * getVectorOperand()
Definition: Instructions.h:1879
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:116
llvm::PatternMatch::m_Shuffle
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
Definition: PatternMatch.h:1561
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:527
llvm::initializeVectorCombineLegacyPassPass
void initializeVectorCombineLegacyPassPass(PassRegistry &)
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::TargetTransformInfo::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy=nullptr, CmpInst::Predicate VecPred=CmpInst::BAD_ICMP_PREDICATE, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:801
llvm::TargetTransformInfo::getNumberOfRegisters
unsigned getNumberOfRegisters(unsigned ClassID) const
Definition: TargetTransformInfo.cpp:574
llvm::ConstantVector::get
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1354
llvm::InstructionCost::isValid
bool isValid() const
Definition: InstructionCost.h:60
llvm::Instruction::mayReadFromMemory
bool mayReadFromMemory() const
Return true if this instruction may read memory.
Definition: Instruction.cpp:543
llvm::Value::stripAndAccumulateInBoundsConstantOffsets
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition: Value.h:731
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::widenShuffleMaskElts
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Definition: VectorUtils.cpp:432
llvm::commonAlignment
Align commonAlignment(Align A, Align B)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:211
llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:931
ops
vector Optimize scalar vector ops
Definition: VectorCombine.cpp:834
get
Should compile to something r4 addze r3 instead we get
Definition: README.txt:24
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
Function.h
llvm::Value::hasNUses
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition: Value.cpp:151
llvm::PatternMatch::m_ZeroInt
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:522
llvm::TargetTransformInfo::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index=-1) const
Definition: TargetTransformInfo.cpp:812
llvm::Instruction::BinaryOps
BinaryOps
Definition: Instruction.h:768
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:252
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:191
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:802
Dominators.h
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1269
llvm::GlobalsAAWrapperPass
Legacy wrapper pass to provide the GlobalsAAResult object.
Definition: GlobalsModRef.h:143
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
TargetTransformInfo.h
llvm::PatternMatch
Definition: PatternMatch.h:47
llvm::ExtractElementInst::getIndexOperand
Value * getIndexOperand()
Definition: Instructions.h:1880
Vectorize.h
llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:93
llvm::isSafeToLoadUnconditionally
bool isSafeToLoadUnconditionally(Value *V, Align Alignment, APInt &Size, const DataLayout &DL, Instruction *ScanFrom=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
Definition: Loads.cpp:335
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::TargetTransformInfo::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
Definition: TargetTransformInfo.cpp:707
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:414
combine
vector combine
Definition: VectorCombine.cpp:833
llvm::PatternMatch::m_Cmp
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:89
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:628
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:122
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38