LLVM  13.0.0git
InstCombineVectorOps.cpp
Go to the documentation of this file.
1 //===- InstCombineVectorOps.cpp -------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements instcombine for ExtractElement, InsertElement and
10 // ShuffleVector.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "InstCombineInternal.h"
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
24 #include "llvm/IR/BasicBlock.h"
25 #include "llvm/IR/Constant.h"
26 #include "llvm/IR/Constants.h"
27 #include "llvm/IR/DerivedTypes.h"
28 #include "llvm/IR/InstrTypes.h"
29 #include "llvm/IR/Instruction.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/IR/Operator.h"
32 #include "llvm/IR/PatternMatch.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/IR/User.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/Support/Casting.h"
40 #include <cassert>
41 #include <cstdint>
42 #include <iterator>
43 #include <utility>
44 
45 using namespace llvm;
46 using namespace PatternMatch;
47 
48 #define DEBUG_TYPE "instcombine"
49 
50 STATISTIC(NumAggregateReconstructionsSimplified,
51  "Number of aggregate reconstructions turned into reuse of the "
52  "original aggregate");
53 
54 /// Return true if the value is cheaper to scalarize than it is to leave as a
55 /// vector operation. IsConstantExtractIndex indicates whether we are extracting
56 /// one known element from a vector constant.
57 ///
58 /// FIXME: It's possible to create more instructions than previously existed.
59 static bool cheapToScalarize(Value *V, bool IsConstantExtractIndex) {
60  // If we can pick a scalar constant value out of a vector, that is free.
61  if (auto *C = dyn_cast<Constant>(V))
62  return IsConstantExtractIndex || C->getSplatValue();
63 
64  // An insertelement to the same constant index as our extract will simplify
65  // to the scalar inserted element. An insertelement to a different constant
66  // index is irrelevant to our extract.
68  return IsConstantExtractIndex;
69 
70  if (match(V, m_OneUse(m_Load(m_Value()))))
71  return true;
72 
73  if (match(V, m_OneUse(m_UnOp())))
74  return true;
75 
76  Value *V0, *V1;
77  if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
78  if (cheapToScalarize(V0, IsConstantExtractIndex) ||
79  cheapToScalarize(V1, IsConstantExtractIndex))
80  return true;
81 
82  CmpInst::Predicate UnusedPred;
83  if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
84  if (cheapToScalarize(V0, IsConstantExtractIndex) ||
85  cheapToScalarize(V1, IsConstantExtractIndex))
86  return true;
87 
88  return false;
89 }
90 
91 // If we have a PHI node with a vector type that is only used to feed
92 // itself and be an operand of extractelement at a constant location,
93 // try to replace the PHI of the vector type with a PHI of a scalar type.
94 Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
95  PHINode *PN) {
97  // The users we want the PHI to have are:
98  // 1) The EI ExtractElement (we already know this)
99  // 2) Possibly more ExtractElements with the same index.
100  // 3) Another operand, which will feed back into the PHI.
101  Instruction *PHIUser = nullptr;
102  for (auto U : PN->users()) {
103  if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
104  if (EI.getIndexOperand() == EU->getIndexOperand())
105  Extracts.push_back(EU);
106  else
107  return nullptr;
108  } else if (!PHIUser) {
109  PHIUser = cast<Instruction>(U);
110  } else {
111  return nullptr;
112  }
113  }
114 
115  if (!PHIUser)
116  return nullptr;
117 
118  // Verify that this PHI user has one use, which is the PHI itself,
119  // and that it is a binary operation which is cheap to scalarize.
120  // otherwise return nullptr.
121  if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
122  !(isa<BinaryOperator>(PHIUser)) || !cheapToScalarize(PHIUser, true))
123  return nullptr;
124 
125  // Create a scalar PHI node that will replace the vector PHI node
126  // just before the current PHI node.
127  PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
128  PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN));
129  // Scalarize each PHI operand.
130  for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
131  Value *PHIInVal = PN->getIncomingValue(i);
132  BasicBlock *inBB = PN->getIncomingBlock(i);
133  Value *Elt = EI.getIndexOperand();
134  // If the operand is the PHI induction variable:
135  if (PHIInVal == PHIUser) {
136  // Scalarize the binary operation. Its first operand is the
137  // scalar PHI, and the second operand is extracted from the other
138  // vector operand.
139  BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
140  unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
141  Value *Op = InsertNewInstWith(
142  ExtractElementInst::Create(B0->getOperand(opId), Elt,
143  B0->getOperand(opId)->getName() + ".Elt"),
144  *B0);
145  Value *newPHIUser = InsertNewInstWith(
147  scalarPHI, Op, B0), *B0);
148  scalarPHI->addIncoming(newPHIUser, inBB);
149  } else {
150  // Scalarize PHI input:
151  Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
152  // Insert the new instruction into the predecessor basic block.
153  Instruction *pos = dyn_cast<Instruction>(PHIInVal);
154  BasicBlock::iterator InsertPos;
155  if (pos && !isa<PHINode>(pos)) {
156  InsertPos = ++pos->getIterator();
157  } else {
158  InsertPos = inBB->getFirstInsertionPt();
159  }
160 
161  InsertNewInstWith(newEI, *InsertPos);
162 
163  scalarPHI->addIncoming(newEI, inBB);
164  }
165  }
166 
167  for (auto E : Extracts)
168  replaceInstUsesWith(*E, scalarPHI);
169 
170  return &EI;
171 }
172 
175  bool IsBigEndian) {
176  Value *X;
177  uint64_t ExtIndexC;
178  if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
179  !X->getType()->isVectorTy() ||
180  !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
181  return nullptr;
182 
183  // If this extractelement is using a bitcast from a vector of the same number
184  // of elements, see if we can find the source element from the source vector:
185  // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
186  auto *SrcTy = cast<VectorType>(X->getType());
187  Type *DestTy = Ext.getType();
188  ElementCount NumSrcElts = SrcTy->getElementCount();
189  ElementCount NumElts =
190  cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
191  if (NumSrcElts == NumElts)
192  if (Value *Elt = findScalarElement(X, ExtIndexC))
193  return new BitCastInst(Elt, DestTy);
194 
195  assert(NumSrcElts.isScalable() == NumElts.isScalable() &&
196  "Src and Dst must be the same sort of vector type");
197 
198  // If the source elements are wider than the destination, try to shift and
199  // truncate a subset of scalar bits of an insert op.
200  if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
201  Value *Scalar;
202  uint64_t InsIndexC;
203  if (!match(X, m_InsertElt(m_Value(), m_Value(Scalar),
204  m_ConstantInt(InsIndexC))))
205  return nullptr;
206 
207  // The extract must be from the subset of vector elements that we inserted
208  // into. Example: if we inserted element 1 of a <2 x i64> and we are
209  // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
210  // of elements 4-7 of the bitcasted vector.
211  unsigned NarrowingRatio =
212  NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
213  if (ExtIndexC / NarrowingRatio != InsIndexC)
214  return nullptr;
215 
216  // We are extracting part of the original scalar. How that scalar is
217  // inserted into the vector depends on the endian-ness. Example:
218  // Vector Byte Elt Index: 0 1 2 3 4 5 6 7
219  // +--+--+--+--+--+--+--+--+
220  // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
221  // extelt <4 x i16> V', 3: | |S2|S3|
222  // +--+--+--+--+--+--+--+--+
223  // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
224  // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
225  // In this example, we must right-shift little-endian. Big-endian is just a
226  // truncate.
227  unsigned Chunk = ExtIndexC % NarrowingRatio;
228  if (IsBigEndian)
229  Chunk = NarrowingRatio - 1 - Chunk;
230 
231  // Bail out if this is an FP vector to FP vector sequence. That would take
232  // more instructions than we started with unless there is no shift, and it
233  // may not be handled as well in the backend.
234  bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
235  bool NeedDestBitcast = DestTy->isFloatingPointTy();
236  if (NeedSrcBitcast && NeedDestBitcast)
237  return nullptr;
238 
239  unsigned SrcWidth = SrcTy->getScalarSizeInBits();
240  unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
241  unsigned ShAmt = Chunk * DestWidth;
242 
243  // TODO: This limitation is more strict than necessary. We could sum the
244  // number of new instructions and subtract the number eliminated to know if
245  // we can proceed.
246  if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
247  if (NeedSrcBitcast || NeedDestBitcast)
248  return nullptr;
249 
250  if (NeedSrcBitcast) {
251  Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
252  Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
253  }
254 
255  if (ShAmt) {
256  // Bail out if we could end with more instructions than we started with.
257  if (!Ext.getVectorOperand()->hasOneUse())
258  return nullptr;
259  Scalar = Builder.CreateLShr(Scalar, ShAmt);
260  }
261 
262  if (NeedDestBitcast) {
263  Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
264  return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
265  }
266  return new TruncInst(Scalar, DestTy);
267  }
268 
269  return nullptr;
270 }
271 
272 /// Find elements of V demanded by UserInstr.
274  unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
275 
276  // Conservatively assume that all elements are needed.
277  APInt UsedElts(APInt::getAllOnesValue(VWidth));
278 
279  switch (UserInstr->getOpcode()) {
280  case Instruction::ExtractElement: {
281  ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr);
282  assert(EEI->getVectorOperand() == V);
283  ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand());
284  if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {
285  UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue());
286  }
287  break;
288  }
289  case Instruction::ShuffleVector: {
290  ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);
291  unsigned MaskNumElts =
292  cast<FixedVectorType>(UserInstr->getType())->getNumElements();
293 
294  UsedElts = APInt(VWidth, 0);
295  for (unsigned i = 0; i < MaskNumElts; i++) {
296  unsigned MaskVal = Shuffle->getMaskValue(i);
297  if (MaskVal == -1u || MaskVal >= 2 * VWidth)
298  continue;
299  if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))
300  UsedElts.setBit(MaskVal);
301  if (Shuffle->getOperand(1) == V &&
302  ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
303  UsedElts.setBit(MaskVal - VWidth);
304  }
305  break;
306  }
307  default:
308  break;
309  }
310  return UsedElts;
311 }
312 
313 /// Find union of elements of V demanded by all its users.
314 /// If it is known by querying findDemandedEltsBySingleUser that
315 /// no user demands an element of V, then the corresponding bit
316 /// remains unset in the returned value.
318  unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
319 
320  APInt UnionUsedElts(VWidth, 0);
321  for (const Use &U : V->uses()) {
322  if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
323  UnionUsedElts |= findDemandedEltsBySingleUser(V, I);
324  } else {
325  UnionUsedElts = APInt::getAllOnesValue(VWidth);
326  break;
327  }
328 
329  if (UnionUsedElts.isAllOnesValue())
330  break;
331  }
332 
333  return UnionUsedElts;
334 }
335 
337  Value *SrcVec = EI.getVectorOperand();
338  Value *Index = EI.getIndexOperand();
339  if (Value *V = SimplifyExtractElementInst(SrcVec, Index,
340  SQ.getWithInstruction(&EI)))
341  return replaceInstUsesWith(EI, V);
342 
343  // If extracting a specified index from the vector, see if we can recursively
344  // find a previously computed scalar that was inserted into the vector.
345  auto *IndexC = dyn_cast<ConstantInt>(Index);
346  if (IndexC) {
348  unsigned NumElts = EC.getKnownMinValue();
349 
350  // InstSimplify should handle cases where the index is invalid.
351  // For fixed-length vector, it's invalid to extract out-of-range element.
352  if (!EC.isScalable() && IndexC->getValue().uge(NumElts))
353  return nullptr;
354 
355  // This instruction only demands the single element from the input vector.
356  // Skip for scalable type, the number of elements is unknown at
357  // compile-time.
358  if (!EC.isScalable() && NumElts != 1) {
359  // If the input vector has a single use, simplify it based on this use
360  // property.
361  if (SrcVec->hasOneUse()) {
362  APInt UndefElts(NumElts, 0);
363  APInt DemandedElts(NumElts, 0);
364  DemandedElts.setBit(IndexC->getZExtValue());
365  if (Value *V =
366  SimplifyDemandedVectorElts(SrcVec, DemandedElts, UndefElts))
367  return replaceOperand(EI, 0, V);
368  } else {
369  // If the input vector has multiple uses, simplify it based on a union
370  // of all elements used.
371  APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
372  if (!DemandedElts.isAllOnesValue()) {
373  APInt UndefElts(NumElts, 0);
374  if (Value *V = SimplifyDemandedVectorElts(
375  SrcVec, DemandedElts, UndefElts, 0 /* Depth */,
376  true /* AllowMultipleUsers */)) {
377  if (V != SrcVec) {
378  SrcVec->replaceAllUsesWith(V);
379  return &EI;
380  }
381  }
382  }
383  }
384  }
385  if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian()))
386  return I;
387 
388  // If there's a vector PHI feeding a scalar use through this extractelement
389  // instruction, try to scalarize the PHI.
390  if (auto *Phi = dyn_cast<PHINode>(SrcVec))
391  if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
392  return ScalarPHI;
393  }
394 
395  // TODO come up with a n-ary matcher that subsumes both unary and
396  // binary matchers.
397  UnaryOperator *UO;
398  if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, IndexC)) {
399  // extelt (unop X), Index --> unop (extelt X, Index)
400  Value *X = UO->getOperand(0);
401  Value *E = Builder.CreateExtractElement(X, Index);
403  }
404 
405  BinaryOperator *BO;
406  if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, IndexC)) {
407  // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
408  Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
409  Value *E0 = Builder.CreateExtractElement(X, Index);
410  Value *E1 = Builder.CreateExtractElement(Y, Index);
411  return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
412  }
413 
414  Value *X, *Y;
415  CmpInst::Predicate Pred;
416  if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
417  cheapToScalarize(SrcVec, IndexC)) {
418  // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
419  Value *E0 = Builder.CreateExtractElement(X, Index);
420  Value *E1 = Builder.CreateExtractElement(Y, Index);
421  return CmpInst::Create(cast<CmpInst>(SrcVec)->getOpcode(), Pred, E0, E1);
422  }
423 
424  if (auto *I = dyn_cast<Instruction>(SrcVec)) {
425  if (auto *IE = dyn_cast<InsertElementInst>(I)) {
426  // Extracting the inserted element?
427  if (IE->getOperand(2) == Index)
428  return replaceInstUsesWith(EI, IE->getOperand(1));
429  // If the inserted and extracted elements are constants, they must not
430  // be the same value, extract from the pre-inserted value instead.
431  if (isa<Constant>(IE->getOperand(2)) && IndexC)
432  return replaceOperand(EI, 0, IE->getOperand(0));
433  } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
434  // If this is extracting an element from a shufflevector, figure out where
435  // it came from and extract from the appropriate input element instead.
436  // Restrict the following transformation to fixed-length vector.
437  if (isa<FixedVectorType>(SVI->getType()) && isa<ConstantInt>(Index)) {
438  int SrcIdx =
439  SVI->getMaskValue(cast<ConstantInt>(Index)->getZExtValue());
440  Value *Src;
441  unsigned LHSWidth = cast<FixedVectorType>(SVI->getOperand(0)->getType())
442  ->getNumElements();
443 
444  if (SrcIdx < 0)
445  return replaceInstUsesWith(EI, UndefValue::get(EI.getType()));
446  if (SrcIdx < (int)LHSWidth)
447  Src = SVI->getOperand(0);
448  else {
449  SrcIdx -= LHSWidth;
450  Src = SVI->getOperand(1);
451  }
454  Src, ConstantInt::get(Int32Ty, SrcIdx, false));
455  }
456  } else if (auto *CI = dyn_cast<CastInst>(I)) {
457  // Canonicalize extractelement(cast) -> cast(extractelement).
458  // Bitcasts can change the number of vector elements, and they cost
459  // nothing.
460  if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
461  Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
462  return CastInst::Create(CI->getOpcode(), EE, EI.getType());
463  }
464  }
465  }
466  return nullptr;
467 }
468 
469 /// If V is a shuffle of values that ONLY returns elements from either LHS or
470 /// RHS, return the shuffle mask and true. Otherwise, return false.
471 static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
473  assert(LHS->getType() == RHS->getType() &&
474  "Invalid CollectSingleShuffleElements");
475  unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
476 
477  if (match(V, m_Undef())) {
478  Mask.assign(NumElts, -1);
479  return true;
480  }
481 
482  if (V == LHS) {
483  for (unsigned i = 0; i != NumElts; ++i)
484  Mask.push_back(i);
485  return true;
486  }
487 
488  if (V == RHS) {
489  for (unsigned i = 0; i != NumElts; ++i)
490  Mask.push_back(i + NumElts);
491  return true;
492  }
493 
494  if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
495  // If this is an insert of an extract from some other vector, include it.
496  Value *VecOp = IEI->getOperand(0);
497  Value *ScalarOp = IEI->getOperand(1);
498  Value *IdxOp = IEI->getOperand(2);
499 
500  if (!isa<ConstantInt>(IdxOp))
501  return false;
502  unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
503 
504  if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
505  // We can handle this if the vector we are inserting into is
506  // transitively ok.
507  if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
508  // If so, update the mask to reflect the inserted undef.
509  Mask[InsertedIdx] = -1;
510  return true;
511  }
512  } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
513  if (isa<ConstantInt>(EI->getOperand(1))) {
514  unsigned ExtractedIdx =
515  cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
516  unsigned NumLHSElts =
517  cast<FixedVectorType>(LHS->getType())->getNumElements();
518 
519  // This must be extracting from either LHS or RHS.
520  if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
521  // We can handle this if the vector we are inserting into is
522  // transitively ok.
523  if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
524  // If so, update the mask to reflect the inserted value.
525  if (EI->getOperand(0) == LHS) {
526  Mask[InsertedIdx % NumElts] = ExtractedIdx;
527  } else {
528  assert(EI->getOperand(0) == RHS);
529  Mask[InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;
530  }
531  return true;
532  }
533  }
534  }
535  }
536  }
537 
538  return false;
539 }
540 
541 /// If we have insertion into a vector that is wider than the vector that we
542 /// are extracting from, try to widen the source vector to allow a single
543 /// shufflevector to replace one or more insert/extract pairs.
545  ExtractElementInst *ExtElt,
546  InstCombinerImpl &IC) {
547  auto *InsVecType = cast<FixedVectorType>(InsElt->getType());
548  auto *ExtVecType = cast<FixedVectorType>(ExtElt->getVectorOperandType());
549  unsigned NumInsElts = InsVecType->getNumElements();
550  unsigned NumExtElts = ExtVecType->getNumElements();
551 
552  // The inserted-to vector must be wider than the extracted-from vector.
553  if (InsVecType->getElementType() != ExtVecType->getElementType() ||
554  NumExtElts >= NumInsElts)
555  return;
556 
557  // Create a shuffle mask to widen the extended-from vector using undefined
558  // values. The mask selects all of the values of the original vector followed
559  // by as many undefined values as needed to create a vector of the same length
560  // as the inserted-to vector.
561  SmallVector<int, 16> ExtendMask;
562  for (unsigned i = 0; i < NumExtElts; ++i)
563  ExtendMask.push_back(i);
564  for (unsigned i = NumExtElts; i < NumInsElts; ++i)
565  ExtendMask.push_back(-1);
566 
567  Value *ExtVecOp = ExtElt->getVectorOperand();
568  auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
569  BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
570  ? ExtVecOpInst->getParent()
571  : ExtElt->getParent();
572 
573  // TODO: This restriction matches the basic block check below when creating
574  // new extractelement instructions. If that limitation is removed, this one
575  // could also be removed. But for now, we just bail out to ensure that we
576  // will replace the extractelement instruction that is feeding our
577  // insertelement instruction. This allows the insertelement to then be
578  // replaced by a shufflevector. If the insertelement is not replaced, we can
579  // induce infinite looping because there's an optimization for extractelement
580  // that will delete our widening shuffle. This would trigger another attempt
581  // here to create that shuffle, and we spin forever.
582  if (InsertionBlock != InsElt->getParent())
583  return;
584 
585  // TODO: This restriction matches the check in visitInsertElementInst() and
586  // prevents an infinite loop caused by not turning the extract/insert pair
587  // into a shuffle. We really should not need either check, but we're lacking
588  // folds for shufflevectors because we're afraid to generate shuffle masks
589  // that the backend can't handle.
590  if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
591  return;
592 
593  auto *WideVec =
594  new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType), ExtendMask);
595 
596  // Insert the new shuffle after the vector operand of the extract is defined
597  // (as long as it's not a PHI) or at the start of the basic block of the
598  // extract, so any subsequent extracts in the same basic block can use it.
599  // TODO: Insert before the earliest ExtractElementInst that is replaced.
600  if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
601  WideVec->insertAfter(ExtVecOpInst);
602  else
603  IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt());
604 
605  // Replace extracts from the original narrow vector with extracts from the new
606  // wide vector.
607  for (User *U : ExtVecOp->users()) {
608  ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
609  if (!OldExt || OldExt->getParent() != WideVec->getParent())
610  continue;
611  auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
612  NewExt->insertAfter(OldExt);
613  IC.replaceInstUsesWith(*OldExt, NewExt);
614  }
615 }
616 
617 /// We are building a shuffle to create V, which is a sequence of insertelement,
618 /// extractelement pairs. If PermittedRHS is set, then we must either use it or
619 /// not rely on the second vector source. Return a std::pair containing the
620 /// left and right vectors of the proposed shuffle (or 0), and set the Mask
621 /// parameter as required.
622 ///
623 /// Note: we intentionally don't try to fold earlier shuffles since they have
624 /// often been chosen carefully to be efficiently implementable on the target.
625 using ShuffleOps = std::pair<Value *, Value *>;
626 
628  Value *PermittedRHS,
629  InstCombinerImpl &IC) {
630  assert(V->getType()->isVectorTy() && "Invalid shuffle!");
631  unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
632 
633  if (match(V, m_Undef())) {
634  Mask.assign(NumElts, -1);
635  return std::make_pair(
636  PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr);
637  }
638 
639  if (isa<ConstantAggregateZero>(V)) {
640  Mask.assign(NumElts, 0);
641  return std::make_pair(V, nullptr);
642  }
643 
644  if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
645  // If this is an insert of an extract from some other vector, include it.
646  Value *VecOp = IEI->getOperand(0);
647  Value *ScalarOp = IEI->getOperand(1);
648  Value *IdxOp = IEI->getOperand(2);
649 
650  if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
651  if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
652  unsigned ExtractedIdx =
653  cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
654  unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
655 
656  // Either the extracted from or inserted into vector must be RHSVec,
657  // otherwise we'd end up with a shuffle of three inputs.
658  if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
659  Value *RHS = EI->getOperand(0);
660  ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC);
661  assert(LR.second == nullptr || LR.second == RHS);
662 
663  if (LR.first->getType() != RHS->getType()) {
664  // Although we are giving up for now, see if we can create extracts
665  // that match the inserts for another round of combining.
666  replaceExtractElements(IEI, EI, IC);
667 
668  // We tried our best, but we can't find anything compatible with RHS
669  // further up the chain. Return a trivial shuffle.
670  for (unsigned i = 0; i < NumElts; ++i)
671  Mask[i] = i;
672  return std::make_pair(V, nullptr);
673  }
674 
675  unsigned NumLHSElts =
676  cast<FixedVectorType>(RHS->getType())->getNumElements();
677  Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
678  return std::make_pair(LR.first, RHS);
679  }
680 
681  if (VecOp == PermittedRHS) {
682  // We've gone as far as we can: anything on the other side of the
683  // extractelement will already have been converted into a shuffle.
684  unsigned NumLHSElts =
685  cast<FixedVectorType>(EI->getOperand(0)->getType())
686  ->getNumElements();
687  for (unsigned i = 0; i != NumElts; ++i)
688  Mask.push_back(i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
689  return std::make_pair(EI->getOperand(0), PermittedRHS);
690  }
691 
692  // If this insertelement is a chain that comes from exactly these two
693  // vectors, return the vector and the effective shuffle.
694  if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
695  collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
696  Mask))
697  return std::make_pair(EI->getOperand(0), PermittedRHS);
698  }
699  }
700  }
701 
702  // Otherwise, we can't do anything fancy. Return an identity vector.
703  for (unsigned i = 0; i != NumElts; ++i)
704  Mask.push_back(i);
705  return std::make_pair(V, nullptr);
706 }
707 
708 /// Look for chain of insertvalue's that fully define an aggregate, and trace
709 /// back the values inserted, see if they are all were extractvalue'd from
710 /// the same source aggregate from the exact same element indexes.
711 /// If they were, just reuse the source aggregate.
712 /// This potentially deals with PHI indirections.
714  InsertValueInst &OrigIVI) {
715  Type *AggTy = OrigIVI.getType();
716  unsigned NumAggElts;
717  switch (AggTy->getTypeID()) {
718  case Type::StructTyID:
719  NumAggElts = AggTy->getStructNumElements();
720  break;
721  case Type::ArrayTyID:
722  NumAggElts = AggTy->getArrayNumElements();
723  break;
724  default:
725  llvm_unreachable("Unhandled aggregate type?");
726  }
727 
728  // Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able
729  // to handle clang C++ exception struct (which is hardcoded as {i8*, i32}),
730  // FIXME: any interesting patterns to be caught with larger limit?
731  assert(NumAggElts > 0 && "Aggregate should have elements.");
732  if (NumAggElts > 2)
733  return nullptr;
734 
735  static constexpr auto NotFound = None;
736  static constexpr auto FoundMismatch = nullptr;
737 
738  // Try to find a value of each element of an aggregate.
739  // FIXME: deal with more complex, not one-dimensional, aggregate types
740  SmallVector<Optional<Value *>, 2> AggElts(NumAggElts, NotFound);
741 
742  // Do we know values for each element of the aggregate?
743  auto KnowAllElts = [&AggElts]() {
744  return all_of(AggElts,
745  [](Optional<Value *> Elt) { return Elt != NotFound; });
746  };
747 
748  int Depth = 0;
749 
750  // Arbitrary `insertvalue` visitation depth limit. Let's be okay with
751  // every element being overwritten twice, which should never happen.
752  static const int DepthLimit = 2 * NumAggElts;
753 
754  // Recurse up the chain of `insertvalue` aggregate operands until either we've
755  // reconstructed full initializer or can't visit any more `insertvalue`'s.
756  for (InsertValueInst *CurrIVI = &OrigIVI;
757  Depth < DepthLimit && CurrIVI && !KnowAllElts();
758  CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()),
759  ++Depth) {
760  Value *InsertedValue = CurrIVI->getInsertedValueOperand();
761  ArrayRef<unsigned int> Indices = CurrIVI->getIndices();
762 
763  // Don't bother with more than single-level aggregates.
764  if (Indices.size() != 1)
765  return nullptr; // FIXME: deal with more complex aggregates?
766 
767  // Now, we may have already previously recorded the value for this element
768  // of an aggregate. If we did, that means the CurrIVI will later be
769  // overwritten with the already-recorded value. But if not, let's record it!
770  Optional<Value *> &Elt = AggElts[Indices.front()];
771  Elt = Elt.getValueOr(InsertedValue);
772 
773  // FIXME: should we handle chain-terminating undef base operand?
774  }
775 
776  // Was that sufficient to deduce the full initializer for the aggregate?
777  if (!KnowAllElts())
778  return nullptr; // Give up then.
779 
780  // We now want to find the source[s] of the aggregate elements we've found.
781  // And with "source" we mean the original aggregate[s] from which
782  // the inserted elements were extracted. This may require PHI translation.
783 
784  enum class AggregateDescription {
785  /// When analyzing the value that was inserted into an aggregate, we did
786  /// not manage to find defining `extractvalue` instruction to analyze.
787  NotFound,
788  /// When analyzing the value that was inserted into an aggregate, we did
789  /// manage to find defining `extractvalue` instruction[s], and everything
790  /// matched perfectly - aggregate type, element insertion/extraction index.
791  Found,
792  /// When analyzing the value that was inserted into an aggregate, we did
793  /// manage to find defining `extractvalue` instruction, but there was
794  /// a mismatch: either the source type from which the extraction was didn't
795  /// match the aggregate type into which the insertion was,
796  /// or the extraction/insertion channels mismatched,
797  /// or different elements had different source aggregates.
798  FoundMismatch
799  };
800  auto Describe = [](Optional<Value *> SourceAggregate) {
801  if (SourceAggregate == NotFound)
803  if (*SourceAggregate == FoundMismatch)
804  return AggregateDescription::FoundMismatch;
805  return AggregateDescription::Found;
806  };
807 
808  // Given the value \p Elt that was being inserted into element \p EltIdx of an
809  // aggregate AggTy, see if \p Elt was originally defined by an
810  // appropriate extractvalue (same element index, same aggregate type).
811  // If found, return the source aggregate from which the extraction was.
812  // If \p PredBB is provided, does PHI translation of an \p Elt first.
813  auto FindSourceAggregate =
814  [&](Value *Elt, unsigned EltIdx, Optional<BasicBlock *> UseBB,
816  // For now(?), only deal with, at most, a single level of PHI indirection.
817  if (UseBB && PredBB)
818  Elt = Elt->DoPHITranslation(*UseBB, *PredBB);
819  // FIXME: deal with multiple levels of PHI indirection?
820 
821  // Did we find an extraction?
822  auto *EVI = dyn_cast<ExtractValueInst>(Elt);
823  if (!EVI)
824  return NotFound;
825 
826  Value *SourceAggregate = EVI->getAggregateOperand();
827 
828  // Is the extraction from the same type into which the insertion was?
829  if (SourceAggregate->getType() != AggTy)
830  return FoundMismatch;
831  // And the element index doesn't change between extraction and insertion?
832  if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front())
833  return FoundMismatch;
834 
835  return SourceAggregate; // AggregateDescription::Found
836  };
837 
838  // Given elements AggElts that were constructing an aggregate OrigIVI,
839  // see if we can find appropriate source aggregate for each of the elements,
840  // and see it's the same aggregate for each element. If so, return it.
841  auto FindCommonSourceAggregate =
842  [&](Optional<BasicBlock *> UseBB,
844  Optional<Value *> SourceAggregate;
845 
846  for (auto I : enumerate(AggElts)) {
847  assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&
848  "We don't store nullptr in SourceAggregate!");
849  assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
850  (I.index() != 0) &&
851  "SourceAggregate should be valid after the the first element,");
852 
853  // For this element, is there a plausible source aggregate?
854  // FIXME: we could special-case undef element, IFF we know that in the
855  // source aggregate said element isn't poison.
856  Optional<Value *> SourceAggregateForElement =
857  FindSourceAggregate(*I.value(), I.index(), UseBB, PredBB);
858 
859  // Okay, what have we found? Does that correlate with previous findings?
860 
861  // Regardless of whether or not we have previously found source
862  // aggregate for previous elements (if any), if we didn't find one for
863  // this element, passthrough whatever we have just found.
864  if (Describe(SourceAggregateForElement) != AggregateDescription::Found)
865  return SourceAggregateForElement;
866 
867  // Okay, we have found source aggregate for this element.
868  // Let's see what we already know from previous elements, if any.
869  switch (Describe(SourceAggregate)) {
871  // This is apparently the first element that we have examined.
872  SourceAggregate = SourceAggregateForElement; // Record the aggregate!
873  continue; // Great, now look at next element.
874  case AggregateDescription::Found:
875  // We have previously already successfully examined other elements.
876  // Is this the same source aggregate we've found for other elements?
877  if (*SourceAggregateForElement != *SourceAggregate)
878  return FoundMismatch;
879  continue; // Still the same aggregate, look at next element.
880  case AggregateDescription::FoundMismatch:
881  llvm_unreachable("Can't happen. We would have early-exited then.");
882  };
883  }
884 
885  assert(Describe(SourceAggregate) == AggregateDescription::Found &&
886  "Must be a valid Value");
887  return *SourceAggregate;
888  };
889 
890  Optional<Value *> SourceAggregate;
891 
892  // Can we find the source aggregate without looking at predecessors?
893  SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/None, /*PredBB=*/None);
894  if (Describe(SourceAggregate) != AggregateDescription::NotFound) {
895  if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch)
896  return nullptr; // Conflicting source aggregates!
897  ++NumAggregateReconstructionsSimplified;
898  return replaceInstUsesWith(OrigIVI, *SourceAggregate);
899  }
900 
901  // Okay, apparently we need to look at predecessors.
902 
903  // We should be smart about picking the "use" basic block, which will be the
904  // merge point for aggregate, where we'll insert the final PHI that will be
905  // used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice.
906  // We should look in which blocks each of the AggElts is being defined,
907  // they all should be defined in the same basic block.
908  BasicBlock *UseBB = nullptr;
909 
910  for (const Optional<Value *> &Elt : AggElts) {
911  // If this element's value was not defined by an instruction, ignore it.
912  auto *I = dyn_cast<Instruction>(*Elt);
913  if (!I)
914  continue;
915  // Otherwise, in which basic block is this instruction located?
916  BasicBlock *BB = I->getParent();
917  // If it's the first instruction we've encountered, record the basic block.
918  if (!UseBB) {
919  UseBB = BB;
920  continue;
921  }
922  // Otherwise, this must be the same basic block we've seen previously.
923  if (UseBB != BB)
924  return nullptr;
925  }
926 
927  // If *all* of the elements are basic-block-independent, meaning they are
928  // either function arguments, or constant expressions, then if we didn't
929  // handle them without predecessor-aware handling, we won't handle them now.
930  if (!UseBB)
931  return nullptr;
932 
933  // If we didn't manage to find source aggregate without looking at
934  // predecessors, and there are no predecessors to look at, then we're done.
935  if (pred_empty(UseBB))
936  return nullptr;
937 
938  // Arbitrary predecessor count limit.
939  static const int PredCountLimit = 64;
940 
941  // Cache the (non-uniqified!) list of predecessors in a vector,
942  // checking the limit at the same time for efficiency.
943  SmallVector<BasicBlock *, 4> Preds; // May have duplicates!
944  for (BasicBlock *Pred : predecessors(UseBB)) {
945  // Don't bother if there are too many predecessors.
946  if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once?
947  return nullptr;
948  Preds.emplace_back(Pred);
949  }
950 
951  // For each predecessor, what is the source aggregate,
952  // from which all the elements were originally extracted from?
953  // Note that we want for the map to have stable iteration order!
955  for (BasicBlock *Pred : Preds) {
956  std::pair<decltype(SourceAggregates)::iterator, bool> IV =
957  SourceAggregates.insert({Pred, nullptr});
958  // Did we already evaluate this predecessor?
959  if (!IV.second)
960  continue;
961 
962  // Let's hope that when coming from predecessor Pred, all elements of the
963  // aggregate produced by OrigIVI must have been originally extracted from
964  // the same aggregate. Is that so? Can we find said original aggregate?
965  SourceAggregate = FindCommonSourceAggregate(UseBB, Pred);
966  if (Describe(SourceAggregate) != AggregateDescription::Found)
967  return nullptr; // Give up.
968  IV.first->second = *SourceAggregate;
969  }
970 
971  // All good! Now we just need to thread the source aggregates here.
972  // Note that we have to insert the new PHI here, ourselves, because we can't
973  // rely on InstCombinerImpl::run() inserting it into the right basic block.
974  // Note that the same block can be a predecessor more than once,
975  // and we need to preserve that invariant for the PHI node.
977  Builder.SetInsertPoint(UseBB->getFirstNonPHI());
978  auto *PHI =
979  Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() + ".merged");
980  for (BasicBlock *Pred : Preds)
981  PHI->addIncoming(SourceAggregates[Pred], Pred);
982 
983  ++NumAggregateReconstructionsSimplified;
984  return replaceInstUsesWith(OrigIVI, PHI);
985 }
986 
987 /// Try to find redundant insertvalue instructions, like the following ones:
988 /// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
989 /// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
990 /// Here the second instruction inserts values at the same indices, as the
991 /// first one, making the first one redundant.
992 /// It should be transformed to:
993 /// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
995  bool IsRedundant = false;
996  ArrayRef<unsigned int> FirstIndices = I.getIndices();
997 
998  // If there is a chain of insertvalue instructions (each of them except the
999  // last one has only one use and it's another insertvalue insn from this
1000  // chain), check if any of the 'children' uses the same indices as the first
1001  // instruction. In this case, the first one is redundant.
1002  Value *V = &I;
1003  unsigned Depth = 0;
1004  while (V->hasOneUse() && Depth < 10) {
1005  User *U = V->user_back();
1006  auto UserInsInst = dyn_cast<InsertValueInst>(U);
1007  if (!UserInsInst || U->getOperand(0) != V)
1008  break;
1009  if (UserInsInst->getIndices() == FirstIndices) {
1010  IsRedundant = true;
1011  break;
1012  }
1013  V = UserInsInst;
1014  Depth++;
1015  }
1016 
1017  if (IsRedundant)
1018  return replaceInstUsesWith(I, I.getOperand(0));
1019 
1020  if (Instruction *NewI = foldAggregateConstructionIntoAggregateReuse(I))
1021  return NewI;
1022 
1023  return nullptr;
1024 }
1025 
1027  // Can not analyze scalable type, the number of elements is not a compile-time
1028  // constant.
1029  if (isa<ScalableVectorType>(Shuf.getOperand(0)->getType()))
1030  return false;
1031 
1032  int MaskSize = Shuf.getShuffleMask().size();
1033  int VecSize =
1034  cast<FixedVectorType>(Shuf.getOperand(0)->getType())->getNumElements();
1035 
1036  // A vector select does not change the size of the operands.
1037  if (MaskSize != VecSize)
1038  return false;
1039 
1040  // Each mask element must be undefined or choose a vector element from one of
1041  // the source operands without crossing vector lanes.
1042  for (int i = 0; i != MaskSize; ++i) {
1043  int Elt = Shuf.getMaskValue(i);
1044  if (Elt != -1 && Elt != i && Elt != i + VecSize)
1045  return false;
1046  }
1047 
1048  return true;
1049 }
1050 
1051 /// Turn a chain of inserts that splats a value into an insert + shuffle:
1052 /// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
1053 /// shufflevector(insertelt(X, %k, 0), undef, zero)
1055  // We are interested in the last insert in a chain. So if this insert has a
1056  // single user and that user is an insert, bail.
1057  if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
1058  return nullptr;
1059 
1060  VectorType *VecTy = InsElt.getType();
1061  // Can not handle scalable type, the number of elements is not a compile-time
1062  // constant.
1063  if (isa<ScalableVectorType>(VecTy))
1064  return nullptr;
1065  unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
1066 
1067  // Do not try to do this for a one-element vector, since that's a nop,
1068  // and will cause an inf-loop.
1069  if (NumElements == 1)
1070  return nullptr;
1071 
1072  Value *SplatVal = InsElt.getOperand(1);
1073  InsertElementInst *CurrIE = &InsElt;
1074  SmallBitVector ElementPresent(NumElements, false);
1075  InsertElementInst *FirstIE = nullptr;
1076 
1077  // Walk the chain backwards, keeping track of which indices we inserted into,
1078  // until we hit something that isn't an insert of the splatted value.
1079  while (CurrIE) {
1080  auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
1081  if (!Idx || CurrIE->getOperand(1) != SplatVal)
1082  return nullptr;
1083 
1084  auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
1085  // Check none of the intermediate steps have any additional uses, except
1086  // for the root insertelement instruction, which can be re-used, if it
1087  // inserts at position 0.
1088  if (CurrIE != &InsElt &&
1089  (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
1090  return nullptr;
1091 
1092  ElementPresent[Idx->getZExtValue()] = true;
1093  FirstIE = CurrIE;
1094  CurrIE = NextIE;
1095  }
1096 
1097  // If this is just a single insertelement (not a sequence), we are done.
1098  if (FirstIE == &InsElt)
1099  return nullptr;
1100 
1101  // If we are not inserting into an undef vector, make sure we've seen an
1102  // insert into every element.
1103  // TODO: If the base vector is not undef, it might be better to create a splat
1104  // and then a select-shuffle (blend) with the base vector.
1105  if (!match(FirstIE->getOperand(0), m_Undef()))
1106  if (!ElementPresent.all())
1107  return nullptr;
1108 
1109  // Create the insert + shuffle.
1110  Type *Int32Ty = Type::getInt32Ty(InsElt.getContext());
1111  UndefValue *UndefVec = UndefValue::get(VecTy);
1112  Constant *Zero = ConstantInt::get(Int32Ty, 0);
1113  if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
1114  FirstIE = InsertElementInst::Create(UndefVec, SplatVal, Zero, "", &InsElt);
1115 
1116  // Splat from element 0, but replace absent elements with undef in the mask.
1117  SmallVector<int, 16> Mask(NumElements, 0);
1118  for (unsigned i = 0; i != NumElements; ++i)
1119  if (!ElementPresent[i])
1120  Mask[i] = -1;
1121 
1122  return new ShuffleVectorInst(FirstIE, UndefVec, Mask);
1123 }
1124 
1125 /// Try to fold an insert element into an existing splat shuffle by changing
1126 /// the shuffle's mask to include the index of this insert element.
1128  // Check if the vector operand of this insert is a canonical splat shuffle.
1129  auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1130  if (!Shuf || !Shuf->isZeroEltSplat())
1131  return nullptr;
1132 
1133  // Bail out early if shuffle is scalable type. The number of elements in
1134  // shuffle mask is unknown at compile-time.
1135  if (isa<ScalableVectorType>(Shuf->getType()))
1136  return nullptr;
1137 
1138  // Check for a constant insertion index.
1139  uint64_t IdxC;
1140  if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1141  return nullptr;
1142 
1143  // Check if the splat shuffle's input is the same as this insert's scalar op.
1144  Value *X = InsElt.getOperand(1);
1145  Value *Op0 = Shuf->getOperand(0);
1146  if (!match(Op0, m_InsertElt(m_Undef(), m_Specific(X), m_ZeroInt())))
1147  return nullptr;
1148 
1149  // Replace the shuffle mask element at the index of this insert with a zero.
1150  // For example:
1151  // inselt (shuf (inselt undef, X, 0), undef, <0,undef,0,undef>), X, 1
1152  // --> shuf (inselt undef, X, 0), undef, <0,0,0,undef>
1153  unsigned NumMaskElts =
1154  cast<FixedVectorType>(Shuf->getType())->getNumElements();
1155  SmallVector<int, 16> NewMask(NumMaskElts);
1156  for (unsigned i = 0; i != NumMaskElts; ++i)
1157  NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);
1158 
1159  return new ShuffleVectorInst(Op0, UndefValue::get(Op0->getType()), NewMask);
1160 }
1161 
1162 /// Try to fold an extract+insert element into an existing identity shuffle by
1163 /// changing the shuffle's mask to include the index of this insert element.
1165  // Check if the vector operand of this insert is an identity shuffle.
1166  auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1167  if (!Shuf || !match(Shuf->getOperand(1), m_Undef()) ||
1168  !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
1169  return nullptr;
1170 
1171  // Bail out early if shuffle is scalable type. The number of elements in
1172  // shuffle mask is unknown at compile-time.
1173  if (isa<ScalableVectorType>(Shuf->getType()))
1174  return nullptr;
1175 
1176  // Check for a constant insertion index.
1177  uint64_t IdxC;
1178  if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1179  return nullptr;
1180 
1181  // Check if this insert's scalar op is extracted from the identity shuffle's
1182  // input vector.
1183  Value *Scalar = InsElt.getOperand(1);
1184  Value *X = Shuf->getOperand(0);
1185  if (!match(Scalar, m_ExtractElt(m_Specific(X), m_SpecificInt(IdxC))))
1186  return nullptr;
1187 
1188  // Replace the shuffle mask element at the index of this extract+insert with
1189  // that same index value.
1190  // For example:
1191  // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
1192  unsigned NumMaskElts =
1193  cast<FixedVectorType>(Shuf->getType())->getNumElements();
1194  SmallVector<int, 16> NewMask(NumMaskElts);
1195  ArrayRef<int> OldMask = Shuf->getShuffleMask();
1196  for (unsigned i = 0; i != NumMaskElts; ++i) {
1197  if (i != IdxC) {
1198  // All mask elements besides the inserted element remain the same.
1199  NewMask[i] = OldMask[i];
1200  } else if (OldMask[i] == (int)IdxC) {
1201  // If the mask element was already set, there's nothing to do
1202  // (demanded elements analysis may unset it later).
1203  return nullptr;
1204  } else {
1205  assert(OldMask[i] == UndefMaskElem &&
1206  "Unexpected shuffle mask element for identity shuffle");
1207  NewMask[i] = IdxC;
1208  }
1209  }
1210 
1211  return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask);
1212 }
1213 
1214 /// If we have an insertelement instruction feeding into another insertelement
1215 /// and the 2nd is inserting a constant into the vector, canonicalize that
1216 /// constant insertion before the insertion of a variable:
1217 ///
1218 /// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
1219 /// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
1220 ///
1221 /// This has the potential of eliminating the 2nd insertelement instruction
1222 /// via constant folding of the scalar constant into a vector constant.
1225  auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
1226  if (!InsElt1 || !InsElt1->hasOneUse())
1227  return nullptr;
1228 
1229  Value *X, *Y;
1230  Constant *ScalarC;
1231  ConstantInt *IdxC1, *IdxC2;
1232  if (match(InsElt1->getOperand(0), m_Value(X)) &&
1233  match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
1234  match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
1235  match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
1236  match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
1237  Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
1238  return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
1239  }
1240 
1241  return nullptr;
1242 }
1243 
1244 /// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
1245 /// --> shufflevector X, CVec', Mask'
1247  auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
1248  // Bail out if the parent has more than one use. In that case, we'd be
1249  // replacing the insertelt with a shuffle, and that's not a clear win.
1250  if (!Inst || !Inst->hasOneUse())
1251  return nullptr;
1252  if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
1253  // The shuffle must have a constant vector operand. The insertelt must have
1254  // a constant scalar being inserted at a constant position in the vector.
1255  Constant *ShufConstVec, *InsEltScalar;
1256  uint64_t InsEltIndex;
1257  if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
1258  !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
1259  !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
1260  return nullptr;
1261 
1262  // Adding an element to an arbitrary shuffle could be expensive, but a
1263  // shuffle that selects elements from vectors without crossing lanes is
1264  // assumed cheap.
1265  // If we're just adding a constant into that shuffle, it will still be
1266  // cheap.
1267  if (!isShuffleEquivalentToSelect(*Shuf))
1268  return nullptr;
1269 
1270  // From the above 'select' check, we know that the mask has the same number
1271  // of elements as the vector input operands. We also know that each constant
1272  // input element is used in its lane and can not be used more than once by
1273  // the shuffle. Therefore, replace the constant in the shuffle's constant
1274  // vector with the insertelt constant. Replace the constant in the shuffle's
1275  // mask vector with the insertelt index plus the length of the vector
1276  // (because the constant vector operand of a shuffle is always the 2nd
1277  // operand).
1278  ArrayRef<int> Mask = Shuf->getShuffleMask();
1279  unsigned NumElts = Mask.size();
1280  SmallVector<Constant *, 16> NewShufElts(NumElts);
1281  SmallVector<int, 16> NewMaskElts(NumElts);
1282  for (unsigned I = 0; I != NumElts; ++I) {
1283  if (I == InsEltIndex) {
1284  NewShufElts[I] = InsEltScalar;
1285  NewMaskElts[I] = InsEltIndex + NumElts;
1286  } else {
1287  // Copy over the existing values.
1288  NewShufElts[I] = ShufConstVec->getAggregateElement(I);
1289  NewMaskElts[I] = Mask[I];
1290  }
1291  }
1292 
1293  // Create new operands for a shuffle that includes the constant of the
1294  // original insertelt. The old shuffle will be dead now.
1295  return new ShuffleVectorInst(Shuf->getOperand(0),
1296  ConstantVector::get(NewShufElts), NewMaskElts);
1297  } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
1298  // Transform sequences of insertelements ops with constant data/indexes into
1299  // a single shuffle op.
1300  // Can not handle scalable type, the number of elements needed to create
1301  // shuffle mask is not a compile-time constant.
1302  if (isa<ScalableVectorType>(InsElt.getType()))
1303  return nullptr;
1304  unsigned NumElts =
1305  cast<FixedVectorType>(InsElt.getType())->getNumElements();
1306 
1307  uint64_t InsertIdx[2];
1308  Constant *Val[2];
1309  if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
1310  !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
1311  !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
1312  !match(IEI->getOperand(1), m_Constant(Val[1])))
1313  return nullptr;
1314  SmallVector<Constant *, 16> Values(NumElts);
1315  SmallVector<int, 16> Mask(NumElts);
1316  auto ValI = std::begin(Val);
1317  // Generate new constant vector and mask.
1318  // We have 2 values/masks from the insertelements instructions. Insert them
1319  // into new value/mask vectors.
1320  for (uint64_t I : InsertIdx) {
1321  if (!Values[I]) {
1322  Values[I] = *ValI;
1323  Mask[I] = NumElts + I;
1324  }
1325  ++ValI;
1326  }
1327  // Remaining values are filled with 'undef' values.
1328  for (unsigned I = 0; I < NumElts; ++I) {
1329  if (!Values[I]) {
1330  Values[I] = UndefValue::get(InsElt.getType()->getElementType());
1331  Mask[I] = I;
1332  }
1333  }
1334  // Create new operands for a shuffle that includes the constant of the
1335  // original insertelt.
1336  return new ShuffleVectorInst(IEI->getOperand(0),
1337  ConstantVector::get(Values), Mask);
1338  }
1339  return nullptr;
1340 }
1341 
1343  Value *VecOp = IE.getOperand(0);
1344  Value *ScalarOp = IE.getOperand(1);
1345  Value *IdxOp = IE.getOperand(2);
1346 
1347  if (auto *V = SimplifyInsertElementInst(
1348  VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
1349  return replaceInstUsesWith(IE, V);
1350 
1351  // If the scalar is bitcast and inserted into undef, do the insert in the
1352  // source type followed by bitcast.
1353  // TODO: Generalize for insert into any constant, not just undef?
1354  Value *ScalarSrc;
1355  if (match(VecOp, m_Undef()) &&
1356  match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) &&
1357  (ScalarSrc->getType()->isIntegerTy() ||
1358  ScalarSrc->getType()->isFloatingPointTy())) {
1359  // inselt undef, (bitcast ScalarSrc), IdxOp -->
1360  // bitcast (inselt undef, ScalarSrc, IdxOp)
1361  Type *ScalarTy = ScalarSrc->getType();
1362  Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount());
1363  UndefValue *NewUndef = UndefValue::get(VecTy);
1364  Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp);
1365  return new BitCastInst(NewInsElt, IE.getType());
1366  }
1367 
1368  // If the vector and scalar are both bitcast from the same element type, do
1369  // the insert in that source type followed by bitcast.
1370  Value *VecSrc;
1371  if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
1372  match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
1373  (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
1374  VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
1375  cast<VectorType>(VecSrc->getType())->getElementType() ==
1376  ScalarSrc->getType()) {
1377  // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
1378  // bitcast (inselt VecSrc, ScalarSrc, IdxOp)
1379  Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
1380  return new BitCastInst(NewInsElt, IE.getType());
1381  }
1382 
1383  // If the inserted element was extracted from some other fixed-length vector
1384  // and both indexes are valid constants, try to turn this into a shuffle.
1385  // Can not handle scalable vector type, the number of elements needed to
1386  // create shuffle mask is not a compile-time constant.
1387  uint64_t InsertedIdx, ExtractedIdx;
1388  Value *ExtVecOp;
1389  if (isa<FixedVectorType>(IE.getType()) &&
1390  match(IdxOp, m_ConstantInt(InsertedIdx)) &&
1391  match(ScalarOp,
1392  m_ExtractElt(m_Value(ExtVecOp), m_ConstantInt(ExtractedIdx))) &&
1393  isa<FixedVectorType>(ExtVecOp->getType()) &&
1394  ExtractedIdx <
1395  cast<FixedVectorType>(ExtVecOp->getType())->getNumElements()) {
1396  // TODO: Looking at the user(s) to determine if this insert is a
1397  // fold-to-shuffle opportunity does not match the usual instcombine
1398  // constraints. We should decide if the transform is worthy based only
1399  // on this instruction and its operands, but that may not work currently.
1400  //
1401  // Here, we are trying to avoid creating shuffles before reaching
1402  // the end of a chain of extract-insert pairs. This is complicated because
1403  // we do not generally form arbitrary shuffle masks in instcombine
1404  // (because those may codegen poorly), but collectShuffleElements() does
1405  // exactly that.
1406  //
1407  // The rules for determining what is an acceptable target-independent
1408  // shuffle mask are fuzzy because they evolve based on the backend's
1409  // capabilities and real-world impact.
1410  auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
1411  if (!Insert.hasOneUse())
1412  return true;
1413  auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
1414  if (!InsertUser)
1415  return true;
1416  return false;
1417  };
1418 
1419  // Try to form a shuffle from a chain of extract-insert ops.
1420  if (isShuffleRootCandidate(IE)) {
1422  ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this);
1423 
1424  // The proposed shuffle may be trivial, in which case we shouldn't
1425  // perform the combine.
1426  if (LR.first != &IE && LR.second != &IE) {
1427  // We now have a shuffle of LHS, RHS, Mask.
1428  if (LR.second == nullptr)
1429  LR.second = UndefValue::get(LR.first->getType());
1430  return new ShuffleVectorInst(LR.first, LR.second, Mask);
1431  }
1432  }
1433  }
1434 
1435  if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {
1436  unsigned VWidth = VecTy->getNumElements();
1437  APInt UndefElts(VWidth, 0);
1438  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
1439  if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
1440  if (V != &IE)
1441  return replaceInstUsesWith(IE, V);
1442  return &IE;
1443  }
1444  }
1445 
1447  return Shuf;
1448 
1449  if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
1450  return NewInsElt;
1451 
1452  if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
1453  return Broadcast;
1454 
1455  if (Instruction *Splat = foldInsEltIntoSplat(IE))
1456  return Splat;
1457 
1458  if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
1459  return IdentityShuf;
1460 
1461  return nullptr;
1462 }
1463 
1464 /// Return true if we can evaluate the specified expression tree if the vector
1465 /// elements were shuffled in a different order.
1467  unsigned Depth = 5) {
1468  // We can always reorder the elements of a constant.
1469  if (isa<Constant>(V))
1470  return true;
1471 
1472  // We won't reorder vector arguments. No IPO here.
1473  Instruction *I = dyn_cast<Instruction>(V);
1474  if (!I) return false;
1475 
1476  // Two users may expect different orders of the elements. Don't try it.
1477  if (!I->hasOneUse())
1478  return false;
1479 
1480  if (Depth == 0) return false;
1481 
1482  switch (I->getOpcode()) {
1483  case Instruction::UDiv:
1484  case Instruction::SDiv:
1485  case Instruction::URem:
1486  case Instruction::SRem:
1487  // Propagating an undefined shuffle mask element to integer div/rem is not
1488  // allowed because those opcodes can create immediate undefined behavior
1489  // from an undefined element in an operand.
1490  if (llvm::is_contained(Mask, -1))
1491  return false;
1493  case Instruction::Add:
1494  case Instruction::FAdd:
1495  case Instruction::Sub:
1496  case Instruction::FSub:
1497  case Instruction::Mul:
1498  case Instruction::FMul:
1499  case Instruction::FDiv:
1500  case Instruction::FRem:
1501  case Instruction::Shl:
1502  case Instruction::LShr:
1503  case Instruction::AShr:
1504  case Instruction::And:
1505  case Instruction::Or:
1506  case Instruction::Xor:
1507  case Instruction::ICmp:
1508  case Instruction::FCmp:
1509  case Instruction::Trunc:
1510  case Instruction::ZExt:
1511  case Instruction::SExt:
1512  case Instruction::FPToUI:
1513  case Instruction::FPToSI:
1514  case Instruction::UIToFP:
1515  case Instruction::SIToFP:
1516  case Instruction::FPTrunc:
1517  case Instruction::FPExt:
1518  case Instruction::GetElementPtr: {
1519  // Bail out if we would create longer vector ops. We could allow creating
1520  // longer vector ops, but that may result in more expensive codegen.
1521  Type *ITy = I->getType();
1522  if (ITy->isVectorTy() &&
1523  Mask.size() > cast<FixedVectorType>(ITy)->getNumElements())
1524  return false;
1525  for (Value *Operand : I->operands()) {
1526  if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
1527  return false;
1528  }
1529  return true;
1530  }
1531  case Instruction::InsertElement: {
1532  ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
1533  if (!CI) return false;
1534  int ElementNumber = CI->getLimitedValue();
1535 
1536  // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1537  // can't put an element into multiple indices.
1538  bool SeenOnce = false;
1539  for (int i = 0, e = Mask.size(); i != e; ++i) {
1540  if (Mask[i] == ElementNumber) {
1541  if (SeenOnce)
1542  return false;
1543  SeenOnce = true;
1544  }
1545  }
1546  return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
1547  }
1548  }
1549  return false;
1550 }
1551 
1552 /// Rebuild a new instruction just like 'I' but with the new operands given.
1553 /// In the event of type mismatch, the type of the operands is correct.
1555  // We don't want to use the IRBuilder here because we want the replacement
1556  // instructions to appear next to 'I', not the builder's insertion point.
1557  switch (I->getOpcode()) {
1558  case Instruction::Add:
1559  case Instruction::FAdd:
1560  case Instruction::Sub:
1561  case Instruction::FSub:
1562  case Instruction::Mul:
1563  case Instruction::FMul:
1564  case Instruction::UDiv:
1565  case Instruction::SDiv:
1566  case Instruction::FDiv:
1567  case Instruction::URem:
1568  case Instruction::SRem:
1569  case Instruction::FRem:
1570  case Instruction::Shl:
1571  case Instruction::LShr:
1572  case Instruction::AShr:
1573  case Instruction::And:
1574  case Instruction::Or:
1575  case Instruction::Xor: {
1576  BinaryOperator *BO = cast<BinaryOperator>(I);
1577  assert(NewOps.size() == 2 && "binary operator with #ops != 2");
1578  BinaryOperator *New =
1579  BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(),
1580  NewOps[0], NewOps[1], "", BO);
1581  if (isa<OverflowingBinaryOperator>(BO)) {
1582  New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1583  New->setHasNoSignedWrap(BO->hasNoSignedWrap());
1584  }
1585  if (isa<PossiblyExactOperator>(BO)) {
1586  New->setIsExact(BO->isExact());
1587  }
1588  if (isa<FPMathOperator>(BO))
1589  New->copyFastMathFlags(I);
1590  return New;
1591  }
1592  case Instruction::ICmp:
1593  assert(NewOps.size() == 2 && "icmp with #ops != 2");
1594  return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(),
1595  NewOps[0], NewOps[1]);
1596  case Instruction::FCmp:
1597  assert(NewOps.size() == 2 && "fcmp with #ops != 2");
1598  return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(),
1599  NewOps[0], NewOps[1]);
1600  case Instruction::Trunc:
1601  case Instruction::ZExt:
1602  case Instruction::SExt:
1603  case Instruction::FPToUI:
1604  case Instruction::FPToSI:
1605  case Instruction::UIToFP:
1606  case Instruction::SIToFP:
1607  case Instruction::FPTrunc:
1608  case Instruction::FPExt: {
1609  // It's possible that the mask has a different number of elements from
1610  // the original cast. We recompute the destination type to match the mask.
1611  Type *DestTy = VectorType::get(
1612  I->getType()->getScalarType(),
1613  cast<VectorType>(NewOps[0]->getType())->getElementCount());
1614  assert(NewOps.size() == 1 && "cast with #ops != 1");
1615  return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy,
1616  "", I);
1617  }
1618  case Instruction::GetElementPtr: {
1619  Value *Ptr = NewOps[0];
1620  ArrayRef<Value*> Idx = NewOps.slice(1);
1622  cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I);
1623  GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
1624  return GEP;
1625  }
1626  }
1627  llvm_unreachable("failed to rebuild vector instructions");
1628 }
1629 
1631  // Mask.size() does not need to be equal to the number of vector elements.
1632 
1633  assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
1634  Type *EltTy = V->getType()->getScalarType();
1635  Type *I32Ty = IntegerType::getInt32Ty(V->getContext());
1636  if (match(V, m_Undef()))
1637  return UndefValue::get(FixedVectorType::get(EltTy, Mask.size()));
1638 
1639  if (isa<ConstantAggregateZero>(V))
1640  return ConstantAggregateZero::get(FixedVectorType::get(EltTy, Mask.size()));
1641 
1642  if (Constant *C = dyn_cast<Constant>(V))
1643  return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()),
1644  Mask);
1645 
1646  Instruction *I = cast<Instruction>(V);
1647  switch (I->getOpcode()) {
1648  case Instruction::Add:
1649  case Instruction::FAdd:
1650  case Instruction::Sub:
1651  case Instruction::FSub:
1652  case Instruction::Mul:
1653  case Instruction::FMul:
1654  case Instruction::UDiv:
1655  case Instruction::SDiv:
1656  case Instruction::FDiv:
1657  case Instruction::URem:
1658  case Instruction::SRem:
1659  case Instruction::FRem:
1660  case Instruction::Shl:
1661  case Instruction::LShr:
1662  case Instruction::AShr:
1663  case Instruction::And:
1664  case Instruction::Or:
1665  case Instruction::Xor:
1666  case Instruction::ICmp:
1667  case Instruction::FCmp:
1668  case Instruction::Trunc:
1669  case Instruction::ZExt:
1670  case Instruction::SExt:
1671  case Instruction::FPToUI:
1672  case Instruction::FPToSI:
1673  case Instruction::UIToFP:
1674  case Instruction::SIToFP:
1675  case Instruction::FPTrunc:
1676  case Instruction::FPExt:
1677  case Instruction::Select:
1678  case Instruction::GetElementPtr: {
1679  SmallVector<Value*, 8> NewOps;
1680  bool NeedsRebuild =
1681  (Mask.size() !=
1682  cast<FixedVectorType>(I->getType())->getNumElements());
1683  for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
1684  Value *V;
1685  // Recursively call evaluateInDifferentElementOrder on vector arguments
1686  // as well. E.g. GetElementPtr may have scalar operands even if the
1687  // return value is a vector, so we need to examine the operand type.
1688  if (I->getOperand(i)->getType()->isVectorTy())
1689  V = evaluateInDifferentElementOrder(I->getOperand(i), Mask);
1690  else
1691  V = I->getOperand(i);
1692  NewOps.push_back(V);
1693  NeedsRebuild |= (V != I->getOperand(i));
1694  }
1695  if (NeedsRebuild) {
1696  return buildNew(I, NewOps);
1697  }
1698  return I;
1699  }
1700  case Instruction::InsertElement: {
1701  int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
1702 
1703  // The insertelement was inserting at Element. Figure out which element
1704  // that becomes after shuffling. The answer is guaranteed to be unique
1705  // by CanEvaluateShuffled.
1706  bool Found = false;
1707  int Index = 0;
1708  for (int e = Mask.size(); Index != e; ++Index) {
1709  if (Mask[Index] == Element) {
1710  Found = true;
1711  break;
1712  }
1713  }
1714 
1715  // If element is not in Mask, no need to handle the operand 1 (element to
1716  // be inserted). Just evaluate values in operand 0 according to Mask.
1717  if (!Found)
1718  return evaluateInDifferentElementOrder(I->getOperand(0), Mask);
1719 
1720  Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask);
1721  return InsertElementInst::Create(V, I->getOperand(1),
1722  ConstantInt::get(I32Ty, Index), "", I);
1723  }
1724  }
1725  llvm_unreachable("failed to reorder elements of vector instruction!");
1726 }
1727 
1728 // Returns true if the shuffle is extracting a contiguous range of values from
1729 // LHS, for example:
1730 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
1731 // Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
1732 // Shuffles to: |EE|FF|GG|HH|
1733 // +--+--+--+--+
1735  ArrayRef<int> Mask) {
1736  unsigned LHSElems =
1737  cast<FixedVectorType>(SVI.getOperand(0)->getType())->getNumElements();
1738  unsigned MaskElems = Mask.size();
1739  unsigned BegIdx = Mask.front();
1740  unsigned EndIdx = Mask.back();
1741  if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
1742  return false;
1743  for (unsigned I = 0; I != MaskElems; ++I)
1744  if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
1745  return false;
1746  return true;
1747 }
1748 
1749 /// These are the ingredients in an alternate form binary operator as described
1750 /// below.
1751 struct BinopElts {
1756  Value *V0 = nullptr, Value *V1 = nullptr) :
1757  Opcode(Opc), Op0(V0), Op1(V1) {}
1758  operator bool() const { return Opcode != 0; }
1759 };
1760 
1761 /// Binops may be transformed into binops with different opcodes and operands.
1762 /// Reverse the usual canonicalization to enable folds with the non-canonical
1763 /// form of the binop. If a transform is possible, return the elements of the
1764 /// new binop. If not, return invalid elements.
1766  Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
1767  Type *Ty = BO->getType();
1768  switch (BO->getOpcode()) {
1769  case Instruction::Shl: {
1770  // shl X, C --> mul X, (1 << C)
1771  Constant *C;
1772  if (match(BO1, m_Constant(C))) {
1773  Constant *ShlOne = ConstantExpr::getShl(ConstantInt::get(Ty, 1), C);
1774  return { Instruction::Mul, BO0, ShlOne };
1775  }
1776  break;
1777  }
1778  case Instruction::Or: {
1779  // or X, C --> add X, C (when X and C have no common bits set)
1780  const APInt *C;
1781  if (match(BO1, m_APInt(C)) && MaskedValueIsZero(BO0, *C, DL))
1782  return { Instruction::Add, BO0, BO1 };
1783  break;
1784  }
1785  default:
1786  break;
1787  }
1788  return {};
1789 }
1790 
1792  assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
1793 
1794  // Are we shuffling together some value and that same value after it has been
1795  // modified by a binop with a constant?
1796  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1797  Constant *C;
1798  bool Op0IsBinop;
1799  if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
1800  Op0IsBinop = true;
1801  else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
1802  Op0IsBinop = false;
1803  else
1804  return nullptr;
1805 
1806  // The identity constant for a binop leaves a variable operand unchanged. For
1807  // a vector, this is a splat of something like 0, -1, or 1.
1808  // If there's no identity constant for this binop, we're done.
1809  auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
1810  BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
1811  Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
1812  if (!IdC)
1813  return nullptr;
1814 
1815  // Shuffle identity constants into the lanes that return the original value.
1816  // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
1817  // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
1818  // The existing binop constant vector remains in the same operand position.
1820  Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
1822 
1823  bool MightCreatePoisonOrUB =
1825  (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
1826  if (MightCreatePoisonOrUB)
1827  NewC = InstCombiner::getSafeVectorConstantForBinop(BOpcode, NewC, true);
1828 
1829  // shuf (bop X, C), X, M --> bop X, C'
1830  // shuf X, (bop X, C), M --> bop X, C'
1831  Value *X = Op0IsBinop ? Op1 : Op0;
1832  Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
1833  NewBO->copyIRFlags(BO);
1834 
1835  // An undef shuffle mask element may propagate as an undef constant element in
1836  // the new binop. That would produce poison where the original code might not.
1837  // If we already made a safe constant, then there's no danger.
1838  if (is_contained(Mask, UndefMaskElem) && !MightCreatePoisonOrUB)
1839  NewBO->dropPoisonGeneratingFlags();
1840  return NewBO;
1841 }
1842 
1843 /// If we have an insert of a scalar to a non-zero element of an undefined
1844 /// vector and then shuffle that value, that's the same as inserting to the zero
1845 /// element and shuffling. Splatting from the zero element is recognized as the
1846 /// canonical form of splat.
1849  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1851  Value *X;
1852  uint64_t IndexC;
1853 
1854  // Match a shuffle that is a splat to a non-zero element.
1855  if (!match(Op0, m_OneUse(m_InsertElt(m_Undef(), m_Value(X),
1856  m_ConstantInt(IndexC)))) ||
1857  !match(Op1, m_Undef()) || match(Mask, m_ZeroMask()) || IndexC == 0)
1858  return nullptr;
1859 
1860  // Insert into element 0 of an undef vector.
1861  UndefValue *UndefVec = UndefValue::get(Shuf.getType());
1862  Constant *Zero = Builder.getInt32(0);
1863  Value *NewIns = Builder.CreateInsertElement(UndefVec, X, Zero);
1864 
1865  // Splat from element 0. Any mask element that is undefined remains undefined.
1866  // For example:
1867  // shuf (inselt undef, X, 2), undef, <2,2,undef>
1868  // --> shuf (inselt undef, X, 0), undef, <0,0,undef>
1869  unsigned NumMaskElts =
1870  cast<FixedVectorType>(Shuf.getType())->getNumElements();
1871  SmallVector<int, 16> NewMask(NumMaskElts, 0);
1872  for (unsigned i = 0; i != NumMaskElts; ++i)
1873  if (Mask[i] == UndefMaskElem)
1874  NewMask[i] = Mask[i];
1875 
1876  return new ShuffleVectorInst(NewIns, UndefVec, NewMask);
1877 }
1878 
1879 /// Try to fold shuffles that are the equivalent of a vector select.
1882  const DataLayout &DL) {
1883  if (!Shuf.isSelect())
1884  return nullptr;
1885 
1886  // Canonicalize to choose from operand 0 first unless operand 1 is undefined.
1887  // Commuting undef to operand 0 conflicts with another canonicalization.
1888  unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
1889  if (!match(Shuf.getOperand(1), m_Undef()) &&
1890  Shuf.getMaskValue(0) >= (int)NumElts) {
1891  // TODO: Can we assert that both operands of a shuffle-select are not undef
1892  // (otherwise, it would have been folded by instsimplify?
1893  Shuf.commute();
1894  return &Shuf;
1895  }
1896 
1898  return I;
1899 
1900  BinaryOperator *B0, *B1;
1901  if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
1902  !match(Shuf.getOperand(1), m_BinOp(B1)))
1903  return nullptr;
1904 
1905  Value *X, *Y;
1906  Constant *C0, *C1;
1907  bool ConstantsAreOp1;
1908  if (match(B0, m_BinOp(m_Value(X), m_Constant(C0))) &&
1910  ConstantsAreOp1 = true;
1911  else if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
1913  ConstantsAreOp1 = false;
1914  else
1915  return nullptr;
1916 
1917  // We need matching binops to fold the lanes together.
1918  BinaryOperator::BinaryOps Opc0 = B0->getOpcode();
1919  BinaryOperator::BinaryOps Opc1 = B1->getOpcode();
1920  bool DropNSW = false;
1921  if (ConstantsAreOp1 && Opc0 != Opc1) {
1922  // TODO: We drop "nsw" if shift is converted into multiply because it may
1923  // not be correct when the shift amount is BitWidth - 1. We could examine
1924  // each vector element to determine if it is safe to keep that flag.
1925  if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
1926  DropNSW = true;
1927  if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
1928  assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
1929  Opc0 = AltB0.Opcode;
1930  C0 = cast<Constant>(AltB0.Op1);
1931  } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
1932  assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
1933  Opc1 = AltB1.Opcode;
1934  C1 = cast<Constant>(AltB1.Op1);
1935  }
1936  }
1937 
1938  if (Opc0 != Opc1)
1939  return nullptr;
1940 
1941  // The opcodes must be the same. Use a new name to make that clear.
1942  BinaryOperator::BinaryOps BOpc = Opc0;
1943 
1944  // Select the constant elements needed for the single binop.
1947 
1948  // We are moving a binop after a shuffle. When a shuffle has an undefined
1949  // mask element, the result is undefined, but it is not poison or undefined
1950  // behavior. That is not necessarily true for div/rem/shift.
1951  bool MightCreatePoisonOrUB =
1954  if (MightCreatePoisonOrUB)
1956  ConstantsAreOp1);
1957 
1958  Value *V;
1959  if (X == Y) {
1960  // Remove a binop and the shuffle by rearranging the constant:
1961  // shuffle (op V, C0), (op V, C1), M --> op V, C'
1962  // shuffle (op C0, V), (op C1, V), M --> op C', V
1963  V = X;
1964  } else {
1965  // If there are 2 different variable operands, we must create a new shuffle
1966  // (select) first, so check uses to ensure that we don't end up with more
1967  // instructions than we started with.
1968  if (!B0->hasOneUse() && !B1->hasOneUse())
1969  return nullptr;
1970 
1971  // If we use the original shuffle mask and op1 is *variable*, we would be
1972  // putting an undef into operand 1 of div/rem/shift. This is either UB or
1973  // poison. We do not have to guard against UB when *constants* are op1
1974  // because safe constants guarantee that we do not overflow sdiv/srem (and
1975  // there's no danger for other opcodes).
1976  // TODO: To allow this case, create a new shuffle mask with no undefs.
1977  if (MightCreatePoisonOrUB && !ConstantsAreOp1)
1978  return nullptr;
1979 
1980  // Note: In general, we do not create new shuffles in InstCombine because we
1981  // do not know if a target can lower an arbitrary shuffle optimally. In this
1982  // case, the shuffle uses the existing mask, so there is no additional risk.
1983 
1984  // Select the variable vectors first, then perform the binop:
1985  // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
1986  // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
1987  V = Builder.CreateShuffleVector(X, Y, Mask);
1988  }
1989 
1990  Instruction *NewBO = ConstantsAreOp1 ? BinaryOperator::Create(BOpc, V, NewC) :
1991  BinaryOperator::Create(BOpc, NewC, V);
1992 
1993  // Flags are intersected from the 2 source binops. But there are 2 exceptions:
1994  // 1. If we changed an opcode, poison conditions might have changed.
1995  // 2. If the shuffle had undef mask elements, the new binop might have undefs
1996  // where the original code did not. But if we already made a safe constant,
1997  // then there's no danger.
1998  NewBO->copyIRFlags(B0);
1999  NewBO->andIRFlags(B1);
2000  if (DropNSW)
2001  NewBO->setHasNoSignedWrap(false);
2002  if (is_contained(Mask, UndefMaskElem) && !MightCreatePoisonOrUB)
2003  NewBO->dropPoisonGeneratingFlags();
2004  return NewBO;
2005 }
2006 
2007 /// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
2008 /// Example (little endian):
2009 /// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>
2011  bool IsBigEndian) {
2012  // This must be a bitcasted shuffle of 1 vector integer operand.
2013  Type *DestType = Shuf.getType();
2014  Value *X;
2015  if (!match(Shuf.getOperand(0), m_BitCast(m_Value(X))) ||
2016  !match(Shuf.getOperand(1), m_Undef()) || !DestType->isIntOrIntVectorTy())
2017  return nullptr;
2018 
2019  // The source type must have the same number of elements as the shuffle,
2020  // and the source element type must be larger than the shuffle element type.
2021  Type *SrcType = X->getType();
2022  if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() ||
2023  cast<FixedVectorType>(SrcType)->getNumElements() !=
2024  cast<FixedVectorType>(DestType)->getNumElements() ||
2025  SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0)
2026  return nullptr;
2027 
2028  assert(Shuf.changesLength() && !Shuf.increasesLength() &&
2029  "Expected a shuffle that decreases length");
2030 
2031  // Last, check that the mask chooses the correct low bits for each narrow
2032  // element in the result.
2033  uint64_t TruncRatio =
2034  SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
2036  for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
2037  if (Mask[i] == UndefMaskElem)
2038  continue;
2039  uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;
2040  assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits");
2041  if (Mask[i] != (int)LSBIndex)
2042  return nullptr;
2043  }
2044 
2045  return new TruncInst(X, DestType);
2046 }
2047 
2048 /// Match a shuffle-select-shuffle pattern where the shuffles are widening and
2049 /// narrowing (concatenating with undef and extracting back to the original
2050 /// length). This allows replacing the wide select with a narrow select.
2053  // This must be a narrowing identity shuffle. It extracts the 1st N elements
2054  // of the 1st vector operand of a shuffle.
2055  if (!match(Shuf.getOperand(1), m_Undef()) || !Shuf.isIdentityWithExtract())
2056  return nullptr;
2057 
2058  // The vector being shuffled must be a vector select that we can eliminate.
2059  // TODO: The one-use requirement could be eased if X and/or Y are constants.
2060  Value *Cond, *X, *Y;
2061  if (!match(Shuf.getOperand(0),
2063  return nullptr;
2064 
2065  // We need a narrow condition value. It must be extended with undef elements
2066  // and have the same number of elements as this shuffle.
2067  unsigned NarrowNumElts =
2068  cast<FixedVectorType>(Shuf.getType())->getNumElements();
2069  Value *NarrowCond;
2070  if (!match(Cond, m_OneUse(m_Shuffle(m_Value(NarrowCond), m_Undef()))) ||
2071  cast<FixedVectorType>(NarrowCond->getType())->getNumElements() !=
2072  NarrowNumElts ||
2073  !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
2074  return nullptr;
2075 
2076  // shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
2077  // sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
2078  Value *NarrowX = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2079  Value *NarrowY = Builder.CreateShuffleVector(Y, Shuf.getShuffleMask());
2080  return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
2081 }
2082 
2083 /// Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
2085  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2086  if (!Shuf.isIdentityWithExtract() || !match(Op1, m_Undef()))
2087  return nullptr;
2088 
2089  Value *X, *Y;
2091  if (!match(Op0, m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask))))
2092  return nullptr;
2093 
2094  // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
2095  // then combining may result in worse codegen.
2096  if (!Op0->hasOneUse())
2097  return nullptr;
2098 
2099  // We are extracting a subvector from a shuffle. Remove excess elements from
2100  // the 1st shuffle mask to eliminate the extract.
2101  //
2102  // This transform is conservatively limited to identity extracts because we do
2103  // not allow arbitrary shuffle mask creation as a target-independent transform
2104  // (because we can't guarantee that will lower efficiently).
2105  //
2106  // If the extracting shuffle has an undef mask element, it transfers to the
2107  // new shuffle mask. Otherwise, copy the original mask element. Example:
2108  // shuf (shuf X, Y, <C0, C1, C2, undef, C4>), undef, <0, undef, 2, 3> -->
2109  // shuf X, Y, <C0, undef, C2, undef>
2110  unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2111  SmallVector<int, 16> NewMask(NumElts);
2112  assert(NumElts < Mask.size() &&
2113  "Identity with extract must have less elements than its inputs");
2114 
2115  for (unsigned i = 0; i != NumElts; ++i) {
2116  int ExtractMaskElt = Shuf.getMaskValue(i);
2117  int MaskElt = Mask[i];
2118  NewMask[i] = ExtractMaskElt == UndefMaskElem ? ExtractMaskElt : MaskElt;
2119  }
2120  return new ShuffleVectorInst(X, Y, NewMask);
2121 }
2122 
2123 /// Try to replace a shuffle with an insertelement or try to replace a shuffle
2124 /// operand with the operand of an insertelement.
2126  InstCombinerImpl &IC) {
2127  Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
2129  Shuf.getShuffleMask(Mask);
2130 
2131  // The shuffle must not change vector sizes.
2132  // TODO: This restriction could be removed if the insert has only one use
2133  // (because the transform would require a new length-changing shuffle).
2134  int NumElts = Mask.size();
2135  if (NumElts != (int)(cast<FixedVectorType>(V0->getType())->getNumElements()))
2136  return nullptr;
2137 
2138  // This is a specialization of a fold in SimplifyDemandedVectorElts. We may
2139  // not be able to handle it there if the insertelement has >1 use.
2140  // If the shuffle has an insertelement operand but does not choose the
2141  // inserted scalar element from that value, then we can replace that shuffle
2142  // operand with the source vector of the insertelement.
2143  Value *X;
2144  uint64_t IdxC;
2145  if (match(V0, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2146  // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
2147  if (!is_contained(Mask, (int)IdxC))
2148  return IC.replaceOperand(Shuf, 0, X);
2149  }
2150  if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2151  // Offset the index constant by the vector width because we are checking for
2152  // accesses to the 2nd vector input of the shuffle.
2153  IdxC += NumElts;
2154  // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
2155  if (!is_contained(Mask, (int)IdxC))
2156  return IC.replaceOperand(Shuf, 1, X);
2157  }
2158 
2159  // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
2160  auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
2161  // We need an insertelement with a constant index.
2162  if (!match(V0, m_InsertElt(m_Value(), m_Value(Scalar),
2163  m_ConstantInt(IndexC))))
2164  return false;
2165 
2166  // Test the shuffle mask to see if it splices the inserted scalar into the
2167  // operand 1 vector of the shuffle.
2168  int NewInsIndex = -1;
2169  for (int i = 0; i != NumElts; ++i) {
2170  // Ignore undef mask elements.
2171  if (Mask[i] == -1)
2172  continue;
2173 
2174  // The shuffle takes elements of operand 1 without lane changes.
2175  if (Mask[i] == NumElts + i)
2176  continue;
2177 
2178  // The shuffle must choose the inserted scalar exactly once.
2179  if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
2180  return false;
2181 
2182  // The shuffle is placing the inserted scalar into element i.
2183  NewInsIndex = i;
2184  }
2185 
2186  assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
2187 
2188  // Index is updated to the potentially translated insertion lane.
2189  IndexC = ConstantInt::get(IndexC->getType(), NewInsIndex);
2190  return true;
2191  };
2192 
2193  // If the shuffle is unnecessary, insert the scalar operand directly into
2194  // operand 1 of the shuffle. Example:
2195  // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
2196  Value *Scalar;
2197  ConstantInt *IndexC;
2198  if (isShufflingScalarIntoOp1(Scalar, IndexC))
2199  return InsertElementInst::Create(V1, Scalar, IndexC);
2200 
2201  // Try again after commuting shuffle. Example:
2202  // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
2203  // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
2204  std::swap(V0, V1);
2206  if (isShufflingScalarIntoOp1(Scalar, IndexC))
2207  return InsertElementInst::Create(V1, Scalar, IndexC);
2208 
2209  return nullptr;
2210 }
2211 
2213  // Match the operands as identity with padding (also known as concatenation
2214  // with undef) shuffles of the same source type. The backend is expected to
2215  // recreate these concatenations from a shuffle of narrow operands.
2216  auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
2217  auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
2218  if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
2219  !Shuffle1 || !Shuffle1->isIdentityWithPadding())
2220  return nullptr;
2221 
2222  // We limit this transform to power-of-2 types because we expect that the
2223  // backend can convert the simplified IR patterns to identical nodes as the
2224  // original IR.
2225  // TODO: If we can verify the same behavior for arbitrary types, the
2226  // power-of-2 checks can be removed.
2227  Value *X = Shuffle0->getOperand(0);
2228  Value *Y = Shuffle1->getOperand(0);
2229  if (X->getType() != Y->getType() ||
2230  !isPowerOf2_32(cast<FixedVectorType>(Shuf.getType())->getNumElements()) ||
2231  !isPowerOf2_32(
2232  cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) ||
2233  !isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) ||
2234  match(X, m_Undef()) || match(Y, m_Undef()))
2235  return nullptr;
2236  assert(match(Shuffle0->getOperand(1), m_Undef()) &&
2237  match(Shuffle1->getOperand(1), m_Undef()) &&
2238  "Unexpected operand for identity shuffle");
2239 
2240  // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
2241  // operands directly by adjusting the shuffle mask to account for the narrower
2242  // types:
2243  // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
2244  int NarrowElts = cast<FixedVectorType>(X->getType())->getNumElements();
2245  int WideElts = cast<FixedVectorType>(Shuffle0->getType())->getNumElements();
2246  assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
2247 
2249  SmallVector<int, 16> NewMask(Mask.size(), -1);
2250  for (int i = 0, e = Mask.size(); i != e; ++i) {
2251  if (Mask[i] == -1)
2252  continue;
2253 
2254  // If this shuffle is choosing an undef element from 1 of the sources, that
2255  // element is undef.
2256  if (Mask[i] < WideElts) {
2257  if (Shuffle0->getMaskValue(Mask[i]) == -1)
2258  continue;
2259  } else {
2260  if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
2261  continue;
2262  }
2263 
2264  // If this shuffle is choosing from the 1st narrow op, the mask element is
2265  // the same. If this shuffle is choosing from the 2nd narrow op, the mask
2266  // element is offset down to adjust for the narrow vector widths.
2267  if (Mask[i] < WideElts) {
2268  assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
2269  NewMask[i] = Mask[i];
2270  } else {
2271  assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
2272  NewMask[i] = Mask[i] - (WideElts - NarrowElts);
2273  }
2274  }
2275  return new ShuffleVectorInst(X, Y, NewMask);
2276 }
2277 
2279  Value *LHS = SVI.getOperand(0);
2280  Value *RHS = SVI.getOperand(1);
2281  SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI);
2282  if (auto *V = SimplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(),
2283  SVI.getType(), ShufQuery))
2284  return replaceInstUsesWith(SVI, V);
2285 
2286  // Bail out for scalable vectors
2287  if (isa<ScalableVectorType>(LHS->getType()))
2288  return nullptr;
2289 
2290  unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements();
2291  unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements();
2292 
2293  // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
2294  //
2295  // if X and Y are of the same (vector) type, and the element size is not
2296  // changed by the bitcasts, we can distribute the bitcasts through the
2297  // shuffle, hopefully reducing the number of instructions. We make sure that
2298  // at least one bitcast only has one use, so we don't *increase* the number of
2299  // instructions here.
2300  Value *X, *Y;
2301  if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_BitCast(m_Value(Y))) &&
2302  X->getType()->isVectorTy() && X->getType() == Y->getType() &&
2303  X->getType()->getScalarSizeInBits() ==
2304  SVI.getType()->getScalarSizeInBits() &&
2305  (LHS->hasOneUse() || RHS->hasOneUse())) {
2306  Value *V = Builder.CreateShuffleVector(X, Y, SVI.getShuffleMask(),
2307  SVI.getName() + ".uncasted");
2308  return new BitCastInst(V, SVI.getType());
2309  }
2310 
2313 
2314  // Peek through a bitcasted shuffle operand by scaling the mask. If the
2315  // simulated shuffle can simplify, then this shuffle is unnecessary:
2316  // shuf (bitcast X), undef, Mask --> bitcast X'
2317  // TODO: This could be extended to allow length-changing shuffles.
2318  // The transform might also be obsoleted if we allowed canonicalization
2319  // of bitcasted shuffles.
2320  if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) &&
2321  X->getType()->isVectorTy() && VWidth == LHSWidth) {
2322  // Try to create a scaled mask constant.
2323  auto *XType = cast<FixedVectorType>(X->getType());
2324  unsigned XNumElts = XType->getNumElements();
2325  SmallVector<int, 16> ScaledMask;
2326  if (XNumElts >= VWidth) {
2327  assert(XNumElts % VWidth == 0 && "Unexpected vector bitcast");
2328  narrowShuffleMaskElts(XNumElts / VWidth, Mask, ScaledMask);
2329  } else {
2330  assert(VWidth % XNumElts == 0 && "Unexpected vector bitcast");
2331  if (!widenShuffleMaskElts(VWidth / XNumElts, Mask, ScaledMask))
2332  ScaledMask.clear();
2333  }
2334  if (!ScaledMask.empty()) {
2335  // If the shuffled source vector simplifies, cast that value to this
2336  // shuffle's type.
2337  if (auto *V = SimplifyShuffleVectorInst(X, UndefValue::get(XType),
2338  ScaledMask, XType, ShufQuery))
2339  return BitCastInst::Create(Instruction::BitCast, V, SVI.getType());
2340  }
2341  }
2342 
2343  // shuffle x, x, mask --> shuffle x, undef, mask'
2344  if (LHS == RHS) {
2345  assert(!match(RHS, m_Undef()) &&
2346  "Shuffle with 2 undef ops not simplified?");
2347  // Remap any references to RHS to use LHS.
2348  SmallVector<int, 16> Elts;
2349  for (unsigned i = 0; i != VWidth; ++i) {
2350  // Propagate undef elements or force mask to LHS.
2351  if (Mask[i] < 0)
2352  Elts.push_back(UndefMaskElem);
2353  else
2354  Elts.push_back(Mask[i] % LHSWidth);
2355  }
2356  return new ShuffleVectorInst(LHS, UndefValue::get(RHS->getType()), Elts);
2357  }
2358 
2359  // shuffle undef, x, mask --> shuffle x, undef, mask'
2360  if (match(LHS, m_Undef())) {
2361  SVI.commute();
2362  return &SVI;
2363  }
2364 
2366  return I;
2367 
2368  if (Instruction *I = foldSelectShuffle(SVI, Builder, DL))
2369  return I;
2370 
2371  if (Instruction *I = foldTruncShuffle(SVI, DL.isBigEndian()))
2372  return I;
2373 
2374  if (Instruction *I = narrowVectorSelect(SVI, Builder))
2375  return I;
2376 
2377  APInt UndefElts(VWidth, 0);
2378  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
2379  if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
2380  if (V != &SVI)
2381  return replaceInstUsesWith(SVI, V);
2382  return &SVI;
2383  }
2384 
2386  return I;
2387 
2388  // These transforms have the potential to lose undef knowledge, so they are
2389  // intentionally placed after SimplifyDemandedVectorElts().
2390  if (Instruction *I = foldShuffleWithInsert(SVI, *this))
2391  return I;
2393  return I;
2394 
2395  if (match(RHS, m_Undef()) && canEvaluateShuffled(LHS, Mask)) {
2397  return replaceInstUsesWith(SVI, V);
2398  }
2399 
2400  // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
2401  // a non-vector type. We can instead bitcast the original vector followed by
2402  // an extract of the desired element:
2403  //
2404  // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
2405  // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2406  // %1 = bitcast <4 x i8> %sroa to i32
2407  // Becomes:
2408  // %bc = bitcast <16 x i8> %in to <4 x i32>
2409  // %ext = extractelement <4 x i32> %bc, i32 0
2410  //
2411  // If the shuffle is extracting a contiguous range of values from the input
2412  // vector then each use which is a bitcast of the extracted size can be
2413  // replaced. This will work if the vector types are compatible, and the begin
2414  // index is aligned to a value in the casted vector type. If the begin index
2415  // isn't aligned then we can shuffle the original vector (keeping the same
2416  // vector type) before extracting.
2417  //
2418  // This code will bail out if the target type is fundamentally incompatible
2419  // with vectors of the source type.
2420  //
2421  // Example of <16 x i8>, target type i32:
2422  // Index range [4,8): v-----------v Will work.
2423  // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2424  // <16 x i8>: | | | | | | | | | | | | | | | | |
2425  // <4 x i32>: | | | | |
2426  // +-----------+-----------+-----------+-----------+
2427  // Index range [6,10): ^-----------^ Needs an extra shuffle.
2428  // Target type i40: ^--------------^ Won't work, bail.
2429  bool MadeChange = false;
2430  if (isShuffleExtractingFromLHS(SVI, Mask)) {
2431  Value *V = LHS;
2432  unsigned MaskElems = Mask.size();
2433  auto *SrcTy = cast<FixedVectorType>(V->getType());
2434  unsigned VecBitWidth = SrcTy->getPrimitiveSizeInBits().getFixedSize();
2435  unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
2436  assert(SrcElemBitWidth && "vector elements must have a bitwidth");
2437  unsigned SrcNumElems = SrcTy->getNumElements();
2440  for (User *U : SVI.users())
2441  if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
2442  if (!BC->use_empty())
2443  // Only visit bitcasts that weren't previously handled.
2444  BCs.push_back(BC);
2445  for (BitCastInst *BC : BCs) {
2446  unsigned BegIdx = Mask.front();
2447  Type *TgtTy = BC->getDestTy();
2448  unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
2449  if (!TgtElemBitWidth)
2450  continue;
2451  unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
2452  bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
2453  bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
2454  if (!VecBitWidthsEqual)
2455  continue;
2456  if (!VectorType::isValidElementType(TgtTy))
2457  continue;
2458  auto *CastSrcTy = FixedVectorType::get(TgtTy, TgtNumElems);
2459  if (!BegIsAligned) {
2460  // Shuffle the input so [0,NumElements) contains the output, and
2461  // [NumElems,SrcNumElems) is undef.
2462  SmallVector<int, 16> ShuffleMask(SrcNumElems, -1);
2463  for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
2464  ShuffleMask[I] = Idx;
2465  V = Builder.CreateShuffleVector(V, ShuffleMask,
2466  SVI.getName() + ".extract");
2467  BegIdx = 0;
2468  }
2469  unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
2470  assert(SrcElemsPerTgtElem);
2471  BegIdx /= SrcElemsPerTgtElem;
2472  bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end();
2473  auto *NewBC =
2474  BCAlreadyExists
2475  ? NewBCs[CastSrcTy]
2476  : Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
2477  if (!BCAlreadyExists)
2478  NewBCs[CastSrcTy] = NewBC;
2479  auto *Ext = Builder.CreateExtractElement(
2480  NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract");
2481  // The shufflevector isn't being replaced: the bitcast that used it
2482  // is. InstCombine will visit the newly-created instructions.
2483  replaceInstUsesWith(*BC, Ext);
2484  MadeChange = true;
2485  }
2486  }
2487 
2488  // If the LHS is a shufflevector itself, see if we can combine it with this
2489  // one without producing an unusual shuffle.
2490  // Cases that might be simplified:
2491  // 1.
2492  // x1=shuffle(v1,v2,mask1)
2493  // x=shuffle(x1,undef,mask)
2494  // ==>
2495  // x=shuffle(v1,undef,newMask)
2496  // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
2497  // 2.
2498  // x1=shuffle(v1,undef,mask1)
2499  // x=shuffle(x1,x2,mask)
2500  // where v1.size() == mask1.size()
2501  // ==>
2502  // x=shuffle(v1,x2,newMask)
2503  // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
2504  // 3.
2505  // x2=shuffle(v2,undef,mask2)
2506  // x=shuffle(x1,x2,mask)
2507  // where v2.size() == mask2.size()
2508  // ==>
2509  // x=shuffle(x1,v2,newMask)
2510  // newMask[i] = (mask[i] < x1.size())
2511  // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
2512  // 4.
2513  // x1=shuffle(v1,undef,mask1)
2514  // x2=shuffle(v2,undef,mask2)
2515  // x=shuffle(x1,x2,mask)
2516  // where v1.size() == v2.size()
2517  // ==>
2518  // x=shuffle(v1,v2,newMask)
2519  // newMask[i] = (mask[i] < x1.size())
2520  // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
2521  //
2522  // Here we are really conservative:
2523  // we are absolutely afraid of producing a shuffle mask not in the input
2524  // program, because the code gen may not be smart enough to turn a merged
2525  // shuffle into two specific shuffles: it may produce worse code. As such,
2526  // we only merge two shuffles if the result is either a splat or one of the
2527  // input shuffle masks. In this case, merging the shuffles just removes
2528  // one instruction, which we know is safe. This is good for things like
2529  // turning: (splat(splat)) -> splat, or
2530  // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
2531  ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
2532  ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
2533  if (LHSShuffle)
2534  if (!match(LHSShuffle->getOperand(1), m_Undef()) && !match(RHS, m_Undef()))
2535  LHSShuffle = nullptr;
2536  if (RHSShuffle)
2537  if (!match(RHSShuffle->getOperand(1), m_Undef()))
2538  RHSShuffle = nullptr;
2539  if (!LHSShuffle && !RHSShuffle)
2540  return MadeChange ? &SVI : nullptr;
2541 
2542  Value* LHSOp0 = nullptr;
2543  Value* LHSOp1 = nullptr;
2544  Value* RHSOp0 = nullptr;
2545  unsigned LHSOp0Width = 0;
2546  unsigned RHSOp0Width = 0;
2547  if (LHSShuffle) {
2548  LHSOp0 = LHSShuffle->getOperand(0);
2549  LHSOp1 = LHSShuffle->getOperand(1);
2550  LHSOp0Width = cast<FixedVectorType>(LHSOp0->getType())->getNumElements();
2551  }
2552  if (RHSShuffle) {
2553  RHSOp0 = RHSShuffle->getOperand(0);
2554  RHSOp0Width = cast<FixedVectorType>(RHSOp0->getType())->getNumElements();
2555  }
2556  Value* newLHS = LHS;
2557  Value* newRHS = RHS;
2558  if (LHSShuffle) {
2559  // case 1
2560  if (match(RHS, m_Undef())) {
2561  newLHS = LHSOp0;
2562  newRHS = LHSOp1;
2563  }
2564  // case 2 or 4
2565  else if (LHSOp0Width == LHSWidth) {
2566  newLHS = LHSOp0;
2567  }
2568  }
2569  // case 3 or 4
2570  if (RHSShuffle && RHSOp0Width == LHSWidth) {
2571  newRHS = RHSOp0;
2572  }
2573  // case 4
2574  if (LHSOp0 == RHSOp0) {
2575  newLHS = LHSOp0;
2576  newRHS = nullptr;
2577  }
2578 
2579  if (newLHS == LHS && newRHS == RHS)
2580  return MadeChange ? &SVI : nullptr;
2581 
2582  ArrayRef<int> LHSMask;
2583  ArrayRef<int> RHSMask;
2584  if (newLHS != LHS)
2585  LHSMask = LHSShuffle->getShuffleMask();
2586  if (RHSShuffle && newRHS != RHS)
2587  RHSMask = RHSShuffle->getShuffleMask();
2588 
2589  unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
2590  SmallVector<int, 16> newMask;
2591  bool isSplat = true;
2592  int SplatElt = -1;
2593  // Create a new mask for the new ShuffleVectorInst so that the new
2594  // ShuffleVectorInst is equivalent to the original one.
2595  for (unsigned i = 0; i < VWidth; ++i) {
2596  int eltMask;
2597  if (Mask[i] < 0) {
2598  // This element is an undef value.
2599  eltMask = -1;
2600  } else if (Mask[i] < (int)LHSWidth) {
2601  // This element is from left hand side vector operand.
2602  //
2603  // If LHS is going to be replaced (case 1, 2, or 4), calculate the
2604  // new mask value for the element.
2605  if (newLHS != LHS) {
2606  eltMask = LHSMask[Mask[i]];
2607  // If the value selected is an undef value, explicitly specify it
2608  // with a -1 mask value.
2609  if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
2610  eltMask = -1;
2611  } else
2612  eltMask = Mask[i];
2613  } else {
2614  // This element is from right hand side vector operand
2615  //
2616  // If the value selected is an undef value, explicitly specify it
2617  // with a -1 mask value. (case 1)
2618  if (match(RHS, m_Undef()))
2619  eltMask = -1;
2620  // If RHS is going to be replaced (case 3 or 4), calculate the
2621  // new mask value for the element.
2622  else if (newRHS != RHS) {
2623  eltMask = RHSMask[Mask[i]-LHSWidth];
2624  // If the value selected is an undef value, explicitly specify it
2625  // with a -1 mask value.
2626  if (eltMask >= (int)RHSOp0Width) {
2627  assert(match(RHSShuffle->getOperand(1), m_Undef()) &&
2628  "should have been check above");
2629  eltMask = -1;
2630  }
2631  } else
2632  eltMask = Mask[i]-LHSWidth;
2633 
2634  // If LHS's width is changed, shift the mask value accordingly.
2635  // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
2636  // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
2637  // If newRHS == newLHS, we want to remap any references from newRHS to
2638  // newLHS so that we can properly identify splats that may occur due to
2639  // obfuscation across the two vectors.
2640  if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
2641  eltMask += newLHSWidth;
2642  }
2643 
2644  // Check if this could still be a splat.
2645  if (eltMask >= 0) {
2646  if (SplatElt >= 0 && SplatElt != eltMask)
2647  isSplat = false;
2648  SplatElt = eltMask;
2649  }
2650 
2651  newMask.push_back(eltMask);
2652  }
2653 
2654  // If the result mask is equal to one of the original shuffle masks,
2655  // or is a splat, do the replacement.
2656  if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
2657  if (!newRHS)
2658  newRHS = UndefValue::get(newLHS->getType());
2659  return new ShuffleVectorInst(newLHS, newRHS, newMask);
2660  }
2661 
2662  return MadeChange ? &SVI : nullptr;
2663 }
i
i
Definition: README.txt:29
llvm::Type::ArrayTyID
@ ArrayTyID
Arrays.
Definition: Type.h:76
Int32Ty
IntegerType * Int32Ty
Definition: NVVMIntrRange.cpp:67
BinopElts::Opcode
BinaryOperator::BinaryOps Opcode
Definition: InstCombineVectorOps.cpp:1752
llvm::predecessors
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:127
B1
llvm::ShuffleVectorInst::isIdentityWithExtract
bool isIdentityWithExtract() const
Return true if this shuffle extracts the first N elements of exactly one source vector.
Definition: Instructions.cpp:2274
llvm::SimplifyExtractElementInst
Value * SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q)
Given operands for an ExtractElementInst, fold the result or return null.
Definition: InstructionSimplify.cpp:4577
llvm
Definition: AllocatorList.h:23
llvm::MaskedValueIsZero
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if 'V & Mask' is known to be zero.
Definition: ValueTracking.cpp:359
llvm::PatternMatch::m_Mask
Definition: PatternMatch.h:1519
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:447
InstCombiner.h
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:722
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
llvm::SimplifyQuery
Definition: InstructionSimplify.h:94
llvm::ShuffleVectorInst::getShuffleMask
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
Definition: Instructions.cpp:2047
llvm::ElementCount
Definition: TypeSize.h:386
foldInsSequenceIntoSplat
static Instruction * foldInsSequenceIntoSplat(InsertElementInst &InsElt)
Turn a chain of inserts that splats a value into an insert + shuffle: insertelt(insertelt(insertelt(i...
Definition: InstCombineVectorOps.cpp:1054
llvm::ExtractElementInst
This instruction extracts a single (scalar) element from a VectorType value.
Definition: Instructions.h:1850
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5136
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:317
llvm::PatternMatch::m_InsertElt
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Definition: PatternMatch.h:1489
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:131
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::enumerate
detail::enumerator< R > enumerate(R &&TheRange)
Given an input range, returns a new range whose values are are pair (A,B) such that A is the 0-based ...
Definition: STLExtras.h:1923
llvm::PatternMatch::m_Load
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
Definition: PatternMatch.h:1568
ErrorHandling.h
llvm::IRBuilder< TargetFolder, IRBuilderCallbackInserter >
llvm::CastInst::Create
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Definition: Instructions.cpp:2945
llvm::SmallDenseMap
Definition: DenseMap.h:880
llvm::Instruction::hasNoUnsignedWrap
bool hasNoUnsignedWrap() const
Determine whether the no unsigned wrap flag is set.
Definition: Instruction.cpp:132
llvm::Type::getTypeID
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
llvm::ConstantInt::getLimitedValue
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:242
APInt.h
llvm::InsertElementInst::Create
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1928
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:34
llvm::ExtractElementInst::Create
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1863
buildNew
static Value * buildNew(Instruction *I, ArrayRef< Value * > NewOps)
Rebuild a new instruction just like 'I' but with the new operands given.
Definition: InstCombineVectorOps.cpp:1554
llvm::narrowShuffleMaskElts
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
Definition: VectorUtils.cpp:411
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
DenseMap.h
llvm::Instruction::isShift
bool isShift() const
Definition: Instruction.h:167
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
foldSelectShuffle
static Instruction * foldSelectShuffle(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder, const DataLayout &DL)
Try to fold shuffles that are the equivalent of a vector select.
Definition: InstCombineVectorOps.cpp:1880
llvm::InstCombinerImpl::visitExtractElementInst
Instruction * visitExtractElementInst(ExtractElementInst &EI)
Definition: InstCombineVectorOps.cpp:336
llvm::InstCombinerImpl::replaceInstUsesWith
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombineInternal.h:386
llvm::Optional
Definition: APInt.h:33
isShuffleEquivalentToSelect
static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf)
Definition: InstCombineVectorOps.cpp:1026
llvm::PatternMatch::m_BinOp
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
llvm::InstCombinerImpl::visitShuffleVectorInst
Instruction * visitShuffleVectorInst(ShuffleVectorInst &SVI)
Definition: InstCombineVectorOps.cpp:2278
Operator.h
llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:424
STLExtras.h
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:491
llvm::UnaryOperator
Definition: InstrTypes.h:103
llvm::PatternMatch::m_BitCast
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
Definition: PatternMatch.h:1598
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:163
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::LinearPolySize::isScalable
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:299
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:197
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
foldConstantInsEltIntoShuffle
static Instruction * foldConstantInsEltIntoShuffle(InsertElementInst &InsElt)
insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex --> shufflevector X,...
Definition: InstCombineVectorOps.cpp:1246
llvm::UndefMaskElem
constexpr int UndefMaskElem
Definition: Instructions.h:1974
llvm::PatternMatch::m_OneUse
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
llvm::SmallBitVector
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
Definition: SmallBitVector.h:34
Instruction.h
llvm::PatternMatch::m_APInt
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:270
llvm::UnaryOperator::getOpcode
UnaryOps getOpcode() const
Definition: InstrTypes.h:173
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:160
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1505
llvm::SelectInst::Create
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
Definition: Instructions.h:1746
cheapToScalarize
static bool cheapToScalarize(Value *V, bool IsConstantExtractIndex)
Return true if the value is cheaper to scalarize than it is to leave as a vector operation.
Definition: InstCombineVectorOps.cpp:59
llvm::APInt::setBit
void setBit(unsigned BitPosition)
Set a given bit to 1.
Definition: APInt.h:1442
llvm::ShuffleVectorInst::isSelect
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
Definition: Instructions.h:2159
InstCombineInternal.h
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition: PatternMatch.h:1467
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::PHINode::getIncomingValue
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Definition: Instructions.h:2666
llvm::LegalizeActions::NotFound
@ NotFound
Sentinel value for when no action was found in the specified table.
Definition: LegalizerInfo.h:92
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::Instruction::setHasNoSignedWrap
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
Definition: Instruction.cpp:124
llvm::User
Definition: User.h:44
llvm::PatternMatch::m_UnOp
class_match< UnaryOperator > m_UnOp()
Match an arbitrary unary operation and ignore it.
Definition: PatternMatch.h:79
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
isSplat
static bool isSplat(ArrayRef< Value * > VL)
Definition: SLPVectorizer.cpp:232
InstrTypes.h
llvm::FCmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1344
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
getOpcode
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:199
foldInsEltIntoIdentityShuffle
static Instruction * foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt)
Try to fold an extract+insert element into an existing identity shuffle by changing the shuffle's mas...
Definition: InstCombineVectorOps.cpp:1164
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:235
llvm::InsertElementInst
This instruction inserts a single (scalar) element into a VectorType value.
Definition: Instructions.h:1914
llvm::Value::uses
iterator_range< use_iterator > uses()
Definition: Value.h:389
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:249
llvm::VectorType::getElementCount
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:629
llvm::SmallBitVector::all
bool all() const
Returns true if all bits are set.
Definition: SmallBitVector.h:213
llvm::ShuffleVectorInst::getMaskValue
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
Definition: Instructions.h:2032
llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition: InstrTypes.h:395
llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:145
llvm::Instruction
Definition: Instruction.h:45
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:147
ShuffleOps
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
Definition: InstCombineVectorOps.cpp:625
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::UnaryOperator::CreateWithCopiedFlags
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:158
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1770
llvm::InstCombinerImpl
Definition: InstCombineInternal.h:60
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:885
InstCombineWorklist.h
collectShuffleElements
static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl< int > &Mask, Value *PermittedRHS, InstCombinerImpl &IC)
Definition: InstCombineVectorOps.cpp:627
llvm::InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse
Instruction * foldAggregateConstructionIntoAggregateReuse(InsertValueInst &OrigIVI)
Look for chain of insertvalue's that fully define an aggregate, and trace back the values inserted,...
Definition: InstCombineVectorOps.cpp:713
PatternMatch.h
llvm::BasicBlock::getFirstNonPHI
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:212
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:644
llvm::Instruction::isIntDivRem
bool isIntDivRem() const
Definition: Instruction.h:166
llvm::ArrayRef::slice
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:193
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2662
llvm::None
const NoneType None
Definition: None.h:23
llvm::ShuffleVectorInst::getType
VectorType * getType() const
Overload to return most specific vector type.
Definition: Instructions.h:2023
Type.h
llvm::APInt::isAllOnesValue
bool isAllOnesValue() const
Determine if all bits are set.
Definition: APInt.h:401
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::PatternMatch::m_ExtractElt
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
Definition: PatternMatch.h:1497
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::Type::getStructNumElements
unsigned getStructNumElements() const
Definition: DerivedTypes.h:350
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:202
llvm::APInt::getOneBitSet
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:593
llvm::Value::user_back
User * user_back()
Definition: Value.h:420
llvm::ConstantExpr::getShuffleVector
static Constant * getShuffleVector(Constant *V1, Constant *V2, ArrayRef< int > Mask, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2571
llvm::VectorType::isValidElementType
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:635
foldShuffleWithInsert
static Instruction * foldShuffleWithInsert(ShuffleVectorInst &Shuf, InstCombinerImpl &IC)
Try to replace a shuffle with an insertelement or try to replace a shuffle operand with the operand o...
Definition: InstCombineVectorOps.cpp:2125
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:391
llvm::Instruction::andIRFlags
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
Definition: Instruction.cpp:292
llvm::InstCombiner::getSafeVectorConstantForBinop
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
Definition: InstCombiner.h:305
VectorUtils.h
llvm::Optional::getValueOr
constexpr T getValueOr(U &&value) const LLVM_LVALUE_FUNCTION
Definition: Optional.h:295
BasicBlock.h
llvm::ShuffleVectorInst::increasesLength
bool increasesLength() const
Return true if this shuffle returns a vector with a greater number of elements than its source vector...
Definition: Instructions.h:2073
llvm::ShuffleVectorInst::commuteShuffleMask
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
Definition: Instructions.h:2285
llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:156
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1178
llvm::ConstantExpr::getBinOpIdentity
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2786
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::Instruction::user_back
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:91
llvm::TruncInst
This class represents a truncation of integer types.
Definition: Instructions.h:4691
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2720
replaceExtractElements
static void replaceExtractElements(InsertElementInst *InsElt, ExtractElementInst *ExtElt, InstCombinerImpl &IC)
If we have insertion into a vector that is wider than the vector that we are extracting from,...
Definition: InstCombineVectorOps.cpp:544
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::UndefValue
'undef' values are things that do not have specified contents.
Definition: Constants.h:1347
llvm::SimplifyShuffleVectorInst
Value * SimplifyShuffleVectorInst(Value *Op0, Value *Op1, ArrayRef< int > Mask, Type *RetTy, const SimplifyQuery &Q)
Given operands for a ShuffleVectorInst, fold the result or return null.
Definition: InstructionSimplify.cpp:4825
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:905
llvm::ConstantExpr::getShl
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2740
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1570
ArrayRef.h
canonicalizeInsertSplat
static Instruction * canonicalizeInsertSplat(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
If we have an insert of a scalar to a non-zero element of an undefined vector and then shuffle that v...
Definition: InstCombineVectorOps.cpp:1847
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::Type::getArrayNumElements
uint64_t getArrayNumElements() const
Definition: DerivedTypes.h:386
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
BinopElts
These are the ingredients in an alternate form binary operator as described below.
Definition: InstCombineVectorOps.cpp:1751
llvm::ExtractElementInst::getVectorOperandType
VectorType * getVectorOperandType() const
Definition: Instructions.h:1884
llvm::PatternMatch::m_ZeroMask
Definition: PatternMatch.h:1528
llvm::SimplifyQuery::getWithInstruction
SimplifyQuery getWithInstruction(Instruction *I) const
Definition: InstructionSimplify.h:121
collectSingleShuffleElements
static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, SmallVectorImpl< int > &Mask)
If V is a shuffle of values that ONLY returns elements from either LHS or RHS, return the shuffle mas...
Definition: InstCombineVectorOps.cpp:471
llvm::PatternMatch::m_Constant
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:142
llvm::LinearPolySize::getKnownMinValue
ScalarTy getKnownMinValue() const
Returns the minimum value this size can represent.
Definition: TypeSize.h:297
foldIdentityExtractShuffle
static Instruction * foldIdentityExtractShuffle(ShuffleVectorInst &Shuf)
Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
Definition: InstCombineVectorOps.cpp:2084
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:649
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:70
llvm::GetElementPtrInst::Create
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:931
foldBitcastExtElt
static Instruction * foldBitcastExtElt(ExtractElementInst &Ext, InstCombiner::BuilderTy &Builder, bool IsBigEndian)
Definition: InstCombineVectorOps.cpp:173
llvm::PatternMatch::m_SpecificInt
specific_intval< false > m_SpecificInt(APInt V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:880
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::BinaryOperator
Definition: InstrTypes.h:190
llvm::APInt::getAllOnesValue
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:567
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:167
evaluateInDifferentElementOrder
static Value * evaluateInDifferentElementOrder(Value *V, ArrayRef< int > Mask)
Definition: InstCombineVectorOps.cpp:1630
llvm::SimplifyInsertElementInst
Value * SimplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx, const SimplifyQuery &Q)
Given operands for an InsertElement, fold the result or return null.
Definition: InstructionSimplify.cpp:4476
llvm::PatternMatch::m_Undef
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:136
llvm::ExtractElementInst::getVectorOperand
Value * getVectorOperand()
Definition: Instructions.h:1879
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Constant::getAggregateElement
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:421
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::PatternMatch::m_Shuffle
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
Definition: PatternMatch.h:1556
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:527
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:952
llvm::IRBuilderBase::InsertPointGuard
Definition: IRBuilder.h:367
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::InstCombinerImpl::visitInsertValueInst
Instruction * visitInsertValueInst(InsertValueInst &IV)
Try to find redundant insertvalue instructions, like the following ones: %0 = insertvalue { i8,...
Definition: InstCombineVectorOps.cpp:994
llvm::Instruction::isExact
bool isExact() const
Determine whether the exact flag is set.
Definition: Instruction.cpp:165
llvm::pred_empty
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:119
llvm::ConstantVector::get
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1354
llvm::APInt::ult
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1205
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:281
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:299
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::ArrayRef::front
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:166
findDemandedEltsBySingleUser
static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr)
Find elements of V demanded by UserInstr.
Definition: InstCombineVectorOps.cpp:273
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:140
llvm::InstCombinerImpl::visitInsertElementInst
Instruction * visitInsertElementInst(InsertElementInst &IE)
Definition: InstCombineVectorOps.cpp:1342
llvm::MCID::Select
@ Select
Definition: MCInstrDesc.h:163
llvm::widenShuffleMaskElts
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Definition: VectorUtils.cpp:432
Constant.h
llvm::PPC::getPredicate
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
foldTruncShuffle
static Instruction * foldTruncShuffle(ShuffleVectorInst &Shuf, bool IsBigEndian)
Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
Definition: InstCombineVectorOps.cpp:2010
llvm::Type::getIntNTy
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:201
llvm::InstCombinerImpl::InsertNewInstWith
Instruction * InsertNewInstWith(Instruction *New, Instruction &Old)
Same as InsertNewInstBefore, but also sets the debug loc.
Definition: InstCombineInternal.h:375
narrowVectorSelect
static Instruction * narrowVectorSelect(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Match a shuffle-select-shuffle pattern where the shuffles are widening and narrowing (concatenating w...
Definition: InstCombineVectorOps.cpp:2051
isShuffleExtractingFromLHS
static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, ArrayRef< int > Mask)
Definition: InstCombineVectorOps.cpp:1734
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:83
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2612
hoistInsEltConst
static Instruction * hoistInsEltConst(InsertElementInst &InsElt2, InstCombiner::BuilderTy &Builder)
If we have an insertelement instruction feeding into another insertelement and the 2nd is inserting a...
Definition: InstCombineVectorOps.cpp:1223
llvm::Type::isIntOrIntVectorTy
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:208
Casting.h
foldInsEltIntoSplat
static Instruction * foldInsEltIntoSplat(InsertElementInst &InsElt)
Try to fold an insert element into an existing splat shuffle by changing the shuffle's mask to includ...
Definition: InstCombineVectorOps.cpp:1127
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::PatternMatch::m_ZeroInt
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:522
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:184
llvm::findScalarElement
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
Definition: VectorUtils.cpp:270
llvm::CmpInst::Create
static CmpInst * Create(OtherOps Op, Predicate predicate, Value *S1, Value *S2, const Twine &Name="", Instruction *InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
Definition: Instructions.cpp:3616
llvm::InstCombinerImpl::replaceOperand
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombineInternal.h:407
llvm::Instruction::BinaryOps
BinaryOps
Definition: Instruction.h:768
llvm::InsertElementInst::getType
VectorType * getType() const
Overload to return most specific vector type.
Definition: Instructions.h:1947
llvm::Type::StructTyID
@ StructTyID
Structures.
Definition: Type.h:75
canEvaluateShuffled
static bool canEvaluateShuffled(Value *V, ArrayRef< int > Mask, unsigned Depth=5)
Return true if we can evaluate the specified expression tree if the vector elements were shuffled in ...
Definition: InstCombineVectorOps.cpp:1466
llvm::ShuffleVectorInst
This instruction constructs a fixed permutation of two input vectors.
Definition: Instructions.h:1986
Instructions.h
SmallVector.h
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:802
User.h
findDemandedEltsByAllUsers
static APInt findDemandedEltsByAllUsers(Value *V)
Find union of elements of V demanded by all its users.
Definition: InstCombineVectorOps.cpp:317
llvm::Value::DoPHITranslation
const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition: Value.cpp:944
llvm::Instruction::hasNoSignedWrap
bool hasNoSignedWrap() const
Determine whether the no signed wrap flag is set.
Definition: Instruction.cpp:136
SmallBitVector.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
InstructionSimplify.h
llvm::PHINode::getIncomingBlock
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Definition: Instructions.h:2686
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
llvm::PHINode
Definition: Instructions.h:2572
llvm::ExtractElementInst::getIndexOperand
Value * getIndexOperand()
Definition: Instructions.h:1880
llvm::Instruction::copyIRFlags
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
Definition: Instruction.cpp:268
llvm::SmallVectorImpl< int >
foldIdentityPaddedShuffles
static Instruction * foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf)
Definition: InstCombineVectorOps.cpp:2212
DerivedTypes.h
BinopElts::Op0
Value * Op0
Definition: InstCombineVectorOps.cpp:1753
getAlternateBinop
static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL)
Binops may be transformed into binops with different opcodes and operands.
Definition: InstCombineVectorOps.cpp:1765
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
foldSelectShuffleWith1Binop
static Instruction * foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf)
Definition: InstCombineVectorOps.cpp:1791
llvm::ConstantAggregateZero::get
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1649
llvm::InsertValueInst
This instruction inserts a struct field of array element value into an aggregate value.
Definition: Instructions.h:2429
llvm::BinaryOperator::Create
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Definition: Instructions.cpp:2549
Value.h
llvm::BinaryOperator::CreateWithCopiedFlags
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Instruction *CopyO, const Twine &Name="")
Definition: InstrTypes.h:251
llvm::PatternMatch::m_Cmp
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:89
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
BinopElts::BinopElts
BinopElts(BinaryOperator::BinaryOps Opc=(BinaryOperator::BinaryOps) 0, Value *V0=nullptr, Value *V1=nullptr)
Definition: InstCombineVectorOps.cpp:1755
BinopElts::Op1
Value * Op1
Definition: InstCombineVectorOps.cpp:1754
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:628
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:434
llvm::Instruction::dropPoisonGeneratingFlags
void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
Definition: Instruction.cpp:140
llvm::ShuffleVectorInst::changesLength
bool changesLength() const
Return true if this shuffle returns a vector with a different number of elements than its source vect...
Definition: Instructions.h:2062
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:122
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:908
llvm::ShuffleVectorInst::commute
void commute()
Swap the operands and adjust the mask to preserve the semantics of the instruction.
Definition: Instructions.cpp:1967