LLVM  14.0.0git
InstCombineVectorOps.cpp
Go to the documentation of this file.
1 //===- InstCombineVectorOps.cpp -------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements instcombine for ExtractElement, InsertElement and
10 // ShuffleVector.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "InstCombineInternal.h"
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
24 #include "llvm/IR/BasicBlock.h"
25 #include "llvm/IR/Constant.h"
26 #include "llvm/IR/Constants.h"
27 #include "llvm/IR/DerivedTypes.h"
28 #include "llvm/IR/InstrTypes.h"
29 #include "llvm/IR/Instruction.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/IR/Operator.h"
32 #include "llvm/IR/PatternMatch.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/IR/User.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/Support/Casting.h"
40 #include <cassert>
41 #include <cstdint>
42 #include <iterator>
43 #include <utility>
44 
45 using namespace llvm;
46 using namespace PatternMatch;
47 
48 #define DEBUG_TYPE "instcombine"
49 
50 STATISTIC(NumAggregateReconstructionsSimplified,
51  "Number of aggregate reconstructions turned into reuse of the "
52  "original aggregate");
53 
54 /// Return true if the value is cheaper to scalarize than it is to leave as a
55 /// vector operation. If the extract index \p EI is a constant integer then
56 /// some operations may be cheap to scalarize.
57 ///
58 /// FIXME: It's possible to create more instructions than previously existed.
59 static bool cheapToScalarize(Value *V, Value *EI) {
60  ConstantInt *CEI = dyn_cast<ConstantInt>(EI);
61 
62  // If we can pick a scalar constant value out of a vector, that is free.
63  if (auto *C = dyn_cast<Constant>(V))
64  return CEI || C->getSplatValue();
65 
66  if (CEI && match(V, m_Intrinsic<Intrinsic::experimental_stepvector>())) {
67  ElementCount EC = cast<VectorType>(V->getType())->getElementCount();
68  // Index needs to be lower than the minimum size of the vector, because
69  // for scalable vector, the vector size is known at run time.
70  return CEI->getValue().ult(EC.getKnownMinValue());
71  }
72 
73  // An insertelement to the same constant index as our extract will simplify
74  // to the scalar inserted element. An insertelement to a different constant
75  // index is irrelevant to our extract.
77  return CEI;
78 
79  if (match(V, m_OneUse(m_Load(m_Value()))))
80  return true;
81 
82  if (match(V, m_OneUse(m_UnOp())))
83  return true;
84 
85  Value *V0, *V1;
86  if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
87  if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
88  return true;
89 
90  CmpInst::Predicate UnusedPred;
91  if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
92  if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
93  return true;
94 
95  return false;
96 }
97 
98 // If we have a PHI node with a vector type that is only used to feed
99 // itself and be an operand of extractelement at a constant location,
100 // try to replace the PHI of the vector type with a PHI of a scalar type.
101 Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
102  PHINode *PN) {
104  // The users we want the PHI to have are:
105  // 1) The EI ExtractElement (we already know this)
106  // 2) Possibly more ExtractElements with the same index.
107  // 3) Another operand, which will feed back into the PHI.
108  Instruction *PHIUser = nullptr;
109  for (auto U : PN->users()) {
110  if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
111  if (EI.getIndexOperand() == EU->getIndexOperand())
112  Extracts.push_back(EU);
113  else
114  return nullptr;
115  } else if (!PHIUser) {
116  PHIUser = cast<Instruction>(U);
117  } else {
118  return nullptr;
119  }
120  }
121 
122  if (!PHIUser)
123  return nullptr;
124 
125  // Verify that this PHI user has one use, which is the PHI itself,
126  // and that it is a binary operation which is cheap to scalarize.
127  // otherwise return nullptr.
128  if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
129  !(isa<BinaryOperator>(PHIUser)) ||
130  !cheapToScalarize(PHIUser, EI.getIndexOperand()))
131  return nullptr;
132 
133  // Create a scalar PHI node that will replace the vector PHI node
134  // just before the current PHI node.
135  PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
136  PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN));
137  // Scalarize each PHI operand.
138  for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
139  Value *PHIInVal = PN->getIncomingValue(i);
140  BasicBlock *inBB = PN->getIncomingBlock(i);
141  Value *Elt = EI.getIndexOperand();
142  // If the operand is the PHI induction variable:
143  if (PHIInVal == PHIUser) {
144  // Scalarize the binary operation. Its first operand is the
145  // scalar PHI, and the second operand is extracted from the other
146  // vector operand.
147  BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
148  unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
149  Value *Op = InsertNewInstWith(
150  ExtractElementInst::Create(B0->getOperand(opId), Elt,
151  B0->getOperand(opId)->getName() + ".Elt"),
152  *B0);
153  Value *newPHIUser = InsertNewInstWith(
155  scalarPHI, Op, B0), *B0);
156  scalarPHI->addIncoming(newPHIUser, inBB);
157  } else {
158  // Scalarize PHI input:
159  Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
160  // Insert the new instruction into the predecessor basic block.
161  Instruction *pos = dyn_cast<Instruction>(PHIInVal);
162  BasicBlock::iterator InsertPos;
163  if (pos && !isa<PHINode>(pos)) {
164  InsertPos = ++pos->getIterator();
165  } else {
166  InsertPos = inBB->getFirstInsertionPt();
167  }
168 
169  InsertNewInstWith(newEI, *InsertPos);
170 
171  scalarPHI->addIncoming(newEI, inBB);
172  }
173  }
174 
175  for (auto E : Extracts)
176  replaceInstUsesWith(*E, scalarPHI);
177 
178  return &EI;
179 }
180 
183  bool IsBigEndian) {
184  Value *X;
185  uint64_t ExtIndexC;
186  if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
187  !X->getType()->isVectorTy() ||
188  !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
189  return nullptr;
190 
191  // If this extractelement is using a bitcast from a vector of the same number
192  // of elements, see if we can find the source element from the source vector:
193  // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
194  auto *SrcTy = cast<VectorType>(X->getType());
195  Type *DestTy = Ext.getType();
196  ElementCount NumSrcElts = SrcTy->getElementCount();
197  ElementCount NumElts =
198  cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
199  if (NumSrcElts == NumElts)
200  if (Value *Elt = findScalarElement(X, ExtIndexC))
201  return new BitCastInst(Elt, DestTy);
202 
203  assert(NumSrcElts.isScalable() == NumElts.isScalable() &&
204  "Src and Dst must be the same sort of vector type");
205 
206  // If the source elements are wider than the destination, try to shift and
207  // truncate a subset of scalar bits of an insert op.
208  if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
209  Value *Scalar;
210  uint64_t InsIndexC;
211  if (!match(X, m_InsertElt(m_Value(), m_Value(Scalar),
212  m_ConstantInt(InsIndexC))))
213  return nullptr;
214 
215  // The extract must be from the subset of vector elements that we inserted
216  // into. Example: if we inserted element 1 of a <2 x i64> and we are
217  // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
218  // of elements 4-7 of the bitcasted vector.
219  unsigned NarrowingRatio =
220  NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
221  if (ExtIndexC / NarrowingRatio != InsIndexC)
222  return nullptr;
223 
224  // We are extracting part of the original scalar. How that scalar is
225  // inserted into the vector depends on the endian-ness. Example:
226  // Vector Byte Elt Index: 0 1 2 3 4 5 6 7
227  // +--+--+--+--+--+--+--+--+
228  // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
229  // extelt <4 x i16> V', 3: | |S2|S3|
230  // +--+--+--+--+--+--+--+--+
231  // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
232  // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
233  // In this example, we must right-shift little-endian. Big-endian is just a
234  // truncate.
235  unsigned Chunk = ExtIndexC % NarrowingRatio;
236  if (IsBigEndian)
237  Chunk = NarrowingRatio - 1 - Chunk;
238 
239  // Bail out if this is an FP vector to FP vector sequence. That would take
240  // more instructions than we started with unless there is no shift, and it
241  // may not be handled as well in the backend.
242  bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
243  bool NeedDestBitcast = DestTy->isFloatingPointTy();
244  if (NeedSrcBitcast && NeedDestBitcast)
245  return nullptr;
246 
247  unsigned SrcWidth = SrcTy->getScalarSizeInBits();
248  unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
249  unsigned ShAmt = Chunk * DestWidth;
250 
251  // TODO: This limitation is more strict than necessary. We could sum the
252  // number of new instructions and subtract the number eliminated to know if
253  // we can proceed.
254  if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
255  if (NeedSrcBitcast || NeedDestBitcast)
256  return nullptr;
257 
258  if (NeedSrcBitcast) {
259  Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
260  Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
261  }
262 
263  if (ShAmt) {
264  // Bail out if we could end with more instructions than we started with.
265  if (!Ext.getVectorOperand()->hasOneUse())
266  return nullptr;
267  Scalar = Builder.CreateLShr(Scalar, ShAmt);
268  }
269 
270  if (NeedDestBitcast) {
271  Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
272  return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
273  }
274  return new TruncInst(Scalar, DestTy);
275  }
276 
277  return nullptr;
278 }
279 
280 /// Find elements of V demanded by UserInstr.
282  unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
283 
284  // Conservatively assume that all elements are needed.
285  APInt UsedElts(APInt::getAllOnes(VWidth));
286 
287  switch (UserInstr->getOpcode()) {
288  case Instruction::ExtractElement: {
289  ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr);
290  assert(EEI->getVectorOperand() == V);
291  ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand());
292  if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {
293  UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue());
294  }
295  break;
296  }
297  case Instruction::ShuffleVector: {
298  ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);
299  unsigned MaskNumElts =
300  cast<FixedVectorType>(UserInstr->getType())->getNumElements();
301 
302  UsedElts = APInt(VWidth, 0);
303  for (unsigned i = 0; i < MaskNumElts; i++) {
304  unsigned MaskVal = Shuffle->getMaskValue(i);
305  if (MaskVal == -1u || MaskVal >= 2 * VWidth)
306  continue;
307  if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))
308  UsedElts.setBit(MaskVal);
309  if (Shuffle->getOperand(1) == V &&
310  ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
311  UsedElts.setBit(MaskVal - VWidth);
312  }
313  break;
314  }
315  default:
316  break;
317  }
318  return UsedElts;
319 }
320 
321 /// Find union of elements of V demanded by all its users.
322 /// If it is known by querying findDemandedEltsBySingleUser that
323 /// no user demands an element of V, then the corresponding bit
324 /// remains unset in the returned value.
326  unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
327 
328  APInt UnionUsedElts(VWidth, 0);
329  for (const Use &U : V->uses()) {
330  if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
331  UnionUsedElts |= findDemandedEltsBySingleUser(V, I);
332  } else {
333  UnionUsedElts = APInt::getAllOnes(VWidth);
334  break;
335  }
336 
337  if (UnionUsedElts.isAllOnes())
338  break;
339  }
340 
341  return UnionUsedElts;
342 }
343 
345  Value *SrcVec = EI.getVectorOperand();
346  Value *Index = EI.getIndexOperand();
347  if (Value *V = SimplifyExtractElementInst(SrcVec, Index,
348  SQ.getWithInstruction(&EI)))
349  return replaceInstUsesWith(EI, V);
350 
351  // If extracting a specified index from the vector, see if we can recursively
352  // find a previously computed scalar that was inserted into the vector.
353  auto *IndexC = dyn_cast<ConstantInt>(Index);
354  if (IndexC) {
356  unsigned NumElts = EC.getKnownMinValue();
357 
358  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(SrcVec)) {
359  Intrinsic::ID IID = II->getIntrinsicID();
360  // Index needs to be lower than the minimum size of the vector, because
361  // for scalable vector, the vector size is known at run time.
362  if (IID == Intrinsic::experimental_stepvector &&
363  IndexC->getValue().ult(NumElts)) {
364  Type *Ty = EI.getType();
365  unsigned BitWidth = Ty->getIntegerBitWidth();
366  Value *Idx;
367  // Return index when its value does not exceed the allowed limit
368  // for the element type of the vector, otherwise return undefined.
369  if (IndexC->getValue().getActiveBits() <= BitWidth)
370  Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth));
371  else
372  Idx = UndefValue::get(Ty);
373  return replaceInstUsesWith(EI, Idx);
374  }
375  }
376 
377  // InstSimplify should handle cases where the index is invalid.
378  // For fixed-length vector, it's invalid to extract out-of-range element.
379  if (!EC.isScalable() && IndexC->getValue().uge(NumElts))
380  return nullptr;
381 
382  // This instruction only demands the single element from the input vector.
383  // Skip for scalable type, the number of elements is unknown at
384  // compile-time.
385  if (!EC.isScalable() && NumElts != 1) {
386  // If the input vector has a single use, simplify it based on this use
387  // property.
388  if (SrcVec->hasOneUse()) {
389  APInt UndefElts(NumElts, 0);
390  APInt DemandedElts(NumElts, 0);
391  DemandedElts.setBit(IndexC->getZExtValue());
392  if (Value *V =
393  SimplifyDemandedVectorElts(SrcVec, DemandedElts, UndefElts))
394  return replaceOperand(EI, 0, V);
395  } else {
396  // If the input vector has multiple uses, simplify it based on a union
397  // of all elements used.
398  APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
399  if (!DemandedElts.isAllOnes()) {
400  APInt UndefElts(NumElts, 0);
401  if (Value *V = SimplifyDemandedVectorElts(
402  SrcVec, DemandedElts, UndefElts, 0 /* Depth */,
403  true /* AllowMultipleUsers */)) {
404  if (V != SrcVec) {
405  SrcVec->replaceAllUsesWith(V);
406  return &EI;
407  }
408  }
409  }
410  }
411  }
412 
413  if (Instruction *I = foldBitcastExtElt(EI, Builder, DL.isBigEndian()))
414  return I;
415 
416  // If there's a vector PHI feeding a scalar use through this extractelement
417  // instruction, try to scalarize the PHI.
418  if (auto *Phi = dyn_cast<PHINode>(SrcVec))
419  if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
420  return ScalarPHI;
421  }
422 
423  // TODO come up with a n-ary matcher that subsumes both unary and
424  // binary matchers.
425  UnaryOperator *UO;
426  if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, Index)) {
427  // extelt (unop X), Index --> unop (extelt X, Index)
428  Value *X = UO->getOperand(0);
429  Value *E = Builder.CreateExtractElement(X, Index);
431  }
432 
433  BinaryOperator *BO;
434  if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, Index)) {
435  // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
436  Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
437  Value *E0 = Builder.CreateExtractElement(X, Index);
438  Value *E1 = Builder.CreateExtractElement(Y, Index);
439  return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
440  }
441 
442  Value *X, *Y;
443  CmpInst::Predicate Pred;
444  if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
445  cheapToScalarize(SrcVec, Index)) {
446  // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
447  Value *E0 = Builder.CreateExtractElement(X, Index);
448  Value *E1 = Builder.CreateExtractElement(Y, Index);
449  return CmpInst::Create(cast<CmpInst>(SrcVec)->getOpcode(), Pred, E0, E1);
450  }
451 
452  if (auto *I = dyn_cast<Instruction>(SrcVec)) {
453  if (auto *IE = dyn_cast<InsertElementInst>(I)) {
454  // Extracting the inserted element?
455  if (IE->getOperand(2) == Index)
456  return replaceInstUsesWith(EI, IE->getOperand(1));
457  // If the inserted and extracted elements are constants, they must not
458  // be the same value, extract from the pre-inserted value instead.
459  if (isa<Constant>(IE->getOperand(2)) && IndexC)
460  return replaceOperand(EI, 0, IE->getOperand(0));
461  } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
462  auto *VecType = cast<VectorType>(GEP->getType());
463  ElementCount EC = VecType->getElementCount();
464  uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0;
465  if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) {
466  // Find out why we have a vector result - these are a few examples:
467  // 1. We have a scalar pointer and a vector of indices, or
468  // 2. We have a vector of pointers and a scalar index, or
469  // 3. We have a vector of pointers and a vector of indices, etc.
470  // Here we only consider combining when there is exactly one vector
471  // operand, since the optimization is less obviously a win due to
472  // needing more than one extractelements.
473 
474  unsigned VectorOps =
475  llvm::count_if(GEP->operands(), [](const Value *V) {
476  return isa<VectorType>(V->getType());
477  });
478  if (VectorOps > 1)
479  return nullptr;
480  assert(VectorOps == 1 && "Expected exactly one vector GEP operand!");
481 
482  Value *NewPtr = GEP->getPointerOperand();
483  if (isa<VectorType>(NewPtr->getType()))
484  NewPtr = Builder.CreateExtractElement(NewPtr, IndexC);
485 
486  SmallVector<Value *> NewOps;
487  for (unsigned I = 1; I != GEP->getNumOperands(); ++I) {
488  Value *Op = GEP->getOperand(I);
489  if (isa<VectorType>(Op->getType()))
490  NewOps.push_back(Builder.CreateExtractElement(Op, IndexC));
491  else
492  NewOps.push_back(Op);
493  }
494 
496  cast<PointerType>(NewPtr->getType())->getElementType(), NewPtr,
497  NewOps);
498  NewGEP->setIsInBounds(GEP->isInBounds());
499  return NewGEP;
500  }
501  return nullptr;
502  } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
503  // If this is extracting an element from a shufflevector, figure out where
504  // it came from and extract from the appropriate input element instead.
505  // Restrict the following transformation to fixed-length vector.
506  if (isa<FixedVectorType>(SVI->getType()) && isa<ConstantInt>(Index)) {
507  int SrcIdx =
508  SVI->getMaskValue(cast<ConstantInt>(Index)->getZExtValue());
509  Value *Src;
510  unsigned LHSWidth = cast<FixedVectorType>(SVI->getOperand(0)->getType())
511  ->getNumElements();
512 
513  if (SrcIdx < 0)
514  return replaceInstUsesWith(EI, UndefValue::get(EI.getType()));
515  if (SrcIdx < (int)LHSWidth)
516  Src = SVI->getOperand(0);
517  else {
518  SrcIdx -= LHSWidth;
519  Src = SVI->getOperand(1);
520  }
523  Src, ConstantInt::get(Int32Ty, SrcIdx, false));
524  }
525  } else if (auto *CI = dyn_cast<CastInst>(I)) {
526  // Canonicalize extractelement(cast) -> cast(extractelement).
527  // Bitcasts can change the number of vector elements, and they cost
528  // nothing.
529  if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
530  Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
531  return CastInst::Create(CI->getOpcode(), EE, EI.getType());
532  }
533  }
534  }
535  return nullptr;
536 }
537 
538 /// If V is a shuffle of values that ONLY returns elements from either LHS or
539 /// RHS, return the shuffle mask and true. Otherwise, return false.
540 static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
542  assert(LHS->getType() == RHS->getType() &&
543  "Invalid CollectSingleShuffleElements");
544  unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
545 
546  if (match(V, m_Undef())) {
547  Mask.assign(NumElts, -1);
548  return true;
549  }
550 
551  if (V == LHS) {
552  for (unsigned i = 0; i != NumElts; ++i)
553  Mask.push_back(i);
554  return true;
555  }
556 
557  if (V == RHS) {
558  for (unsigned i = 0; i != NumElts; ++i)
559  Mask.push_back(i + NumElts);
560  return true;
561  }
562 
563  if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
564  // If this is an insert of an extract from some other vector, include it.
565  Value *VecOp = IEI->getOperand(0);
566  Value *ScalarOp = IEI->getOperand(1);
567  Value *IdxOp = IEI->getOperand(2);
568 
569  if (!isa<ConstantInt>(IdxOp))
570  return false;
571  unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
572 
573  if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
574  // We can handle this if the vector we are inserting into is
575  // transitively ok.
576  if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
577  // If so, update the mask to reflect the inserted undef.
578  Mask[InsertedIdx] = -1;
579  return true;
580  }
581  } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
582  if (isa<ConstantInt>(EI->getOperand(1))) {
583  unsigned ExtractedIdx =
584  cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
585  unsigned NumLHSElts =
586  cast<FixedVectorType>(LHS->getType())->getNumElements();
587 
588  // This must be extracting from either LHS or RHS.
589  if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
590  // We can handle this if the vector we are inserting into is
591  // transitively ok.
592  if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
593  // If so, update the mask to reflect the inserted value.
594  if (EI->getOperand(0) == LHS) {
595  Mask[InsertedIdx % NumElts] = ExtractedIdx;
596  } else {
597  assert(EI->getOperand(0) == RHS);
598  Mask[InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;
599  }
600  return true;
601  }
602  }
603  }
604  }
605  }
606 
607  return false;
608 }
609 
610 /// If we have insertion into a vector that is wider than the vector that we
611 /// are extracting from, try to widen the source vector to allow a single
612 /// shufflevector to replace one or more insert/extract pairs.
614  ExtractElementInst *ExtElt,
615  InstCombinerImpl &IC) {
616  auto *InsVecType = cast<FixedVectorType>(InsElt->getType());
617  auto *ExtVecType = cast<FixedVectorType>(ExtElt->getVectorOperandType());
618  unsigned NumInsElts = InsVecType->getNumElements();
619  unsigned NumExtElts = ExtVecType->getNumElements();
620 
621  // The inserted-to vector must be wider than the extracted-from vector.
622  if (InsVecType->getElementType() != ExtVecType->getElementType() ||
623  NumExtElts >= NumInsElts)
624  return;
625 
626  // Create a shuffle mask to widen the extended-from vector using poison
627  // values. The mask selects all of the values of the original vector followed
628  // by as many poison values as needed to create a vector of the same length
629  // as the inserted-to vector.
630  SmallVector<int, 16> ExtendMask;
631  for (unsigned i = 0; i < NumExtElts; ++i)
632  ExtendMask.push_back(i);
633  for (unsigned i = NumExtElts; i < NumInsElts; ++i)
634  ExtendMask.push_back(-1);
635 
636  Value *ExtVecOp = ExtElt->getVectorOperand();
637  auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
638  BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
639  ? ExtVecOpInst->getParent()
640  : ExtElt->getParent();
641 
642  // TODO: This restriction matches the basic block check below when creating
643  // new extractelement instructions. If that limitation is removed, this one
644  // could also be removed. But for now, we just bail out to ensure that we
645  // will replace the extractelement instruction that is feeding our
646  // insertelement instruction. This allows the insertelement to then be
647  // replaced by a shufflevector. If the insertelement is not replaced, we can
648  // induce infinite looping because there's an optimization for extractelement
649  // that will delete our widening shuffle. This would trigger another attempt
650  // here to create that shuffle, and we spin forever.
651  if (InsertionBlock != InsElt->getParent())
652  return;
653 
654  // TODO: This restriction matches the check in visitInsertElementInst() and
655  // prevents an infinite loop caused by not turning the extract/insert pair
656  // into a shuffle. We really should not need either check, but we're lacking
657  // folds for shufflevectors because we're afraid to generate shuffle masks
658  // that the backend can't handle.
659  if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
660  return;
661 
662  auto *WideVec =
663  new ShuffleVectorInst(ExtVecOp, PoisonValue::get(ExtVecType), ExtendMask);
664 
665  // Insert the new shuffle after the vector operand of the extract is defined
666  // (as long as it's not a PHI) or at the start of the basic block of the
667  // extract, so any subsequent extracts in the same basic block can use it.
668  // TODO: Insert before the earliest ExtractElementInst that is replaced.
669  if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
670  WideVec->insertAfter(ExtVecOpInst);
671  else
672  IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt());
673 
674  // Replace extracts from the original narrow vector with extracts from the new
675  // wide vector.
676  for (User *U : ExtVecOp->users()) {
677  ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
678  if (!OldExt || OldExt->getParent() != WideVec->getParent())
679  continue;
680  auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
681  NewExt->insertAfter(OldExt);
682  IC.replaceInstUsesWith(*OldExt, NewExt);
683  }
684 }
685 
686 /// We are building a shuffle to create V, which is a sequence of insertelement,
687 /// extractelement pairs. If PermittedRHS is set, then we must either use it or
688 /// not rely on the second vector source. Return a std::pair containing the
689 /// left and right vectors of the proposed shuffle (or 0), and set the Mask
690 /// parameter as required.
691 ///
692 /// Note: we intentionally don't try to fold earlier shuffles since they have
693 /// often been chosen carefully to be efficiently implementable on the target.
694 using ShuffleOps = std::pair<Value *, Value *>;
695 
697  Value *PermittedRHS,
698  InstCombinerImpl &IC) {
699  assert(V->getType()->isVectorTy() && "Invalid shuffle!");
700  unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
701 
702  if (match(V, m_Undef())) {
703  Mask.assign(NumElts, -1);
704  return std::make_pair(
705  PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr);
706  }
707 
708  if (isa<ConstantAggregateZero>(V)) {
709  Mask.assign(NumElts, 0);
710  return std::make_pair(V, nullptr);
711  }
712 
713  if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
714  // If this is an insert of an extract from some other vector, include it.
715  Value *VecOp = IEI->getOperand(0);
716  Value *ScalarOp = IEI->getOperand(1);
717  Value *IdxOp = IEI->getOperand(2);
718 
719  if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
720  if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
721  unsigned ExtractedIdx =
722  cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
723  unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
724 
725  // Either the extracted from or inserted into vector must be RHSVec,
726  // otherwise we'd end up with a shuffle of three inputs.
727  if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
728  Value *RHS = EI->getOperand(0);
729  ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC);
730  assert(LR.second == nullptr || LR.second == RHS);
731 
732  if (LR.first->getType() != RHS->getType()) {
733  // Although we are giving up for now, see if we can create extracts
734  // that match the inserts for another round of combining.
735  replaceExtractElements(IEI, EI, IC);
736 
737  // We tried our best, but we can't find anything compatible with RHS
738  // further up the chain. Return a trivial shuffle.
739  for (unsigned i = 0; i < NumElts; ++i)
740  Mask[i] = i;
741  return std::make_pair(V, nullptr);
742  }
743 
744  unsigned NumLHSElts =
745  cast<FixedVectorType>(RHS->getType())->getNumElements();
746  Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
747  return std::make_pair(LR.first, RHS);
748  }
749 
750  if (VecOp == PermittedRHS) {
751  // We've gone as far as we can: anything on the other side of the
752  // extractelement will already have been converted into a shuffle.
753  unsigned NumLHSElts =
754  cast<FixedVectorType>(EI->getOperand(0)->getType())
755  ->getNumElements();
756  for (unsigned i = 0; i != NumElts; ++i)
757  Mask.push_back(i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
758  return std::make_pair(EI->getOperand(0), PermittedRHS);
759  }
760 
761  // If this insertelement is a chain that comes from exactly these two
762  // vectors, return the vector and the effective shuffle.
763  if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
764  collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
765  Mask))
766  return std::make_pair(EI->getOperand(0), PermittedRHS);
767  }
768  }
769  }
770 
771  // Otherwise, we can't do anything fancy. Return an identity vector.
772  for (unsigned i = 0; i != NumElts; ++i)
773  Mask.push_back(i);
774  return std::make_pair(V, nullptr);
775 }
776 
777 /// Look for chain of insertvalue's that fully define an aggregate, and trace
778 /// back the values inserted, see if they are all were extractvalue'd from
779 /// the same source aggregate from the exact same element indexes.
780 /// If they were, just reuse the source aggregate.
781 /// This potentially deals with PHI indirections.
783  InsertValueInst &OrigIVI) {
784  Type *AggTy = OrigIVI.getType();
785  unsigned NumAggElts;
786  switch (AggTy->getTypeID()) {
787  case Type::StructTyID:
788  NumAggElts = AggTy->getStructNumElements();
789  break;
790  case Type::ArrayTyID:
791  NumAggElts = AggTy->getArrayNumElements();
792  break;
793  default:
794  llvm_unreachable("Unhandled aggregate type?");
795  }
796 
797  // Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able
798  // to handle clang C++ exception struct (which is hardcoded as {i8*, i32}),
799  // FIXME: any interesting patterns to be caught with larger limit?
800  assert(NumAggElts > 0 && "Aggregate should have elements.");
801  if (NumAggElts > 2)
802  return nullptr;
803 
804  static constexpr auto NotFound = None;
805  static constexpr auto FoundMismatch = nullptr;
806 
807  // Try to find a value of each element of an aggregate.
808  // FIXME: deal with more complex, not one-dimensional, aggregate types
809  SmallVector<Optional<Instruction *>, 2> AggElts(NumAggElts, NotFound);
810 
811  // Do we know values for each element of the aggregate?
812  auto KnowAllElts = [&AggElts]() {
813  return all_of(AggElts,
814  [](Optional<Instruction *> Elt) { return Elt != NotFound; });
815  };
816 
817  int Depth = 0;
818 
819  // Arbitrary `insertvalue` visitation depth limit. Let's be okay with
820  // every element being overwritten twice, which should never happen.
821  static const int DepthLimit = 2 * NumAggElts;
822 
823  // Recurse up the chain of `insertvalue` aggregate operands until either we've
824  // reconstructed full initializer or can't visit any more `insertvalue`'s.
825  for (InsertValueInst *CurrIVI = &OrigIVI;
826  Depth < DepthLimit && CurrIVI && !KnowAllElts();
827  CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()),
828  ++Depth) {
829  auto *InsertedValue =
830  dyn_cast<Instruction>(CurrIVI->getInsertedValueOperand());
831  if (!InsertedValue)
832  return nullptr; // Inserted value must be produced by an instruction.
833 
834  ArrayRef<unsigned int> Indices = CurrIVI->getIndices();
835 
836  // Don't bother with more than single-level aggregates.
837  if (Indices.size() != 1)
838  return nullptr; // FIXME: deal with more complex aggregates?
839 
840  // Now, we may have already previously recorded the value for this element
841  // of an aggregate. If we did, that means the CurrIVI will later be
842  // overwritten with the already-recorded value. But if not, let's record it!
843  Optional<Instruction *> &Elt = AggElts[Indices.front()];
844  Elt = Elt.getValueOr(InsertedValue);
845 
846  // FIXME: should we handle chain-terminating undef base operand?
847  }
848 
849  // Was that sufficient to deduce the full initializer for the aggregate?
850  if (!KnowAllElts())
851  return nullptr; // Give up then.
852 
853  // We now want to find the source[s] of the aggregate elements we've found.
854  // And with "source" we mean the original aggregate[s] from which
855  // the inserted elements were extracted. This may require PHI translation.
856 
857  enum class AggregateDescription {
858  /// When analyzing the value that was inserted into an aggregate, we did
859  /// not manage to find defining `extractvalue` instruction to analyze.
860  NotFound,
861  /// When analyzing the value that was inserted into an aggregate, we did
862  /// manage to find defining `extractvalue` instruction[s], and everything
863  /// matched perfectly - aggregate type, element insertion/extraction index.
864  Found,
865  /// When analyzing the value that was inserted into an aggregate, we did
866  /// manage to find defining `extractvalue` instruction, but there was
867  /// a mismatch: either the source type from which the extraction was didn't
868  /// match the aggregate type into which the insertion was,
869  /// or the extraction/insertion channels mismatched,
870  /// or different elements had different source aggregates.
871  FoundMismatch
872  };
873  auto Describe = [](Optional<Value *> SourceAggregate) {
874  if (SourceAggregate == NotFound)
876  if (*SourceAggregate == FoundMismatch)
877  return AggregateDescription::FoundMismatch;
878  return AggregateDescription::Found;
879  };
880 
881  // Given the value \p Elt that was being inserted into element \p EltIdx of an
882  // aggregate AggTy, see if \p Elt was originally defined by an
883  // appropriate extractvalue (same element index, same aggregate type).
884  // If found, return the source aggregate from which the extraction was.
885  // If \p PredBB is provided, does PHI translation of an \p Elt first.
886  auto FindSourceAggregate =
887  [&](Instruction *Elt, unsigned EltIdx, Optional<BasicBlock *> UseBB,
889  // For now(?), only deal with, at most, a single level of PHI indirection.
890  if (UseBB && PredBB)
891  Elt = dyn_cast<Instruction>(Elt->DoPHITranslation(*UseBB, *PredBB));
892  // FIXME: deal with multiple levels of PHI indirection?
893 
894  // Did we find an extraction?
895  auto *EVI = dyn_cast_or_null<ExtractValueInst>(Elt);
896  if (!EVI)
897  return NotFound;
898 
899  Value *SourceAggregate = EVI->getAggregateOperand();
900 
901  // Is the extraction from the same type into which the insertion was?
902  if (SourceAggregate->getType() != AggTy)
903  return FoundMismatch;
904  // And the element index doesn't change between extraction and insertion?
905  if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front())
906  return FoundMismatch;
907 
908  return SourceAggregate; // AggregateDescription::Found
909  };
910 
911  // Given elements AggElts that were constructing an aggregate OrigIVI,
912  // see if we can find appropriate source aggregate for each of the elements,
913  // and see it's the same aggregate for each element. If so, return it.
914  auto FindCommonSourceAggregate =
915  [&](Optional<BasicBlock *> UseBB,
917  Optional<Value *> SourceAggregate;
918 
919  for (auto I : enumerate(AggElts)) {
920  assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&
921  "We don't store nullptr in SourceAggregate!");
922  assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
923  (I.index() != 0) &&
924  "SourceAggregate should be valid after the first element,");
925 
926  // For this element, is there a plausible source aggregate?
927  // FIXME: we could special-case undef element, IFF we know that in the
928  // source aggregate said element isn't poison.
929  Optional<Value *> SourceAggregateForElement =
930  FindSourceAggregate(*I.value(), I.index(), UseBB, PredBB);
931 
932  // Okay, what have we found? Does that correlate with previous findings?
933 
934  // Regardless of whether or not we have previously found source
935  // aggregate for previous elements (if any), if we didn't find one for
936  // this element, passthrough whatever we have just found.
937  if (Describe(SourceAggregateForElement) != AggregateDescription::Found)
938  return SourceAggregateForElement;
939 
940  // Okay, we have found source aggregate for this element.
941  // Let's see what we already know from previous elements, if any.
942  switch (Describe(SourceAggregate)) {
944  // This is apparently the first element that we have examined.
945  SourceAggregate = SourceAggregateForElement; // Record the aggregate!
946  continue; // Great, now look at next element.
947  case AggregateDescription::Found:
948  // We have previously already successfully examined other elements.
949  // Is this the same source aggregate we've found for other elements?
950  if (*SourceAggregateForElement != *SourceAggregate)
951  return FoundMismatch;
952  continue; // Still the same aggregate, look at next element.
953  case AggregateDescription::FoundMismatch:
954  llvm_unreachable("Can't happen. We would have early-exited then.");
955  };
956  }
957 
958  assert(Describe(SourceAggregate) == AggregateDescription::Found &&
959  "Must be a valid Value");
960  return *SourceAggregate;
961  };
962 
963  Optional<Value *> SourceAggregate;
964 
965  // Can we find the source aggregate without looking at predecessors?
966  SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/None, /*PredBB=*/None);
967  if (Describe(SourceAggregate) != AggregateDescription::NotFound) {
968  if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch)
969  return nullptr; // Conflicting source aggregates!
970  ++NumAggregateReconstructionsSimplified;
971  return replaceInstUsesWith(OrigIVI, *SourceAggregate);
972  }
973 
974  // Okay, apparently we need to look at predecessors.
975 
976  // We should be smart about picking the "use" basic block, which will be the
977  // merge point for aggregate, where we'll insert the final PHI that will be
978  // used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice.
979  // We should look in which blocks each of the AggElts is being defined,
980  // they all should be defined in the same basic block.
981  BasicBlock *UseBB = nullptr;
982 
983  for (const Optional<Instruction *> &I : AggElts) {
984  BasicBlock *BB = (*I)->getParent();
985  // If it's the first instruction we've encountered, record the basic block.
986  if (!UseBB) {
987  UseBB = BB;
988  continue;
989  }
990  // Otherwise, this must be the same basic block we've seen previously.
991  if (UseBB != BB)
992  return nullptr;
993  }
994 
995  // If *all* of the elements are basic-block-independent, meaning they are
996  // either function arguments, or constant expressions, then if we didn't
997  // handle them without predecessor-aware handling, we won't handle them now.
998  if (!UseBB)
999  return nullptr;
1000 
1001  // If we didn't manage to find source aggregate without looking at
1002  // predecessors, and there are no predecessors to look at, then we're done.
1003  if (pred_empty(UseBB))
1004  return nullptr;
1005 
1006  // Arbitrary predecessor count limit.
1007  static const int PredCountLimit = 64;
1008 
1009  // Cache the (non-uniqified!) list of predecessors in a vector,
1010  // checking the limit at the same time for efficiency.
1011  SmallVector<BasicBlock *, 4> Preds; // May have duplicates!
1012  for (BasicBlock *Pred : predecessors(UseBB)) {
1013  // Don't bother if there are too many predecessors.
1014  if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once?
1015  return nullptr;
1016  Preds.emplace_back(Pred);
1017  }
1018 
1019  // For each predecessor, what is the source aggregate,
1020  // from which all the elements were originally extracted from?
1021  // Note that we want for the map to have stable iteration order!
1022  SmallDenseMap<BasicBlock *, Value *, 4> SourceAggregates;
1023  for (BasicBlock *Pred : Preds) {
1024  std::pair<decltype(SourceAggregates)::iterator, bool> IV =
1025  SourceAggregates.insert({Pred, nullptr});
1026  // Did we already evaluate this predecessor?
1027  if (!IV.second)
1028  continue;
1029 
1030  // Let's hope that when coming from predecessor Pred, all elements of the
1031  // aggregate produced by OrigIVI must have been originally extracted from
1032  // the same aggregate. Is that so? Can we find said original aggregate?
1033  SourceAggregate = FindCommonSourceAggregate(UseBB, Pred);
1034  if (Describe(SourceAggregate) != AggregateDescription::Found)
1035  return nullptr; // Give up.
1036  IV.first->second = *SourceAggregate;
1037  }
1038 
1039  // All good! Now we just need to thread the source aggregates here.
1040  // Note that we have to insert the new PHI here, ourselves, because we can't
1041  // rely on InstCombinerImpl::run() inserting it into the right basic block.
1042  // Note that the same block can be a predecessor more than once,
1043  // and we need to preserve that invariant for the PHI node.
1045  Builder.SetInsertPoint(UseBB->getFirstNonPHI());
1046  auto *PHI =
1047  Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() + ".merged");
1048  for (BasicBlock *Pred : Preds)
1049  PHI->addIncoming(SourceAggregates[Pred], Pred);
1050 
1051  ++NumAggregateReconstructionsSimplified;
1052  return replaceInstUsesWith(OrigIVI, PHI);
1053 }
1054 
1055 /// Try to find redundant insertvalue instructions, like the following ones:
1056 /// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
1057 /// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
1058 /// Here the second instruction inserts values at the same indices, as the
1059 /// first one, making the first one redundant.
1060 /// It should be transformed to:
1061 /// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
1063  bool IsRedundant = false;
1064  ArrayRef<unsigned int> FirstIndices = I.getIndices();
1065 
1066  // If there is a chain of insertvalue instructions (each of them except the
1067  // last one has only one use and it's another insertvalue insn from this
1068  // chain), check if any of the 'children' uses the same indices as the first
1069  // instruction. In this case, the first one is redundant.
1070  Value *V = &I;
1071  unsigned Depth = 0;
1072  while (V->hasOneUse() && Depth < 10) {
1073  User *U = V->user_back();
1074  auto UserInsInst = dyn_cast<InsertValueInst>(U);
1075  if (!UserInsInst || U->getOperand(0) != V)
1076  break;
1077  if (UserInsInst->getIndices() == FirstIndices) {
1078  IsRedundant = true;
1079  break;
1080  }
1081  V = UserInsInst;
1082  Depth++;
1083  }
1084 
1085  if (IsRedundant)
1086  return replaceInstUsesWith(I, I.getOperand(0));
1087 
1088  if (Instruction *NewI = foldAggregateConstructionIntoAggregateReuse(I))
1089  return NewI;
1090 
1091  return nullptr;
1092 }
1093 
1095  // Can not analyze scalable type, the number of elements is not a compile-time
1096  // constant.
1097  if (isa<ScalableVectorType>(Shuf.getOperand(0)->getType()))
1098  return false;
1099 
1100  int MaskSize = Shuf.getShuffleMask().size();
1101  int VecSize =
1102  cast<FixedVectorType>(Shuf.getOperand(0)->getType())->getNumElements();
1103 
1104  // A vector select does not change the size of the operands.
1105  if (MaskSize != VecSize)
1106  return false;
1107 
1108  // Each mask element must be undefined or choose a vector element from one of
1109  // the source operands without crossing vector lanes.
1110  for (int i = 0; i != MaskSize; ++i) {
1111  int Elt = Shuf.getMaskValue(i);
1112  if (Elt != -1 && Elt != i && Elt != i + VecSize)
1113  return false;
1114  }
1115 
1116  return true;
1117 }
1118 
1119 /// Turn a chain of inserts that splats a value into an insert + shuffle:
1120 /// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
1121 /// shufflevector(insertelt(X, %k, 0), poison, zero)
1123  // We are interested in the last insert in a chain. So if this insert has a
1124  // single user and that user is an insert, bail.
1125  if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
1126  return nullptr;
1127 
1128  VectorType *VecTy = InsElt.getType();
1129  // Can not handle scalable type, the number of elements is not a compile-time
1130  // constant.
1131  if (isa<ScalableVectorType>(VecTy))
1132  return nullptr;
1133  unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
1134 
1135  // Do not try to do this for a one-element vector, since that's a nop,
1136  // and will cause an inf-loop.
1137  if (NumElements == 1)
1138  return nullptr;
1139 
1140  Value *SplatVal = InsElt.getOperand(1);
1141  InsertElementInst *CurrIE = &InsElt;
1142  SmallBitVector ElementPresent(NumElements, false);
1143  InsertElementInst *FirstIE = nullptr;
1144 
1145  // Walk the chain backwards, keeping track of which indices we inserted into,
1146  // until we hit something that isn't an insert of the splatted value.
1147  while (CurrIE) {
1148  auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
1149  if (!Idx || CurrIE->getOperand(1) != SplatVal)
1150  return nullptr;
1151 
1152  auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
1153  // Check none of the intermediate steps have any additional uses, except
1154  // for the root insertelement instruction, which can be re-used, if it
1155  // inserts at position 0.
1156  if (CurrIE != &InsElt &&
1157  (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
1158  return nullptr;
1159 
1160  ElementPresent[Idx->getZExtValue()] = true;
1161  FirstIE = CurrIE;
1162  CurrIE = NextIE;
1163  }
1164 
1165  // If this is just a single insertelement (not a sequence), we are done.
1166  if (FirstIE == &InsElt)
1167  return nullptr;
1168 
1169  // If we are not inserting into an undef vector, make sure we've seen an
1170  // insert into every element.
1171  // TODO: If the base vector is not undef, it might be better to create a splat
1172  // and then a select-shuffle (blend) with the base vector.
1173  if (!match(FirstIE->getOperand(0), m_Undef()))
1174  if (!ElementPresent.all())
1175  return nullptr;
1176 
1177  // Create the insert + shuffle.
1178  Type *Int32Ty = Type::getInt32Ty(InsElt.getContext());
1179  PoisonValue *PoisonVec = PoisonValue::get(VecTy);
1180  Constant *Zero = ConstantInt::get(Int32Ty, 0);
1181  if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
1182  FirstIE = InsertElementInst::Create(PoisonVec, SplatVal, Zero, "", &InsElt);
1183 
1184  // Splat from element 0, but replace absent elements with undef in the mask.
1185  SmallVector<int, 16> Mask(NumElements, 0);
1186  for (unsigned i = 0; i != NumElements; ++i)
1187  if (!ElementPresent[i])
1188  Mask[i] = -1;
1189 
1190  return new ShuffleVectorInst(FirstIE, PoisonVec, Mask);
1191 }
1192 
1193 /// Try to fold an insert element into an existing splat shuffle by changing
1194 /// the shuffle's mask to include the index of this insert element.
1196  // Check if the vector operand of this insert is a canonical splat shuffle.
1197  auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1198  if (!Shuf || !Shuf->isZeroEltSplat())
1199  return nullptr;
1200 
1201  // Bail out early if shuffle is scalable type. The number of elements in
1202  // shuffle mask is unknown at compile-time.
1203  if (isa<ScalableVectorType>(Shuf->getType()))
1204  return nullptr;
1205 
1206  // Check for a constant insertion index.
1207  uint64_t IdxC;
1208  if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1209  return nullptr;
1210 
1211  // Check if the splat shuffle's input is the same as this insert's scalar op.
1212  Value *X = InsElt.getOperand(1);
1213  Value *Op0 = Shuf->getOperand(0);
1214  if (!match(Op0, m_InsertElt(m_Undef(), m_Specific(X), m_ZeroInt())))
1215  return nullptr;
1216 
1217  // Replace the shuffle mask element at the index of this insert with a zero.
1218  // For example:
1219  // inselt (shuf (inselt undef, X, 0), undef, <0,undef,0,undef>), X, 1
1220  // --> shuf (inselt undef, X, 0), undef, <0,0,0,undef>
1221  unsigned NumMaskElts =
1222  cast<FixedVectorType>(Shuf->getType())->getNumElements();
1223  SmallVector<int, 16> NewMask(NumMaskElts);
1224  for (unsigned i = 0; i != NumMaskElts; ++i)
1225  NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);
1226 
1227  return new ShuffleVectorInst(Op0, UndefValue::get(Op0->getType()), NewMask);
1228 }
1229 
1230 /// Try to fold an extract+insert element into an existing identity shuffle by
1231 /// changing the shuffle's mask to include the index of this insert element.
1233  // Check if the vector operand of this insert is an identity shuffle.
1234  auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1235  if (!Shuf || !match(Shuf->getOperand(1), m_Undef()) ||
1236  !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
1237  return nullptr;
1238 
1239  // Bail out early if shuffle is scalable type. The number of elements in
1240  // shuffle mask is unknown at compile-time.
1241  if (isa<ScalableVectorType>(Shuf->getType()))
1242  return nullptr;
1243 
1244  // Check for a constant insertion index.
1245  uint64_t IdxC;
1246  if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1247  return nullptr;
1248 
1249  // Check if this insert's scalar op is extracted from the identity shuffle's
1250  // input vector.
1251  Value *Scalar = InsElt.getOperand(1);
1252  Value *X = Shuf->getOperand(0);
1253  if (!match(Scalar, m_ExtractElt(m_Specific(X), m_SpecificInt(IdxC))))
1254  return nullptr;
1255 
1256  // Replace the shuffle mask element at the index of this extract+insert with
1257  // that same index value.
1258  // For example:
1259  // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
1260  unsigned NumMaskElts =
1261  cast<FixedVectorType>(Shuf->getType())->getNumElements();
1262  SmallVector<int, 16> NewMask(NumMaskElts);
1263  ArrayRef<int> OldMask = Shuf->getShuffleMask();
1264  for (unsigned i = 0; i != NumMaskElts; ++i) {
1265  if (i != IdxC) {
1266  // All mask elements besides the inserted element remain the same.
1267  NewMask[i] = OldMask[i];
1268  } else if (OldMask[i] == (int)IdxC) {
1269  // If the mask element was already set, there's nothing to do
1270  // (demanded elements analysis may unset it later).
1271  return nullptr;
1272  } else {
1273  assert(OldMask[i] == UndefMaskElem &&
1274  "Unexpected shuffle mask element for identity shuffle");
1275  NewMask[i] = IdxC;
1276  }
1277  }
1278 
1279  return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask);
1280 }
1281 
1282 /// If we have an insertelement instruction feeding into another insertelement
1283 /// and the 2nd is inserting a constant into the vector, canonicalize that
1284 /// constant insertion before the insertion of a variable:
1285 ///
1286 /// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
1287 /// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
1288 ///
1289 /// This has the potential of eliminating the 2nd insertelement instruction
1290 /// via constant folding of the scalar constant into a vector constant.
1293  auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
1294  if (!InsElt1 || !InsElt1->hasOneUse())
1295  return nullptr;
1296 
1297  Value *X, *Y;
1298  Constant *ScalarC;
1299  ConstantInt *IdxC1, *IdxC2;
1300  if (match(InsElt1->getOperand(0), m_Value(X)) &&
1301  match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
1302  match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
1303  match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
1304  match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
1305  Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
1306  return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
1307  }
1308 
1309  return nullptr;
1310 }
1311 
1312 /// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
1313 /// --> shufflevector X, CVec', Mask'
1315  auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
1316  // Bail out if the parent has more than one use. In that case, we'd be
1317  // replacing the insertelt with a shuffle, and that's not a clear win.
1318  if (!Inst || !Inst->hasOneUse())
1319  return nullptr;
1320  if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
1321  // The shuffle must have a constant vector operand. The insertelt must have
1322  // a constant scalar being inserted at a constant position in the vector.
1323  Constant *ShufConstVec, *InsEltScalar;
1324  uint64_t InsEltIndex;
1325  if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
1326  !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
1327  !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
1328  return nullptr;
1329 
1330  // Adding an element to an arbitrary shuffle could be expensive, but a
1331  // shuffle that selects elements from vectors without crossing lanes is
1332  // assumed cheap.
1333  // If we're just adding a constant into that shuffle, it will still be
1334  // cheap.
1335  if (!isShuffleEquivalentToSelect(*Shuf))
1336  return nullptr;
1337 
1338  // From the above 'select' check, we know that the mask has the same number
1339  // of elements as the vector input operands. We also know that each constant
1340  // input element is used in its lane and can not be used more than once by
1341  // the shuffle. Therefore, replace the constant in the shuffle's constant
1342  // vector with the insertelt constant. Replace the constant in the shuffle's
1343  // mask vector with the insertelt index plus the length of the vector
1344  // (because the constant vector operand of a shuffle is always the 2nd
1345  // operand).
1346  ArrayRef<int> Mask = Shuf->getShuffleMask();
1347  unsigned NumElts = Mask.size();
1348  SmallVector<Constant *, 16> NewShufElts(NumElts);
1349  SmallVector<int, 16> NewMaskElts(NumElts);
1350  for (unsigned I = 0; I != NumElts; ++I) {
1351  if (I == InsEltIndex) {
1352  NewShufElts[I] = InsEltScalar;
1353  NewMaskElts[I] = InsEltIndex + NumElts;
1354  } else {
1355  // Copy over the existing values.
1356  NewShufElts[I] = ShufConstVec->getAggregateElement(I);
1357  NewMaskElts[I] = Mask[I];
1358  }
1359  }
1360 
1361  // Create new operands for a shuffle that includes the constant of the
1362  // original insertelt. The old shuffle will be dead now.
1363  return new ShuffleVectorInst(Shuf->getOperand(0),
1364  ConstantVector::get(NewShufElts), NewMaskElts);
1365  } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
1366  // Transform sequences of insertelements ops with constant data/indexes into
1367  // a single shuffle op.
1368  // Can not handle scalable type, the number of elements needed to create
1369  // shuffle mask is not a compile-time constant.
1370  if (isa<ScalableVectorType>(InsElt.getType()))
1371  return nullptr;
1372  unsigned NumElts =
1373  cast<FixedVectorType>(InsElt.getType())->getNumElements();
1374 
1375  uint64_t InsertIdx[2];
1376  Constant *Val[2];
1377  if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
1378  !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
1379  !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
1380  !match(IEI->getOperand(1), m_Constant(Val[1])))
1381  return nullptr;
1382  SmallVector<Constant *, 16> Values(NumElts);
1383  SmallVector<int, 16> Mask(NumElts);
1384  auto ValI = std::begin(Val);
1385  // Generate new constant vector and mask.
1386  // We have 2 values/masks from the insertelements instructions. Insert them
1387  // into new value/mask vectors.
1388  for (uint64_t I : InsertIdx) {
1389  if (!Values[I]) {
1390  Values[I] = *ValI;
1391  Mask[I] = NumElts + I;
1392  }
1393  ++ValI;
1394  }
1395  // Remaining values are filled with 'undef' values.
1396  for (unsigned I = 0; I < NumElts; ++I) {
1397  if (!Values[I]) {
1398  Values[I] = UndefValue::get(InsElt.getType()->getElementType());
1399  Mask[I] = I;
1400  }
1401  }
1402  // Create new operands for a shuffle that includes the constant of the
1403  // original insertelt.
1404  return new ShuffleVectorInst(IEI->getOperand(0),
1405  ConstantVector::get(Values), Mask);
1406  }
1407  return nullptr;
1408 }
1409 
1410 /// If both the base vector and the inserted element are extended from the same
1411 /// type, do the insert element in the narrow source type followed by extend.
1412 /// TODO: This can be extended to include other cast opcodes, but particularly
1413 /// if we create a wider insertelement, make sure codegen is not harmed.
1416  // We are creating a vector extend. If the original vector extend has another
1417  // use, that would mean we end up with 2 vector extends, so avoid that.
1418  // TODO: We could ease the use-clause to "if at least one op has one use"
1419  // (assuming that the source types match - see next TODO comment).
1420  Value *Vec = InsElt.getOperand(0);
1421  if (!Vec->hasOneUse())
1422  return nullptr;
1423 
1424  Value *Scalar = InsElt.getOperand(1);
1425  Value *X, *Y;
1426  CastInst::CastOps CastOpcode;
1427  if (match(Vec, m_FPExt(m_Value(X))) && match(Scalar, m_FPExt(m_Value(Y))))
1428  CastOpcode = Instruction::FPExt;
1429  else if (match(Vec, m_SExt(m_Value(X))) && match(Scalar, m_SExt(m_Value(Y))))
1430  CastOpcode = Instruction::SExt;
1431  else if (match(Vec, m_ZExt(m_Value(X))) && match(Scalar, m_ZExt(m_Value(Y))))
1432  CastOpcode = Instruction::ZExt;
1433  else
1434  return nullptr;
1435 
1436  // TODO: We can allow mismatched types by creating an intermediate cast.
1437  if (X->getType()->getScalarType() != Y->getType())
1438  return nullptr;
1439 
1440  // inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index)
1441  Value *NewInsElt = Builder.CreateInsertElement(X, Y, InsElt.getOperand(2));
1442  return CastInst::Create(CastOpcode, NewInsElt, InsElt.getType());
1443 }
1444 
1446  Value *VecOp = IE.getOperand(0);
1447  Value *ScalarOp = IE.getOperand(1);
1448  Value *IdxOp = IE.getOperand(2);
1449 
1450  if (auto *V = SimplifyInsertElementInst(
1451  VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
1452  return replaceInstUsesWith(IE, V);
1453 
1454  // If the scalar is bitcast and inserted into undef, do the insert in the
1455  // source type followed by bitcast.
1456  // TODO: Generalize for insert into any constant, not just undef?
1457  Value *ScalarSrc;
1458  if (match(VecOp, m_Undef()) &&
1459  match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) &&
1460  (ScalarSrc->getType()->isIntegerTy() ||
1461  ScalarSrc->getType()->isFloatingPointTy())) {
1462  // inselt undef, (bitcast ScalarSrc), IdxOp -->
1463  // bitcast (inselt undef, ScalarSrc, IdxOp)
1464  Type *ScalarTy = ScalarSrc->getType();
1465  Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount());
1466  UndefValue *NewUndef = UndefValue::get(VecTy);
1467  Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp);
1468  return new BitCastInst(NewInsElt, IE.getType());
1469  }
1470 
1471  // If the vector and scalar are both bitcast from the same element type, do
1472  // the insert in that source type followed by bitcast.
1473  Value *VecSrc;
1474  if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
1475  match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
1476  (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
1477  VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
1478  cast<VectorType>(VecSrc->getType())->getElementType() ==
1479  ScalarSrc->getType()) {
1480  // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
1481  // bitcast (inselt VecSrc, ScalarSrc, IdxOp)
1482  Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
1483  return new BitCastInst(NewInsElt, IE.getType());
1484  }
1485 
1486  // If the inserted element was extracted from some other fixed-length vector
1487  // and both indexes are valid constants, try to turn this into a shuffle.
1488  // Can not handle scalable vector type, the number of elements needed to
1489  // create shuffle mask is not a compile-time constant.
1490  uint64_t InsertedIdx, ExtractedIdx;
1491  Value *ExtVecOp;
1492  if (isa<FixedVectorType>(IE.getType()) &&
1493  match(IdxOp, m_ConstantInt(InsertedIdx)) &&
1494  match(ScalarOp,
1495  m_ExtractElt(m_Value(ExtVecOp), m_ConstantInt(ExtractedIdx))) &&
1496  isa<FixedVectorType>(ExtVecOp->getType()) &&
1497  ExtractedIdx <
1498  cast<FixedVectorType>(ExtVecOp->getType())->getNumElements()) {
1499  // TODO: Looking at the user(s) to determine if this insert is a
1500  // fold-to-shuffle opportunity does not match the usual instcombine
1501  // constraints. We should decide if the transform is worthy based only
1502  // on this instruction and its operands, but that may not work currently.
1503  //
1504  // Here, we are trying to avoid creating shuffles before reaching
1505  // the end of a chain of extract-insert pairs. This is complicated because
1506  // we do not generally form arbitrary shuffle masks in instcombine
1507  // (because those may codegen poorly), but collectShuffleElements() does
1508  // exactly that.
1509  //
1510  // The rules for determining what is an acceptable target-independent
1511  // shuffle mask are fuzzy because they evolve based on the backend's
1512  // capabilities and real-world impact.
1513  auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
1514  if (!Insert.hasOneUse())
1515  return true;
1516  auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
1517  if (!InsertUser)
1518  return true;
1519  return false;
1520  };
1521 
1522  // Try to form a shuffle from a chain of extract-insert ops.
1523  if (isShuffleRootCandidate(IE)) {
1525  ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this);
1526 
1527  // The proposed shuffle may be trivial, in which case we shouldn't
1528  // perform the combine.
1529  if (LR.first != &IE && LR.second != &IE) {
1530  // We now have a shuffle of LHS, RHS, Mask.
1531  if (LR.second == nullptr)
1532  LR.second = UndefValue::get(LR.first->getType());
1533  return new ShuffleVectorInst(LR.first, LR.second, Mask);
1534  }
1535  }
1536  }
1537 
1538  if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {
1539  unsigned VWidth = VecTy->getNumElements();
1540  APInt UndefElts(VWidth, 0);
1541  APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
1542  if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
1543  if (V != &IE)
1544  return replaceInstUsesWith(IE, V);
1545  return &IE;
1546  }
1547  }
1548 
1550  return Shuf;
1551 
1552  if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
1553  return NewInsElt;
1554 
1555  if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
1556  return Broadcast;
1557 
1558  if (Instruction *Splat = foldInsEltIntoSplat(IE))
1559  return Splat;
1560 
1561  if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
1562  return IdentityShuf;
1563 
1565  return Ext;
1566 
1567  return nullptr;
1568 }
1569 
1570 /// Return true if we can evaluate the specified expression tree if the vector
1571 /// elements were shuffled in a different order.
1573  unsigned Depth = 5) {
1574  // We can always reorder the elements of a constant.
1575  if (isa<Constant>(V))
1576  return true;
1577 
1578  // We won't reorder vector arguments. No IPO here.
1579  Instruction *I = dyn_cast<Instruction>(V);
1580  if (!I) return false;
1581 
1582  // Two users may expect different orders of the elements. Don't try it.
1583  if (!I->hasOneUse())
1584  return false;
1585 
1586  if (Depth == 0) return false;
1587 
1588  switch (I->getOpcode()) {
1589  case Instruction::UDiv:
1590  case Instruction::SDiv:
1591  case Instruction::URem:
1592  case Instruction::SRem:
1593  // Propagating an undefined shuffle mask element to integer div/rem is not
1594  // allowed because those opcodes can create immediate undefined behavior
1595  // from an undefined element in an operand.
1596  if (llvm::is_contained(Mask, -1))
1597  return false;
1599  case Instruction::Add:
1600  case Instruction::FAdd:
1601  case Instruction::Sub:
1602  case Instruction::FSub:
1603  case Instruction::Mul:
1604  case Instruction::FMul:
1605  case Instruction::FDiv:
1606  case Instruction::FRem:
1607  case Instruction::Shl:
1608  case Instruction::LShr:
1609  case Instruction::AShr:
1610  case Instruction::And:
1611  case Instruction::Or:
1612  case Instruction::Xor:
1613  case Instruction::ICmp:
1614  case Instruction::FCmp:
1615  case Instruction::Trunc:
1616  case Instruction::ZExt:
1617  case Instruction::SExt:
1618  case Instruction::FPToUI:
1619  case Instruction::FPToSI:
1620  case Instruction::UIToFP:
1621  case Instruction::SIToFP:
1622  case Instruction::FPTrunc:
1623  case Instruction::FPExt:
1624  case Instruction::GetElementPtr: {
1625  // Bail out if we would create longer vector ops. We could allow creating
1626  // longer vector ops, but that may result in more expensive codegen.
1627  Type *ITy = I->getType();
1628  if (ITy->isVectorTy() &&
1629  Mask.size() > cast<FixedVectorType>(ITy)->getNumElements())
1630  return false;
1631  for (Value *Operand : I->operands()) {
1632  if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
1633  return false;
1634  }
1635  return true;
1636  }
1637  case Instruction::InsertElement: {
1638  ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
1639  if (!CI) return false;
1640  int ElementNumber = CI->getLimitedValue();
1641 
1642  // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1643  // can't put an element into multiple indices.
1644  bool SeenOnce = false;
1645  for (int i = 0, e = Mask.size(); i != e; ++i) {
1646  if (Mask[i] == ElementNumber) {
1647  if (SeenOnce)
1648  return false;
1649  SeenOnce = true;
1650  }
1651  }
1652  return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
1653  }
1654  }
1655  return false;
1656 }
1657 
1658 /// Rebuild a new instruction just like 'I' but with the new operands given.
1659 /// In the event of type mismatch, the type of the operands is correct.
1661  // We don't want to use the IRBuilder here because we want the replacement
1662  // instructions to appear next to 'I', not the builder's insertion point.
1663  switch (I->getOpcode()) {
1664  case Instruction::Add:
1665  case Instruction::FAdd:
1666  case Instruction::Sub:
1667  case Instruction::FSub:
1668  case Instruction::Mul:
1669  case Instruction::FMul:
1670  case Instruction::UDiv:
1671  case Instruction::SDiv:
1672  case Instruction::FDiv:
1673  case Instruction::URem:
1674  case Instruction::SRem:
1675  case Instruction::FRem:
1676  case Instruction::Shl:
1677  case Instruction::LShr:
1678  case Instruction::AShr:
1679  case Instruction::And:
1680  case Instruction::Or:
1681  case Instruction::Xor: {
1682  BinaryOperator *BO = cast<BinaryOperator>(I);
1683  assert(NewOps.size() == 2 && "binary operator with #ops != 2");
1684  BinaryOperator *New =
1685  BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(),
1686  NewOps[0], NewOps[1], "", BO);
1687  if (isa<OverflowingBinaryOperator>(BO)) {
1688  New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1689  New->setHasNoSignedWrap(BO->hasNoSignedWrap());
1690  }
1691  if (isa<PossiblyExactOperator>(BO)) {
1692  New->setIsExact(BO->isExact());
1693  }
1694  if (isa<FPMathOperator>(BO))
1695  New->copyFastMathFlags(I);
1696  return New;
1697  }
1698  case Instruction::ICmp:
1699  assert(NewOps.size() == 2 && "icmp with #ops != 2");
1700  return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(),
1701  NewOps[0], NewOps[1]);
1702  case Instruction::FCmp:
1703  assert(NewOps.size() == 2 && "fcmp with #ops != 2");
1704  return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(),
1705  NewOps[0], NewOps[1]);
1706  case Instruction::Trunc:
1707  case Instruction::ZExt:
1708  case Instruction::SExt:
1709  case Instruction::FPToUI:
1710  case Instruction::FPToSI:
1711  case Instruction::UIToFP:
1712  case Instruction::SIToFP:
1713  case Instruction::FPTrunc:
1714  case Instruction::FPExt: {
1715  // It's possible that the mask has a different number of elements from
1716  // the original cast. We recompute the destination type to match the mask.
1717  Type *DestTy = VectorType::get(
1718  I->getType()->getScalarType(),
1719  cast<VectorType>(NewOps[0]->getType())->getElementCount());
1720  assert(NewOps.size() == 1 && "cast with #ops != 1");
1721  return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy,
1722  "", I);
1723  }
1724  case Instruction::GetElementPtr: {
1725  Value *Ptr = NewOps[0];
1726  ArrayRef<Value*> Idx = NewOps.slice(1);
1728  cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I);
1729  GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
1730  return GEP;
1731  }
1732  }
1733  llvm_unreachable("failed to rebuild vector instructions");
1734 }
1735 
1737  // Mask.size() does not need to be equal to the number of vector elements.
1738 
1739  assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
1740  Type *EltTy = V->getType()->getScalarType();
1741  Type *I32Ty = IntegerType::getInt32Ty(V->getContext());
1742  if (match(V, m_Undef()))
1743  return UndefValue::get(FixedVectorType::get(EltTy, Mask.size()));
1744 
1745  if (isa<ConstantAggregateZero>(V))
1746  return ConstantAggregateZero::get(FixedVectorType::get(EltTy, Mask.size()));
1747 
1748  if (Constant *C = dyn_cast<Constant>(V))
1749  return ConstantExpr::getShuffleVector(C, PoisonValue::get(C->getType()),
1750  Mask);
1751 
1752  Instruction *I = cast<Instruction>(V);
1753  switch (I->getOpcode()) {
1754  case Instruction::Add:
1755  case Instruction::FAdd:
1756  case Instruction::Sub:
1757  case Instruction::FSub:
1758  case Instruction::Mul:
1759  case Instruction::FMul:
1760  case Instruction::UDiv:
1761  case Instruction::SDiv:
1762  case Instruction::FDiv:
1763  case Instruction::URem:
1764  case Instruction::SRem:
1765  case Instruction::FRem:
1766  case Instruction::Shl:
1767  case Instruction::LShr:
1768  case Instruction::AShr:
1769  case Instruction::And:
1770  case Instruction::Or:
1771  case Instruction::Xor:
1772  case Instruction::ICmp:
1773  case Instruction::FCmp:
1774  case Instruction::Trunc:
1775  case Instruction::ZExt:
1776  case Instruction::SExt:
1777  case Instruction::FPToUI:
1778  case Instruction::FPToSI:
1779  case Instruction::UIToFP:
1780  case Instruction::SIToFP:
1781  case Instruction::FPTrunc:
1782  case Instruction::FPExt:
1783  case Instruction::Select:
1784  case Instruction::GetElementPtr: {
1785  SmallVector<Value*, 8> NewOps;
1786  bool NeedsRebuild =
1787  (Mask.size() !=
1788  cast<FixedVectorType>(I->getType())->getNumElements());
1789  for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
1790  Value *V;
1791  // Recursively call evaluateInDifferentElementOrder on vector arguments
1792  // as well. E.g. GetElementPtr may have scalar operands even if the
1793  // return value is a vector, so we need to examine the operand type.
1794  if (I->getOperand(i)->getType()->isVectorTy())
1795  V = evaluateInDifferentElementOrder(I->getOperand(i), Mask);
1796  else
1797  V = I->getOperand(i);
1798  NewOps.push_back(V);
1799  NeedsRebuild |= (V != I->getOperand(i));
1800  }
1801  if (NeedsRebuild) {
1802  return buildNew(I, NewOps);
1803  }
1804  return I;
1805  }
1806  case Instruction::InsertElement: {
1807  int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
1808 
1809  // The insertelement was inserting at Element. Figure out which element
1810  // that becomes after shuffling. The answer is guaranteed to be unique
1811  // by CanEvaluateShuffled.
1812  bool Found = false;
1813  int Index = 0;
1814  for (int e = Mask.size(); Index != e; ++Index) {
1815  if (Mask[Index] == Element) {
1816  Found = true;
1817  break;
1818  }
1819  }
1820 
1821  // If element is not in Mask, no need to handle the operand 1 (element to
1822  // be inserted). Just evaluate values in operand 0 according to Mask.
1823  if (!Found)
1824  return evaluateInDifferentElementOrder(I->getOperand(0), Mask);
1825 
1826  Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask);
1827  return InsertElementInst::Create(V, I->getOperand(1),
1828  ConstantInt::get(I32Ty, Index), "", I);
1829  }
1830  }
1831  llvm_unreachable("failed to reorder elements of vector instruction!");
1832 }
1833 
1834 // Returns true if the shuffle is extracting a contiguous range of values from
1835 // LHS, for example:
1836 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
1837 // Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
1838 // Shuffles to: |EE|FF|GG|HH|
1839 // +--+--+--+--+
1841  ArrayRef<int> Mask) {
1842  unsigned LHSElems =
1843  cast<FixedVectorType>(SVI.getOperand(0)->getType())->getNumElements();
1844  unsigned MaskElems = Mask.size();
1845  unsigned BegIdx = Mask.front();
1846  unsigned EndIdx = Mask.back();
1847  if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
1848  return false;
1849  for (unsigned I = 0; I != MaskElems; ++I)
1850  if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
1851  return false;
1852  return true;
1853 }
1854 
1855 /// These are the ingredients in an alternate form binary operator as described
1856 /// below.
1857 struct BinopElts {
1862  Value *V0 = nullptr, Value *V1 = nullptr) :
1863  Opcode(Opc), Op0(V0), Op1(V1) {}
1864  operator bool() const { return Opcode != 0; }
1865 };
1866 
1867 /// Binops may be transformed into binops with different opcodes and operands.
1868 /// Reverse the usual canonicalization to enable folds with the non-canonical
1869 /// form of the binop. If a transform is possible, return the elements of the
1870 /// new binop. If not, return invalid elements.
1872  Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
1873  Type *Ty = BO->getType();
1874  switch (BO->getOpcode()) {
1875  case Instruction::Shl: {
1876  // shl X, C --> mul X, (1 << C)
1877  Constant *C;
1878  if (match(BO1, m_Constant(C))) {
1879  Constant *ShlOne = ConstantExpr::getShl(ConstantInt::get(Ty, 1), C);
1880  return { Instruction::Mul, BO0, ShlOne };
1881  }
1882  break;
1883  }
1884  case Instruction::Or: {
1885  // or X, C --> add X, C (when X and C have no common bits set)
1886  const APInt *C;
1887  if (match(BO1, m_APInt(C)) && MaskedValueIsZero(BO0, *C, DL))
1888  return { Instruction::Add, BO0, BO1 };
1889  break;
1890  }
1891  default:
1892  break;
1893  }
1894  return {};
1895 }
1896 
1898  assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
1899 
1900  // Are we shuffling together some value and that same value after it has been
1901  // modified by a binop with a constant?
1902  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1903  Constant *C;
1904  bool Op0IsBinop;
1905  if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
1906  Op0IsBinop = true;
1907  else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
1908  Op0IsBinop = false;
1909  else
1910  return nullptr;
1911 
1912  // The identity constant for a binop leaves a variable operand unchanged. For
1913  // a vector, this is a splat of something like 0, -1, or 1.
1914  // If there's no identity constant for this binop, we're done.
1915  auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
1916  BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
1917  Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
1918  if (!IdC)
1919  return nullptr;
1920 
1921  // Shuffle identity constants into the lanes that return the original value.
1922  // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
1923  // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
1924  // The existing binop constant vector remains in the same operand position.
1926  Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
1928 
1929  bool MightCreatePoisonOrUB =
1931  (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
1932  if (MightCreatePoisonOrUB)
1933  NewC = InstCombiner::getSafeVectorConstantForBinop(BOpcode, NewC, true);
1934 
1935  // shuf (bop X, C), X, M --> bop X, C'
1936  // shuf X, (bop X, C), M --> bop X, C'
1937  Value *X = Op0IsBinop ? Op1 : Op0;
1938  Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
1939  NewBO->copyIRFlags(BO);
1940 
1941  // An undef shuffle mask element may propagate as an undef constant element in
1942  // the new binop. That would produce poison where the original code might not.
1943  // If we already made a safe constant, then there's no danger.
1944  if (is_contained(Mask, UndefMaskElem) && !MightCreatePoisonOrUB)
1945  NewBO->dropPoisonGeneratingFlags();
1946  return NewBO;
1947 }
1948 
1949 /// If we have an insert of a scalar to a non-zero element of an undefined
1950 /// vector and then shuffle that value, that's the same as inserting to the zero
1951 /// element and shuffling. Splatting from the zero element is recognized as the
1952 /// canonical form of splat.
1955  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1957  Value *X;
1958  uint64_t IndexC;
1959 
1960  // Match a shuffle that is a splat to a non-zero element.
1961  if (!match(Op0, m_OneUse(m_InsertElt(m_Undef(), m_Value(X),
1962  m_ConstantInt(IndexC)))) ||
1963  !match(Op1, m_Undef()) || match(Mask, m_ZeroMask()) || IndexC == 0)
1964  return nullptr;
1965 
1966  // Insert into element 0 of an undef vector.
1967  UndefValue *UndefVec = UndefValue::get(Shuf.getType());
1968  Constant *Zero = Builder.getInt32(0);
1969  Value *NewIns = Builder.CreateInsertElement(UndefVec, X, Zero);
1970 
1971  // Splat from element 0. Any mask element that is undefined remains undefined.
1972  // For example:
1973  // shuf (inselt undef, X, 2), undef, <2,2,undef>
1974  // --> shuf (inselt undef, X, 0), undef, <0,0,undef>
1975  unsigned NumMaskElts =
1976  cast<FixedVectorType>(Shuf.getType())->getNumElements();
1977  SmallVector<int, 16> NewMask(NumMaskElts, 0);
1978  for (unsigned i = 0; i != NumMaskElts; ++i)
1979  if (Mask[i] == UndefMaskElem)
1980  NewMask[i] = Mask[i];
1981 
1982  return new ShuffleVectorInst(NewIns, UndefVec, NewMask);
1983 }
1984 
1985 /// Try to fold shuffles that are the equivalent of a vector select.
1988  const DataLayout &DL) {
1989  if (!Shuf.isSelect())
1990  return nullptr;
1991 
1992  // Canonicalize to choose from operand 0 first unless operand 1 is undefined.
1993  // Commuting undef to operand 0 conflicts with another canonicalization.
1994  unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
1995  if (!match(Shuf.getOperand(1), m_Undef()) &&
1996  Shuf.getMaskValue(0) >= (int)NumElts) {
1997  // TODO: Can we assert that both operands of a shuffle-select are not undef
1998  // (otherwise, it would have been folded by instsimplify?
1999  Shuf.commute();
2000  return &Shuf;
2001  }
2002 
2004  return I;
2005 
2006  BinaryOperator *B0, *B1;
2007  if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
2008  !match(Shuf.getOperand(1), m_BinOp(B1)))
2009  return nullptr;
2010 
2011  Value *X, *Y;
2012  Constant *C0, *C1;
2013  bool ConstantsAreOp1;
2014  if (match(B0, m_BinOp(m_Value(X), m_Constant(C0))) &&
2016  ConstantsAreOp1 = true;
2017  else if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
2019  ConstantsAreOp1 = false;
2020  else
2021  return nullptr;
2022 
2023  // We need matching binops to fold the lanes together.
2024  BinaryOperator::BinaryOps Opc0 = B0->getOpcode();
2025  BinaryOperator::BinaryOps Opc1 = B1->getOpcode();
2026  bool DropNSW = false;
2027  if (ConstantsAreOp1 && Opc0 != Opc1) {
2028  // TODO: We drop "nsw" if shift is converted into multiply because it may
2029  // not be correct when the shift amount is BitWidth - 1. We could examine
2030  // each vector element to determine if it is safe to keep that flag.
2031  if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
2032  DropNSW = true;
2033  if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
2034  assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
2035  Opc0 = AltB0.Opcode;
2036  C0 = cast<Constant>(AltB0.Op1);
2037  } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
2038  assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
2039  Opc1 = AltB1.Opcode;
2040  C1 = cast<Constant>(AltB1.Op1);
2041  }
2042  }
2043 
2044  if (Opc0 != Opc1)
2045  return nullptr;
2046 
2047  // The opcodes must be the same. Use a new name to make that clear.
2048  BinaryOperator::BinaryOps BOpc = Opc0;
2049 
2050  // Select the constant elements needed for the single binop.
2053 
2054  // We are moving a binop after a shuffle. When a shuffle has an undefined
2055  // mask element, the result is undefined, but it is not poison or undefined
2056  // behavior. That is not necessarily true for div/rem/shift.
2057  bool MightCreatePoisonOrUB =
2060  if (MightCreatePoisonOrUB)
2062  ConstantsAreOp1);
2063 
2064  Value *V;
2065  if (X == Y) {
2066  // Remove a binop and the shuffle by rearranging the constant:
2067  // shuffle (op V, C0), (op V, C1), M --> op V, C'
2068  // shuffle (op C0, V), (op C1, V), M --> op C', V
2069  V = X;
2070  } else {
2071  // If there are 2 different variable operands, we must create a new shuffle
2072  // (select) first, so check uses to ensure that we don't end up with more
2073  // instructions than we started with.
2074  if (!B0->hasOneUse() && !B1->hasOneUse())
2075  return nullptr;
2076 
2077  // If we use the original shuffle mask and op1 is *variable*, we would be
2078  // putting an undef into operand 1 of div/rem/shift. This is either UB or
2079  // poison. We do not have to guard against UB when *constants* are op1
2080  // because safe constants guarantee that we do not overflow sdiv/srem (and
2081  // there's no danger for other opcodes).
2082  // TODO: To allow this case, create a new shuffle mask with no undefs.
2083  if (MightCreatePoisonOrUB && !ConstantsAreOp1)
2084  return nullptr;
2085 
2086  // Note: In general, we do not create new shuffles in InstCombine because we
2087  // do not know if a target can lower an arbitrary shuffle optimally. In this
2088  // case, the shuffle uses the existing mask, so there is no additional risk.
2089 
2090  // Select the variable vectors first, then perform the binop:
2091  // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
2092  // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
2093  V = Builder.CreateShuffleVector(X, Y, Mask);
2094  }
2095 
2096  Instruction *NewBO = ConstantsAreOp1 ? BinaryOperator::Create(BOpc, V, NewC) :
2097  BinaryOperator::Create(BOpc, NewC, V);
2098 
2099  // Flags are intersected from the 2 source binops. But there are 2 exceptions:
2100  // 1. If we changed an opcode, poison conditions might have changed.
2101  // 2. If the shuffle had undef mask elements, the new binop might have undefs
2102  // where the original code did not. But if we already made a safe constant,
2103  // then there's no danger.
2104  NewBO->copyIRFlags(B0);
2105  NewBO->andIRFlags(B1);
2106  if (DropNSW)
2107  NewBO->setHasNoSignedWrap(false);
2108  if (is_contained(Mask, UndefMaskElem) && !MightCreatePoisonOrUB)
2109  NewBO->dropPoisonGeneratingFlags();
2110  return NewBO;
2111 }
2112 
2113 /// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
2114 /// Example (little endian):
2115 /// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>
2117  bool IsBigEndian) {
2118  // This must be a bitcasted shuffle of 1 vector integer operand.
2119  Type *DestType = Shuf.getType();
2120  Value *X;
2121  if (!match(Shuf.getOperand(0), m_BitCast(m_Value(X))) ||
2122  !match(Shuf.getOperand(1), m_Undef()) || !DestType->isIntOrIntVectorTy())
2123  return nullptr;
2124 
2125  // The source type must have the same number of elements as the shuffle,
2126  // and the source element type must be larger than the shuffle element type.
2127  Type *SrcType = X->getType();
2128  if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() ||
2129  cast<FixedVectorType>(SrcType)->getNumElements() !=
2130  cast<FixedVectorType>(DestType)->getNumElements() ||
2131  SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0)
2132  return nullptr;
2133 
2134  assert(Shuf.changesLength() && !Shuf.increasesLength() &&
2135  "Expected a shuffle that decreases length");
2136 
2137  // Last, check that the mask chooses the correct low bits for each narrow
2138  // element in the result.
2139  uint64_t TruncRatio =
2140  SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
2142  for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
2143  if (Mask[i] == UndefMaskElem)
2144  continue;
2145  uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;
2146  assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits");
2147  if (Mask[i] != (int)LSBIndex)
2148  return nullptr;
2149  }
2150 
2151  return new TruncInst(X, DestType);
2152 }
2153 
2154 /// Match a shuffle-select-shuffle pattern where the shuffles are widening and
2155 /// narrowing (concatenating with undef and extracting back to the original
2156 /// length). This allows replacing the wide select with a narrow select.
2159  // This must be a narrowing identity shuffle. It extracts the 1st N elements
2160  // of the 1st vector operand of a shuffle.
2161  if (!match(Shuf.getOperand(1), m_Undef()) || !Shuf.isIdentityWithExtract())
2162  return nullptr;
2163 
2164  // The vector being shuffled must be a vector select that we can eliminate.
2165  // TODO: The one-use requirement could be eased if X and/or Y are constants.
2166  Value *Cond, *X, *Y;
2167  if (!match(Shuf.getOperand(0),
2169  return nullptr;
2170 
2171  // We need a narrow condition value. It must be extended with undef elements
2172  // and have the same number of elements as this shuffle.
2173  unsigned NarrowNumElts =
2174  cast<FixedVectorType>(Shuf.getType())->getNumElements();
2175  Value *NarrowCond;
2176  if (!match(Cond, m_OneUse(m_Shuffle(m_Value(NarrowCond), m_Undef()))) ||
2177  cast<FixedVectorType>(NarrowCond->getType())->getNumElements() !=
2178  NarrowNumElts ||
2179  !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
2180  return nullptr;
2181 
2182  // shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
2183  // sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
2184  Value *NarrowX = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2185  Value *NarrowY = Builder.CreateShuffleVector(Y, Shuf.getShuffleMask());
2186  return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
2187 }
2188 
2189 /// Try to fold an extract subvector operation.
2191  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2192  if (!Shuf.isIdentityWithExtract() || !match(Op1, m_Undef()))
2193  return nullptr;
2194 
2195  // Check if we are extracting all bits of an inserted scalar:
2196  // extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type
2197  Value *X;
2198  if (match(Op0, m_BitCast(m_InsertElt(m_Value(), m_Value(X), m_Zero()))) &&
2199  X->getType()->getPrimitiveSizeInBits() ==
2200  Shuf.getType()->getPrimitiveSizeInBits())
2201  return new BitCastInst(X, Shuf.getType());
2202 
2203  // Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
2204  Value *Y;
2206  if (!match(Op0, m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask))))
2207  return nullptr;
2208 
2209  // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
2210  // then combining may result in worse codegen.
2211  if (!Op0->hasOneUse())
2212  return nullptr;
2213 
2214  // We are extracting a subvector from a shuffle. Remove excess elements from
2215  // the 1st shuffle mask to eliminate the extract.
2216  //
2217  // This transform is conservatively limited to identity extracts because we do
2218  // not allow arbitrary shuffle mask creation as a target-independent transform
2219  // (because we can't guarantee that will lower efficiently).
2220  //
2221  // If the extracting shuffle has an undef mask element, it transfers to the
2222  // new shuffle mask. Otherwise, copy the original mask element. Example:
2223  // shuf (shuf X, Y, <C0, C1, C2, undef, C4>), undef, <0, undef, 2, 3> -->
2224  // shuf X, Y, <C0, undef, C2, undef>
2225  unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2226  SmallVector<int, 16> NewMask(NumElts);
2227  assert(NumElts < Mask.size() &&
2228  "Identity with extract must have less elements than its inputs");
2229 
2230  for (unsigned i = 0; i != NumElts; ++i) {
2231  int ExtractMaskElt = Shuf.getMaskValue(i);
2232  int MaskElt = Mask[i];
2233  NewMask[i] = ExtractMaskElt == UndefMaskElem ? ExtractMaskElt : MaskElt;
2234  }
2235  return new ShuffleVectorInst(X, Y, NewMask);
2236 }
2237 
2238 /// Try to replace a shuffle with an insertelement or try to replace a shuffle
2239 /// operand with the operand of an insertelement.
2241  InstCombinerImpl &IC) {
2242  Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
2244  Shuf.getShuffleMask(Mask);
2245 
2246  // The shuffle must not change vector sizes.
2247  // TODO: This restriction could be removed if the insert has only one use
2248  // (because the transform would require a new length-changing shuffle).
2249  int NumElts = Mask.size();
2250  if (NumElts != (int)(cast<FixedVectorType>(V0->getType())->getNumElements()))
2251  return nullptr;
2252 
2253  // This is a specialization of a fold in SimplifyDemandedVectorElts. We may
2254  // not be able to handle it there if the insertelement has >1 use.
2255  // If the shuffle has an insertelement operand but does not choose the
2256  // inserted scalar element from that value, then we can replace that shuffle
2257  // operand with the source vector of the insertelement.
2258  Value *X;
2259  uint64_t IdxC;
2260  if (match(V0, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2261  // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
2262  if (!is_contained(Mask, (int)IdxC))
2263  return IC.replaceOperand(Shuf, 0, X);
2264  }
2265  if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2266  // Offset the index constant by the vector width because we are checking for
2267  // accesses to the 2nd vector input of the shuffle.
2268  IdxC += NumElts;
2269  // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
2270  if (!is_contained(Mask, (int)IdxC))
2271  return IC.replaceOperand(Shuf, 1, X);
2272  }
2273 
2274  // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
2275  auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
2276  // We need an insertelement with a constant index.
2277  if (!match(V0, m_InsertElt(m_Value(), m_Value(Scalar),
2278  m_ConstantInt(IndexC))))
2279  return false;
2280 
2281  // Test the shuffle mask to see if it splices the inserted scalar into the
2282  // operand 1 vector of the shuffle.
2283  int NewInsIndex = -1;
2284  for (int i = 0; i != NumElts; ++i) {
2285  // Ignore undef mask elements.
2286  if (Mask[i] == -1)
2287  continue;
2288 
2289  // The shuffle takes elements of operand 1 without lane changes.
2290  if (Mask[i] == NumElts + i)
2291  continue;
2292 
2293  // The shuffle must choose the inserted scalar exactly once.
2294  if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
2295  return false;
2296 
2297  // The shuffle is placing the inserted scalar into element i.
2298  NewInsIndex = i;
2299  }
2300 
2301  assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
2302 
2303  // Index is updated to the potentially translated insertion lane.
2304  IndexC = ConstantInt::get(IndexC->getType(), NewInsIndex);
2305  return true;
2306  };
2307 
2308  // If the shuffle is unnecessary, insert the scalar operand directly into
2309  // operand 1 of the shuffle. Example:
2310  // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
2311  Value *Scalar;
2312  ConstantInt *IndexC;
2313  if (isShufflingScalarIntoOp1(Scalar, IndexC))
2314  return InsertElementInst::Create(V1, Scalar, IndexC);
2315 
2316  // Try again after commuting shuffle. Example:
2317  // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
2318  // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
2319  std::swap(V0, V1);
2321  if (isShufflingScalarIntoOp1(Scalar, IndexC))
2322  return InsertElementInst::Create(V1, Scalar, IndexC);
2323 
2324  return nullptr;
2325 }
2326 
2328  // Match the operands as identity with padding (also known as concatenation
2329  // with undef) shuffles of the same source type. The backend is expected to
2330  // recreate these concatenations from a shuffle of narrow operands.
2331  auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
2332  auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
2333  if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
2334  !Shuffle1 || !Shuffle1->isIdentityWithPadding())
2335  return nullptr;
2336 
2337  // We limit this transform to power-of-2 types because we expect that the
2338  // backend can convert the simplified IR patterns to identical nodes as the
2339  // original IR.
2340  // TODO: If we can verify the same behavior for arbitrary types, the
2341  // power-of-2 checks can be removed.
2342  Value *X = Shuffle0->getOperand(0);
2343  Value *Y = Shuffle1->getOperand(0);
2344  if (X->getType() != Y->getType() ||
2345  !isPowerOf2_32(cast<FixedVectorType>(Shuf.getType())->getNumElements()) ||
2346  !isPowerOf2_32(
2347  cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) ||
2348  !isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) ||
2349  match(X, m_Undef()) || match(Y, m_Undef()))
2350  return nullptr;
2351  assert(match(Shuffle0->getOperand(1), m_Undef()) &&
2352  match(Shuffle1->getOperand(1), m_Undef()) &&
2353  "Unexpected operand for identity shuffle");
2354 
2355  // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
2356  // operands directly by adjusting the shuffle mask to account for the narrower
2357  // types:
2358  // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
2359  int NarrowElts = cast<FixedVectorType>(X->getType())->getNumElements();
2360  int WideElts = cast<FixedVectorType>(Shuffle0->getType())->getNumElements();
2361  assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
2362 
2364  SmallVector<int, 16> NewMask(Mask.size(), -1);
2365  for (int i = 0, e = Mask.size(); i != e; ++i) {
2366  if (Mask[i] == -1)
2367  continue;
2368 
2369  // If this shuffle is choosing an undef element from 1 of the sources, that
2370  // element is undef.
2371  if (Mask[i] < WideElts) {
2372  if (Shuffle0->getMaskValue(Mask[i]) == -1)
2373  continue;
2374  } else {
2375  if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
2376  continue;
2377  }
2378 
2379  // If this shuffle is choosing from the 1st narrow op, the mask element is
2380  // the same. If this shuffle is choosing from the 2nd narrow op, the mask
2381  // element is offset down to adjust for the narrow vector widths.
2382  if (Mask[i] < WideElts) {
2383  assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
2384  NewMask[i] = Mask[i];
2385  } else {
2386  assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
2387  NewMask[i] = Mask[i] - (WideElts - NarrowElts);
2388  }
2389  }
2390  return new ShuffleVectorInst(X, Y, NewMask);
2391 }
2392 
2394  Value *LHS = SVI.getOperand(0);
2395  Value *RHS = SVI.getOperand(1);
2396  SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI);
2397  if (auto *V = SimplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(),
2398  SVI.getType(), ShufQuery))
2399  return replaceInstUsesWith(SVI, V);
2400 
2401  // Bail out for scalable vectors
2402  if (isa<ScalableVectorType>(LHS->getType()))
2403  return nullptr;
2404 
2405  unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements();
2406  unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements();
2407 
2408  // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
2409  //
2410  // if X and Y are of the same (vector) type, and the element size is not
2411  // changed by the bitcasts, we can distribute the bitcasts through the
2412  // shuffle, hopefully reducing the number of instructions. We make sure that
2413  // at least one bitcast only has one use, so we don't *increase* the number of
2414  // instructions here.
2415  Value *X, *Y;
2416  if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_BitCast(m_Value(Y))) &&
2417  X->getType()->isVectorTy() && X->getType() == Y->getType() &&
2418  X->getType()->getScalarSizeInBits() ==
2419  SVI.getType()->getScalarSizeInBits() &&
2420  (LHS->hasOneUse() || RHS->hasOneUse())) {
2421  Value *V = Builder.CreateShuffleVector(X, Y, SVI.getShuffleMask(),
2422  SVI.getName() + ".uncasted");
2423  return new BitCastInst(V, SVI.getType());
2424  }
2425 
2428 
2429  // Peek through a bitcasted shuffle operand by scaling the mask. If the
2430  // simulated shuffle can simplify, then this shuffle is unnecessary:
2431  // shuf (bitcast X), undef, Mask --> bitcast X'
2432  // TODO: This could be extended to allow length-changing shuffles.
2433  // The transform might also be obsoleted if we allowed canonicalization
2434  // of bitcasted shuffles.
2435  if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) &&
2436  X->getType()->isVectorTy() && VWidth == LHSWidth) {
2437  // Try to create a scaled mask constant.
2438  auto *XType = cast<FixedVectorType>(X->getType());
2439  unsigned XNumElts = XType->getNumElements();
2440  SmallVector<int, 16> ScaledMask;
2441  if (XNumElts >= VWidth) {
2442  assert(XNumElts % VWidth == 0 && "Unexpected vector bitcast");
2443  narrowShuffleMaskElts(XNumElts / VWidth, Mask, ScaledMask);
2444  } else {
2445  assert(VWidth % XNumElts == 0 && "Unexpected vector bitcast");
2446  if (!widenShuffleMaskElts(VWidth / XNumElts, Mask, ScaledMask))
2447  ScaledMask.clear();
2448  }
2449  if (!ScaledMask.empty()) {
2450  // If the shuffled source vector simplifies, cast that value to this
2451  // shuffle's type.
2452  if (auto *V = SimplifyShuffleVectorInst(X, UndefValue::get(XType),
2453  ScaledMask, XType, ShufQuery))
2454  return BitCastInst::Create(Instruction::BitCast, V, SVI.getType());
2455  }
2456  }
2457 
2458  // shuffle x, x, mask --> shuffle x, undef, mask'
2459  if (LHS == RHS) {
2460  assert(!match(RHS, m_Undef()) &&
2461  "Shuffle with 2 undef ops not simplified?");
2462  // Remap any references to RHS to use LHS.
2463  SmallVector<int, 16> Elts;
2464  for (unsigned i = 0; i != VWidth; ++i) {
2465  // Propagate undef elements or force mask to LHS.
2466  if (Mask[i] < 0)
2467  Elts.push_back(UndefMaskElem);
2468  else
2469  Elts.push_back(Mask[i] % LHSWidth);
2470  }
2471  return new ShuffleVectorInst(LHS, UndefValue::get(RHS->getType()), Elts);
2472  }
2473 
2474  // shuffle undef, x, mask --> shuffle x, undef, mask'
2475  if (match(LHS, m_Undef())) {
2476  SVI.commute();
2477  return &SVI;
2478  }
2479 
2481  return I;
2482 
2483  if (Instruction *I = foldSelectShuffle(SVI, Builder, DL))
2484  return I;
2485 
2486  if (Instruction *I = foldTruncShuffle(SVI, DL.isBigEndian()))
2487  return I;
2488 
2489  if (Instruction *I = narrowVectorSelect(SVI, Builder))
2490  return I;
2491 
2492  APInt UndefElts(VWidth, 0);
2493  APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2494  if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
2495  if (V != &SVI)
2496  return replaceInstUsesWith(SVI, V);
2497  return &SVI;
2498  }
2499 
2501  return I;
2502 
2503  // These transforms have the potential to lose undef knowledge, so they are
2504  // intentionally placed after SimplifyDemandedVectorElts().
2505  if (Instruction *I = foldShuffleWithInsert(SVI, *this))
2506  return I;
2508  return I;
2509 
2510  if (match(RHS, m_Undef()) && canEvaluateShuffled(LHS, Mask)) {
2512  return replaceInstUsesWith(SVI, V);
2513  }
2514 
2515  // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
2516  // a non-vector type. We can instead bitcast the original vector followed by
2517  // an extract of the desired element:
2518  //
2519  // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
2520  // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2521  // %1 = bitcast <4 x i8> %sroa to i32
2522  // Becomes:
2523  // %bc = bitcast <16 x i8> %in to <4 x i32>
2524  // %ext = extractelement <4 x i32> %bc, i32 0
2525  //
2526  // If the shuffle is extracting a contiguous range of values from the input
2527  // vector then each use which is a bitcast of the extracted size can be
2528  // replaced. This will work if the vector types are compatible, and the begin
2529  // index is aligned to a value in the casted vector type. If the begin index
2530  // isn't aligned then we can shuffle the original vector (keeping the same
2531  // vector type) before extracting.
2532  //
2533  // This code will bail out if the target type is fundamentally incompatible
2534  // with vectors of the source type.
2535  //
2536  // Example of <16 x i8>, target type i32:
2537  // Index range [4,8): v-----------v Will work.
2538  // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2539  // <16 x i8>: | | | | | | | | | | | | | | | | |
2540  // <4 x i32>: | | | | |
2541  // +-----------+-----------+-----------+-----------+
2542  // Index range [6,10): ^-----------^ Needs an extra shuffle.
2543  // Target type i40: ^--------------^ Won't work, bail.
2544  bool MadeChange = false;
2545  if (isShuffleExtractingFromLHS(SVI, Mask)) {
2546  Value *V = LHS;
2547  unsigned MaskElems = Mask.size();
2548  auto *SrcTy = cast<FixedVectorType>(V->getType());
2549  unsigned VecBitWidth = SrcTy->getPrimitiveSizeInBits().getFixedSize();
2550  unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
2551  assert(SrcElemBitWidth && "vector elements must have a bitwidth");
2552  unsigned SrcNumElems = SrcTy->getNumElements();
2555  for (User *U : SVI.users())
2556  if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
2557  if (!BC->use_empty())
2558  // Only visit bitcasts that weren't previously handled.
2559  BCs.push_back(BC);
2560  for (BitCastInst *BC : BCs) {
2561  unsigned BegIdx = Mask.front();
2562  Type *TgtTy = BC->getDestTy();
2563  unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
2564  if (!TgtElemBitWidth)
2565  continue;
2566  unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
2567  bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
2568  bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
2569  if (!VecBitWidthsEqual)
2570  continue;
2571  if (!VectorType::isValidElementType(TgtTy))
2572  continue;
2573  auto *CastSrcTy = FixedVectorType::get(TgtTy, TgtNumElems);
2574  if (!BegIsAligned) {
2575  // Shuffle the input so [0,NumElements) contains the output, and
2576  // [NumElems,SrcNumElems) is undef.
2577  SmallVector<int, 16> ShuffleMask(SrcNumElems, -1);
2578  for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
2579  ShuffleMask[I] = Idx;
2580  V = Builder.CreateShuffleVector(V, ShuffleMask,
2581  SVI.getName() + ".extract");
2582  BegIdx = 0;
2583  }
2584  unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
2585  assert(SrcElemsPerTgtElem);
2586  BegIdx /= SrcElemsPerTgtElem;
2587  bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end();
2588  auto *NewBC =
2589  BCAlreadyExists
2590  ? NewBCs[CastSrcTy]
2591  : Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
2592  if (!BCAlreadyExists)
2593  NewBCs[CastSrcTy] = NewBC;
2594  auto *Ext = Builder.CreateExtractElement(
2595  NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract");
2596  // The shufflevector isn't being replaced: the bitcast that used it
2597  // is. InstCombine will visit the newly-created instructions.
2598  replaceInstUsesWith(*BC, Ext);
2599  MadeChange = true;
2600  }
2601  }
2602 
2603  // If the LHS is a shufflevector itself, see if we can combine it with this
2604  // one without producing an unusual shuffle.
2605  // Cases that might be simplified:
2606  // 1.
2607  // x1=shuffle(v1,v2,mask1)
2608  // x=shuffle(x1,undef,mask)
2609  // ==>
2610  // x=shuffle(v1,undef,newMask)
2611  // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
2612  // 2.
2613  // x1=shuffle(v1,undef,mask1)
2614  // x=shuffle(x1,x2,mask)
2615  // where v1.size() == mask1.size()
2616  // ==>
2617  // x=shuffle(v1,x2,newMask)
2618  // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
2619  // 3.
2620  // x2=shuffle(v2,undef,mask2)
2621  // x=shuffle(x1,x2,mask)
2622  // where v2.size() == mask2.size()
2623  // ==>
2624  // x=shuffle(x1,v2,newMask)
2625  // newMask[i] = (mask[i] < x1.size())
2626  // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
2627  // 4.
2628  // x1=shuffle(v1,undef,mask1)
2629  // x2=shuffle(v2,undef,mask2)
2630  // x=shuffle(x1,x2,mask)
2631  // where v1.size() == v2.size()
2632  // ==>
2633  // x=shuffle(v1,v2,newMask)
2634  // newMask[i] = (mask[i] < x1.size())
2635  // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
2636  //
2637  // Here we are really conservative:
2638  // we are absolutely afraid of producing a shuffle mask not in the input
2639  // program, because the code gen may not be smart enough to turn a merged
2640  // shuffle into two specific shuffles: it may produce worse code. As such,
2641  // we only merge two shuffles if the result is either a splat or one of the
2642  // input shuffle masks. In this case, merging the shuffles just removes
2643  // one instruction, which we know is safe. This is good for things like
2644  // turning: (splat(splat)) -> splat, or
2645  // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
2646  ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
2647  ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
2648  if (LHSShuffle)
2649  if (!match(LHSShuffle->getOperand(1), m_Undef()) && !match(RHS, m_Undef()))
2650  LHSShuffle = nullptr;
2651  if (RHSShuffle)
2652  if (!match(RHSShuffle->getOperand(1), m_Undef()))
2653  RHSShuffle = nullptr;
2654  if (!LHSShuffle && !RHSShuffle)
2655  return MadeChange ? &SVI : nullptr;
2656 
2657  Value* LHSOp0 = nullptr;
2658  Value* LHSOp1 = nullptr;
2659  Value* RHSOp0 = nullptr;
2660  unsigned LHSOp0Width = 0;
2661  unsigned RHSOp0Width = 0;
2662  if (LHSShuffle) {
2663  LHSOp0 = LHSShuffle->getOperand(0);
2664  LHSOp1 = LHSShuffle->getOperand(1);
2665  LHSOp0Width = cast<FixedVectorType>(LHSOp0->getType())->getNumElements();
2666  }
2667  if (RHSShuffle) {
2668  RHSOp0 = RHSShuffle->getOperand(0);
2669  RHSOp0Width = cast<FixedVectorType>(RHSOp0->getType())->getNumElements();
2670  }
2671  Value* newLHS = LHS;
2672  Value* newRHS = RHS;
2673  if (LHSShuffle) {
2674  // case 1
2675  if (match(RHS, m_Undef())) {
2676  newLHS = LHSOp0;
2677  newRHS = LHSOp1;
2678  }
2679  // case 2 or 4
2680  else if (LHSOp0Width == LHSWidth) {
2681  newLHS = LHSOp0;
2682  }
2683  }
2684  // case 3 or 4
2685  if (RHSShuffle && RHSOp0Width == LHSWidth) {
2686  newRHS = RHSOp0;
2687  }
2688  // case 4
2689  if (LHSOp0 == RHSOp0) {
2690  newLHS = LHSOp0;
2691  newRHS = nullptr;
2692  }
2693 
2694  if (newLHS == LHS && newRHS == RHS)
2695  return MadeChange ? &SVI : nullptr;
2696 
2697  ArrayRef<int> LHSMask;
2698  ArrayRef<int> RHSMask;
2699  if (newLHS != LHS)
2700  LHSMask = LHSShuffle->getShuffleMask();
2701  if (RHSShuffle && newRHS != RHS)
2702  RHSMask = RHSShuffle->getShuffleMask();
2703 
2704  unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
2705  SmallVector<int, 16> newMask;
2706  bool isSplat = true;
2707  int SplatElt = -1;
2708  // Create a new mask for the new ShuffleVectorInst so that the new
2709  // ShuffleVectorInst is equivalent to the original one.
2710  for (unsigned i = 0; i < VWidth; ++i) {
2711  int eltMask;
2712  if (Mask[i] < 0) {
2713  // This element is an undef value.
2714  eltMask = -1;
2715  } else if (Mask[i] < (int)LHSWidth) {
2716  // This element is from left hand side vector operand.
2717  //
2718  // If LHS is going to be replaced (case 1, 2, or 4), calculate the
2719  // new mask value for the element.
2720  if (newLHS != LHS) {
2721  eltMask = LHSMask[Mask[i]];
2722  // If the value selected is an undef value, explicitly specify it
2723  // with a -1 mask value.
2724  if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
2725  eltMask = -1;
2726  } else
2727  eltMask = Mask[i];
2728  } else {
2729  // This element is from right hand side vector operand
2730  //
2731  // If the value selected is an undef value, explicitly specify it
2732  // with a -1 mask value. (case 1)
2733  if (match(RHS, m_Undef()))
2734  eltMask = -1;
2735  // If RHS is going to be replaced (case 3 or 4), calculate the
2736  // new mask value for the element.
2737  else if (newRHS != RHS) {
2738  eltMask = RHSMask[Mask[i]-LHSWidth];
2739  // If the value selected is an undef value, explicitly specify it
2740  // with a -1 mask value.
2741  if (eltMask >= (int)RHSOp0Width) {
2742  assert(match(RHSShuffle->getOperand(1), m_Undef()) &&
2743  "should have been check above");
2744  eltMask = -1;
2745  }
2746  } else
2747  eltMask = Mask[i]-LHSWidth;
2748 
2749  // If LHS's width is changed, shift the mask value accordingly.
2750  // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
2751  // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
2752  // If newRHS == newLHS, we want to remap any references from newRHS to
2753  // newLHS so that we can properly identify splats that may occur due to
2754  // obfuscation across the two vectors.
2755  if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
2756  eltMask += newLHSWidth;
2757  }
2758 
2759  // Check if this could still be a splat.
2760  if (eltMask >= 0) {
2761  if (SplatElt >= 0 && SplatElt != eltMask)
2762  isSplat = false;
2763  SplatElt = eltMask;
2764  }
2765 
2766  newMask.push_back(eltMask);
2767  }
2768 
2769  // If the result mask is equal to one of the original shuffle masks,
2770  // or is a splat, do the replacement.
2771  if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
2772  if (!newRHS)
2773  newRHS = UndefValue::get(newLHS->getType());
2774  return new ShuffleVectorInst(newLHS, newRHS, newMask);
2775  }
2776 
2777  return MadeChange ? &SVI : nullptr;
2778 }
i
i
Definition: README.txt:29
llvm::Type::ArrayTyID
@ ArrayTyID
Arrays.
Definition: Type.h:75
Int32Ty
IntegerType * Int32Ty
Definition: NVVMIntrRange.cpp:67
BinopElts::Opcode
BinaryOperator::BinaryOps Opcode
Definition: InstCombineVectorOps.cpp:1858
llvm::predecessors
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:127
llvm::GetElementPtrInst::setIsInBounds
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
Definition: Instructions.cpp:1798
B1
llvm::ShuffleVectorInst::isIdentityWithExtract
bool isIdentityWithExtract() const
Return true if this shuffle extracts the first N elements of exactly one source vector.
Definition: Instructions.cpp:2361
llvm::SimplifyExtractElementInst
Value * SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q)
Given operands for an ExtractElementInst, fold the result or return null.
Definition: InstructionSimplify.cpp:4599
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::MaskedValueIsZero
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if 'V & Mask' is known to be zero.
Definition: ValueTracking.cpp:361
llvm::PatternMatch::m_Mask
Definition: PatternMatch.h:1524
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:435
InstCombiner.h
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:720
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
llvm::SimplifyQuery
Definition: InstructionSimplify.h:94
llvm::ShuffleVectorInst::getShuffleMask
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
Definition: Instructions.cpp:2059
llvm::ElementCount
Definition: TypeSize.h:386
foldInsSequenceIntoSplat
static Instruction * foldInsSequenceIntoSplat(InsertElementInst &InsElt)
Turn a chain of inserts that splats a value into an insert + shuffle: insertelt(insertelt(insertelt(i...
Definition: InstCombineVectorOps.cpp:1122
llvm::ExtractElementInst
This instruction extracts a single (scalar) element from a VectorType value.
Definition: Instructions.h:1873
llvm::BinaryOperator::CreateWithCopiedFlags
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Instruction *CopyO, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:250
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5192
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:319
llvm::PatternMatch::m_InsertElt
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Definition: PatternMatch.h:1494
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::enumerate
detail::enumerator< R > enumerate(R &&TheRange)
Given an input range, returns a new range whose values are are pair (A,B) such that A is the 0-based ...
Definition: STLExtras.h:1977
llvm::PatternMatch::m_Load
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
Definition: PatternMatch.h:1573
ErrorHandling.h
llvm::IRBuilder< TargetFolder, IRBuilderCallbackInserter >
llvm::CastInst::Create
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Definition: Instructions.cpp:3038
llvm::SmallDenseMap
Definition: DenseMap.h:880
llvm::Instruction::hasNoUnsignedWrap
bool hasNoUnsignedWrap() const
Determine whether the no unsigned wrap flag is set.
Definition: Instruction.cpp:136
narrowInsElt
static Instruction * narrowInsElt(InsertElementInst &InsElt, InstCombiner::BuilderTy &Builder)
If both the base vector and the inserted element are extended from the same type, do the insert eleme...
Definition: InstCombineVectorOps.cpp:1414
llvm::Type::getTypeID
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:135
llvm::ConstantInt::getLimitedValue
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:244
APInt.h
llvm::InsertElementInst::Create
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1951
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
llvm::ExtractElementInst::Create
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1886
buildNew
static Value * buildNew(Instruction *I, ArrayRef< Value * > NewOps)
Rebuild a new instruction just like 'I' but with the new operands given.
Definition: InstCombineVectorOps.cpp:1660
llvm::narrowShuffleMaskElts
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
Definition: VectorUtils.cpp:427
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
DenseMap.h
llvm::Instruction::isShift
bool isShift() const
Definition: Instruction.h:167
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
foldSelectShuffle
static Instruction * foldSelectShuffle(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder, const DataLayout &DL)
Try to fold shuffles that are the equivalent of a vector select.
Definition: InstCombineVectorOps.cpp:1986
llvm::InstCombinerImpl::visitExtractElementInst
Instruction * visitExtractElementInst(ExtractElementInst &EI)
Definition: InstCombineVectorOps.cpp:344
llvm::InstCombinerImpl::replaceInstUsesWith
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombineInternal.h:396
llvm::Optional
Definition: APInt.h:33
isShuffleEquivalentToSelect
static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf)
Definition: InstCombineVectorOps.cpp:1094
llvm::PatternMatch::m_BinOp
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
llvm::InstCombinerImpl::visitShuffleVectorInst
Instruction * visitShuffleVectorInst(ShuffleVectorInst &SVI)
Definition: InstCombineVectorOps.cpp:2393
Operator.h
llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:422
STLExtras.h
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:491
llvm::UnaryOperator
Definition: InstrTypes.h:102
llvm::PatternMatch::m_BitCast
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
Definition: PatternMatch.h:1603
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::count_if
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1637
llvm::LinearPolySize::isScalable
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:299
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:203
llvm::NVPTX::PTXLdStInstCode::VecType
VecType
Definition: NVPTX.h:121
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
foldConstantInsEltIntoShuffle
static Instruction * foldConstantInsEltIntoShuffle(InsertElementInst &InsElt)
insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex --> shufflevector X,...
Definition: InstCombineVectorOps.cpp:1314
llvm::UndefMaskElem
constexpr int UndefMaskElem
Definition: Instructions.h:1997
llvm::PatternMatch::m_OneUse
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
llvm::SmallBitVector
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
Definition: SmallBitVector.h:34
Instruction.h
llvm::PatternMatch::m_APInt
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:270
llvm::UnaryOperator::getOpcode
UnaryOps getOpcode() const
Definition: InstrTypes.h:172
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:160
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1547
llvm::SelectInst::Create
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
Definition: Instructions.h:1769
llvm::APInt::setBit
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1272
llvm::ShuffleVectorInst::isSelect
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
Definition: Instructions.h:2183
InstCombineInternal.h
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition: PatternMatch.h:1472
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::PHINode::getIncomingValue
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Definition: Instructions.h:2721
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::Instruction::setHasNoSignedWrap
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
Definition: Instruction.cpp:128
llvm::User
Definition: User.h:44
llvm::PatternMatch::m_UnOp
class_match< UnaryOperator > m_UnOp()
Match an arbitrary unary operation and ignore it.
Definition: PatternMatch.h:79
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
isSplat
static bool isSplat(ArrayRef< Value * > VL)
Definition: SLPVectorizer.cpp:236
InstrTypes.h
llvm::FCmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1369
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
getOpcode
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:199
foldInsEltIntoIdentityShuffle
static Instruction * foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt)
Try to fold an extract+insert element into an existing identity shuffle by changing the shuffle's mas...
Definition: InstCombineVectorOps.cpp:1232
llvm::APInt::getAllOnes
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:216
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:237
llvm::InsertElementInst
This instruction inserts a single (scalar) element into a VectorType value.
Definition: Instructions.h:1937
llvm::Value::uses
iterator_range< use_iterator > uses()
Definition: Value.h:377
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:249
llvm::VectorType::getElementCount
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:627
llvm::PatternMatch::m_ZExt
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
Definition: PatternMatch.h:1639
llvm::SmallBitVector::all
bool all() const
Returns true if all bits are set.
Definition: SmallBitVector.h:215
llvm::Instruction::CastOps
CastOps
Definition: Instruction.h:799
llvm::ShuffleVectorInst::getMaskValue
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
Definition: Instructions.h:2056
llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition: InstrTypes.h:393
llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:145
llvm::Instruction
Definition: Instruction.h:45
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:153
ShuffleOps
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
Definition: InstCombineVectorOps.cpp:694
llvm::APInt::isAllOnes
bool isAllOnes() const
Determine if all bits are set.
Definition: APInt.h:349
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::UnaryOperator::CreateWithCopiedFlags
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:157
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1771
llvm::InstCombinerImpl
Definition: InstCombineInternal.h:61
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:900
InstCombineWorklist.h
collectShuffleElements
static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl< int > &Mask, Value *PermittedRHS, InstCombinerImpl &IC)
Definition: InstCombineVectorOps.cpp:696
llvm::InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse
Instruction * foldAggregateConstructionIntoAggregateReuse(InsertValueInst &OrigIVI)
Look for chain of insertvalue's that fully define an aggregate, and trace back the values inserted,...
Definition: InstCombineVectorOps.cpp:782
PatternMatch.h
llvm::BasicBlock::getFirstNonPHI
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:212
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:648
llvm::Instruction::isIntDivRem
bool isIntDivRem() const
Definition: Instruction.h:166
llvm::ArrayRef::slice
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2717
llvm::None
const NoneType None
Definition: None.h:23
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
llvm::ShuffleVectorInst::getType
VectorType * getType() const
Overload to return most specific vector type.
Definition: Instructions.h:2047
Type.h
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::PatternMatch::m_ExtractElt
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
Definition: PatternMatch.h:1502
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::Type::getStructNumElements
unsigned getStructNumElements() const
Definition: DerivedTypes.h:348
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:201
llvm::APInt::getOneBitSet
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:224
llvm::Value::user_back
User * user_back()
Definition: Value.h:408
llvm::ConstantExpr::getShuffleVector
static Constant * getShuffleVector(Constant *V1, Constant *V2, ArrayRef< int > Mask, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2572
llvm::VectorType::isValidElementType
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:639
foldShuffleWithInsert
static Instruction * foldShuffleWithInsert(ShuffleVectorInst &Shuf, InstCombinerImpl &IC)
Try to replace a shuffle with an insertelement or try to replace a shuffle operand with the operand o...
Definition: InstCombineVectorOps.cpp:2240
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
llvm::Instruction::andIRFlags
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
Definition: Instruction.cpp:313
llvm::InstCombiner::getSafeVectorConstantForBinop
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
Definition: InstCombiner.h:312
VectorUtils.h
llvm::Optional::getValueOr
constexpr T getValueOr(U &&value) const LLVM_LVALUE_FUNCTION
Definition: Optional.h:297
BasicBlock.h
llvm::ShuffleVectorInst::increasesLength
bool increasesLength() const
Return true if this shuffle returns a vector with a greater number of elements than its source vector...
Definition: Instructions.h:2097
llvm::ShuffleVectorInst::commuteShuffleMask
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
Definition: Instructions.h:2339
llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:535
llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:156
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1203
llvm::ConstantExpr::getBinOpIdentity
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2787
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
uint64_t
llvm::Instruction::user_back
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:91
llvm::TruncInst
This class represents a truncation of integer types.
Definition: Instructions.h:4747
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2775
replaceExtractElements
static void replaceExtractElements(InsertElementInst *InsElt, ExtractElementInst *ExtElt, InstCombinerImpl &IC)
If we have insertion into a vector that is wider than the vector that we are extracting from,...
Definition: InstCombineVectorOps.cpp:613
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::UndefValue
'undef' values are things that do not have specified contents.
Definition: Constants.h:1348
llvm::SimplifyShuffleVectorInst
Value * SimplifyShuffleVectorInst(Value *Op0, Value *Op1, ArrayRef< int > Mask, Type *RetTy, const SimplifyQuery &Q)
Given operands for a ShuffleVectorInst, fold the result or return null.
Definition: InstructionSimplify.cpp:4848
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:928
llvm::ConstantExpr::getShl
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2741
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1612
ArrayRef.h
canonicalizeInsertSplat
static Instruction * canonicalizeInsertSplat(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
If we have an insert of a scalar to a non-zero element of an undefined vector and then shuffle that v...
Definition: InstCombineVectorOps.cpp:1953
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::Type::getArrayNumElements
uint64_t getArrayNumElements() const
Definition: DerivedTypes.h:384
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
BinopElts
These are the ingredients in an alternate form binary operator as described below.
Definition: InstCombineVectorOps.cpp:1857
llvm::ExtractElementInst::getVectorOperandType
VectorType * getVectorOperandType() const
Definition: Instructions.h:1907
llvm::PatternMatch::m_ZeroMask
Definition: PatternMatch.h:1533
llvm::SimplifyQuery::getWithInstruction
SimplifyQuery getWithInstruction(Instruction *I) const
Definition: InstructionSimplify.h:121
collectSingleShuffleElements
static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, SmallVectorImpl< int > &Mask)
If V is a shuffle of values that ONLY returns elements from either LHS or RHS, return the shuffle mas...
Definition: InstCombineVectorOps.cpp:540
llvm::PatternMatch::m_Constant
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:142
llvm::LinearPolySize::getKnownMinValue
ScalarTy getKnownMinValue() const
Returns the minimum value this size can represent.
Definition: TypeSize.h:297
foldIdentityExtractShuffle
static Instruction * foldIdentityExtractShuffle(ShuffleVectorInst &Shuf)
Try to fold an extract subvector operation.
Definition: InstCombineVectorOps.cpp:2190
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:650
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::GetElementPtrInst::Create
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:954
llvm::PatternMatch::m_SExt
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
Definition: PatternMatch.h:1633
foldBitcastExtElt
static Instruction * foldBitcastExtElt(ExtractElementInst &Ext, InstCombiner::BuilderTy &Builder, bool IsBigEndian)
Definition: InstCombineVectorOps.cpp:181
llvm::PatternMatch::m_SpecificInt
specific_intval< false > m_SpecificInt(APInt V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:885
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::BinaryOperator
Definition: InstrTypes.h:189
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:179
evaluateInDifferentElementOrder
static Value * evaluateInDifferentElementOrder(Value *V, ArrayRef< int > Mask)
Definition: InstCombineVectorOps.cpp:1736
llvm::SimplifyInsertElementInst
Value * SimplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx, const SimplifyQuery &Q)
Given operands for an InsertElement, fold the result or return null.
Definition: InstructionSimplify.cpp:4495
llvm::PatternMatch::m_Undef
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:136
llvm::ExtractElementInst::getVectorOperand
Value * getVectorOperand()
Definition: Instructions.h:1902
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Constant::getAggregateElement
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:421
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::PatternMatch::m_Shuffle
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
Definition: PatternMatch.h:1561
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:520
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:978
llvm::IRBuilderBase::InsertPointGuard
Definition: IRBuilder.h:367
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::InstCombinerImpl::visitInsertValueInst
Instruction * visitInsertValueInst(InsertValueInst &IV)
Try to find redundant insertvalue instructions, like the following ones: %0 = insertvalue { i8,...
Definition: InstCombineVectorOps.cpp:1062
llvm::Instruction::isExact
bool isExact() const
Determine whether the exact flag is set.
Definition: Instruction.cpp:186
llvm::pred_empty
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:119
llvm::ConstantVector::get
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1369
llvm::APInt::ult
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1037
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:273
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:297
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::ArrayRef::front
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:168
findDemandedEltsBySingleUser
static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr)
Find elements of V demanded by UserInstr.
Definition: InstCombineVectorOps.cpp:281
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
llvm::InstCombinerImpl::visitInsertElementInst
Instruction * visitInsertElementInst(InsertElementInst &IE)
Definition: InstCombineVectorOps.cpp:1445
llvm::MCID::Select
@ Select
Definition: MCInstrDesc.h:162
llvm::PatternMatch::m_FPExt
CastClass_match< OpTy, Instruction::FPExt > m_FPExt(const OpTy &Op)
Definition: PatternMatch.h:1697
llvm::widenShuffleMaskElts
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Definition: VectorUtils.cpp:448
Constant.h
llvm::PPC::getPredicate
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
foldTruncShuffle
static Instruction * foldTruncShuffle(ShuffleVectorInst &Shuf, bool IsBigEndian)
Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
Definition: InstCombineVectorOps.cpp:2116
llvm::Type::getIntNTy
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:207
llvm::InstCombinerImpl::InsertNewInstWith
Instruction * InsertNewInstWith(Instruction *New, Instruction &Old)
Same as InsertNewInstBefore, but also sets the debug loc.
Definition: InstCombineInternal.h:385
narrowVectorSelect
static Instruction * narrowVectorSelect(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Match a shuffle-select-shuffle pattern where the shuffles are widening and narrowing (concatenating w...
Definition: InstCombineVectorOps.cpp:2157
isShuffleExtractingFromLHS
static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, ArrayRef< int > Mask)
Definition: InstCombineVectorOps.cpp:1840
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:83
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:321
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2667
hoistInsEltConst
static Instruction * hoistInsEltConst(InsertElementInst &InsElt2, InstCombiner::BuilderTy &Builder)
If we have an insertelement instruction feeding into another insertelement and the 2nd is inserting a...
Definition: InstCombineVectorOps.cpp:1291
llvm::Type::isIntOrIntVectorTy
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:207
Casting.h
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
foldInsEltIntoSplat
static Instruction * foldInsEltIntoSplat(InsertElementInst &InsElt)
Try to fold an insert element into an existing splat shuffle by changing the shuffle's mask to includ...
Definition: InstCombineVectorOps.cpp:1195
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::PatternMatch::m_ZeroInt
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:522
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:183
llvm::findScalarElement
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
Definition: VectorUtils.cpp:280
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
llvm::CmpInst::Create
static CmpInst * Create(OtherOps Op, Predicate predicate, Value *S1, Value *S2, const Twine &Name="", Instruction *InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
Definition: Instructions.cpp:3709
llvm::InstCombinerImpl::replaceOperand
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombineInternal.h:417
llvm::Instruction::BinaryOps
BinaryOps
Definition: Instruction.h:785
llvm::InsertElementInst::getType
VectorType * getType() const
Overload to return most specific vector type.
Definition: Instructions.h:1970
llvm::Type::StructTyID
@ StructTyID
Structures.
Definition: Type.h:74
canEvaluateShuffled
static bool canEvaluateShuffled(Value *V, ArrayRef< int > Mask, unsigned Depth=5)
Return true if we can evaluate the specified expression tree if the vector elements were shuffled in ...
Definition: InstCombineVectorOps.cpp:1572
llvm::ShuffleVectorInst
This instruction constructs a fixed permutation of two input vectors.
Definition: Instructions.h:2009
Instructions.h
cheapToScalarize
static bool cheapToScalarize(Value *V, Value *EI)
Return true if the value is cheaper to scalarize than it is to leave as a vector operation.
Definition: InstCombineVectorOps.cpp:59
SmallVector.h
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:802
User.h
findDemandedEltsByAllUsers
static APInt findDemandedEltsByAllUsers(Value *V)
Find union of elements of V demanded by all its users.
Definition: InstCombineVectorOps.cpp:325
llvm::Value::DoPHITranslation
const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition: Value.cpp:970
llvm::Instruction::hasNoSignedWrap
bool hasNoSignedWrap() const
Determine whether the no signed wrap flag is set.
Definition: Instruction.cpp:140
SmallBitVector.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
InstructionSimplify.h
llvm::PHINode::getIncomingBlock
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Definition: Instructions.h:2741
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
llvm::PHINode
Definition: Instructions.h:2625
llvm::Constant::getSplatValue
Constant * getSplatValue(bool AllowUndefs=false) const
If all elements of the vector constant have the same value, return that value.
Definition: Constants.cpp:1686
llvm::ExtractElementInst::getIndexOperand
Value * getIndexOperand()
Definition: Instructions.h:1903
llvm::Instruction::copyIRFlags
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
Definition: Instruction.cpp:289
llvm::SmallVectorImpl< int >
foldIdentityPaddedShuffles
static Instruction * foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf)
Definition: InstCombineVectorOps.cpp:2327
DerivedTypes.h
BinopElts::Op0
Value * Op0
Definition: InstCombineVectorOps.cpp:1859
getAlternateBinop
static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL)
Binops may be transformed into binops with different opcodes and operands.
Definition: InstCombineVectorOps.cpp:1871
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
foldSelectShuffleWith1Binop
static Instruction * foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf)
Definition: InstCombineVectorOps.cpp:1897
llvm::ConstantAggregateZero::get
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1650
llvm::InsertValueInst
This instruction inserts a struct field of array element value into an aggregate value.
Definition: Instructions.h:2483
llvm::BinaryOperator::Create
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Definition: Instructions.cpp:2636
Value.h
llvm::PatternMatch::m_Cmp
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:89
llvm::LegacyLegalizeActions::NotFound
@ NotFound
Sentinel value for when no action was found in the specified table.
Definition: LegacyLegalizerInfo.h:74
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
BinopElts::BinopElts
BinopElts(BinaryOperator::BinaryOps Opc=(BinaryOperator::BinaryOps) 0, Value *V0=nullptr, Value *V1=nullptr)
Definition: InstCombineVectorOps.cpp:1861
BinopElts::Op1
Value * Op1
Definition: InstCombineVectorOps.cpp:1860
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:632
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:422
llvm::Instruction::dropPoisonGeneratingFlags
void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
Definition: Instruction.cpp:144
llvm::ShuffleVectorInst::changesLength
bool changesLength() const
Return true if this shuffle returns a vector with a different number of elements than its source vect...
Definition: Instructions.h:2086
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:128
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:908
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
llvm::PoisonValue
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition: Constants.h:1396
llvm::PoisonValue::get
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1790
llvm::ShuffleVectorInst::commute
void commute()
Swap the operands and adjust the mask to preserve the semantics of the instruction.
Definition: Instructions.cpp:1979