LLVM  16.0.0git
InstCombineVectorOps.cpp
Go to the documentation of this file.
1 //===- InstCombineVectorOps.cpp -------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements instcombine for ExtractElement, InsertElement and
10 // ShuffleVector.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "InstCombineInternal.h"
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
24 #include "llvm/IR/BasicBlock.h"
25 #include "llvm/IR/Constant.h"
26 #include "llvm/IR/Constants.h"
27 #include "llvm/IR/DerivedTypes.h"
28 #include "llvm/IR/InstrTypes.h"
29 #include "llvm/IR/Instruction.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/IR/Operator.h"
32 #include "llvm/IR/PatternMatch.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/IR/User.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/Support/Casting.h"
39 #include <cassert>
40 #include <cstdint>
41 #include <iterator>
42 #include <utility>
43 
44 #define DEBUG_TYPE "instcombine"
45 
46 using namespace llvm;
47 using namespace PatternMatch;
48 
49 STATISTIC(NumAggregateReconstructionsSimplified,
50  "Number of aggregate reconstructions turned into reuse of the "
51  "original aggregate");
52 
53 /// Return true if the value is cheaper to scalarize than it is to leave as a
54 /// vector operation. If the extract index \p EI is a constant integer then
55 /// some operations may be cheap to scalarize.
56 ///
57 /// FIXME: It's possible to create more instructions than previously existed.
58 static bool cheapToScalarize(Value *V, Value *EI) {
59  ConstantInt *CEI = dyn_cast<ConstantInt>(EI);
60 
61  // If we can pick a scalar constant value out of a vector, that is free.
62  if (auto *C = dyn_cast<Constant>(V))
63  return CEI || C->getSplatValue();
64 
65  if (CEI && match(V, m_Intrinsic<Intrinsic::experimental_stepvector>())) {
66  ElementCount EC = cast<VectorType>(V->getType())->getElementCount();
67  // Index needs to be lower than the minimum size of the vector, because
68  // for scalable vector, the vector size is known at run time.
69  return CEI->getValue().ult(EC.getKnownMinValue());
70  }
71 
72  // An insertelement to the same constant index as our extract will simplify
73  // to the scalar inserted element. An insertelement to a different constant
74  // index is irrelevant to our extract.
76  return CEI;
77 
78  if (match(V, m_OneUse(m_Load(m_Value()))))
79  return true;
80 
81  if (match(V, m_OneUse(m_UnOp())))
82  return true;
83 
84  Value *V0, *V1;
85  if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
86  if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
87  return true;
88 
89  CmpInst::Predicate UnusedPred;
90  if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
91  if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
92  return true;
93 
94  return false;
95 }
96 
97 // If we have a PHI node with a vector type that is only used to feed
98 // itself and be an operand of extractelement at a constant location,
99 // try to replace the PHI of the vector type with a PHI of a scalar type.
100 Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
101  PHINode *PN) {
103  // The users we want the PHI to have are:
104  // 1) The EI ExtractElement (we already know this)
105  // 2) Possibly more ExtractElements with the same index.
106  // 3) Another operand, which will feed back into the PHI.
107  Instruction *PHIUser = nullptr;
108  for (auto *U : PN->users()) {
109  if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
110  if (EI.getIndexOperand() == EU->getIndexOperand())
111  Extracts.push_back(EU);
112  else
113  return nullptr;
114  } else if (!PHIUser) {
115  PHIUser = cast<Instruction>(U);
116  } else {
117  return nullptr;
118  }
119  }
120 
121  if (!PHIUser)
122  return nullptr;
123 
124  // Verify that this PHI user has one use, which is the PHI itself,
125  // and that it is a binary operation which is cheap to scalarize.
126  // otherwise return nullptr.
127  if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
128  !(isa<BinaryOperator>(PHIUser)) ||
129  !cheapToScalarize(PHIUser, EI.getIndexOperand()))
130  return nullptr;
131 
132  // Create a scalar PHI node that will replace the vector PHI node
133  // just before the current PHI node.
134  PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
135  PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN));
136  // Scalarize each PHI operand.
137  for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
138  Value *PHIInVal = PN->getIncomingValue(i);
139  BasicBlock *inBB = PN->getIncomingBlock(i);
140  Value *Elt = EI.getIndexOperand();
141  // If the operand is the PHI induction variable:
142  if (PHIInVal == PHIUser) {
143  // Scalarize the binary operation. Its first operand is the
144  // scalar PHI, and the second operand is extracted from the other
145  // vector operand.
146  BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
147  unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
148  Value *Op = InsertNewInstWith(
149  ExtractElementInst::Create(B0->getOperand(opId), Elt,
150  B0->getOperand(opId)->getName() + ".Elt"),
151  *B0);
152  Value *newPHIUser = InsertNewInstWith(
154  scalarPHI, Op, B0), *B0);
155  scalarPHI->addIncoming(newPHIUser, inBB);
156  } else {
157  // Scalarize PHI input:
158  Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
159  // Insert the new instruction into the predecessor basic block.
160  Instruction *pos = dyn_cast<Instruction>(PHIInVal);
161  BasicBlock::iterator InsertPos;
162  if (pos && !isa<PHINode>(pos)) {
163  InsertPos = ++pos->getIterator();
164  } else {
165  InsertPos = inBB->getFirstInsertionPt();
166  }
167 
168  InsertNewInstWith(newEI, *InsertPos);
169 
170  scalarPHI->addIncoming(newEI, inBB);
171  }
172  }
173 
174  for (auto *E : Extracts)
175  replaceInstUsesWith(*E, scalarPHI);
176 
177  return &EI;
178 }
179 
180 Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
181  Value *X;
182  uint64_t ExtIndexC;
183  if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
184  !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
185  return nullptr;
186 
187  ElementCount NumElts =
188  cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
189  Type *DestTy = Ext.getType();
190  unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
191  bool IsBigEndian = DL.isBigEndian();
192 
193  // If we are casting an integer to vector and extracting a portion, that is
194  // a shift-right and truncate.
195  if (X->getType()->isIntegerTy()) {
196  assert(isa<FixedVectorType>(Ext.getVectorOperand()->getType()) &&
197  "Expected fixed vector type for bitcast from scalar integer");
198 
199  // Big endian requires adjusting the extract index since MSB is at index 0.
200  // LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8
201  // BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8
202  if (IsBigEndian)
203  ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC;
204  unsigned ShiftAmountC = ExtIndexC * DestWidth;
205  if (!ShiftAmountC ||
206  (isDesirableIntType(X->getType()->getPrimitiveSizeInBits()) &&
207  Ext.getVectorOperand()->hasOneUse())) {
208  if (ShiftAmountC)
209  X = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset");
210  if (DestTy->isFloatingPointTy()) {
211  Type *DstIntTy = IntegerType::getIntNTy(X->getContext(), DestWidth);
212  Value *Trunc = Builder.CreateTrunc(X, DstIntTy);
213  return new BitCastInst(Trunc, DestTy);
214  }
215  return new TruncInst(X, DestTy);
216  }
217  }
218 
219  if (!X->getType()->isVectorTy())
220  return nullptr;
221 
222  // If this extractelement is using a bitcast from a vector of the same number
223  // of elements, see if we can find the source element from the source vector:
224  // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
225  auto *SrcTy = cast<VectorType>(X->getType());
226  ElementCount NumSrcElts = SrcTy->getElementCount();
227  if (NumSrcElts == NumElts)
228  if (Value *Elt = findScalarElement(X, ExtIndexC))
229  return new BitCastInst(Elt, DestTy);
230 
231  assert(NumSrcElts.isScalable() == NumElts.isScalable() &&
232  "Src and Dst must be the same sort of vector type");
233 
234  // If the source elements are wider than the destination, try to shift and
235  // truncate a subset of scalar bits of an insert op.
236  if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
237  Value *Scalar;
238  Value *Vec;
239  uint64_t InsIndexC;
240  if (!match(X, m_InsertElt(m_Value(Vec), m_Value(Scalar),
241  m_ConstantInt(InsIndexC))))
242  return nullptr;
243 
244  // The extract must be from the subset of vector elements that we inserted
245  // into. Example: if we inserted element 1 of a <2 x i64> and we are
246  // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
247  // of elements 4-7 of the bitcasted vector.
248  unsigned NarrowingRatio =
249  NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
250 
251  if (ExtIndexC / NarrowingRatio != InsIndexC) {
252  // Remove insertelement, if we don't use the inserted element.
253  // extractelement (bitcast (insertelement (Vec, b)), a) ->
254  // extractelement (bitcast (Vec), a)
255  // FIXME: this should be removed to SimplifyDemandedVectorElts,
256  // once scale vectors are supported.
257  if (X->hasOneUse() && Ext.getVectorOperand()->hasOneUse()) {
258  Value *NewBC = Builder.CreateBitCast(Vec, Ext.getVectorOperandType());
259  return ExtractElementInst::Create(NewBC, Ext.getIndexOperand());
260  }
261  return nullptr;
262  }
263 
264  // We are extracting part of the original scalar. How that scalar is
265  // inserted into the vector depends on the endian-ness. Example:
266  // Vector Byte Elt Index: 0 1 2 3 4 5 6 7
267  // +--+--+--+--+--+--+--+--+
268  // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
269  // extelt <4 x i16> V', 3: | |S2|S3|
270  // +--+--+--+--+--+--+--+--+
271  // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
272  // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
273  // In this example, we must right-shift little-endian. Big-endian is just a
274  // truncate.
275  unsigned Chunk = ExtIndexC % NarrowingRatio;
276  if (IsBigEndian)
277  Chunk = NarrowingRatio - 1 - Chunk;
278 
279  // Bail out if this is an FP vector to FP vector sequence. That would take
280  // more instructions than we started with unless there is no shift, and it
281  // may not be handled as well in the backend.
282  bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
283  bool NeedDestBitcast = DestTy->isFloatingPointTy();
284  if (NeedSrcBitcast && NeedDestBitcast)
285  return nullptr;
286 
287  unsigned SrcWidth = SrcTy->getScalarSizeInBits();
288  unsigned ShAmt = Chunk * DestWidth;
289 
290  // TODO: This limitation is more strict than necessary. We could sum the
291  // number of new instructions and subtract the number eliminated to know if
292  // we can proceed.
293  if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
294  if (NeedSrcBitcast || NeedDestBitcast)
295  return nullptr;
296 
297  if (NeedSrcBitcast) {
298  Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
299  Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
300  }
301 
302  if (ShAmt) {
303  // Bail out if we could end with more instructions than we started with.
304  if (!Ext.getVectorOperand()->hasOneUse())
305  return nullptr;
306  Scalar = Builder.CreateLShr(Scalar, ShAmt);
307  }
308 
309  if (NeedDestBitcast) {
310  Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
311  return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
312  }
313  return new TruncInst(Scalar, DestTy);
314  }
315 
316  return nullptr;
317 }
318 
319 /// Find elements of V demanded by UserInstr.
321  unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
322 
323  // Conservatively assume that all elements are needed.
324  APInt UsedElts(APInt::getAllOnes(VWidth));
325 
326  switch (UserInstr->getOpcode()) {
327  case Instruction::ExtractElement: {
328  ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr);
329  assert(EEI->getVectorOperand() == V);
330  ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand());
331  if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {
332  UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue());
333  }
334  break;
335  }
336  case Instruction::ShuffleVector: {
337  ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);
338  unsigned MaskNumElts =
339  cast<FixedVectorType>(UserInstr->getType())->getNumElements();
340 
341  UsedElts = APInt(VWidth, 0);
342  for (unsigned i = 0; i < MaskNumElts; i++) {
343  unsigned MaskVal = Shuffle->getMaskValue(i);
344  if (MaskVal == -1u || MaskVal >= 2 * VWidth)
345  continue;
346  if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))
347  UsedElts.setBit(MaskVal);
348  if (Shuffle->getOperand(1) == V &&
349  ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
350  UsedElts.setBit(MaskVal - VWidth);
351  }
352  break;
353  }
354  default:
355  break;
356  }
357  return UsedElts;
358 }
359 
360 /// Find union of elements of V demanded by all its users.
361 /// If it is known by querying findDemandedEltsBySingleUser that
362 /// no user demands an element of V, then the corresponding bit
363 /// remains unset in the returned value.
365  unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
366 
367  APInt UnionUsedElts(VWidth, 0);
368  for (const Use &U : V->uses()) {
369  if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
370  UnionUsedElts |= findDemandedEltsBySingleUser(V, I);
371  } else {
372  UnionUsedElts = APInt::getAllOnes(VWidth);
373  break;
374  }
375 
376  if (UnionUsedElts.isAllOnes())
377  break;
378  }
379 
380  return UnionUsedElts;
381 }
382 
383 /// Given a constant index for a extractelement or insertelement instruction,
384 /// return it with the canonical type if it isn't already canonical. We
385 /// arbitrarily pick 64 bit as our canonical type. The actual bitwidth doesn't
386 /// matter, we just want a consistent type to simplify CSE.
388  const unsigned IndexBW = IndexC->getType()->getBitWidth();
389  if (IndexBW == 64 || IndexC->getValue().getActiveBits() > 64)
390  return nullptr;
391  return ConstantInt::get(IndexC->getContext(),
392  IndexC->getValue().zextOrTrunc(64));
393 }
394 
396  Value *SrcVec = EI.getVectorOperand();
397  Value *Index = EI.getIndexOperand();
398  if (Value *V = simplifyExtractElementInst(SrcVec, Index,
399  SQ.getWithInstruction(&EI)))
400  return replaceInstUsesWith(EI, V);
401 
402  // extractelt (select %x, %vec1, %vec2), %const ->
403  // select %x, %vec1[%const], %vec2[%const]
404  // TODO: Support constant folding of multiple select operands:
405  // extractelt (select %x, %vec1, %vec2), (select %x, %c1, %c2)
406  // If the extractelement will for instance try to do out of bounds accesses
407  // because of the values of %c1 and/or %c2, the sequence could be optimized
408  // early. This is currently not possible because constant folding will reach
409  // an unreachable assertion if it doesn't find a constant operand.
410  if (SelectInst *SI = dyn_cast<SelectInst>(EI.getVectorOperand()))
411  if (SI->getCondition()->getType()->isIntegerTy() &&
412  isa<Constant>(EI.getIndexOperand()))
413  if (Instruction *R = FoldOpIntoSelect(EI, SI))
414  return R;
415 
416  // If extracting a specified index from the vector, see if we can recursively
417  // find a previously computed scalar that was inserted into the vector.
418  auto *IndexC = dyn_cast<ConstantInt>(Index);
419  if (IndexC) {
420  // Canonicalize type of constant indices to i64 to simplify CSE
421  if (auto *NewIdx = getPreferredVectorIndex(IndexC))
422  return replaceOperand(EI, 1, NewIdx);
423 
425  unsigned NumElts = EC.getKnownMinValue();
426 
427  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(SrcVec)) {
428  Intrinsic::ID IID = II->getIntrinsicID();
429  // Index needs to be lower than the minimum size of the vector, because
430  // for scalable vector, the vector size is known at run time.
431  if (IID == Intrinsic::experimental_stepvector &&
432  IndexC->getValue().ult(NumElts)) {
433  Type *Ty = EI.getType();
434  unsigned BitWidth = Ty->getIntegerBitWidth();
435  Value *Idx;
436  // Return index when its value does not exceed the allowed limit
437  // for the element type of the vector, otherwise return undefined.
438  if (IndexC->getValue().getActiveBits() <= BitWidth)
439  Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth));
440  else
441  Idx = UndefValue::get(Ty);
442  return replaceInstUsesWith(EI, Idx);
443  }
444  }
445 
446  // InstSimplify should handle cases where the index is invalid.
447  // For fixed-length vector, it's invalid to extract out-of-range element.
448  if (!EC.isScalable() && IndexC->getValue().uge(NumElts))
449  return nullptr;
450 
451  if (Instruction *I = foldBitcastExtElt(EI))
452  return I;
453 
454  // If there's a vector PHI feeding a scalar use through this extractelement
455  // instruction, try to scalarize the PHI.
456  if (auto *Phi = dyn_cast<PHINode>(SrcVec))
457  if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
458  return ScalarPHI;
459  }
460 
461  // TODO come up with a n-ary matcher that subsumes both unary and
462  // binary matchers.
463  UnaryOperator *UO;
464  if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, Index)) {
465  // extelt (unop X), Index --> unop (extelt X, Index)
466  Value *X = UO->getOperand(0);
467  Value *E = Builder.CreateExtractElement(X, Index);
469  }
470 
471  BinaryOperator *BO;
472  if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, Index)) {
473  // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
474  Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
475  Value *E0 = Builder.CreateExtractElement(X, Index);
476  Value *E1 = Builder.CreateExtractElement(Y, Index);
477  return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
478  }
479 
480  Value *X, *Y;
481  CmpInst::Predicate Pred;
482  if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
483  cheapToScalarize(SrcVec, Index)) {
484  // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
485  Value *E0 = Builder.CreateExtractElement(X, Index);
486  Value *E1 = Builder.CreateExtractElement(Y, Index);
487  return CmpInst::Create(cast<CmpInst>(SrcVec)->getOpcode(), Pred, E0, E1);
488  }
489 
490  if (auto *I = dyn_cast<Instruction>(SrcVec)) {
491  if (auto *IE = dyn_cast<InsertElementInst>(I)) {
492  // instsimplify already handled the case where the indices are constants
493  // and equal by value, if both are constants, they must not be the same
494  // value, extract from the pre-inserted value instead.
495  if (isa<Constant>(IE->getOperand(2)) && IndexC)
496  return replaceOperand(EI, 0, IE->getOperand(0));
497  } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
498  auto *VecType = cast<VectorType>(GEP->getType());
499  ElementCount EC = VecType->getElementCount();
500  uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0;
501  if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) {
502  // Find out why we have a vector result - these are a few examples:
503  // 1. We have a scalar pointer and a vector of indices, or
504  // 2. We have a vector of pointers and a scalar index, or
505  // 3. We have a vector of pointers and a vector of indices, etc.
506  // Here we only consider combining when there is exactly one vector
507  // operand, since the optimization is less obviously a win due to
508  // needing more than one extractelements.
509 
510  unsigned VectorOps =
511  llvm::count_if(GEP->operands(), [](const Value *V) {
512  return isa<VectorType>(V->getType());
513  });
514  if (VectorOps == 1) {
515  Value *NewPtr = GEP->getPointerOperand();
516  if (isa<VectorType>(NewPtr->getType()))
517  NewPtr = Builder.CreateExtractElement(NewPtr, IndexC);
518 
519  SmallVector<Value *> NewOps;
520  for (unsigned I = 1; I != GEP->getNumOperands(); ++I) {
521  Value *Op = GEP->getOperand(I);
522  if (isa<VectorType>(Op->getType()))
523  NewOps.push_back(Builder.CreateExtractElement(Op, IndexC));
524  else
525  NewOps.push_back(Op);
526  }
527 
529  GEP->getSourceElementType(), NewPtr, NewOps);
530  NewGEP->setIsInBounds(GEP->isInBounds());
531  return NewGEP;
532  }
533  }
534  } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
535  // If this is extracting an element from a shufflevector, figure out where
536  // it came from and extract from the appropriate input element instead.
537  // Restrict the following transformation to fixed-length vector.
538  if (isa<FixedVectorType>(SVI->getType()) && isa<ConstantInt>(Index)) {
539  int SrcIdx =
540  SVI->getMaskValue(cast<ConstantInt>(Index)->getZExtValue());
541  Value *Src;
542  unsigned LHSWidth = cast<FixedVectorType>(SVI->getOperand(0)->getType())
543  ->getNumElements();
544 
545  if (SrcIdx < 0)
546  return replaceInstUsesWith(EI, UndefValue::get(EI.getType()));
547  if (SrcIdx < (int)LHSWidth)
548  Src = SVI->getOperand(0);
549  else {
550  SrcIdx -= LHSWidth;
551  Src = SVI->getOperand(1);
552  }
555  Src, ConstantInt::get(Int32Ty, SrcIdx, false));
556  }
557  } else if (auto *CI = dyn_cast<CastInst>(I)) {
558  // Canonicalize extractelement(cast) -> cast(extractelement).
559  // Bitcasts can change the number of vector elements, and they cost
560  // nothing.
561  if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
562  Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
563  return CastInst::Create(CI->getOpcode(), EE, EI.getType());
564  }
565  }
566  }
567 
568  // Run demanded elements after other transforms as this can drop flags on
569  // binops. If there's two paths to the same final result, we prefer the
570  // one which doesn't force us to drop flags.
571  if (IndexC) {
573  unsigned NumElts = EC.getKnownMinValue();
574  // This instruction only demands the single element from the input vector.
575  // Skip for scalable type, the number of elements is unknown at
576  // compile-time.
577  if (!EC.isScalable() && NumElts != 1) {
578  // If the input vector has a single use, simplify it based on this use
579  // property.
580  if (SrcVec->hasOneUse()) {
581  APInt UndefElts(NumElts, 0);
582  APInt DemandedElts(NumElts, 0);
583  DemandedElts.setBit(IndexC->getZExtValue());
584  if (Value *V =
585  SimplifyDemandedVectorElts(SrcVec, DemandedElts, UndefElts))
586  return replaceOperand(EI, 0, V);
587  } else {
588  // If the input vector has multiple uses, simplify it based on a union
589  // of all elements used.
590  APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
591  if (!DemandedElts.isAllOnes()) {
592  APInt UndefElts(NumElts, 0);
593  if (Value *V = SimplifyDemandedVectorElts(
594  SrcVec, DemandedElts, UndefElts, 0 /* Depth */,
595  true /* AllowMultipleUsers */)) {
596  if (V != SrcVec) {
597  SrcVec->replaceAllUsesWith(V);
598  return &EI;
599  }
600  }
601  }
602  }
603  }
604  }
605  return nullptr;
606 }
607 
608 /// If V is a shuffle of values that ONLY returns elements from either LHS or
609 /// RHS, return the shuffle mask and true. Otherwise, return false.
612  assert(LHS->getType() == RHS->getType() &&
613  "Invalid CollectSingleShuffleElements");
614  unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
615 
616  if (match(V, m_Undef())) {
617  Mask.assign(NumElts, -1);
618  return true;
619  }
620 
621  if (V == LHS) {
622  for (unsigned i = 0; i != NumElts; ++i)
623  Mask.push_back(i);
624  return true;
625  }
626 
627  if (V == RHS) {
628  for (unsigned i = 0; i != NumElts; ++i)
629  Mask.push_back(i + NumElts);
630  return true;
631  }
632 
633  if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
634  // If this is an insert of an extract from some other vector, include it.
635  Value *VecOp = IEI->getOperand(0);
636  Value *ScalarOp = IEI->getOperand(1);
637  Value *IdxOp = IEI->getOperand(2);
638 
639  if (!isa<ConstantInt>(IdxOp))
640  return false;
641  unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
642 
643  if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
644  // We can handle this if the vector we are inserting into is
645  // transitively ok.
646  if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
647  // If so, update the mask to reflect the inserted undef.
648  Mask[InsertedIdx] = -1;
649  return true;
650  }
651  } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
652  if (isa<ConstantInt>(EI->getOperand(1))) {
653  unsigned ExtractedIdx =
654  cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
655  unsigned NumLHSElts =
656  cast<FixedVectorType>(LHS->getType())->getNumElements();
657 
658  // This must be extracting from either LHS or RHS.
659  if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
660  // We can handle this if the vector we are inserting into is
661  // transitively ok.
662  if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
663  // If so, update the mask to reflect the inserted value.
664  if (EI->getOperand(0) == LHS) {
665  Mask[InsertedIdx % NumElts] = ExtractedIdx;
666  } else {
667  assert(EI->getOperand(0) == RHS);
668  Mask[InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;
669  }
670  return true;
671  }
672  }
673  }
674  }
675  }
676 
677  return false;
678 }
679 
680 /// If we have insertion into a vector that is wider than the vector that we
681 /// are extracting from, try to widen the source vector to allow a single
682 /// shufflevector to replace one or more insert/extract pairs.
684  ExtractElementInst *ExtElt,
685  InstCombinerImpl &IC) {
686  auto *InsVecType = cast<FixedVectorType>(InsElt->getType());
687  auto *ExtVecType = cast<FixedVectorType>(ExtElt->getVectorOperandType());
688  unsigned NumInsElts = InsVecType->getNumElements();
689  unsigned NumExtElts = ExtVecType->getNumElements();
690 
691  // The inserted-to vector must be wider than the extracted-from vector.
692  if (InsVecType->getElementType() != ExtVecType->getElementType() ||
693  NumExtElts >= NumInsElts)
694  return;
695 
696  // Create a shuffle mask to widen the extended-from vector using poison
697  // values. The mask selects all of the values of the original vector followed
698  // by as many poison values as needed to create a vector of the same length
699  // as the inserted-to vector.
700  SmallVector<int, 16> ExtendMask;
701  for (unsigned i = 0; i < NumExtElts; ++i)
702  ExtendMask.push_back(i);
703  for (unsigned i = NumExtElts; i < NumInsElts; ++i)
704  ExtendMask.push_back(-1);
705 
706  Value *ExtVecOp = ExtElt->getVectorOperand();
707  auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
708  BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
709  ? ExtVecOpInst->getParent()
710  : ExtElt->getParent();
711 
712  // TODO: This restriction matches the basic block check below when creating
713  // new extractelement instructions. If that limitation is removed, this one
714  // could also be removed. But for now, we just bail out to ensure that we
715  // will replace the extractelement instruction that is feeding our
716  // insertelement instruction. This allows the insertelement to then be
717  // replaced by a shufflevector. If the insertelement is not replaced, we can
718  // induce infinite looping because there's an optimization for extractelement
719  // that will delete our widening shuffle. This would trigger another attempt
720  // here to create that shuffle, and we spin forever.
721  if (InsertionBlock != InsElt->getParent())
722  return;
723 
724  // TODO: This restriction matches the check in visitInsertElementInst() and
725  // prevents an infinite loop caused by not turning the extract/insert pair
726  // into a shuffle. We really should not need either check, but we're lacking
727  // folds for shufflevectors because we're afraid to generate shuffle masks
728  // that the backend can't handle.
729  if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
730  return;
731 
732  auto *WideVec = new ShuffleVectorInst(ExtVecOp, ExtendMask);
733 
734  // Insert the new shuffle after the vector operand of the extract is defined
735  // (as long as it's not a PHI) or at the start of the basic block of the
736  // extract, so any subsequent extracts in the same basic block can use it.
737  // TODO: Insert before the earliest ExtractElementInst that is replaced.
738  if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
739  WideVec->insertAfter(ExtVecOpInst);
740  else
741  IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt());
742 
743  // Replace extracts from the original narrow vector with extracts from the new
744  // wide vector.
745  for (User *U : ExtVecOp->users()) {
746  ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
747  if (!OldExt || OldExt->getParent() != WideVec->getParent())
748  continue;
749  auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
750  NewExt->insertAfter(OldExt);
751  IC.replaceInstUsesWith(*OldExt, NewExt);
752  }
753 }
754 
755 /// We are building a shuffle to create V, which is a sequence of insertelement,
756 /// extractelement pairs. If PermittedRHS is set, then we must either use it or
757 /// not rely on the second vector source. Return a std::pair containing the
758 /// left and right vectors of the proposed shuffle (or 0), and set the Mask
759 /// parameter as required.
760 ///
761 /// Note: we intentionally don't try to fold earlier shuffles since they have
762 /// often been chosen carefully to be efficiently implementable on the target.
763 using ShuffleOps = std::pair<Value *, Value *>;
764 
766  Value *PermittedRHS,
767  InstCombinerImpl &IC) {
768  assert(V->getType()->isVectorTy() && "Invalid shuffle!");
769  unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
770 
771  if (match(V, m_Undef())) {
772  Mask.assign(NumElts, -1);
773  return std::make_pair(
774  PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr);
775  }
776 
777  if (isa<ConstantAggregateZero>(V)) {
778  Mask.assign(NumElts, 0);
779  return std::make_pair(V, nullptr);
780  }
781 
782  if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
783  // If this is an insert of an extract from some other vector, include it.
784  Value *VecOp = IEI->getOperand(0);
785  Value *ScalarOp = IEI->getOperand(1);
786  Value *IdxOp = IEI->getOperand(2);
787 
788  if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
789  if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
790  unsigned ExtractedIdx =
791  cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
792  unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
793 
794  // Either the extracted from or inserted into vector must be RHSVec,
795  // otherwise we'd end up with a shuffle of three inputs.
796  if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
797  Value *RHS = EI->getOperand(0);
798  ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC);
799  assert(LR.second == nullptr || LR.second == RHS);
800 
801  if (LR.first->getType() != RHS->getType()) {
802  // Although we are giving up for now, see if we can create extracts
803  // that match the inserts for another round of combining.
804  replaceExtractElements(IEI, EI, IC);
805 
806  // We tried our best, but we can't find anything compatible with RHS
807  // further up the chain. Return a trivial shuffle.
808  for (unsigned i = 0; i < NumElts; ++i)
809  Mask[i] = i;
810  return std::make_pair(V, nullptr);
811  }
812 
813  unsigned NumLHSElts =
814  cast<FixedVectorType>(RHS->getType())->getNumElements();
815  Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
816  return std::make_pair(LR.first, RHS);
817  }
818 
819  if (VecOp == PermittedRHS) {
820  // We've gone as far as we can: anything on the other side of the
821  // extractelement will already have been converted into a shuffle.
822  unsigned NumLHSElts =
823  cast<FixedVectorType>(EI->getOperand(0)->getType())
824  ->getNumElements();
825  for (unsigned i = 0; i != NumElts; ++i)
826  Mask.push_back(i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
827  return std::make_pair(EI->getOperand(0), PermittedRHS);
828  }
829 
830  // If this insertelement is a chain that comes from exactly these two
831  // vectors, return the vector and the effective shuffle.
832  if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
833  collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
834  Mask))
835  return std::make_pair(EI->getOperand(0), PermittedRHS);
836  }
837  }
838  }
839 
840  // Otherwise, we can't do anything fancy. Return an identity vector.
841  for (unsigned i = 0; i != NumElts; ++i)
842  Mask.push_back(i);
843  return std::make_pair(V, nullptr);
844 }
845 
846 /// Look for chain of insertvalue's that fully define an aggregate, and trace
847 /// back the values inserted, see if they are all were extractvalue'd from
848 /// the same source aggregate from the exact same element indexes.
849 /// If they were, just reuse the source aggregate.
850 /// This potentially deals with PHI indirections.
852  InsertValueInst &OrigIVI) {
853  Type *AggTy = OrigIVI.getType();
854  unsigned NumAggElts;
855  switch (AggTy->getTypeID()) {
856  case Type::StructTyID:
857  NumAggElts = AggTy->getStructNumElements();
858  break;
859  case Type::ArrayTyID:
860  NumAggElts = AggTy->getArrayNumElements();
861  break;
862  default:
863  llvm_unreachable("Unhandled aggregate type?");
864  }
865 
866  // Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able
867  // to handle clang C++ exception struct (which is hardcoded as {i8*, i32}),
868  // FIXME: any interesting patterns to be caught with larger limit?
869  assert(NumAggElts > 0 && "Aggregate should have elements.");
870  if (NumAggElts > 2)
871  return nullptr;
872 
873  static constexpr auto NotFound = None;
874  static constexpr auto FoundMismatch = nullptr;
875 
876  // Try to find a value of each element of an aggregate.
877  // FIXME: deal with more complex, not one-dimensional, aggregate types
878  SmallVector<Optional<Instruction *>, 2> AggElts(NumAggElts, NotFound);
879 
880  // Do we know values for each element of the aggregate?
881  auto KnowAllElts = [&AggElts]() {
882  return !llvm::is_contained(AggElts, NotFound);
883  };
884 
885  int Depth = 0;
886 
887  // Arbitrary `insertvalue` visitation depth limit. Let's be okay with
888  // every element being overwritten twice, which should never happen.
889  static const int DepthLimit = 2 * NumAggElts;
890 
891  // Recurse up the chain of `insertvalue` aggregate operands until either we've
892  // reconstructed full initializer or can't visit any more `insertvalue`'s.
893  for (InsertValueInst *CurrIVI = &OrigIVI;
894  Depth < DepthLimit && CurrIVI && !KnowAllElts();
895  CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()),
896  ++Depth) {
897  auto *InsertedValue =
898  dyn_cast<Instruction>(CurrIVI->getInsertedValueOperand());
899  if (!InsertedValue)
900  return nullptr; // Inserted value must be produced by an instruction.
901 
902  ArrayRef<unsigned int> Indices = CurrIVI->getIndices();
903 
904  // Don't bother with more than single-level aggregates.
905  if (Indices.size() != 1)
906  return nullptr; // FIXME: deal with more complex aggregates?
907 
908  // Now, we may have already previously recorded the value for this element
909  // of an aggregate. If we did, that means the CurrIVI will later be
910  // overwritten with the already-recorded value. But if not, let's record it!
911  Optional<Instruction *> &Elt = AggElts[Indices.front()];
912  Elt = Elt.value_or(InsertedValue);
913 
914  // FIXME: should we handle chain-terminating undef base operand?
915  }
916 
917  // Was that sufficient to deduce the full initializer for the aggregate?
918  if (!KnowAllElts())
919  return nullptr; // Give up then.
920 
921  // We now want to find the source[s] of the aggregate elements we've found.
922  // And with "source" we mean the original aggregate[s] from which
923  // the inserted elements were extracted. This may require PHI translation.
924 
925  enum class AggregateDescription {
926  /// When analyzing the value that was inserted into an aggregate, we did
927  /// not manage to find defining `extractvalue` instruction to analyze.
928  NotFound,
929  /// When analyzing the value that was inserted into an aggregate, we did
930  /// manage to find defining `extractvalue` instruction[s], and everything
931  /// matched perfectly - aggregate type, element insertion/extraction index.
932  Found,
933  /// When analyzing the value that was inserted into an aggregate, we did
934  /// manage to find defining `extractvalue` instruction, but there was
935  /// a mismatch: either the source type from which the extraction was didn't
936  /// match the aggregate type into which the insertion was,
937  /// or the extraction/insertion channels mismatched,
938  /// or different elements had different source aggregates.
939  FoundMismatch
940  };
941  auto Describe = [](Optional<Value *> SourceAggregate) {
942  if (SourceAggregate == NotFound)
944  if (*SourceAggregate == FoundMismatch)
945  return AggregateDescription::FoundMismatch;
946  return AggregateDescription::Found;
947  };
948 
949  // Given the value \p Elt that was being inserted into element \p EltIdx of an
950  // aggregate AggTy, see if \p Elt was originally defined by an
951  // appropriate extractvalue (same element index, same aggregate type).
952  // If found, return the source aggregate from which the extraction was.
953  // If \p PredBB is provided, does PHI translation of an \p Elt first.
954  auto FindSourceAggregate =
955  [&](Instruction *Elt, unsigned EltIdx, Optional<BasicBlock *> UseBB,
957  // For now(?), only deal with, at most, a single level of PHI indirection.
958  if (UseBB && PredBB)
959  Elt = dyn_cast<Instruction>(Elt->DoPHITranslation(*UseBB, *PredBB));
960  // FIXME: deal with multiple levels of PHI indirection?
961 
962  // Did we find an extraction?
963  auto *EVI = dyn_cast_or_null<ExtractValueInst>(Elt);
964  if (!EVI)
965  return NotFound;
966 
967  Value *SourceAggregate = EVI->getAggregateOperand();
968 
969  // Is the extraction from the same type into which the insertion was?
970  if (SourceAggregate->getType() != AggTy)
971  return FoundMismatch;
972  // And the element index doesn't change between extraction and insertion?
973  if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front())
974  return FoundMismatch;
975 
976  return SourceAggregate; // AggregateDescription::Found
977  };
978 
979  // Given elements AggElts that were constructing an aggregate OrigIVI,
980  // see if we can find appropriate source aggregate for each of the elements,
981  // and see it's the same aggregate for each element. If so, return it.
982  auto FindCommonSourceAggregate =
983  [&](Optional<BasicBlock *> UseBB,
985  Optional<Value *> SourceAggregate;
986 
987  for (auto I : enumerate(AggElts)) {
988  assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&
989  "We don't store nullptr in SourceAggregate!");
990  assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
991  (I.index() != 0) &&
992  "SourceAggregate should be valid after the first element,");
993 
994  // For this element, is there a plausible source aggregate?
995  // FIXME: we could special-case undef element, IFF we know that in the
996  // source aggregate said element isn't poison.
997  Optional<Value *> SourceAggregateForElement =
998  FindSourceAggregate(*I.value(), I.index(), UseBB, PredBB);
999 
1000  // Okay, what have we found? Does that correlate with previous findings?
1001 
1002  // Regardless of whether or not we have previously found source
1003  // aggregate for previous elements (if any), if we didn't find one for
1004  // this element, passthrough whatever we have just found.
1005  if (Describe(SourceAggregateForElement) != AggregateDescription::Found)
1006  return SourceAggregateForElement;
1007 
1008  // Okay, we have found source aggregate for this element.
1009  // Let's see what we already know from previous elements, if any.
1010  switch (Describe(SourceAggregate)) {
1012  // This is apparently the first element that we have examined.
1013  SourceAggregate = SourceAggregateForElement; // Record the aggregate!
1014  continue; // Great, now look at next element.
1015  case AggregateDescription::Found:
1016  // We have previously already successfully examined other elements.
1017  // Is this the same source aggregate we've found for other elements?
1018  if (*SourceAggregateForElement != *SourceAggregate)
1019  return FoundMismatch;
1020  continue; // Still the same aggregate, look at next element.
1021  case AggregateDescription::FoundMismatch:
1022  llvm_unreachable("Can't happen. We would have early-exited then.");
1023  };
1024  }
1025 
1026  assert(Describe(SourceAggregate) == AggregateDescription::Found &&
1027  "Must be a valid Value");
1028  return *SourceAggregate;
1029  };
1030 
1031  Optional<Value *> SourceAggregate;
1032 
1033  // Can we find the source aggregate without looking at predecessors?
1034  SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/None, /*PredBB=*/None);
1035  if (Describe(SourceAggregate) != AggregateDescription::NotFound) {
1036  if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch)
1037  return nullptr; // Conflicting source aggregates!
1038  ++NumAggregateReconstructionsSimplified;
1039  return replaceInstUsesWith(OrigIVI, *SourceAggregate);
1040  }
1041 
1042  // Okay, apparently we need to look at predecessors.
1043 
1044  // We should be smart about picking the "use" basic block, which will be the
1045  // merge point for aggregate, where we'll insert the final PHI that will be
1046  // used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice.
1047  // We should look in which blocks each of the AggElts is being defined,
1048  // they all should be defined in the same basic block.
1049  BasicBlock *UseBB = nullptr;
1050 
1051  for (const Optional<Instruction *> &I : AggElts) {
1052  BasicBlock *BB = (*I)->getParent();
1053  // If it's the first instruction we've encountered, record the basic block.
1054  if (!UseBB) {
1055  UseBB = BB;
1056  continue;
1057  }
1058  // Otherwise, this must be the same basic block we've seen previously.
1059  if (UseBB != BB)
1060  return nullptr;
1061  }
1062 
1063  // If *all* of the elements are basic-block-independent, meaning they are
1064  // either function arguments, or constant expressions, then if we didn't
1065  // handle them without predecessor-aware handling, we won't handle them now.
1066  if (!UseBB)
1067  return nullptr;
1068 
1069  // If we didn't manage to find source aggregate without looking at
1070  // predecessors, and there are no predecessors to look at, then we're done.
1071  if (pred_empty(UseBB))
1072  return nullptr;
1073 
1074  // Arbitrary predecessor count limit.
1075  static const int PredCountLimit = 64;
1076 
1077  // Cache the (non-uniqified!) list of predecessors in a vector,
1078  // checking the limit at the same time for efficiency.
1079  SmallVector<BasicBlock *, 4> Preds; // May have duplicates!
1080  for (BasicBlock *Pred : predecessors(UseBB)) {
1081  // Don't bother if there are too many predecessors.
1082  if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once?
1083  return nullptr;
1084  Preds.emplace_back(Pred);
1085  }
1086 
1087  // For each predecessor, what is the source aggregate,
1088  // from which all the elements were originally extracted from?
1089  // Note that we want for the map to have stable iteration order!
1090  SmallDenseMap<BasicBlock *, Value *, 4> SourceAggregates;
1091  for (BasicBlock *Pred : Preds) {
1092  std::pair<decltype(SourceAggregates)::iterator, bool> IV =
1093  SourceAggregates.insert({Pred, nullptr});
1094  // Did we already evaluate this predecessor?
1095  if (!IV.second)
1096  continue;
1097 
1098  // Let's hope that when coming from predecessor Pred, all elements of the
1099  // aggregate produced by OrigIVI must have been originally extracted from
1100  // the same aggregate. Is that so? Can we find said original aggregate?
1101  SourceAggregate = FindCommonSourceAggregate(UseBB, Pred);
1102  if (Describe(SourceAggregate) != AggregateDescription::Found)
1103  return nullptr; // Give up.
1104  IV.first->second = *SourceAggregate;
1105  }
1106 
1107  // All good! Now we just need to thread the source aggregates here.
1108  // Note that we have to insert the new PHI here, ourselves, because we can't
1109  // rely on InstCombinerImpl::run() inserting it into the right basic block.
1110  // Note that the same block can be a predecessor more than once,
1111  // and we need to preserve that invariant for the PHI node.
1113  Builder.SetInsertPoint(UseBB->getFirstNonPHI());
1114  auto *PHI =
1115  Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() + ".merged");
1116  for (BasicBlock *Pred : Preds)
1117  PHI->addIncoming(SourceAggregates[Pred], Pred);
1118 
1119  ++NumAggregateReconstructionsSimplified;
1120  return replaceInstUsesWith(OrigIVI, PHI);
1121 }
1122 
1123 /// Try to find redundant insertvalue instructions, like the following ones:
1124 /// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
1125 /// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
1126 /// Here the second instruction inserts values at the same indices, as the
1127 /// first one, making the first one redundant.
1128 /// It should be transformed to:
1129 /// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
1131  bool IsRedundant = false;
1132  ArrayRef<unsigned int> FirstIndices = I.getIndices();
1133 
1134  // If there is a chain of insertvalue instructions (each of them except the
1135  // last one has only one use and it's another insertvalue insn from this
1136  // chain), check if any of the 'children' uses the same indices as the first
1137  // instruction. In this case, the first one is redundant.
1138  Value *V = &I;
1139  unsigned Depth = 0;
1140  while (V->hasOneUse() && Depth < 10) {
1141  User *U = V->user_back();
1142  auto UserInsInst = dyn_cast<InsertValueInst>(U);
1143  if (!UserInsInst || U->getOperand(0) != V)
1144  break;
1145  if (UserInsInst->getIndices() == FirstIndices) {
1146  IsRedundant = true;
1147  break;
1148  }
1149  V = UserInsInst;
1150  Depth++;
1151  }
1152 
1153  if (IsRedundant)
1154  return replaceInstUsesWith(I, I.getOperand(0));
1155 
1156  if (Instruction *NewI = foldAggregateConstructionIntoAggregateReuse(I))
1157  return NewI;
1158 
1159  return nullptr;
1160 }
1161 
1163  // Can not analyze scalable type, the number of elements is not a compile-time
1164  // constant.
1165  if (isa<ScalableVectorType>(Shuf.getOperand(0)->getType()))
1166  return false;
1167 
1168  int MaskSize = Shuf.getShuffleMask().size();
1169  int VecSize =
1170  cast<FixedVectorType>(Shuf.getOperand(0)->getType())->getNumElements();
1171 
1172  // A vector select does not change the size of the operands.
1173  if (MaskSize != VecSize)
1174  return false;
1175 
1176  // Each mask element must be undefined or choose a vector element from one of
1177  // the source operands without crossing vector lanes.
1178  for (int i = 0; i != MaskSize; ++i) {
1179  int Elt = Shuf.getMaskValue(i);
1180  if (Elt != -1 && Elt != i && Elt != i + VecSize)
1181  return false;
1182  }
1183 
1184  return true;
1185 }
1186 
1187 /// Turn a chain of inserts that splats a value into an insert + shuffle:
1188 /// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
1189 /// shufflevector(insertelt(X, %k, 0), poison, zero)
1191  // We are interested in the last insert in a chain. So if this insert has a
1192  // single user and that user is an insert, bail.
1193  if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
1194  return nullptr;
1195 
1196  VectorType *VecTy = InsElt.getType();
1197  // Can not handle scalable type, the number of elements is not a compile-time
1198  // constant.
1199  if (isa<ScalableVectorType>(VecTy))
1200  return nullptr;
1201  unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
1202 
1203  // Do not try to do this for a one-element vector, since that's a nop,
1204  // and will cause an inf-loop.
1205  if (NumElements == 1)
1206  return nullptr;
1207 
1208  Value *SplatVal = InsElt.getOperand(1);
1209  InsertElementInst *CurrIE = &InsElt;
1210  SmallBitVector ElementPresent(NumElements, false);
1211  InsertElementInst *FirstIE = nullptr;
1212 
1213  // Walk the chain backwards, keeping track of which indices we inserted into,
1214  // until we hit something that isn't an insert of the splatted value.
1215  while (CurrIE) {
1216  auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
1217  if (!Idx || CurrIE->getOperand(1) != SplatVal)
1218  return nullptr;
1219 
1220  auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
1221  // Check none of the intermediate steps have any additional uses, except
1222  // for the root insertelement instruction, which can be re-used, if it
1223  // inserts at position 0.
1224  if (CurrIE != &InsElt &&
1225  (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
1226  return nullptr;
1227 
1228  ElementPresent[Idx->getZExtValue()] = true;
1229  FirstIE = CurrIE;
1230  CurrIE = NextIE;
1231  }
1232 
1233  // If this is just a single insertelement (not a sequence), we are done.
1234  if (FirstIE == &InsElt)
1235  return nullptr;
1236 
1237  // If we are not inserting into an undef vector, make sure we've seen an
1238  // insert into every element.
1239  // TODO: If the base vector is not undef, it might be better to create a splat
1240  // and then a select-shuffle (blend) with the base vector.
1241  if (!match(FirstIE->getOperand(0), m_Undef()))
1242  if (!ElementPresent.all())
1243  return nullptr;
1244 
1245  // Create the insert + shuffle.
1246  Type *Int32Ty = Type::getInt32Ty(InsElt.getContext());
1247  PoisonValue *PoisonVec = PoisonValue::get(VecTy);
1248  Constant *Zero = ConstantInt::get(Int32Ty, 0);
1249  if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
1250  FirstIE = InsertElementInst::Create(PoisonVec, SplatVal, Zero, "", &InsElt);
1251 
1252  // Splat from element 0, but replace absent elements with undef in the mask.
1253  SmallVector<int, 16> Mask(NumElements, 0);
1254  for (unsigned i = 0; i != NumElements; ++i)
1255  if (!ElementPresent[i])
1256  Mask[i] = -1;
1257 
1258  return new ShuffleVectorInst(FirstIE, Mask);
1259 }
1260 
1261 /// Try to fold an insert element into an existing splat shuffle by changing
1262 /// the shuffle's mask to include the index of this insert element.
1264  // Check if the vector operand of this insert is a canonical splat shuffle.
1265  auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1266  if (!Shuf || !Shuf->isZeroEltSplat())
1267  return nullptr;
1268 
1269  // Bail out early if shuffle is scalable type. The number of elements in
1270  // shuffle mask is unknown at compile-time.
1271  if (isa<ScalableVectorType>(Shuf->getType()))
1272  return nullptr;
1273 
1274  // Check for a constant insertion index.
1275  uint64_t IdxC;
1276  if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1277  return nullptr;
1278 
1279  // Check if the splat shuffle's input is the same as this insert's scalar op.
1280  Value *X = InsElt.getOperand(1);
1281  Value *Op0 = Shuf->getOperand(0);
1282  if (!match(Op0, m_InsertElt(m_Undef(), m_Specific(X), m_ZeroInt())))
1283  return nullptr;
1284 
1285  // Replace the shuffle mask element at the index of this insert with a zero.
1286  // For example:
1287  // inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1
1288  // --> shuf (inselt undef, X, 0), poison, <0,0,0,undef>
1289  unsigned NumMaskElts =
1290  cast<FixedVectorType>(Shuf->getType())->getNumElements();
1291  SmallVector<int, 16> NewMask(NumMaskElts);
1292  for (unsigned i = 0; i != NumMaskElts; ++i)
1293  NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);
1294 
1295  return new ShuffleVectorInst(Op0, NewMask);
1296 }
1297 
1298 /// Try to fold an extract+insert element into an existing identity shuffle by
1299 /// changing the shuffle's mask to include the index of this insert element.
1301  // Check if the vector operand of this insert is an identity shuffle.
1302  auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1303  if (!Shuf || !match(Shuf->getOperand(1), m_Undef()) ||
1304  !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
1305  return nullptr;
1306 
1307  // Bail out early if shuffle is scalable type. The number of elements in
1308  // shuffle mask is unknown at compile-time.
1309  if (isa<ScalableVectorType>(Shuf->getType()))
1310  return nullptr;
1311 
1312  // Check for a constant insertion index.
1313  uint64_t IdxC;
1314  if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1315  return nullptr;
1316 
1317  // Check if this insert's scalar op is extracted from the identity shuffle's
1318  // input vector.
1319  Value *Scalar = InsElt.getOperand(1);
1320  Value *X = Shuf->getOperand(0);
1321  if (!match(Scalar, m_ExtractElt(m_Specific(X), m_SpecificInt(IdxC))))
1322  return nullptr;
1323 
1324  // Replace the shuffle mask element at the index of this extract+insert with
1325  // that same index value.
1326  // For example:
1327  // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
1328  unsigned NumMaskElts =
1329  cast<FixedVectorType>(Shuf->getType())->getNumElements();
1330  SmallVector<int, 16> NewMask(NumMaskElts);
1331  ArrayRef<int> OldMask = Shuf->getShuffleMask();
1332  for (unsigned i = 0; i != NumMaskElts; ++i) {
1333  if (i != IdxC) {
1334  // All mask elements besides the inserted element remain the same.
1335  NewMask[i] = OldMask[i];
1336  } else if (OldMask[i] == (int)IdxC) {
1337  // If the mask element was already set, there's nothing to do
1338  // (demanded elements analysis may unset it later).
1339  return nullptr;
1340  } else {
1341  assert(OldMask[i] == UndefMaskElem &&
1342  "Unexpected shuffle mask element for identity shuffle");
1343  NewMask[i] = IdxC;
1344  }
1345  }
1346 
1347  return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask);
1348 }
1349 
1350 /// If we have an insertelement instruction feeding into another insertelement
1351 /// and the 2nd is inserting a constant into the vector, canonicalize that
1352 /// constant insertion before the insertion of a variable:
1353 ///
1354 /// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
1355 /// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
1356 ///
1357 /// This has the potential of eliminating the 2nd insertelement instruction
1358 /// via constant folding of the scalar constant into a vector constant.
1361  auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
1362  if (!InsElt1 || !InsElt1->hasOneUse())
1363  return nullptr;
1364 
1365  Value *X, *Y;
1366  Constant *ScalarC;
1367  ConstantInt *IdxC1, *IdxC2;
1368  if (match(InsElt1->getOperand(0), m_Value(X)) &&
1369  match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
1370  match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
1371  match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
1372  match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
1373  Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
1374  return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
1375  }
1376 
1377  return nullptr;
1378 }
1379 
1380 /// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
1381 /// --> shufflevector X, CVec', Mask'
1383  auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
1384  // Bail out if the parent has more than one use. In that case, we'd be
1385  // replacing the insertelt with a shuffle, and that's not a clear win.
1386  if (!Inst || !Inst->hasOneUse())
1387  return nullptr;
1388  if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
1389  // The shuffle must have a constant vector operand. The insertelt must have
1390  // a constant scalar being inserted at a constant position in the vector.
1391  Constant *ShufConstVec, *InsEltScalar;
1392  uint64_t InsEltIndex;
1393  if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
1394  !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
1395  !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
1396  return nullptr;
1397 
1398  // Adding an element to an arbitrary shuffle could be expensive, but a
1399  // shuffle that selects elements from vectors without crossing lanes is
1400  // assumed cheap.
1401  // If we're just adding a constant into that shuffle, it will still be
1402  // cheap.
1403  if (!isShuffleEquivalentToSelect(*Shuf))
1404  return nullptr;
1405 
1406  // From the above 'select' check, we know that the mask has the same number
1407  // of elements as the vector input operands. We also know that each constant
1408  // input element is used in its lane and can not be used more than once by
1409  // the shuffle. Therefore, replace the constant in the shuffle's constant
1410  // vector with the insertelt constant. Replace the constant in the shuffle's
1411  // mask vector with the insertelt index plus the length of the vector
1412  // (because the constant vector operand of a shuffle is always the 2nd
1413  // operand).
1414  ArrayRef<int> Mask = Shuf->getShuffleMask();
1415  unsigned NumElts = Mask.size();
1416  SmallVector<Constant *, 16> NewShufElts(NumElts);
1417  SmallVector<int, 16> NewMaskElts(NumElts);
1418  for (unsigned I = 0; I != NumElts; ++I) {
1419  if (I == InsEltIndex) {
1420  NewShufElts[I] = InsEltScalar;
1421  NewMaskElts[I] = InsEltIndex + NumElts;
1422  } else {
1423  // Copy over the existing values.
1424  NewShufElts[I] = ShufConstVec->getAggregateElement(I);
1425  NewMaskElts[I] = Mask[I];
1426  }
1427 
1428  // Bail if we failed to find an element.
1429  if (!NewShufElts[I])
1430  return nullptr;
1431  }
1432 
1433  // Create new operands for a shuffle that includes the constant of the
1434  // original insertelt. The old shuffle will be dead now.
1435  return new ShuffleVectorInst(Shuf->getOperand(0),
1436  ConstantVector::get(NewShufElts), NewMaskElts);
1437  } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
1438  // Transform sequences of insertelements ops with constant data/indexes into
1439  // a single shuffle op.
1440  // Can not handle scalable type, the number of elements needed to create
1441  // shuffle mask is not a compile-time constant.
1442  if (isa<ScalableVectorType>(InsElt.getType()))
1443  return nullptr;
1444  unsigned NumElts =
1445  cast<FixedVectorType>(InsElt.getType())->getNumElements();
1446 
1447  uint64_t InsertIdx[2];
1448  Constant *Val[2];
1449  if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
1450  !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
1451  !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
1452  !match(IEI->getOperand(1), m_Constant(Val[1])))
1453  return nullptr;
1454  SmallVector<Constant *, 16> Values(NumElts);
1455  SmallVector<int, 16> Mask(NumElts);
1456  auto ValI = std::begin(Val);
1457  // Generate new constant vector and mask.
1458  // We have 2 values/masks from the insertelements instructions. Insert them
1459  // into new value/mask vectors.
1460  for (uint64_t I : InsertIdx) {
1461  if (!Values[I]) {
1462  Values[I] = *ValI;
1463  Mask[I] = NumElts + I;
1464  }
1465  ++ValI;
1466  }
1467  // Remaining values are filled with 'undef' values.
1468  for (unsigned I = 0; I < NumElts; ++I) {
1469  if (!Values[I]) {
1470  Values[I] = UndefValue::get(InsElt.getType()->getElementType());
1471  Mask[I] = I;
1472  }
1473  }
1474  // Create new operands for a shuffle that includes the constant of the
1475  // original insertelt.
1476  return new ShuffleVectorInst(IEI->getOperand(0),
1477  ConstantVector::get(Values), Mask);
1478  }
1479  return nullptr;
1480 }
1481 
1482 /// If both the base vector and the inserted element are extended from the same
1483 /// type, do the insert element in the narrow source type followed by extend.
1484 /// TODO: This can be extended to include other cast opcodes, but particularly
1485 /// if we create a wider insertelement, make sure codegen is not harmed.
1488  // We are creating a vector extend. If the original vector extend has another
1489  // use, that would mean we end up with 2 vector extends, so avoid that.
1490  // TODO: We could ease the use-clause to "if at least one op has one use"
1491  // (assuming that the source types match - see next TODO comment).
1492  Value *Vec = InsElt.getOperand(0);
1493  if (!Vec->hasOneUse())
1494  return nullptr;
1495 
1496  Value *Scalar = InsElt.getOperand(1);
1497  Value *X, *Y;
1498  CastInst::CastOps CastOpcode;
1499  if (match(Vec, m_FPExt(m_Value(X))) && match(Scalar, m_FPExt(m_Value(Y))))
1500  CastOpcode = Instruction::FPExt;
1501  else if (match(Vec, m_SExt(m_Value(X))) && match(Scalar, m_SExt(m_Value(Y))))
1502  CastOpcode = Instruction::SExt;
1503  else if (match(Vec, m_ZExt(m_Value(X))) && match(Scalar, m_ZExt(m_Value(Y))))
1504  CastOpcode = Instruction::ZExt;
1505  else
1506  return nullptr;
1507 
1508  // TODO: We can allow mismatched types by creating an intermediate cast.
1509  if (X->getType()->getScalarType() != Y->getType())
1510  return nullptr;
1511 
1512  // inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index)
1513  Value *NewInsElt = Builder.CreateInsertElement(X, Y, InsElt.getOperand(2));
1514  return CastInst::Create(CastOpcode, NewInsElt, InsElt.getType());
1515 }
1516 
1518  Value *VecOp = IE.getOperand(0);
1519  Value *ScalarOp = IE.getOperand(1);
1520  Value *IdxOp = IE.getOperand(2);
1521 
1522  if (auto *V = simplifyInsertElementInst(
1523  VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
1524  return replaceInstUsesWith(IE, V);
1525 
1526  // Canonicalize type of constant indices to i64 to simplify CSE
1527  if (auto *IndexC = dyn_cast<ConstantInt>(IdxOp))
1528  if (auto *NewIdx = getPreferredVectorIndex(IndexC))
1529  return replaceOperand(IE, 2, NewIdx);
1530 
1531  // If the scalar is bitcast and inserted into undef, do the insert in the
1532  // source type followed by bitcast.
1533  // TODO: Generalize for insert into any constant, not just undef?
1534  Value *ScalarSrc;
1535  if (match(VecOp, m_Undef()) &&
1536  match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) &&
1537  (ScalarSrc->getType()->isIntegerTy() ||
1538  ScalarSrc->getType()->isFloatingPointTy())) {
1539  // inselt undef, (bitcast ScalarSrc), IdxOp -->
1540  // bitcast (inselt undef, ScalarSrc, IdxOp)
1541  Type *ScalarTy = ScalarSrc->getType();
1542  Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount());
1543  UndefValue *NewUndef = UndefValue::get(VecTy);
1544  Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp);
1545  return new BitCastInst(NewInsElt, IE.getType());
1546  }
1547 
1548  // If the vector and scalar are both bitcast from the same element type, do
1549  // the insert in that source type followed by bitcast.
1550  Value *VecSrc;
1551  if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
1552  match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
1553  (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
1554  VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
1555  cast<VectorType>(VecSrc->getType())->getElementType() ==
1556  ScalarSrc->getType()) {
1557  // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
1558  // bitcast (inselt VecSrc, ScalarSrc, IdxOp)
1559  Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
1560  return new BitCastInst(NewInsElt, IE.getType());
1561  }
1562 
1563  // If the inserted element was extracted from some other fixed-length vector
1564  // and both indexes are valid constants, try to turn this into a shuffle.
1565  // Can not handle scalable vector type, the number of elements needed to
1566  // create shuffle mask is not a compile-time constant.
1567  uint64_t InsertedIdx, ExtractedIdx;
1568  Value *ExtVecOp;
1569  if (isa<FixedVectorType>(IE.getType()) &&
1570  match(IdxOp, m_ConstantInt(InsertedIdx)) &&
1571  match(ScalarOp,
1572  m_ExtractElt(m_Value(ExtVecOp), m_ConstantInt(ExtractedIdx))) &&
1573  isa<FixedVectorType>(ExtVecOp->getType()) &&
1574  ExtractedIdx <
1575  cast<FixedVectorType>(ExtVecOp->getType())->getNumElements()) {
1576  // TODO: Looking at the user(s) to determine if this insert is a
1577  // fold-to-shuffle opportunity does not match the usual instcombine
1578  // constraints. We should decide if the transform is worthy based only
1579  // on this instruction and its operands, but that may not work currently.
1580  //
1581  // Here, we are trying to avoid creating shuffles before reaching
1582  // the end of a chain of extract-insert pairs. This is complicated because
1583  // we do not generally form arbitrary shuffle masks in instcombine
1584  // (because those may codegen poorly), but collectShuffleElements() does
1585  // exactly that.
1586  //
1587  // The rules for determining what is an acceptable target-independent
1588  // shuffle mask are fuzzy because they evolve based on the backend's
1589  // capabilities and real-world impact.
1590  auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
1591  if (!Insert.hasOneUse())
1592  return true;
1593  auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
1594  if (!InsertUser)
1595  return true;
1596  return false;
1597  };
1598 
1599  // Try to form a shuffle from a chain of extract-insert ops.
1600  if (isShuffleRootCandidate(IE)) {
1602  ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this);
1603 
1604  // The proposed shuffle may be trivial, in which case we shouldn't
1605  // perform the combine.
1606  if (LR.first != &IE && LR.second != &IE) {
1607  // We now have a shuffle of LHS, RHS, Mask.
1608  if (LR.second == nullptr)
1609  LR.second = UndefValue::get(LR.first->getType());
1610  return new ShuffleVectorInst(LR.first, LR.second, Mask);
1611  }
1612  }
1613  }
1614 
1615  if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {
1616  unsigned VWidth = VecTy->getNumElements();
1617  APInt UndefElts(VWidth, 0);
1618  APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
1619  if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
1620  if (V != &IE)
1621  return replaceInstUsesWith(IE, V);
1622  return &IE;
1623  }
1624  }
1625 
1627  return Shuf;
1628 
1629  if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
1630  return NewInsElt;
1631 
1632  if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
1633  return Broadcast;
1634 
1635  if (Instruction *Splat = foldInsEltIntoSplat(IE))
1636  return Splat;
1637 
1638  if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
1639  return IdentityShuf;
1640 
1642  return Ext;
1643 
1644  return nullptr;
1645 }
1646 
1647 /// Return true if we can evaluate the specified expression tree if the vector
1648 /// elements were shuffled in a different order.
1650  unsigned Depth = 5) {
1651  // We can always reorder the elements of a constant.
1652  if (isa<Constant>(V))
1653  return true;
1654 
1655  // We won't reorder vector arguments. No IPO here.
1656  Instruction *I = dyn_cast<Instruction>(V);
1657  if (!I) return false;
1658 
1659  // Two users may expect different orders of the elements. Don't try it.
1660  if (!I->hasOneUse())
1661  return false;
1662 
1663  if (Depth == 0) return false;
1664 
1665  switch (I->getOpcode()) {
1666  case Instruction::UDiv:
1667  case Instruction::SDiv:
1668  case Instruction::URem:
1669  case Instruction::SRem:
1670  // Propagating an undefined shuffle mask element to integer div/rem is not
1671  // allowed because those opcodes can create immediate undefined behavior
1672  // from an undefined element in an operand.
1673  if (llvm::is_contained(Mask, -1))
1674  return false;
1675  [[fallthrough]];
1676  case Instruction::Add:
1677  case Instruction::FAdd:
1678  case Instruction::Sub:
1679  case Instruction::FSub:
1680  case Instruction::Mul:
1681  case Instruction::FMul:
1682  case Instruction::FDiv:
1683  case Instruction::FRem:
1684  case Instruction::Shl:
1685  case Instruction::LShr:
1686  case Instruction::AShr:
1687  case Instruction::And:
1688  case Instruction::Or:
1689  case Instruction::Xor:
1690  case Instruction::ICmp:
1691  case Instruction::FCmp:
1692  case Instruction::Trunc:
1693  case Instruction::ZExt:
1694  case Instruction::SExt:
1695  case Instruction::FPToUI:
1696  case Instruction::FPToSI:
1697  case Instruction::UIToFP:
1698  case Instruction::SIToFP:
1699  case Instruction::FPTrunc:
1700  case Instruction::FPExt:
1701  case Instruction::GetElementPtr: {
1702  // Bail out if we would create longer vector ops. We could allow creating
1703  // longer vector ops, but that may result in more expensive codegen.
1704  Type *ITy = I->getType();
1705  if (ITy->isVectorTy() &&
1706  Mask.size() > cast<FixedVectorType>(ITy)->getNumElements())
1707  return false;
1708  for (Value *Operand : I->operands()) {
1709  if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
1710  return false;
1711  }
1712  return true;
1713  }
1714  case Instruction::InsertElement: {
1715  ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
1716  if (!CI) return false;
1717  int ElementNumber = CI->getLimitedValue();
1718 
1719  // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1720  // can't put an element into multiple indices.
1721  bool SeenOnce = false;
1722  for (int I : Mask) {
1723  if (I == ElementNumber) {
1724  if (SeenOnce)
1725  return false;
1726  SeenOnce = true;
1727  }
1728  }
1729  return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
1730  }
1731  }
1732  return false;
1733 }
1734 
1735 /// Rebuild a new instruction just like 'I' but with the new operands given.
1736 /// In the event of type mismatch, the type of the operands is correct.
1738  // We don't want to use the IRBuilder here because we want the replacement
1739  // instructions to appear next to 'I', not the builder's insertion point.
1740  switch (I->getOpcode()) {
1741  case Instruction::Add:
1742  case Instruction::FAdd:
1743  case Instruction::Sub:
1744  case Instruction::FSub:
1745  case Instruction::Mul:
1746  case Instruction::FMul:
1747  case Instruction::UDiv:
1748  case Instruction::SDiv:
1749  case Instruction::FDiv:
1750  case Instruction::URem:
1751  case Instruction::SRem:
1752  case Instruction::FRem:
1753  case Instruction::Shl:
1754  case Instruction::LShr:
1755  case Instruction::AShr:
1756  case Instruction::And:
1757  case Instruction::Or:
1758  case Instruction::Xor: {
1759  BinaryOperator *BO = cast<BinaryOperator>(I);
1760  assert(NewOps.size() == 2 && "binary operator with #ops != 2");
1761  BinaryOperator *New =
1762  BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(),
1763  NewOps[0], NewOps[1], "", BO);
1764  if (isa<OverflowingBinaryOperator>(BO)) {
1765  New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1766  New->setHasNoSignedWrap(BO->hasNoSignedWrap());
1767  }
1768  if (isa<PossiblyExactOperator>(BO)) {
1769  New->setIsExact(BO->isExact());
1770  }
1771  if (isa<FPMathOperator>(BO))
1772  New->copyFastMathFlags(I);
1773  return New;
1774  }
1775  case Instruction::ICmp:
1776  assert(NewOps.size() == 2 && "icmp with #ops != 2");
1777  return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(),
1778  NewOps[0], NewOps[1]);
1779  case Instruction::FCmp:
1780  assert(NewOps.size() == 2 && "fcmp with #ops != 2");
1781  return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(),
1782  NewOps[0], NewOps[1]);
1783  case Instruction::Trunc:
1784  case Instruction::ZExt:
1785  case Instruction::SExt:
1786  case Instruction::FPToUI:
1787  case Instruction::FPToSI:
1788  case Instruction::UIToFP:
1789  case Instruction::SIToFP:
1790  case Instruction::FPTrunc:
1791  case Instruction::FPExt: {
1792  // It's possible that the mask has a different number of elements from
1793  // the original cast. We recompute the destination type to match the mask.
1794  Type *DestTy = VectorType::get(
1795  I->getType()->getScalarType(),
1796  cast<VectorType>(NewOps[0]->getType())->getElementCount());
1797  assert(NewOps.size() == 1 && "cast with #ops != 1");
1798  return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy,
1799  "", I);
1800  }
1801  case Instruction::GetElementPtr: {
1802  Value *Ptr = NewOps[0];
1803  ArrayRef<Value*> Idx = NewOps.slice(1);
1805  cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I);
1806  GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
1807  return GEP;
1808  }
1809  }
1810  llvm_unreachable("failed to rebuild vector instructions");
1811 }
1812 
1814  // Mask.size() does not need to be equal to the number of vector elements.
1815 
1816  assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
1817  Type *EltTy = V->getType()->getScalarType();
1818  Type *I32Ty = IntegerType::getInt32Ty(V->getContext());
1819  if (match(V, m_Undef()))
1820  return UndefValue::get(FixedVectorType::get(EltTy, Mask.size()));
1821 
1822  if (isa<ConstantAggregateZero>(V))
1823  return ConstantAggregateZero::get(FixedVectorType::get(EltTy, Mask.size()));
1824 
1825  if (Constant *C = dyn_cast<Constant>(V))
1826  return ConstantExpr::getShuffleVector(C, PoisonValue::get(C->getType()),
1827  Mask);
1828 
1829  Instruction *I = cast<Instruction>(V);
1830  switch (I->getOpcode()) {
1831  case Instruction::Add:
1832  case Instruction::FAdd:
1833  case Instruction::Sub:
1834  case Instruction::FSub:
1835  case Instruction::Mul:
1836  case Instruction::FMul:
1837  case Instruction::UDiv:
1838  case Instruction::SDiv:
1839  case Instruction::FDiv:
1840  case Instruction::URem:
1841  case Instruction::SRem:
1842  case Instruction::FRem:
1843  case Instruction::Shl:
1844  case Instruction::LShr:
1845  case Instruction::AShr:
1846  case Instruction::And:
1847  case Instruction::Or:
1848  case Instruction::Xor:
1849  case Instruction::ICmp:
1850  case Instruction::FCmp:
1851  case Instruction::Trunc:
1852  case Instruction::ZExt:
1853  case Instruction::SExt:
1854  case Instruction::FPToUI:
1855  case Instruction::FPToSI:
1856  case Instruction::UIToFP:
1857  case Instruction::SIToFP:
1858  case Instruction::FPTrunc:
1859  case Instruction::FPExt:
1860  case Instruction::Select:
1861  case Instruction::GetElementPtr: {
1862  SmallVector<Value*, 8> NewOps;
1863  bool NeedsRebuild =
1864  (Mask.size() !=
1865  cast<FixedVectorType>(I->getType())->getNumElements());
1866  for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
1867  Value *V;
1868  // Recursively call evaluateInDifferentElementOrder on vector arguments
1869  // as well. E.g. GetElementPtr may have scalar operands even if the
1870  // return value is a vector, so we need to examine the operand type.
1871  if (I->getOperand(i)->getType()->isVectorTy())
1872  V = evaluateInDifferentElementOrder(I->getOperand(i), Mask);
1873  else
1874  V = I->getOperand(i);
1875  NewOps.push_back(V);
1876  NeedsRebuild |= (V != I->getOperand(i));
1877  }
1878  if (NeedsRebuild) {
1879  return buildNew(I, NewOps);
1880  }
1881  return I;
1882  }
1883  case Instruction::InsertElement: {
1884  int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
1885 
1886  // The insertelement was inserting at Element. Figure out which element
1887  // that becomes after shuffling. The answer is guaranteed to be unique
1888  // by CanEvaluateShuffled.
1889  bool Found = false;
1890  int Index = 0;
1891  for (int e = Mask.size(); Index != e; ++Index) {
1892  if (Mask[Index] == Element) {
1893  Found = true;
1894  break;
1895  }
1896  }
1897 
1898  // If element is not in Mask, no need to handle the operand 1 (element to
1899  // be inserted). Just evaluate values in operand 0 according to Mask.
1900  if (!Found)
1901  return evaluateInDifferentElementOrder(I->getOperand(0), Mask);
1902 
1903  Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask);
1904  return InsertElementInst::Create(V, I->getOperand(1),
1905  ConstantInt::get(I32Ty, Index), "", I);
1906  }
1907  }
1908  llvm_unreachable("failed to reorder elements of vector instruction!");
1909 }
1910 
1911 // Returns true if the shuffle is extracting a contiguous range of values from
1912 // LHS, for example:
1913 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
1914 // Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
1915 // Shuffles to: |EE|FF|GG|HH|
1916 // +--+--+--+--+
1918  ArrayRef<int> Mask) {
1919  unsigned LHSElems =
1920  cast<FixedVectorType>(SVI.getOperand(0)->getType())->getNumElements();
1921  unsigned MaskElems = Mask.size();
1922  unsigned BegIdx = Mask.front();
1923  unsigned EndIdx = Mask.back();
1924  if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
1925  return false;
1926  for (unsigned I = 0; I != MaskElems; ++I)
1927  if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
1928  return false;
1929  return true;
1930 }
1931 
1932 /// These are the ingredients in an alternate form binary operator as described
1933 /// below.
1934 struct BinopElts {
1939  Value *V0 = nullptr, Value *V1 = nullptr) :
1940  Opcode(Opc), Op0(V0), Op1(V1) {}
1941  operator bool() const { return Opcode != 0; }
1942 };
1943 
1944 /// Binops may be transformed into binops with different opcodes and operands.
1945 /// Reverse the usual canonicalization to enable folds with the non-canonical
1946 /// form of the binop. If a transform is possible, return the elements of the
1947 /// new binop. If not, return invalid elements.
1949  Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
1950  Type *Ty = BO->getType();
1951  switch (BO->getOpcode()) {
1952  case Instruction::Shl: {
1953  // shl X, C --> mul X, (1 << C)
1954  Constant *C;
1955  if (match(BO1, m_Constant(C))) {
1956  Constant *ShlOne = ConstantExpr::getShl(ConstantInt::get(Ty, 1), C);
1957  return {Instruction::Mul, BO0, ShlOne};
1958  }
1959  break;
1960  }
1961  case Instruction::Or: {
1962  // or X, C --> add X, C (when X and C have no common bits set)
1963  const APInt *C;
1964  if (match(BO1, m_APInt(C)) && MaskedValueIsZero(BO0, *C, DL))
1965  return {Instruction::Add, BO0, BO1};
1966  break;
1967  }
1968  case Instruction::Sub:
1969  // sub 0, X --> mul X, -1
1970  if (match(BO0, m_ZeroInt()))
1972  break;
1973  default:
1974  break;
1975  }
1976  return {};
1977 }
1978 
1979 /// A select shuffle of a select shuffle with a shared operand can be reduced
1980 /// to a single select shuffle. This is an obvious improvement in IR, and the
1981 /// backend is expected to lower select shuffles efficiently.
1983  assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
1984 
1985  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1987  Shuf.getShuffleMask(Mask);
1988  unsigned NumElts = Mask.size();
1989 
1990  // Canonicalize a select shuffle with common operand as Op1.
1991  auto *ShufOp = dyn_cast<ShuffleVectorInst>(Op0);
1992  if (ShufOp && ShufOp->isSelect() &&
1993  (ShufOp->getOperand(0) == Op1 || ShufOp->getOperand(1) == Op1)) {
1994  std::swap(Op0, Op1);
1996  }
1997 
1998  ShufOp = dyn_cast<ShuffleVectorInst>(Op1);
1999  if (!ShufOp || !ShufOp->isSelect() ||
2000  (ShufOp->getOperand(0) != Op0 && ShufOp->getOperand(1) != Op0))
2001  return nullptr;
2002 
2003  Value *X = ShufOp->getOperand(0), *Y = ShufOp->getOperand(1);
2004  SmallVector<int, 16> Mask1;
2005  ShufOp->getShuffleMask(Mask1);
2006  assert(Mask1.size() == NumElts && "Vector size changed with select shuffle");
2007 
2008  // Canonicalize common operand (Op0) as X (first operand of first shuffle).
2009  if (Y == Op0) {
2010  std::swap(X, Y);
2011  ShuffleVectorInst::commuteShuffleMask(Mask1, NumElts);
2012  }
2013 
2014  // If the mask chooses from X (operand 0), it stays the same.
2015  // If the mask chooses from the earlier shuffle, the other mask value is
2016  // transferred to the combined select shuffle:
2017  // shuf X, (shuf X, Y, M1), M --> shuf X, Y, M'
2018  SmallVector<int, 16> NewMask(NumElts);
2019  for (unsigned i = 0; i != NumElts; ++i)
2020  NewMask[i] = Mask[i] < (signed)NumElts ? Mask[i] : Mask1[i];
2021 
2022  // A select mask with undef elements might look like an identity mask.
2025  "Unexpected shuffle mask");
2026  return new ShuffleVectorInst(X, Y, NewMask);
2027 }
2028 
2030  assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2031 
2032  // Are we shuffling together some value and that same value after it has been
2033  // modified by a binop with a constant?
2034  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2035  Constant *C;
2036  bool Op0IsBinop;
2037  if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
2038  Op0IsBinop = true;
2039  else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
2040  Op0IsBinop = false;
2041  else
2042  return nullptr;
2043 
2044  // The identity constant for a binop leaves a variable operand unchanged. For
2045  // a vector, this is a splat of something like 0, -1, or 1.
2046  // If there's no identity constant for this binop, we're done.
2047  auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
2048  BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
2049  Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
2050  if (!IdC)
2051  return nullptr;
2052 
2053  // Shuffle identity constants into the lanes that return the original value.
2054  // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
2055  // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
2056  // The existing binop constant vector remains in the same operand position.
2058  Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
2060 
2061  bool MightCreatePoisonOrUB =
2063  (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
2064  if (MightCreatePoisonOrUB)
2065  NewC = InstCombiner::getSafeVectorConstantForBinop(BOpcode, NewC, true);
2066 
2067  // shuf (bop X, C), X, M --> bop X, C'
2068  // shuf X, (bop X, C), M --> bop X, C'
2069  Value *X = Op0IsBinop ? Op1 : Op0;
2070  Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
2071  NewBO->copyIRFlags(BO);
2072 
2073  // An undef shuffle mask element may propagate as an undef constant element in
2074  // the new binop. That would produce poison where the original code might not.
2075  // If we already made a safe constant, then there's no danger.
2076  if (is_contained(Mask, UndefMaskElem) && !MightCreatePoisonOrUB)
2077  NewBO->dropPoisonGeneratingFlags();
2078  return NewBO;
2079 }
2080 
2081 /// If we have an insert of a scalar to a non-zero element of an undefined
2082 /// vector and then shuffle that value, that's the same as inserting to the zero
2083 /// element and shuffling. Splatting from the zero element is recognized as the
2084 /// canonical form of splat.
2087  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2089  Value *X;
2090  uint64_t IndexC;
2091 
2092  // Match a shuffle that is a splat to a non-zero element.
2093  if (!match(Op0, m_OneUse(m_InsertElt(m_Undef(), m_Value(X),
2094  m_ConstantInt(IndexC)))) ||
2095  !match(Op1, m_Undef()) || match(Mask, m_ZeroMask()) || IndexC == 0)
2096  return nullptr;
2097 
2098  // Insert into element 0 of an undef vector.
2099  UndefValue *UndefVec = UndefValue::get(Shuf.getType());
2100  Constant *Zero = Builder.getInt32(0);
2101  Value *NewIns = Builder.CreateInsertElement(UndefVec, X, Zero);
2102 
2103  // Splat from element 0. Any mask element that is undefined remains undefined.
2104  // For example:
2105  // shuf (inselt undef, X, 2), _, <2,2,undef>
2106  // --> shuf (inselt undef, X, 0), poison, <0,0,undef>
2107  unsigned NumMaskElts =
2108  cast<FixedVectorType>(Shuf.getType())->getNumElements();
2109  SmallVector<int, 16> NewMask(NumMaskElts, 0);
2110  for (unsigned i = 0; i != NumMaskElts; ++i)
2111  if (Mask[i] == UndefMaskElem)
2112  NewMask[i] = Mask[i];
2113 
2114  return new ShuffleVectorInst(NewIns, NewMask);
2115 }
2116 
2117 /// Try to fold shuffles that are the equivalent of a vector select.
2119  if (!Shuf.isSelect())
2120  return nullptr;
2121 
2122  // Canonicalize to choose from operand 0 first unless operand 1 is undefined.
2123  // Commuting undef to operand 0 conflicts with another canonicalization.
2124  unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2125  if (!match(Shuf.getOperand(1), m_Undef()) &&
2126  Shuf.getMaskValue(0) >= (int)NumElts) {
2127  // TODO: Can we assert that both operands of a shuffle-select are not undef
2128  // (otherwise, it would have been folded by instsimplify?
2129  Shuf.commute();
2130  return &Shuf;
2131  }
2132 
2134  return I;
2135 
2137  return I;
2138 
2139  BinaryOperator *B0, *B1;
2140  if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
2141  !match(Shuf.getOperand(1), m_BinOp(B1)))
2142  return nullptr;
2143 
2144  // If one operand is "0 - X", allow that to be viewed as "X * -1"
2145  // (ConstantsAreOp1) by getAlternateBinop below. If the neg is not paired
2146  // with a multiply, we will exit because C0/C1 will not be set.
2147  Value *X, *Y;
2148  Constant *C0 = nullptr, *C1 = nullptr;
2149  bool ConstantsAreOp1;
2150  if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
2151  match(B1, m_BinOp(m_Constant(C1), m_Value(Y))))
2152  ConstantsAreOp1 = false;
2153  else if (match(B0, m_CombineOr(m_BinOp(m_Value(X), m_Constant(C0)),
2154  m_Neg(m_Value(X)))) &&
2156  m_Neg(m_Value(Y)))))
2157  ConstantsAreOp1 = true;
2158  else
2159  return nullptr;
2160 
2161  // We need matching binops to fold the lanes together.
2162  BinaryOperator::BinaryOps Opc0 = B0->getOpcode();
2163  BinaryOperator::BinaryOps Opc1 = B1->getOpcode();
2164  bool DropNSW = false;
2165  if (ConstantsAreOp1 && Opc0 != Opc1) {
2166  // TODO: We drop "nsw" if shift is converted into multiply because it may
2167  // not be correct when the shift amount is BitWidth - 1. We could examine
2168  // each vector element to determine if it is safe to keep that flag.
2169  if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
2170  DropNSW = true;
2171  if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
2172  assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
2173  Opc0 = AltB0.Opcode;
2174  C0 = cast<Constant>(AltB0.Op1);
2175  } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
2176  assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
2177  Opc1 = AltB1.Opcode;
2178  C1 = cast<Constant>(AltB1.Op1);
2179  }
2180  }
2181 
2182  if (Opc0 != Opc1 || !C0 || !C1)
2183  return nullptr;
2184 
2185  // The opcodes must be the same. Use a new name to make that clear.
2186  BinaryOperator::BinaryOps BOpc = Opc0;
2187 
2188  // Select the constant elements needed for the single binop.
2191 
2192  // We are moving a binop after a shuffle. When a shuffle has an undefined
2193  // mask element, the result is undefined, but it is not poison or undefined
2194  // behavior. That is not necessarily true for div/rem/shift.
2195  bool MightCreatePoisonOrUB =
2198  if (MightCreatePoisonOrUB)
2200  ConstantsAreOp1);
2201 
2202  Value *V;
2203  if (X == Y) {
2204  // Remove a binop and the shuffle by rearranging the constant:
2205  // shuffle (op V, C0), (op V, C1), M --> op V, C'
2206  // shuffle (op C0, V), (op C1, V), M --> op C', V
2207  V = X;
2208  } else {
2209  // If there are 2 different variable operands, we must create a new shuffle
2210  // (select) first, so check uses to ensure that we don't end up with more
2211  // instructions than we started with.
2212  if (!B0->hasOneUse() && !B1->hasOneUse())
2213  return nullptr;
2214 
2215  // If we use the original shuffle mask and op1 is *variable*, we would be
2216  // putting an undef into operand 1 of div/rem/shift. This is either UB or
2217  // poison. We do not have to guard against UB when *constants* are op1
2218  // because safe constants guarantee that we do not overflow sdiv/srem (and
2219  // there's no danger for other opcodes).
2220  // TODO: To allow this case, create a new shuffle mask with no undefs.
2221  if (MightCreatePoisonOrUB && !ConstantsAreOp1)
2222  return nullptr;
2223 
2224  // Note: In general, we do not create new shuffles in InstCombine because we
2225  // do not know if a target can lower an arbitrary shuffle optimally. In this
2226  // case, the shuffle uses the existing mask, so there is no additional risk.
2227 
2228  // Select the variable vectors first, then perform the binop:
2229  // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
2230  // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
2231  V = Builder.CreateShuffleVector(X, Y, Mask);
2232  }
2233 
2234  Value *NewBO = ConstantsAreOp1 ? Builder.CreateBinOp(BOpc, V, NewC) :
2235  Builder.CreateBinOp(BOpc, NewC, V);
2236 
2237  // Flags are intersected from the 2 source binops. But there are 2 exceptions:
2238  // 1. If we changed an opcode, poison conditions might have changed.
2239  // 2. If the shuffle had undef mask elements, the new binop might have undefs
2240  // where the original code did not. But if we already made a safe constant,
2241  // then there's no danger.
2242  if (auto *NewI = dyn_cast<Instruction>(NewBO)) {
2243  NewI->copyIRFlags(B0);
2244  NewI->andIRFlags(B1);
2245  if (DropNSW)
2246  NewI->setHasNoSignedWrap(false);
2247  if (is_contained(Mask, UndefMaskElem) && !MightCreatePoisonOrUB)
2248  NewI->dropPoisonGeneratingFlags();
2249  }
2250  return replaceInstUsesWith(Shuf, NewBO);
2251 }
2252 
2253 /// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
2254 /// Example (little endian):
2255 /// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>
2257  bool IsBigEndian) {
2258  // This must be a bitcasted shuffle of 1 vector integer operand.
2259  Type *DestType = Shuf.getType();
2260  Value *X;
2261  if (!match(Shuf.getOperand(0), m_BitCast(m_Value(X))) ||
2262  !match(Shuf.getOperand(1), m_Undef()) || !DestType->isIntOrIntVectorTy())
2263  return nullptr;
2264 
2265  // The source type must have the same number of elements as the shuffle,
2266  // and the source element type must be larger than the shuffle element type.
2267  Type *SrcType = X->getType();
2268  if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() ||
2269  cast<FixedVectorType>(SrcType)->getNumElements() !=
2270  cast<FixedVectorType>(DestType)->getNumElements() ||
2271  SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0)
2272  return nullptr;
2273 
2274  assert(Shuf.changesLength() && !Shuf.increasesLength() &&
2275  "Expected a shuffle that decreases length");
2276 
2277  // Last, check that the mask chooses the correct low bits for each narrow
2278  // element in the result.
2279  uint64_t TruncRatio =
2280  SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
2282  for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
2283  if (Mask[i] == UndefMaskElem)
2284  continue;
2285  uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;
2286  assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits");
2287  if (Mask[i] != (int)LSBIndex)
2288  return nullptr;
2289  }
2290 
2291  return new TruncInst(X, DestType);
2292 }
2293 
2294 /// Match a shuffle-select-shuffle pattern where the shuffles are widening and
2295 /// narrowing (concatenating with undef and extracting back to the original
2296 /// length). This allows replacing the wide select with a narrow select.
2299  // This must be a narrowing identity shuffle. It extracts the 1st N elements
2300  // of the 1st vector operand of a shuffle.
2301  if (!match(Shuf.getOperand(1), m_Undef()) || !Shuf.isIdentityWithExtract())
2302  return nullptr;
2303 
2304  // The vector being shuffled must be a vector select that we can eliminate.
2305  // TODO: The one-use requirement could be eased if X and/or Y are constants.
2306  Value *Cond, *X, *Y;
2307  if (!match(Shuf.getOperand(0),
2309  return nullptr;
2310 
2311  // We need a narrow condition value. It must be extended with undef elements
2312  // and have the same number of elements as this shuffle.
2313  unsigned NarrowNumElts =
2314  cast<FixedVectorType>(Shuf.getType())->getNumElements();
2315  Value *NarrowCond;
2316  if (!match(Cond, m_OneUse(m_Shuffle(m_Value(NarrowCond), m_Undef()))) ||
2317  cast<FixedVectorType>(NarrowCond->getType())->getNumElements() !=
2318  NarrowNumElts ||
2319  !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
2320  return nullptr;
2321 
2322  // shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
2323  // sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
2324  Value *NarrowX = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2325  Value *NarrowY = Builder.CreateShuffleVector(Y, Shuf.getShuffleMask());
2326  return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
2327 }
2328 
2329 /// Canonicalize FP negate after shuffle.
2332  Instruction *FNeg0;
2333  Value *X;
2334  if (!match(Shuf.getOperand(0), m_CombineAnd(m_Instruction(FNeg0),
2335  m_FNeg(m_Value(X)))))
2336  return nullptr;
2337 
2338  // shuffle (fneg X), Mask --> fneg (shuffle X, Mask)
2339  if (FNeg0->hasOneUse() && match(Shuf.getOperand(1), m_Undef())) {
2340  Value *NewShuf = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2341  return UnaryOperator::CreateFNegFMF(NewShuf, FNeg0);
2342  }
2343 
2344  Instruction *FNeg1;
2345  Value *Y;
2346  if (!match(Shuf.getOperand(1), m_CombineAnd(m_Instruction(FNeg1),
2347  m_FNeg(m_Value(Y)))))
2348  return nullptr;
2349 
2350  // shuffle (fneg X), (fneg Y), Mask --> fneg (shuffle X, Y, Mask)
2351  if (FNeg0->hasOneUse() || FNeg1->hasOneUse()) {
2352  Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2353  Instruction *NewFNeg = UnaryOperator::CreateFNeg(NewShuf);
2354  NewFNeg->copyIRFlags(FNeg0);
2355  NewFNeg->andIRFlags(FNeg1);
2356  return NewFNeg;
2357  }
2358 
2359  return nullptr;
2360 }
2361 
2362 /// Canonicalize casts after shuffle.
2365  // Do we have 2 matching cast operands?
2366  auto *Cast0 = dyn_cast<CastInst>(Shuf.getOperand(0));
2367  auto *Cast1 = dyn_cast<CastInst>(Shuf.getOperand(1));
2368  if (!Cast0 || !Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||
2369  Cast0->getSrcTy() != Cast1->getSrcTy())
2370  return nullptr;
2371 
2372  // TODO: Allow other opcodes? That would require easing the type restrictions
2373  // below here.
2374  CastInst::CastOps CastOpcode = Cast0->getOpcode();
2375  switch (CastOpcode) {
2376  case Instruction::FPToSI:
2377  case Instruction::FPToUI:
2378  case Instruction::SIToFP:
2379  case Instruction::UIToFP:
2380  break;
2381  default:
2382  return nullptr;
2383  }
2384 
2385  VectorType *ShufTy = Shuf.getType();
2386  VectorType *ShufOpTy = cast<VectorType>(Shuf.getOperand(0)->getType());
2387  VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy());
2388 
2389  // TODO: Allow length-increasing shuffles?
2390  if (ShufTy->getElementCount().getKnownMinValue() >
2391  ShufOpTy->getElementCount().getKnownMinValue())
2392  return nullptr;
2393 
2394  // TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)?
2395  assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&
2396  "Expected fixed vector operands for casts and binary shuffle");
2397  if (CastSrcTy->getPrimitiveSizeInBits() > ShufOpTy->getPrimitiveSizeInBits())
2398  return nullptr;
2399 
2400  // At least one of the operands must have only one use (the shuffle).
2401  if (!Cast0->hasOneUse() && !Cast1->hasOneUse())
2402  return nullptr;
2403 
2404  // shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask)
2405  Value *X = Cast0->getOperand(0);
2406  Value *Y = Cast1->getOperand(0);
2407  Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2408  return CastInst::Create(CastOpcode, NewShuf, ShufTy);
2409 }
2410 
2411 /// Try to fold an extract subvector operation.
2413  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2414  if (!Shuf.isIdentityWithExtract() || !match(Op1, m_Undef()))
2415  return nullptr;
2416 
2417  // Check if we are extracting all bits of an inserted scalar:
2418  // extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type
2419  Value *X;
2420  if (match(Op0, m_BitCast(m_InsertElt(m_Value(), m_Value(X), m_Zero()))) &&
2421  X->getType()->getPrimitiveSizeInBits() ==
2422  Shuf.getType()->getPrimitiveSizeInBits())
2423  return new BitCastInst(X, Shuf.getType());
2424 
2425  // Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
2426  Value *Y;
2428  if (!match(Op0, m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask))))
2429  return nullptr;
2430 
2431  // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
2432  // then combining may result in worse codegen.
2433  if (!Op0->hasOneUse())
2434  return nullptr;
2435 
2436  // We are extracting a subvector from a shuffle. Remove excess elements from
2437  // the 1st shuffle mask to eliminate the extract.
2438  //
2439  // This transform is conservatively limited to identity extracts because we do
2440  // not allow arbitrary shuffle mask creation as a target-independent transform
2441  // (because we can't guarantee that will lower efficiently).
2442  //
2443  // If the extracting shuffle has an undef mask element, it transfers to the
2444  // new shuffle mask. Otherwise, copy the original mask element. Example:
2445  // shuf (shuf X, Y, <C0, C1, C2, undef, C4>), undef, <0, undef, 2, 3> -->
2446  // shuf X, Y, <C0, undef, C2, undef>
2447  unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2448  SmallVector<int, 16> NewMask(NumElts);
2449  assert(NumElts < Mask.size() &&
2450  "Identity with extract must have less elements than its inputs");
2451 
2452  for (unsigned i = 0; i != NumElts; ++i) {
2453  int ExtractMaskElt = Shuf.getMaskValue(i);
2454  int MaskElt = Mask[i];
2455  NewMask[i] = ExtractMaskElt == UndefMaskElem ? ExtractMaskElt : MaskElt;
2456  }
2457  return new ShuffleVectorInst(X, Y, NewMask);
2458 }
2459 
2460 /// Try to replace a shuffle with an insertelement or try to replace a shuffle
2461 /// operand with the operand of an insertelement.
2463  InstCombinerImpl &IC) {
2464  Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
2466  Shuf.getShuffleMask(Mask);
2467 
2468  int NumElts = Mask.size();
2469  int InpNumElts = cast<FixedVectorType>(V0->getType())->getNumElements();
2470 
2471  // This is a specialization of a fold in SimplifyDemandedVectorElts. We may
2472  // not be able to handle it there if the insertelement has >1 use.
2473  // If the shuffle has an insertelement operand but does not choose the
2474  // inserted scalar element from that value, then we can replace that shuffle
2475  // operand with the source vector of the insertelement.
2476  Value *X;
2477  uint64_t IdxC;
2478  if (match(V0, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2479  // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
2480  if (!is_contained(Mask, (int)IdxC))
2481  return IC.replaceOperand(Shuf, 0, X);
2482  }
2483  if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2484  // Offset the index constant by the vector width because we are checking for
2485  // accesses to the 2nd vector input of the shuffle.
2486  IdxC += InpNumElts;
2487  // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
2488  if (!is_contained(Mask, (int)IdxC))
2489  return IC.replaceOperand(Shuf, 1, X);
2490  }
2491  // For the rest of the transform, the shuffle must not change vector sizes.
2492  // TODO: This restriction could be removed if the insert has only one use
2493  // (because the transform would require a new length-changing shuffle).
2494  if (NumElts != InpNumElts)
2495  return nullptr;
2496 
2497  // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
2498  auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
2499  // We need an insertelement with a constant index.
2500  if (!match(V0, m_InsertElt(m_Value(), m_Value(Scalar),
2501  m_ConstantInt(IndexC))))
2502  return false;
2503 
2504  // Test the shuffle mask to see if it splices the inserted scalar into the
2505  // operand 1 vector of the shuffle.
2506  int NewInsIndex = -1;
2507  for (int i = 0; i != NumElts; ++i) {
2508  // Ignore undef mask elements.
2509  if (Mask[i] == -1)
2510  continue;
2511 
2512  // The shuffle takes elements of operand 1 without lane changes.
2513  if (Mask[i] == NumElts + i)
2514  continue;
2515 
2516  // The shuffle must choose the inserted scalar exactly once.
2517  if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
2518  return false;
2519 
2520  // The shuffle is placing the inserted scalar into element i.
2521  NewInsIndex = i;
2522  }
2523 
2524  assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
2525 
2526  // Index is updated to the potentially translated insertion lane.
2527  IndexC = ConstantInt::get(IndexC->getType(), NewInsIndex);
2528  return true;
2529  };
2530 
2531  // If the shuffle is unnecessary, insert the scalar operand directly into
2532  // operand 1 of the shuffle. Example:
2533  // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
2534  Value *Scalar;
2535  ConstantInt *IndexC;
2536  if (isShufflingScalarIntoOp1(Scalar, IndexC))
2537  return InsertElementInst::Create(V1, Scalar, IndexC);
2538 
2539  // Try again after commuting shuffle. Example:
2540  // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
2541  // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
2542  std::swap(V0, V1);
2544  if (isShufflingScalarIntoOp1(Scalar, IndexC))
2545  return InsertElementInst::Create(V1, Scalar, IndexC);
2546 
2547  return nullptr;
2548 }
2549 
2551  // Match the operands as identity with padding (also known as concatenation
2552  // with undef) shuffles of the same source type. The backend is expected to
2553  // recreate these concatenations from a shuffle of narrow operands.
2554  auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
2555  auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
2556  if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
2557  !Shuffle1 || !Shuffle1->isIdentityWithPadding())
2558  return nullptr;
2559 
2560  // We limit this transform to power-of-2 types because we expect that the
2561  // backend can convert the simplified IR patterns to identical nodes as the
2562  // original IR.
2563  // TODO: If we can verify the same behavior for arbitrary types, the
2564  // power-of-2 checks can be removed.
2565  Value *X = Shuffle0->getOperand(0);
2566  Value *Y = Shuffle1->getOperand(0);
2567  if (X->getType() != Y->getType() ||
2568  !isPowerOf2_32(cast<FixedVectorType>(Shuf.getType())->getNumElements()) ||
2569  !isPowerOf2_32(
2570  cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) ||
2571  !isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) ||
2572  match(X, m_Undef()) || match(Y, m_Undef()))
2573  return nullptr;
2574  assert(match(Shuffle0->getOperand(1), m_Undef()) &&
2575  match(Shuffle1->getOperand(1), m_Undef()) &&
2576  "Unexpected operand for identity shuffle");
2577 
2578  // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
2579  // operands directly by adjusting the shuffle mask to account for the narrower
2580  // types:
2581  // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
2582  int NarrowElts = cast<FixedVectorType>(X->getType())->getNumElements();
2583  int WideElts = cast<FixedVectorType>(Shuffle0->getType())->getNumElements();
2584  assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
2585 
2587  SmallVector<int, 16> NewMask(Mask.size(), -1);
2588  for (int i = 0, e = Mask.size(); i != e; ++i) {
2589  if (Mask[i] == -1)
2590  continue;
2591 
2592  // If this shuffle is choosing an undef element from 1 of the sources, that
2593  // element is undef.
2594  if (Mask[i] < WideElts) {
2595  if (Shuffle0->getMaskValue(Mask[i]) == -1)
2596  continue;
2597  } else {
2598  if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
2599  continue;
2600  }
2601 
2602  // If this shuffle is choosing from the 1st narrow op, the mask element is
2603  // the same. If this shuffle is choosing from the 2nd narrow op, the mask
2604  // element is offset down to adjust for the narrow vector widths.
2605  if (Mask[i] < WideElts) {
2606  assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
2607  NewMask[i] = Mask[i];
2608  } else {
2609  assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
2610  NewMask[i] = Mask[i] - (WideElts - NarrowElts);
2611  }
2612  }
2613  return new ShuffleVectorInst(X, Y, NewMask);
2614 }
2615 
2616 // Splatting the first element of the result of a BinOp, where any of the
2617 // BinOp's operands are the result of a first element splat can be simplified to
2618 // splatting the first element of the result of the BinOp
2620  if (!match(SVI.getOperand(1), m_Undef()) ||
2621  !match(SVI.getShuffleMask(), m_ZeroMask()))
2622  return nullptr;
2623 
2624  Value *Op0 = SVI.getOperand(0);
2625  Value *X, *Y;
2626  if (!match(Op0, m_BinOp(m_Shuffle(m_Value(X), m_Undef(), m_ZeroMask()),
2627  m_Value(Y))) &&
2628  !match(Op0, m_BinOp(m_Value(X),
2629  m_Shuffle(m_Value(Y), m_Undef(), m_ZeroMask()))))
2630  return nullptr;
2631  if (X->getType() != Y->getType())
2632  return nullptr;
2633 
2634  auto *BinOp = cast<BinaryOperator>(Op0);
2635  if (!isSafeToSpeculativelyExecute(BinOp))
2636  return nullptr;
2637 
2638  Value *NewBO = Builder.CreateBinOp(BinOp->getOpcode(), X, Y);
2639  if (auto NewBOI = dyn_cast<Instruction>(NewBO))
2640  NewBOI->copyIRFlags(BinOp);
2641 
2642  return new ShuffleVectorInst(NewBO, SVI.getShuffleMask());
2643 }
2644 
2646  Value *LHS = SVI.getOperand(0);
2647  Value *RHS = SVI.getOperand(1);
2648  SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI);
2649  if (auto *V = simplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(),
2650  SVI.getType(), ShufQuery))
2651  return replaceInstUsesWith(SVI, V);
2652 
2653  if (Instruction *I = simplifyBinOpSplats(SVI))
2654  return I;
2655 
2656  if (isa<ScalableVectorType>(LHS->getType()))
2657  return nullptr;
2658 
2659  unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements();
2660  unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements();
2661 
2662  // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
2663  //
2664  // if X and Y are of the same (vector) type, and the element size is not
2665  // changed by the bitcasts, we can distribute the bitcasts through the
2666  // shuffle, hopefully reducing the number of instructions. We make sure that
2667  // at least one bitcast only has one use, so we don't *increase* the number of
2668  // instructions here.
2669  Value *X, *Y;
2670  if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_BitCast(m_Value(Y))) &&
2671  X->getType()->isVectorTy() && X->getType() == Y->getType() &&
2672  X->getType()->getScalarSizeInBits() ==
2673  SVI.getType()->getScalarSizeInBits() &&
2674  (LHS->hasOneUse() || RHS->hasOneUse())) {
2675  Value *V = Builder.CreateShuffleVector(X, Y, SVI.getShuffleMask(),
2676  SVI.getName() + ".uncasted");
2677  return new BitCastInst(V, SVI.getType());
2678  }
2679 
2682 
2683  // Peek through a bitcasted shuffle operand by scaling the mask. If the
2684  // simulated shuffle can simplify, then this shuffle is unnecessary:
2685  // shuf (bitcast X), undef, Mask --> bitcast X'
2686  // TODO: This could be extended to allow length-changing shuffles.
2687  // The transform might also be obsoleted if we allowed canonicalization
2688  // of bitcasted shuffles.
2689  if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) &&
2690  X->getType()->isVectorTy() && VWidth == LHSWidth) {
2691  // Try to create a scaled mask constant.
2692  auto *XType = cast<FixedVectorType>(X->getType());
2693  unsigned XNumElts = XType->getNumElements();
2694  SmallVector<int, 16> ScaledMask;
2695  if (XNumElts >= VWidth) {
2696  assert(XNumElts % VWidth == 0 && "Unexpected vector bitcast");
2697  narrowShuffleMaskElts(XNumElts / VWidth, Mask, ScaledMask);
2698  } else {
2699  assert(VWidth % XNumElts == 0 && "Unexpected vector bitcast");
2700  if (!widenShuffleMaskElts(VWidth / XNumElts, Mask, ScaledMask))
2701  ScaledMask.clear();
2702  }
2703  if (!ScaledMask.empty()) {
2704  // If the shuffled source vector simplifies, cast that value to this
2705  // shuffle's type.
2706  if (auto *V = simplifyShuffleVectorInst(X, UndefValue::get(XType),
2707  ScaledMask, XType, ShufQuery))
2708  return BitCastInst::Create(Instruction::BitCast, V, SVI.getType());
2709  }
2710  }
2711 
2712  // shuffle x, x, mask --> shuffle x, undef, mask'
2713  if (LHS == RHS) {
2714  assert(!match(RHS, m_Undef()) &&
2715  "Shuffle with 2 undef ops not simplified?");
2716  return new ShuffleVectorInst(LHS, createUnaryMask(Mask, LHSWidth));
2717  }
2718 
2719  // shuffle undef, x, mask --> shuffle x, undef, mask'
2720  if (match(LHS, m_Undef())) {
2721  SVI.commute();
2722  return &SVI;
2723  }
2724 
2726  return I;
2727 
2728  if (Instruction *I = foldSelectShuffle(SVI))
2729  return I;
2730 
2731  if (Instruction *I = foldTruncShuffle(SVI, DL.isBigEndian()))
2732  return I;
2733 
2734  if (Instruction *I = narrowVectorSelect(SVI, Builder))
2735  return I;
2736 
2737  if (Instruction *I = foldFNegShuffle(SVI, Builder))
2738  return I;
2739 
2740  if (Instruction *I = foldCastShuffle(SVI, Builder))
2741  return I;
2742 
2743  APInt UndefElts(VWidth, 0);
2744  APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2745  if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
2746  if (V != &SVI)
2747  return replaceInstUsesWith(SVI, V);
2748  return &SVI;
2749  }
2750 
2752  return I;
2753 
2754  // These transforms have the potential to lose undef knowledge, so they are
2755  // intentionally placed after SimplifyDemandedVectorElts().
2756  if (Instruction *I = foldShuffleWithInsert(SVI, *this))
2757  return I;
2759  return I;
2760 
2761  if (match(RHS, m_Undef()) && canEvaluateShuffled(LHS, Mask)) {
2763  return replaceInstUsesWith(SVI, V);
2764  }
2765 
2766  // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
2767  // a non-vector type. We can instead bitcast the original vector followed by
2768  // an extract of the desired element:
2769  //
2770  // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
2771  // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2772  // %1 = bitcast <4 x i8> %sroa to i32
2773  // Becomes:
2774  // %bc = bitcast <16 x i8> %in to <4 x i32>
2775  // %ext = extractelement <4 x i32> %bc, i32 0
2776  //
2777  // If the shuffle is extracting a contiguous range of values from the input
2778  // vector then each use which is a bitcast of the extracted size can be
2779  // replaced. This will work if the vector types are compatible, and the begin
2780  // index is aligned to a value in the casted vector type. If the begin index
2781  // isn't aligned then we can shuffle the original vector (keeping the same
2782  // vector type) before extracting.
2783  //
2784  // This code will bail out if the target type is fundamentally incompatible
2785  // with vectors of the source type.
2786  //
2787  // Example of <16 x i8>, target type i32:
2788  // Index range [4,8): v-----------v Will work.
2789  // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2790  // <16 x i8>: | | | | | | | | | | | | | | | | |
2791  // <4 x i32>: | | | | |
2792  // +-----------+-----------+-----------+-----------+
2793  // Index range [6,10): ^-----------^ Needs an extra shuffle.
2794  // Target type i40: ^--------------^ Won't work, bail.
2795  bool MadeChange = false;
2796  if (isShuffleExtractingFromLHS(SVI, Mask)) {
2797  Value *V = LHS;
2798  unsigned MaskElems = Mask.size();
2799  auto *SrcTy = cast<FixedVectorType>(V->getType());
2800  unsigned VecBitWidth = SrcTy->getPrimitiveSizeInBits().getFixedSize();
2801  unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
2802  assert(SrcElemBitWidth && "vector elements must have a bitwidth");
2803  unsigned SrcNumElems = SrcTy->getNumElements();
2806  for (User *U : SVI.users())
2807  if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
2808  if (!BC->use_empty())
2809  // Only visit bitcasts that weren't previously handled.
2810  BCs.push_back(BC);
2811  for (BitCastInst *BC : BCs) {
2812  unsigned BegIdx = Mask.front();
2813  Type *TgtTy = BC->getDestTy();
2814  unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
2815  if (!TgtElemBitWidth)
2816  continue;
2817  unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
2818  bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
2819  bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
2820  if (!VecBitWidthsEqual)
2821  continue;
2822  if (!VectorType::isValidElementType(TgtTy))
2823  continue;
2824  auto *CastSrcTy = FixedVectorType::get(TgtTy, TgtNumElems);
2825  if (!BegIsAligned) {
2826  // Shuffle the input so [0,NumElements) contains the output, and
2827  // [NumElems,SrcNumElems) is undef.
2828  SmallVector<int, 16> ShuffleMask(SrcNumElems, -1);
2829  for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
2830  ShuffleMask[I] = Idx;
2831  V = Builder.CreateShuffleVector(V, ShuffleMask,
2832  SVI.getName() + ".extract");
2833  BegIdx = 0;
2834  }
2835  unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
2836  assert(SrcElemsPerTgtElem);
2837  BegIdx /= SrcElemsPerTgtElem;
2838  bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end();
2839  auto *NewBC =
2840  BCAlreadyExists
2841  ? NewBCs[CastSrcTy]
2842  : Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
2843  if (!BCAlreadyExists)
2844  NewBCs[CastSrcTy] = NewBC;
2845  auto *Ext = Builder.CreateExtractElement(
2846  NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract");
2847  // The shufflevector isn't being replaced: the bitcast that used it
2848  // is. InstCombine will visit the newly-created instructions.
2849  replaceInstUsesWith(*BC, Ext);
2850  MadeChange = true;
2851  }
2852  }
2853 
2854  // If the LHS is a shufflevector itself, see if we can combine it with this
2855  // one without producing an unusual shuffle.
2856  // Cases that might be simplified:
2857  // 1.
2858  // x1=shuffle(v1,v2,mask1)
2859  // x=shuffle(x1,undef,mask)
2860  // ==>
2861  // x=shuffle(v1,undef,newMask)
2862  // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
2863  // 2.
2864  // x1=shuffle(v1,undef,mask1)
2865  // x=shuffle(x1,x2,mask)
2866  // where v1.size() == mask1.size()
2867  // ==>
2868  // x=shuffle(v1,x2,newMask)
2869  // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
2870  // 3.
2871  // x2=shuffle(v2,undef,mask2)
2872  // x=shuffle(x1,x2,mask)
2873  // where v2.size() == mask2.size()
2874  // ==>
2875  // x=shuffle(x1,v2,newMask)
2876  // newMask[i] = (mask[i] < x1.size())
2877  // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
2878  // 4.
2879  // x1=shuffle(v1,undef,mask1)
2880  // x2=shuffle(v2,undef,mask2)
2881  // x=shuffle(x1,x2,mask)
2882  // where v1.size() == v2.size()
2883  // ==>
2884  // x=shuffle(v1,v2,newMask)
2885  // newMask[i] = (mask[i] < x1.size())
2886  // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
2887  //
2888  // Here we are really conservative:
2889  // we are absolutely afraid of producing a shuffle mask not in the input
2890  // program, because the code gen may not be smart enough to turn a merged
2891  // shuffle into two specific shuffles: it may produce worse code. As such,
2892  // we only merge two shuffles if the result is either a splat or one of the
2893  // input shuffle masks. In this case, merging the shuffles just removes
2894  // one instruction, which we know is safe. This is good for things like
2895  // turning: (splat(splat)) -> splat, or
2896  // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
2897  ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
2898  ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
2899  if (LHSShuffle)
2900  if (!match(LHSShuffle->getOperand(1), m_Undef()) && !match(RHS, m_Undef()))
2901  LHSShuffle = nullptr;
2902  if (RHSShuffle)
2903  if (!match(RHSShuffle->getOperand(1), m_Undef()))
2904  RHSShuffle = nullptr;
2905  if (!LHSShuffle && !RHSShuffle)
2906  return MadeChange ? &SVI : nullptr;
2907 
2908  Value* LHSOp0 = nullptr;
2909  Value* LHSOp1 = nullptr;
2910  Value* RHSOp0 = nullptr;
2911  unsigned LHSOp0Width = 0;
2912  unsigned RHSOp0Width = 0;
2913  if (LHSShuffle) {
2914  LHSOp0 = LHSShuffle->getOperand(0);
2915  LHSOp1 = LHSShuffle->getOperand(1);
2916  LHSOp0Width = cast<FixedVectorType>(LHSOp0->getType())->getNumElements();
2917  }
2918  if (RHSShuffle) {
2919  RHSOp0 = RHSShuffle->getOperand(0);
2920  RHSOp0Width = cast<FixedVectorType>(RHSOp0->getType())->getNumElements();
2921  }
2922  Value* newLHS = LHS;
2923  Value* newRHS = RHS;
2924  if (LHSShuffle) {
2925  // case 1
2926  if (match(RHS, m_Undef())) {
2927  newLHS = LHSOp0;
2928  newRHS = LHSOp1;
2929  }
2930  // case 2 or 4
2931  else if (LHSOp0Width == LHSWidth) {
2932  newLHS = LHSOp0;
2933  }
2934  }
2935  // case 3 or 4
2936  if (RHSShuffle && RHSOp0Width == LHSWidth) {
2937  newRHS = RHSOp0;
2938  }
2939  // case 4
2940  if (LHSOp0 == RHSOp0) {
2941  newLHS = LHSOp0;
2942  newRHS = nullptr;
2943  }
2944 
2945  if (newLHS == LHS && newRHS == RHS)
2946  return MadeChange ? &SVI : nullptr;
2947 
2948  ArrayRef<int> LHSMask;
2949  ArrayRef<int> RHSMask;
2950  if (newLHS != LHS)
2951  LHSMask = LHSShuffle->getShuffleMask();
2952  if (RHSShuffle && newRHS != RHS)
2953  RHSMask = RHSShuffle->getShuffleMask();
2954 
2955  unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
2956  SmallVector<int, 16> newMask;
2957  bool isSplat = true;
2958  int SplatElt = -1;
2959  // Create a new mask for the new ShuffleVectorInst so that the new
2960  // ShuffleVectorInst is equivalent to the original one.
2961  for (unsigned i = 0; i < VWidth; ++i) {
2962  int eltMask;
2963  if (Mask[i] < 0) {
2964  // This element is an undef value.
2965  eltMask = -1;
2966  } else if (Mask[i] < (int)LHSWidth) {
2967  // This element is from left hand side vector operand.
2968  //
2969  // If LHS is going to be replaced (case 1, 2, or 4), calculate the
2970  // new mask value for the element.
2971  if (newLHS != LHS) {
2972  eltMask = LHSMask[Mask[i]];
2973  // If the value selected is an undef value, explicitly specify it
2974  // with a -1 mask value.
2975  if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
2976  eltMask = -1;
2977  } else
2978  eltMask = Mask[i];
2979  } else {
2980  // This element is from right hand side vector operand
2981  //
2982  // If the value selected is an undef value, explicitly specify it
2983  // with a -1 mask value. (case 1)
2984  if (match(RHS, m_Undef()))
2985  eltMask = -1;
2986  // If RHS is going to be replaced (case 3 or 4), calculate the
2987  // new mask value for the element.
2988  else if (newRHS != RHS) {
2989  eltMask = RHSMask[Mask[i]-LHSWidth];
2990  // If the value selected is an undef value, explicitly specify it
2991  // with a -1 mask value.
2992  if (eltMask >= (int)RHSOp0Width) {
2993  assert(match(RHSShuffle->getOperand(1), m_Undef()) &&
2994  "should have been check above");
2995  eltMask = -1;
2996  }
2997  } else
2998  eltMask = Mask[i]-LHSWidth;
2999 
3000  // If LHS's width is changed, shift the mask value accordingly.
3001  // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
3002  // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
3003  // If newRHS == newLHS, we want to remap any references from newRHS to
3004  // newLHS so that we can properly identify splats that may occur due to
3005  // obfuscation across the two vectors.
3006  if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
3007  eltMask += newLHSWidth;
3008  }
3009 
3010  // Check if this could still be a splat.
3011  if (eltMask >= 0) {
3012  if (SplatElt >= 0 && SplatElt != eltMask)
3013  isSplat = false;
3014  SplatElt = eltMask;
3015  }
3016 
3017  newMask.push_back(eltMask);
3018  }
3019 
3020  // If the result mask is equal to one of the original shuffle masks,
3021  // or is a splat, do the replacement.
3022  if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
3023  if (!newRHS)
3024  newRHS = UndefValue::get(newLHS->getType());
3025  return new ShuffleVectorInst(newLHS, newRHS, newMask);
3026  }
3027 
3028  return MadeChange ? &SVI : nullptr;
3029 }
i
i
Definition: README.txt:29
llvm::simplifyInsertElementInst
Value * simplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx, const SimplifyQuery &Q)
Given operands for an InsertElement, fold the result or return null.
Definition: InstructionSimplify.cpp:4833
llvm::Type::ArrayTyID
@ ArrayTyID
Arrays.
Definition: Type.h:75
Int32Ty
IntegerType * Int32Ty
Definition: NVVMIntrRange.cpp:67
BinopElts::Opcode
BinaryOperator::BinaryOps Opcode
Definition: InstCombineVectorOps.cpp:1935
llvm::GetElementPtrInst::setIsInBounds
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
Definition: Instructions.cpp:1924
llvm::ShuffleVectorInst::isIdentityWithExtract
bool isIdentityWithExtract() const
Return true if this shuffle extracts the first N elements of exactly one source vector.
Definition: Instructions.cpp:2549
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::ShuffleVectorInst::isIdentityMask
static bool isIdentityMask(ArrayRef< int > Mask)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
Definition: Instructions.cpp:2306
llvm::MaskedValueIsZero
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if 'V & Mask' is known to be zero.
Definition: ValueTracking.cpp:366
llvm::PatternMatch::m_Mask
Definition: PatternMatch.h:1514
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
InstCombiner.h
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:720
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::ConstantInt::getType
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:173
PHI
Rewrite undef for PHI
Definition: AMDGPURewriteUndefForPHI.cpp:101
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
llvm::SimplifyQuery
Definition: InstructionSimplify.h:93
llvm::ShuffleVectorInst::getShuffleMask
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
Definition: Instructions.cpp:2211
llvm::ElementCount
Definition: TypeSize.h:404
foldInsSequenceIntoSplat
static Instruction * foldInsSequenceIntoSplat(InsertElementInst &InsElt)
Turn a chain of inserts that splats a value into an insert + shuffle: insertelt(insertelt(insertelt(i...
Definition: InstCombineVectorOps.cpp:1190
llvm::ExtractElementInst
This instruction extracts a single (scalar) element from a VectorType value.
Definition: Instructions.h:1872
llvm::BinaryOperator::CreateWithCopiedFlags
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Instruction *CopyO, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:250
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5256
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:328
llvm::PatternMatch::m_InsertElt
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Definition: PatternMatch.h:1484
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
Statistic.h
llvm::enumerate
detail::enumerator< R > enumerate(R &&TheRange)
Given an input range, returns a new range whose values are are pair (A,B) such that A is the 0-based ...
Definition: STLExtras.h:2238
llvm::PatternMatch::m_Load
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
Definition: PatternMatch.h:1563
ErrorHandling.h
llvm::PatternMatch::m_CombineOr
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:218
llvm::IRBuilder< TargetFolder, IRBuilderCallbackInserter >
llvm::CastInst::Create
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Definition: Instructions.cpp:3340
llvm::APInt::zextOrTrunc
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:994
llvm::SmallDenseMap
Definition: DenseMap.h:880
narrowInsElt
static Instruction * narrowInsElt(InsertElementInst &InsElt, InstCombiner::BuilderTy &Builder)
If both the base vector and the inserted element are extended from the same type, do the insert eleme...
Definition: InstCombineVectorOps.cpp:1486
llvm::Type::getTypeID
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
foldSelectShuffleOfSelectShuffle
static Instruction * foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf)
A select shuffle of a select shuffle with a shared operand can be reduced to a single select shuffle.
Definition: InstCombineVectorOps.cpp:1982
llvm::ConstantInt::getLimitedValue
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:244
APInt.h
llvm::InsertElementInst::Create
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1950
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
llvm::ExtractElementInst::Create
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1885
buildNew
static Value * buildNew(Instruction *I, ArrayRef< Value * > NewOps)
Rebuild a new instruction just like 'I' but with the new operands given.
Definition: InstCombineVectorOps.cpp:1737
llvm::narrowShuffleMaskElts
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
Definition: VectorUtils.cpp:469
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
DenseMap.h
llvm::Instruction::isShift
bool isShift() const
Definition: Instruction.h:171
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
llvm::InstCombinerImpl::visitExtractElementInst
Instruction * visitExtractElementInst(ExtractElementInst &EI)
Definition: InstCombineVectorOps.cpp:395
llvm::createUnaryMask
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
Definition: VectorUtils.cpp:984
llvm::InstCombinerImpl::replaceInstUsesWith
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombineInternal.h:408
llvm::Optional
Definition: APInt.h:33
isShuffleEquivalentToSelect
static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf)
Definition: InstCombineVectorOps.cpp:1162
llvm::PatternMatch::m_BinOp
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
llvm::InstCombinerImpl::visitShuffleVectorInst
Instruction * visitShuffleVectorInst(ShuffleVectorInst &SVI)
Definition: InstCombineVectorOps.cpp:2645
Operator.h
llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:422
STLExtras.h
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:458
llvm::UnaryOperator
Definition: InstrTypes.h:102
llvm::PatternMatch::m_BitCast
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
Definition: PatternMatch.h:1593
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
llvm::count_if
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1877
llvm::LinearPolySize::isScalable
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:298
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
llvm::NVPTX::PTXLdStInstCode::VecType
VecType
Definition: NVPTX.h:121
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
foldConstantInsEltIntoShuffle
static Instruction * foldConstantInsEltIntoShuffle(InsertElementInst &InsElt)
insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex --> shufflevector X,...
Definition: InstCombineVectorOps.cpp:1382
llvm::UndefMaskElem
constexpr int UndefMaskElem
Definition: Instructions.h:1996
llvm::PatternMatch::m_OneUse
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
llvm::simplifyExtractElementInst
Value * simplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q)
Given operands for an ExtractElementInst, fold the result or return null.
Definition: InstructionSimplify.cpp:4945
llvm::SmallBitVector
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
Definition: SmallBitVector.h:35
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
Instruction.h
llvm::PatternMatch::m_APInt
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:278
llvm::ConstantExpr::getBinOpIdentity
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2714
llvm::UnaryOperator::getOpcode
UnaryOps getOpcode() const
Definition: InstrTypes.h:172
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:164
llvm::SelectInst::Create
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
Definition: Instructions.h:1768
isSplat
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
Definition: LowerMatrixIntrinsics.cpp:98
llvm::APInt::setBit
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1300
llvm::ShuffleVectorInst::isSelect
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
Definition: Instructions.h:2201
InstCombineInternal.h
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition: PatternMatch.h:1462
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::PHINode::getIncomingValue
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Definition: Instructions.h:2795
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
llvm::PatternMatch::m_UnOp
class_match< UnaryOperator > m_UnOp()
Match an arbitrary unary operation and ignore it.
Definition: PatternMatch.h:79
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
InstrTypes.h
llvm::FCmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1360
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
SI
@ SI
Definition: SIInstrInfo.cpp:7966
foldInsEltIntoIdentityShuffle
static Instruction * foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt)
Try to fold an extract+insert element into an existing identity shuffle by changing the shuffle's mas...
Definition: InstCombineVectorOps.cpp:1300
llvm::APInt::getAllOnes
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:246
llvm::PatternMatch::m_Instruction
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:716
llvm::InsertElementInst
This instruction inserts a single (scalar) element into a VectorType value.
Definition: Instructions.h:1936
llvm::Value::uses
iterator_range< use_iterator > uses()
Definition: Value.h:376
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:246
llvm::VectorType::getElementCount
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:627
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::PatternMatch::m_ZExt
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
Definition: PatternMatch.h:1629
llvm::SmallBitVector::all
bool all() const
Returns true if all bits are set.
Definition: SmallBitVector.h:216
llvm::Instruction::CastOps
CastOps
Definition: Instruction.h:802
llvm::ShuffleVectorInst::getMaskValue
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
Definition: Instructions.h:2063
llvm::PatternMatch::m_FNeg
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
Definition: PatternMatch.h:1033
llvm::Constant::getAllOnesValue
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:395
llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition: InstrTypes.h:393
llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:147
llvm::Instruction
Definition: Instruction.h:42
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:189
ShuffleOps
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
Definition: InstCombineVectorOps.cpp:763
llvm::APInt::isAllOnes
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:347
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::predecessors
auto predecessors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:30
llvm::UnaryOperator::CreateWithCopiedFlags
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:157
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1713
llvm::InstCombinerImpl
Definition: InstCombineInternal.h:61
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:879
collectShuffleElements
static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl< int > &Mask, Value *PermittedRHS, InstCombinerImpl &IC)
Definition: InstCombineVectorOps.cpp:765
llvm::InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse
Instruction * foldAggregateConstructionIntoAggregateReuse(InsertValueInst &OrigIVI)
Look for chain of insertvalue's that fully define an aggregate, and trace back the values inserted,...
Definition: InstCombineVectorOps.cpp:851
PatternMatch.h
llvm::BasicBlock::getFirstNonPHI
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:209
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:684
llvm::NVPTX::PTXLdStInstCode::Scalar
@ Scalar
Definition: NVPTX.h:122
llvm::Instruction::isIntDivRem
bool isIntDivRem() const
Definition: Instruction.h:170
llvm::ArrayRef::slice
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:194
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2791
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
llvm::ShuffleVectorInst::getType
VectorType * getType() const
Overload to return most specific vector type.
Definition: Instructions.h:2054
Type.h
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::PatternMatch::m_ExtractElt
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
Definition: PatternMatch.h:1492
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::Type::getStructNumElements
unsigned getStructNumElements() const
Definition: DerivedTypes.h:348
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:210
llvm::APInt::getOneBitSet
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:222
llvm::Value::user_back
User * user_back()
Definition: Value.h:407
llvm::ConstantExpr::getShuffleVector
static Constant * getShuffleVector(Constant *V1, Constant *V2, ArrayRef< int > Mask, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2591
llvm::VectorType::isValidElementType
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:675
foldShuffleWithInsert
static Instruction * foldShuffleWithInsert(ShuffleVectorInst &Shuf, InstCombinerImpl &IC)
Try to replace a shuffle with an insertelement or try to replace a shuffle operand with the operand o...
Definition: InstCombineVectorOps.cpp:2462
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
llvm::Instruction::andIRFlags
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
Definition: Instruction.cpp:352
llvm::InstCombiner::getSafeVectorConstantForBinop
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
Definition: InstCombiner.h:314
VectorUtils.h
BasicBlock.h
llvm::ShuffleVectorInst::increasesLength
bool increasesLength() const
Return true if this shuffle returns a vector with a greater number of elements than its source vector...
Definition: Instructions.h:2104
llvm::ShuffleVectorInst::commuteShuffleMask
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
Definition: Instructions.h:2413
llvm::InstCombinerImpl::foldSelectShuffle
Instruction * foldSelectShuffle(ShuffleVectorInst &Shuf)
Try to fold shuffles that are the equivalent of a vector select.
Definition: InstCombineVectorOps.cpp:2118
llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:537
llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:159
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::Instruction::hasNoSignedWrap
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
Definition: Instruction.cpp:169
llvm::UnaryOperator::CreateFNegFMF
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:165
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1186
uint64_t
llvm::Instruction::hasNoUnsignedWrap
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
Definition: Instruction.cpp:165
llvm::Instruction::user_back
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:88
llvm::TruncInst
This class represents a truncation of integer types.
Definition: Instructions.h:4811
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2849
replaceExtractElements
static void replaceExtractElements(InsertElementInst *InsElt, ExtractElementInst *ExtElt, InstCombinerImpl &IC)
If we have insertion into a vector that is wider than the vector that we are extracting from,...
Definition: InstCombineVectorOps.cpp:683
getPreferredVectorIndex
ConstantInt * getPreferredVectorIndex(ConstantInt *IndexC)
Given a constant index for a extractelement or insertelement instruction, return it with the canonica...
Definition: InstCombineVectorOps.cpp:387
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::UndefValue
'undef' values are things that do not have specified contents.
Definition: Constants.h:1356
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:929
llvm::ConstantExpr::getShl
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2668
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1843
ArrayRef.h
canonicalizeInsertSplat
static Instruction * canonicalizeInsertSplat(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
If we have an insert of a scalar to a non-zero element of an undefined vector and then shuffle that v...
Definition: InstCombineVectorOps.cpp:2085
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::Type::getArrayNumElements
uint64_t getArrayNumElements() const
Definition: DerivedTypes.h:384
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
BinopElts
These are the ingredients in an alternate form binary operator as described below.
Definition: InstCombineVectorOps.cpp:1934
llvm::ExtractElementInst::getVectorOperandType
VectorType * getVectorOperandType() const
Definition: Instructions.h:1906
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:60
llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition: Instructions.h:1737
llvm::PatternMatch::m_ZeroMask
Definition: PatternMatch.h:1523
llvm::SimplifyQuery::getWithInstruction
SimplifyQuery getWithInstruction(Instruction *I) const
Definition: InstructionSimplify.h:120
collectSingleShuffleElements
static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, SmallVectorImpl< int > &Mask)
If V is a shuffle of values that ONLY returns elements from either LHS or RHS, return the shuffle mas...
Definition: InstCombineVectorOps.cpp:610
llvm::PatternMatch::m_Constant
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:144
llvm::LinearPolySize::getKnownMinValue
ScalarTy getKnownMinValue() const
Returns the minimum value this size can represent.
Definition: TypeSize.h:296
getOpcode
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
foldIdentityExtractShuffle
static Instruction * foldIdentityExtractShuffle(ShuffleVectorInst &Shuf)
Try to fold an extract subvector operation.
Definition: InstCombineVectorOps.cpp:2412
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::GetElementPtrInst::Create
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:955
llvm::PatternMatch::m_SExt
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
Definition: PatternMatch.h:1623
llvm::PatternMatch::m_SpecificInt
specific_intval< false > m_SpecificInt(APInt V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:854
llvm::PatternMatch::m_CombineAnd
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
Definition: PatternMatch.h:224
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::BinaryOperator
Definition: InstrTypes.h:189
Mul
BinaryOperator * Mul
Definition: X86PartialReduction.cpp:70
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:138
evaluateInDifferentElementOrder
static Value * evaluateInDifferentElementOrder(Value *V, ArrayRef< int > Mask)
Definition: InstCombineVectorOps.cpp:1813
llvm::PatternMatch::m_Undef
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:136
llvm::ExtractElementInst::getVectorOperand
Value * getVectorOperand()
Definition: Instructions.h:1901
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Constant::getAggregateElement
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:410
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::PatternMatch::m_Shuffle
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
Definition: PatternMatch.h:1551
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:994
llvm::IRBuilderBase::InsertPointGuard
Definition: IRBuilder.h:358
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::InstCombinerImpl::visitInsertValueInst
Instruction * visitInsertValueInst(InsertValueInst &IV)
Try to find redundant insertvalue instructions, like the following ones: %0 = insertvalue { i8,...
Definition: InstCombineVectorOps.cpp:1130
llvm::pred_empty
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
llvm::ConstantVector::get
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1348
foldCastShuffle
static Instruction * foldCastShuffle(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize casts after shuffle.
Definition: InstCombineVectorOps.cpp:2363
llvm::APInt::ult
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1081
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::ArrayRef::front
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:167
llvm::ComplexDeinterleavingOperation::Shuffle
@ Shuffle
findDemandedEltsBySingleUser
static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr)
Find elements of V demanded by UserInstr.
Definition: InstCombineVectorOps.cpp:320
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
llvm::InstCombinerImpl::visitInsertElementInst
Instruction * visitInsertElementInst(InsertElementInst &IE)
Definition: InstCombineVectorOps.cpp:1517
llvm::MCID::Select
@ Select
Definition: MCInstrDesc.h:164
llvm::PatternMatch::m_FPExt
CastClass_match< OpTy, Instruction::FPExt > m_FPExt(const OpTy &Op)
Definition: PatternMatch.h:1687
llvm::widenShuffleMaskElts
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Definition: VectorUtils.cpp:490
Constant.h
llvm::PPC::getPredicate
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
foldTruncShuffle
static Instruction * foldTruncShuffle(ShuffleVectorInst &Shuf, bool IsBigEndian)
Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
Definition: InstCombineVectorOps.cpp:2256
llvm::Type::getIntNTy
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:243
llvm::InstCombinerImpl::InsertNewInstWith
Instruction * InsertNewInstWith(Instruction *New, Instruction &Old)
Same as InsertNewInstBefore, but also sets the debug loc.
Definition: InstCombineInternal.h:397
narrowVectorSelect
static Instruction * narrowVectorSelect(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Match a shuffle-select-shuffle pattern where the shuffles are widening and narrowing (concatenating w...
Definition: InstCombineVectorOps.cpp:2297
llvm::None
constexpr std::nullopt_t None
Definition: None.h:27
isShuffleExtractingFromLHS
static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, ArrayRef< int > Mask)
Definition: InstCombineVectorOps.cpp:1917
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:84
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2741
llvm::ARMII::VecSize
@ VecSize
Definition: ARMBaseInfo.h:421
llvm::ShuffleVectorInst::isSelectMask
static bool isSelectMask(ArrayRef< int > Mask)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
Definition: Instructions.cpp:2342
hoistInsEltConst
static Instruction * hoistInsEltConst(InsertElementInst &InsElt2, InstCombiner::BuilderTy &Builder)
If we have an insertelement instruction feeding into another insertelement and the 2nd is inserting a...
Definition: InstCombineVectorOps.cpp:1359
llvm::Type::isIntOrIntVectorTy
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:216
Casting.h
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
foldInsEltIntoSplat
static Instruction * foldInsEltIntoSplat(InsertElementInst &InsElt)
Try to fold an insert element into an existing splat shuffle by changing the shuffle's mask to includ...
Definition: InstCombineVectorOps.cpp:1263
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:614
llvm::PatternMatch::m_ZeroInt
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:524
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
llvm::findScalarElement
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
Definition: VectorUtils.cpp:285
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
llvm::CmpInst::Create
static CmpInst * Create(OtherOps Op, Predicate predicate, Value *S1, Value *S2, const Twine &Name="", Instruction *InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
Definition: Instructions.cpp:4011
llvm::InstCombinerImpl::replaceOperand
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombineInternal.h:429
llvm::Instruction::BinaryOps
BinaryOps
Definition: Instruction.h:788
llvm::InsertElementInst::getType
VectorType * getType() const
Overload to return most specific vector type.
Definition: Instructions.h:1969
llvm::Type::StructTyID
@ StructTyID
Structures.
Definition: Type.h:74
canEvaluateShuffled
static bool canEvaluateShuffled(Value *V, ArrayRef< int > Mask, unsigned Depth=5)
Return true if we can evaluate the specified expression tree if the vector elements were shuffled in ...
Definition: InstCombineVectorOps.cpp:1649
llvm::ShuffleVectorInst
This instruction constructs a fixed permutation of two input vectors.
Definition: Instructions.h:2008
Instructions.h
cheapToScalarize
static bool cheapToScalarize(Value *V, Value *EI)
Return true if the value is cheaper to scalarize than it is to leave as a vector operation.
Definition: InstCombineVectorOps.cpp:58
SmallVector.h
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:772
User.h
findDemandedEltsByAllUsers
static APInt findDemandedEltsByAllUsers(Value *V)
Find union of elements of V demanded by all its users.
Definition: InstCombineVectorOps.cpp:364
llvm::Value::DoPHITranslation
const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition: Value.cpp:986
SmallBitVector.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
InstructionSimplify.h
llvm::PHINode::getIncomingBlock
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Definition: Instructions.h:2815
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:164
llvm::IntegerType::getBitWidth
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
llvm::APInt::getActiveBits
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1455
foldFNegShuffle
static Instruction * foldFNegShuffle(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize FP negate after shuffle.
Definition: InstCombineVectorOps.cpp:2330
llvm::PHINode
Definition: Instructions.h:2699
llvm::InstCombinerImpl::simplifyBinOpSplats
Instruction * simplifyBinOpSplats(ShuffleVectorInst &SVI)
Definition: InstCombineVectorOps.cpp:2619
llvm::Constant::getSplatValue
Constant * getSplatValue(bool AllowUndefs=false) const
If all elements of the vector constant have the same value, return that value.
Definition: Constants.cpp:1623
llvm::ExtractElementInst::getIndexOperand
Value * getIndexOperand()
Definition: Instructions.h:1902
llvm::Instruction::copyIRFlags
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
Definition: Instruction.cpp:328
IV
static const uint32_t IV[8]
Definition: blake3_impl.h:85
llvm::SmallVectorImpl< int >
foldIdentityPaddedShuffles
static Instruction * foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf)
Definition: InstCombineVectorOps.cpp:2550
DerivedTypes.h
BinopElts::Op0
Value * Op0
Definition: InstCombineVectorOps.cpp:1936
llvm::PatternMatch::m_Neg
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
Definition: PatternMatch.h:2273
getAlternateBinop
static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL)
Binops may be transformed into binops with different opcodes and operands.
Definition: InstCombineVectorOps.cpp:1948
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
foldSelectShuffleWith1Binop
static Instruction * foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf)
Definition: InstCombineVectorOps.cpp:2029
llvm::simplifyShuffleVectorInst
Value * simplifyShuffleVectorInst(Value *Op0, Value *Op1, ArrayRef< int > Mask, Type *RetTy, const SimplifyQuery &Q)
Given operands for a ShuffleVectorInst, fold the result or return null.
Definition: InstructionSimplify.cpp:5196
llvm::ConstantAggregateZero::get
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1587
llvm::InsertValueInst
This instruction inserts a struct field of array element value into an aggregate value.
Definition: Instructions.h:2557
llvm::BinaryOperator::Create
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Definition: Instructions.cpp:2936
Value.h
llvm::PatternMatch::m_Cmp
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:89
llvm::Instruction::isExact
bool isExact() const LLVM_READONLY
Determine whether the exact flag is set.
Definition: Instruction.cpp:225
llvm::Optional::value_or
constexpr T value_or(U &&alt) const &
Definition: Optional.h:291
llvm::LegacyLegalizeActions::NotFound
@ NotFound
Sentinel value for when no action was found in the specified table.
Definition: LegacyLegalizerInfo.h:74
llvm::isSafeToSpeculativelyExecute
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Definition: ValueTracking.cpp:4731
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
BinopElts::BinopElts
BinopElts(BinaryOperator::BinaryOps Opc=(BinaryOperator::BinaryOps) 0, Value *V0=nullptr, Value *V1=nullptr)
Definition: InstCombineVectorOps.cpp:1938
BinopElts::Op1
Value * Op1
Definition: InstCombineVectorOps.cpp:1937
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:668
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::Instruction::dropPoisonGeneratingFlags
void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
Definition: Instruction.cpp:177
llvm::ShuffleVectorInst::changesLength
bool changesLength() const
Return true if this shuffle returns a vector with a different number of elements than its source vect...
Definition: Instructions.h:2093
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:164
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:39
llvm::PoisonValue
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition: Constants.h:1404
llvm::PoisonValue::get
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1732
llvm::ShuffleVectorInst::commute
void commute()
Swap the operands and adjust the mask to preserve the semantics of the instruction.
Definition: Instructions.cpp:2131