LLVM  15.0.0git
InstCombineVectorOps.cpp
Go to the documentation of this file.
1 //===- InstCombineVectorOps.cpp -------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements instcombine for ExtractElement, InsertElement and
10 // ShuffleVector.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "InstCombineInternal.h"
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/Statistic.h"
24 #include "llvm/IR/BasicBlock.h"
25 #include "llvm/IR/Constant.h"
26 #include "llvm/IR/Constants.h"
27 #include "llvm/IR/DerivedTypes.h"
28 #include "llvm/IR/InstrTypes.h"
29 #include "llvm/IR/Instruction.h"
30 #include "llvm/IR/Instructions.h"
31 #include "llvm/IR/Operator.h"
32 #include "llvm/IR/PatternMatch.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/IR/User.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/Support/Casting.h"
39 #include <cassert>
40 #include <cstdint>
41 #include <iterator>
42 #include <utility>
43 
44 #define DEBUG_TYPE "instcombine"
45 
46 using namespace llvm;
47 using namespace PatternMatch;
48 
49 STATISTIC(NumAggregateReconstructionsSimplified,
50  "Number of aggregate reconstructions turned into reuse of the "
51  "original aggregate");
52 
53 /// Return true if the value is cheaper to scalarize than it is to leave as a
54 /// vector operation. If the extract index \p EI is a constant integer then
55 /// some operations may be cheap to scalarize.
56 ///
57 /// FIXME: It's possible to create more instructions than previously existed.
58 static bool cheapToScalarize(Value *V, Value *EI) {
59  ConstantInt *CEI = dyn_cast<ConstantInt>(EI);
60 
61  // If we can pick a scalar constant value out of a vector, that is free.
62  if (auto *C = dyn_cast<Constant>(V))
63  return CEI || C->getSplatValue();
64 
65  if (CEI && match(V, m_Intrinsic<Intrinsic::experimental_stepvector>())) {
66  ElementCount EC = cast<VectorType>(V->getType())->getElementCount();
67  // Index needs to be lower than the minimum size of the vector, because
68  // for scalable vector, the vector size is known at run time.
69  return CEI->getValue().ult(EC.getKnownMinValue());
70  }
71 
72  // An insertelement to the same constant index as our extract will simplify
73  // to the scalar inserted element. An insertelement to a different constant
74  // index is irrelevant to our extract.
76  return CEI;
77 
78  if (match(V, m_OneUse(m_Load(m_Value()))))
79  return true;
80 
81  if (match(V, m_OneUse(m_UnOp())))
82  return true;
83 
84  Value *V0, *V1;
85  if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
86  if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
87  return true;
88 
89  CmpInst::Predicate UnusedPred;
90  if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
91  if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
92  return true;
93 
94  return false;
95 }
96 
97 // If we have a PHI node with a vector type that is only used to feed
98 // itself and be an operand of extractelement at a constant location,
99 // try to replace the PHI of the vector type with a PHI of a scalar type.
100 Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
101  PHINode *PN) {
103  // The users we want the PHI to have are:
104  // 1) The EI ExtractElement (we already know this)
105  // 2) Possibly more ExtractElements with the same index.
106  // 3) Another operand, which will feed back into the PHI.
107  Instruction *PHIUser = nullptr;
108  for (auto U : PN->users()) {
109  if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
110  if (EI.getIndexOperand() == EU->getIndexOperand())
111  Extracts.push_back(EU);
112  else
113  return nullptr;
114  } else if (!PHIUser) {
115  PHIUser = cast<Instruction>(U);
116  } else {
117  return nullptr;
118  }
119  }
120 
121  if (!PHIUser)
122  return nullptr;
123 
124  // Verify that this PHI user has one use, which is the PHI itself,
125  // and that it is a binary operation which is cheap to scalarize.
126  // otherwise return nullptr.
127  if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
128  !(isa<BinaryOperator>(PHIUser)) ||
129  !cheapToScalarize(PHIUser, EI.getIndexOperand()))
130  return nullptr;
131 
132  // Create a scalar PHI node that will replace the vector PHI node
133  // just before the current PHI node.
134  PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
135  PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN));
136  // Scalarize each PHI operand.
137  for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
138  Value *PHIInVal = PN->getIncomingValue(i);
139  BasicBlock *inBB = PN->getIncomingBlock(i);
140  Value *Elt = EI.getIndexOperand();
141  // If the operand is the PHI induction variable:
142  if (PHIInVal == PHIUser) {
143  // Scalarize the binary operation. Its first operand is the
144  // scalar PHI, and the second operand is extracted from the other
145  // vector operand.
146  BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
147  unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
148  Value *Op = InsertNewInstWith(
149  ExtractElementInst::Create(B0->getOperand(opId), Elt,
150  B0->getOperand(opId)->getName() + ".Elt"),
151  *B0);
152  Value *newPHIUser = InsertNewInstWith(
154  scalarPHI, Op, B0), *B0);
155  scalarPHI->addIncoming(newPHIUser, inBB);
156  } else {
157  // Scalarize PHI input:
158  Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
159  // Insert the new instruction into the predecessor basic block.
160  Instruction *pos = dyn_cast<Instruction>(PHIInVal);
161  BasicBlock::iterator InsertPos;
162  if (pos && !isa<PHINode>(pos)) {
163  InsertPos = ++pos->getIterator();
164  } else {
165  InsertPos = inBB->getFirstInsertionPt();
166  }
167 
168  InsertNewInstWith(newEI, *InsertPos);
169 
170  scalarPHI->addIncoming(newEI, inBB);
171  }
172  }
173 
174  for (auto E : Extracts)
175  replaceInstUsesWith(*E, scalarPHI);
176 
177  return &EI;
178 }
179 
180 Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
181  Value *X;
182  uint64_t ExtIndexC;
183  if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
184  !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
185  return nullptr;
186 
187  ElementCount NumElts =
188  cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
189  Type *DestTy = Ext.getType();
190  bool IsBigEndian = DL.isBigEndian();
191 
192  // If we are casting an integer to vector and extracting a portion, that is
193  // a shift-right and truncate.
194  // TODO: Allow FP dest type by casting the trunc to FP?
195  if (X->getType()->isIntegerTy() && DestTy->isIntegerTy() &&
196  isDesirableIntType(X->getType()->getPrimitiveSizeInBits())) {
197  assert(isa<FixedVectorType>(Ext.getVectorOperand()->getType()) &&
198  "Expected fixed vector type for bitcast from scalar integer");
199 
200  // Big endian requires adjusting the extract index since MSB is at index 0.
201  // LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8
202  // BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8
203  if (IsBigEndian)
204  ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC;
205  unsigned ShiftAmountC = ExtIndexC * DestTy->getPrimitiveSizeInBits();
206  if (!ShiftAmountC || Ext.getVectorOperand()->hasOneUse()) {
207  Value *Lshr = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset");
208  return new TruncInst(Lshr, DestTy);
209  }
210  }
211 
212  if (!X->getType()->isVectorTy())
213  return nullptr;
214 
215  // If this extractelement is using a bitcast from a vector of the same number
216  // of elements, see if we can find the source element from the source vector:
217  // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
218  auto *SrcTy = cast<VectorType>(X->getType());
219  ElementCount NumSrcElts = SrcTy->getElementCount();
220  if (NumSrcElts == NumElts)
221  if (Value *Elt = findScalarElement(X, ExtIndexC))
222  return new BitCastInst(Elt, DestTy);
223 
224  assert(NumSrcElts.isScalable() == NumElts.isScalable() &&
225  "Src and Dst must be the same sort of vector type");
226 
227  // If the source elements are wider than the destination, try to shift and
228  // truncate a subset of scalar bits of an insert op.
229  if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
230  Value *Scalar;
231  uint64_t InsIndexC;
232  if (!match(X, m_InsertElt(m_Value(), m_Value(Scalar),
233  m_ConstantInt(InsIndexC))))
234  return nullptr;
235 
236  // The extract must be from the subset of vector elements that we inserted
237  // into. Example: if we inserted element 1 of a <2 x i64> and we are
238  // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
239  // of elements 4-7 of the bitcasted vector.
240  unsigned NarrowingRatio =
241  NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
242  if (ExtIndexC / NarrowingRatio != InsIndexC)
243  return nullptr;
244 
245  // We are extracting part of the original scalar. How that scalar is
246  // inserted into the vector depends on the endian-ness. Example:
247  // Vector Byte Elt Index: 0 1 2 3 4 5 6 7
248  // +--+--+--+--+--+--+--+--+
249  // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
250  // extelt <4 x i16> V', 3: | |S2|S3|
251  // +--+--+--+--+--+--+--+--+
252  // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
253  // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
254  // In this example, we must right-shift little-endian. Big-endian is just a
255  // truncate.
256  unsigned Chunk = ExtIndexC % NarrowingRatio;
257  if (IsBigEndian)
258  Chunk = NarrowingRatio - 1 - Chunk;
259 
260  // Bail out if this is an FP vector to FP vector sequence. That would take
261  // more instructions than we started with unless there is no shift, and it
262  // may not be handled as well in the backend.
263  bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
264  bool NeedDestBitcast = DestTy->isFloatingPointTy();
265  if (NeedSrcBitcast && NeedDestBitcast)
266  return nullptr;
267 
268  unsigned SrcWidth = SrcTy->getScalarSizeInBits();
269  unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
270  unsigned ShAmt = Chunk * DestWidth;
271 
272  // TODO: This limitation is more strict than necessary. We could sum the
273  // number of new instructions and subtract the number eliminated to know if
274  // we can proceed.
275  if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
276  if (NeedSrcBitcast || NeedDestBitcast)
277  return nullptr;
278 
279  if (NeedSrcBitcast) {
280  Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
281  Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
282  }
283 
284  if (ShAmt) {
285  // Bail out if we could end with more instructions than we started with.
286  if (!Ext.getVectorOperand()->hasOneUse())
287  return nullptr;
288  Scalar = Builder.CreateLShr(Scalar, ShAmt);
289  }
290 
291  if (NeedDestBitcast) {
292  Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
293  return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
294  }
295  return new TruncInst(Scalar, DestTy);
296  }
297 
298  return nullptr;
299 }
300 
301 /// Find elements of V demanded by UserInstr.
303  unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
304 
305  // Conservatively assume that all elements are needed.
306  APInt UsedElts(APInt::getAllOnes(VWidth));
307 
308  switch (UserInstr->getOpcode()) {
309  case Instruction::ExtractElement: {
310  ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr);
311  assert(EEI->getVectorOperand() == V);
312  ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand());
313  if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {
314  UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue());
315  }
316  break;
317  }
318  case Instruction::ShuffleVector: {
319  ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);
320  unsigned MaskNumElts =
321  cast<FixedVectorType>(UserInstr->getType())->getNumElements();
322 
323  UsedElts = APInt(VWidth, 0);
324  for (unsigned i = 0; i < MaskNumElts; i++) {
325  unsigned MaskVal = Shuffle->getMaskValue(i);
326  if (MaskVal == -1u || MaskVal >= 2 * VWidth)
327  continue;
328  if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))
329  UsedElts.setBit(MaskVal);
330  if (Shuffle->getOperand(1) == V &&
331  ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
332  UsedElts.setBit(MaskVal - VWidth);
333  }
334  break;
335  }
336  default:
337  break;
338  }
339  return UsedElts;
340 }
341 
342 /// Find union of elements of V demanded by all its users.
343 /// If it is known by querying findDemandedEltsBySingleUser that
344 /// no user demands an element of V, then the corresponding bit
345 /// remains unset in the returned value.
347  unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
348 
349  APInt UnionUsedElts(VWidth, 0);
350  for (const Use &U : V->uses()) {
351  if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
352  UnionUsedElts |= findDemandedEltsBySingleUser(V, I);
353  } else {
354  UnionUsedElts = APInt::getAllOnes(VWidth);
355  break;
356  }
357 
358  if (UnionUsedElts.isAllOnes())
359  break;
360  }
361 
362  return UnionUsedElts;
363 }
364 
365 /// Given a constant index for a extractelement or insertelement instruction,
366 /// return it with the canonical type if it isn't already canonical. We
367 /// arbitrarily pick 64 bit as our canonical type. The actual bitwidth doesn't
368 /// matter, we just want a consistent type to simplify CSE.
370  const unsigned IndexBW = IndexC->getType()->getBitWidth();
371  if (IndexBW == 64 || IndexC->getValue().getActiveBits() > 64)
372  return nullptr;
373  return ConstantInt::get(IndexC->getContext(),
374  IndexC->getValue().zextOrTrunc(64));
375 }
376 
378  Value *SrcVec = EI.getVectorOperand();
379  Value *Index = EI.getIndexOperand();
380  if (Value *V = simplifyExtractElementInst(SrcVec, Index,
381  SQ.getWithInstruction(&EI)))
382  return replaceInstUsesWith(EI, V);
383 
384  // If extracting a specified index from the vector, see if we can recursively
385  // find a previously computed scalar that was inserted into the vector.
386  auto *IndexC = dyn_cast<ConstantInt>(Index);
387  if (IndexC) {
388  // Canonicalize type of constant indices to i64 to simplify CSE
389  if (auto *NewIdx = getPreferredVectorIndex(IndexC))
390  return replaceOperand(EI, 1, NewIdx);
391 
393  unsigned NumElts = EC.getKnownMinValue();
394 
395  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(SrcVec)) {
396  Intrinsic::ID IID = II->getIntrinsicID();
397  // Index needs to be lower than the minimum size of the vector, because
398  // for scalable vector, the vector size is known at run time.
399  if (IID == Intrinsic::experimental_stepvector &&
400  IndexC->getValue().ult(NumElts)) {
401  Type *Ty = EI.getType();
402  unsigned BitWidth = Ty->getIntegerBitWidth();
403  Value *Idx;
404  // Return index when its value does not exceed the allowed limit
405  // for the element type of the vector, otherwise return undefined.
406  if (IndexC->getValue().getActiveBits() <= BitWidth)
407  Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth));
408  else
409  Idx = UndefValue::get(Ty);
410  return replaceInstUsesWith(EI, Idx);
411  }
412  }
413 
414  // InstSimplify should handle cases where the index is invalid.
415  // For fixed-length vector, it's invalid to extract out-of-range element.
416  if (!EC.isScalable() && IndexC->getValue().uge(NumElts))
417  return nullptr;
418 
419  if (Instruction *I = foldBitcastExtElt(EI))
420  return I;
421 
422  // If there's a vector PHI feeding a scalar use through this extractelement
423  // instruction, try to scalarize the PHI.
424  if (auto *Phi = dyn_cast<PHINode>(SrcVec))
425  if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
426  return ScalarPHI;
427  }
428 
429  // TODO come up with a n-ary matcher that subsumes both unary and
430  // binary matchers.
431  UnaryOperator *UO;
432  if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, Index)) {
433  // extelt (unop X), Index --> unop (extelt X, Index)
434  Value *X = UO->getOperand(0);
435  Value *E = Builder.CreateExtractElement(X, Index);
437  }
438 
439  BinaryOperator *BO;
440  if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, Index)) {
441  // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
442  Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
443  Value *E0 = Builder.CreateExtractElement(X, Index);
444  Value *E1 = Builder.CreateExtractElement(Y, Index);
445  return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
446  }
447 
448  Value *X, *Y;
449  CmpInst::Predicate Pred;
450  if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
451  cheapToScalarize(SrcVec, Index)) {
452  // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
453  Value *E0 = Builder.CreateExtractElement(X, Index);
454  Value *E1 = Builder.CreateExtractElement(Y, Index);
455  return CmpInst::Create(cast<CmpInst>(SrcVec)->getOpcode(), Pred, E0, E1);
456  }
457 
458  if (auto *I = dyn_cast<Instruction>(SrcVec)) {
459  if (auto *IE = dyn_cast<InsertElementInst>(I)) {
460  // instsimplify already handled the case where the indices are constants
461  // and equal by value, if both are constants, they must not be the same
462  // value, extract from the pre-inserted value instead.
463  if (isa<Constant>(IE->getOperand(2)) && IndexC)
464  return replaceOperand(EI, 0, IE->getOperand(0));
465  } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
466  auto *VecType = cast<VectorType>(GEP->getType());
467  ElementCount EC = VecType->getElementCount();
468  uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0;
469  if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) {
470  // Find out why we have a vector result - these are a few examples:
471  // 1. We have a scalar pointer and a vector of indices, or
472  // 2. We have a vector of pointers and a scalar index, or
473  // 3. We have a vector of pointers and a vector of indices, etc.
474  // Here we only consider combining when there is exactly one vector
475  // operand, since the optimization is less obviously a win due to
476  // needing more than one extractelements.
477 
478  unsigned VectorOps =
479  llvm::count_if(GEP->operands(), [](const Value *V) {
480  return isa<VectorType>(V->getType());
481  });
482  if (VectorOps == 1) {
483  Value *NewPtr = GEP->getPointerOperand();
484  if (isa<VectorType>(NewPtr->getType()))
485  NewPtr = Builder.CreateExtractElement(NewPtr, IndexC);
486 
487  SmallVector<Value *> NewOps;
488  for (unsigned I = 1; I != GEP->getNumOperands(); ++I) {
489  Value *Op = GEP->getOperand(I);
490  if (isa<VectorType>(Op->getType()))
491  NewOps.push_back(Builder.CreateExtractElement(Op, IndexC));
492  else
493  NewOps.push_back(Op);
494  }
495 
497  GEP->getSourceElementType(), NewPtr, NewOps);
498  NewGEP->setIsInBounds(GEP->isInBounds());
499  return NewGEP;
500  }
501  }
502  } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
503  // If this is extracting an element from a shufflevector, figure out where
504  // it came from and extract from the appropriate input element instead.
505  // Restrict the following transformation to fixed-length vector.
506  if (isa<FixedVectorType>(SVI->getType()) && isa<ConstantInt>(Index)) {
507  int SrcIdx =
508  SVI->getMaskValue(cast<ConstantInt>(Index)->getZExtValue());
509  Value *Src;
510  unsigned LHSWidth = cast<FixedVectorType>(SVI->getOperand(0)->getType())
511  ->getNumElements();
512 
513  if (SrcIdx < 0)
514  return replaceInstUsesWith(EI, UndefValue::get(EI.getType()));
515  if (SrcIdx < (int)LHSWidth)
516  Src = SVI->getOperand(0);
517  else {
518  SrcIdx -= LHSWidth;
519  Src = SVI->getOperand(1);
520  }
523  Src, ConstantInt::get(Int32Ty, SrcIdx, false));
524  }
525  } else if (auto *CI = dyn_cast<CastInst>(I)) {
526  // Canonicalize extractelement(cast) -> cast(extractelement).
527  // Bitcasts can change the number of vector elements, and they cost
528  // nothing.
529  if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
530  Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
531  return CastInst::Create(CI->getOpcode(), EE, EI.getType());
532  }
533  }
534  }
535 
536  // Run demanded elements after other transforms as this can drop flags on
537  // binops. If there's two paths to the same final result, we prefer the
538  // one which doesn't force us to drop flags.
539  if (IndexC) {
541  unsigned NumElts = EC.getKnownMinValue();
542  // This instruction only demands the single element from the input vector.
543  // Skip for scalable type, the number of elements is unknown at
544  // compile-time.
545  if (!EC.isScalable() && NumElts != 1) {
546  // If the input vector has a single use, simplify it based on this use
547  // property.
548  if (SrcVec->hasOneUse()) {
549  APInt UndefElts(NumElts, 0);
550  APInt DemandedElts(NumElts, 0);
551  DemandedElts.setBit(IndexC->getZExtValue());
552  if (Value *V =
553  SimplifyDemandedVectorElts(SrcVec, DemandedElts, UndefElts))
554  return replaceOperand(EI, 0, V);
555  } else {
556  // If the input vector has multiple uses, simplify it based on a union
557  // of all elements used.
558  APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
559  if (!DemandedElts.isAllOnes()) {
560  APInt UndefElts(NumElts, 0);
561  if (Value *V = SimplifyDemandedVectorElts(
562  SrcVec, DemandedElts, UndefElts, 0 /* Depth */,
563  true /* AllowMultipleUsers */)) {
564  if (V != SrcVec) {
565  SrcVec->replaceAllUsesWith(V);
566  return &EI;
567  }
568  }
569  }
570  }
571  }
572  }
573  return nullptr;
574 }
575 
576 /// If V is a shuffle of values that ONLY returns elements from either LHS or
577 /// RHS, return the shuffle mask and true. Otherwise, return false.
580  assert(LHS->getType() == RHS->getType() &&
581  "Invalid CollectSingleShuffleElements");
582  unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
583 
584  if (match(V, m_Undef())) {
585  Mask.assign(NumElts, -1);
586  return true;
587  }
588 
589  if (V == LHS) {
590  for (unsigned i = 0; i != NumElts; ++i)
591  Mask.push_back(i);
592  return true;
593  }
594 
595  if (V == RHS) {
596  for (unsigned i = 0; i != NumElts; ++i)
597  Mask.push_back(i + NumElts);
598  return true;
599  }
600 
601  if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
602  // If this is an insert of an extract from some other vector, include it.
603  Value *VecOp = IEI->getOperand(0);
604  Value *ScalarOp = IEI->getOperand(1);
605  Value *IdxOp = IEI->getOperand(2);
606 
607  if (!isa<ConstantInt>(IdxOp))
608  return false;
609  unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
610 
611  if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
612  // We can handle this if the vector we are inserting into is
613  // transitively ok.
614  if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
615  // If so, update the mask to reflect the inserted undef.
616  Mask[InsertedIdx] = -1;
617  return true;
618  }
619  } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
620  if (isa<ConstantInt>(EI->getOperand(1))) {
621  unsigned ExtractedIdx =
622  cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
623  unsigned NumLHSElts =
624  cast<FixedVectorType>(LHS->getType())->getNumElements();
625 
626  // This must be extracting from either LHS or RHS.
627  if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
628  // We can handle this if the vector we are inserting into is
629  // transitively ok.
630  if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
631  // If so, update the mask to reflect the inserted value.
632  if (EI->getOperand(0) == LHS) {
633  Mask[InsertedIdx % NumElts] = ExtractedIdx;
634  } else {
635  assert(EI->getOperand(0) == RHS);
636  Mask[InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;
637  }
638  return true;
639  }
640  }
641  }
642  }
643  }
644 
645  return false;
646 }
647 
648 /// If we have insertion into a vector that is wider than the vector that we
649 /// are extracting from, try to widen the source vector to allow a single
650 /// shufflevector to replace one or more insert/extract pairs.
652  ExtractElementInst *ExtElt,
653  InstCombinerImpl &IC) {
654  auto *InsVecType = cast<FixedVectorType>(InsElt->getType());
655  auto *ExtVecType = cast<FixedVectorType>(ExtElt->getVectorOperandType());
656  unsigned NumInsElts = InsVecType->getNumElements();
657  unsigned NumExtElts = ExtVecType->getNumElements();
658 
659  // The inserted-to vector must be wider than the extracted-from vector.
660  if (InsVecType->getElementType() != ExtVecType->getElementType() ||
661  NumExtElts >= NumInsElts)
662  return;
663 
664  // Create a shuffle mask to widen the extended-from vector using poison
665  // values. The mask selects all of the values of the original vector followed
666  // by as many poison values as needed to create a vector of the same length
667  // as the inserted-to vector.
668  SmallVector<int, 16> ExtendMask;
669  for (unsigned i = 0; i < NumExtElts; ++i)
670  ExtendMask.push_back(i);
671  for (unsigned i = NumExtElts; i < NumInsElts; ++i)
672  ExtendMask.push_back(-1);
673 
674  Value *ExtVecOp = ExtElt->getVectorOperand();
675  auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
676  BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
677  ? ExtVecOpInst->getParent()
678  : ExtElt->getParent();
679 
680  // TODO: This restriction matches the basic block check below when creating
681  // new extractelement instructions. If that limitation is removed, this one
682  // could also be removed. But for now, we just bail out to ensure that we
683  // will replace the extractelement instruction that is feeding our
684  // insertelement instruction. This allows the insertelement to then be
685  // replaced by a shufflevector. If the insertelement is not replaced, we can
686  // induce infinite looping because there's an optimization for extractelement
687  // that will delete our widening shuffle. This would trigger another attempt
688  // here to create that shuffle, and we spin forever.
689  if (InsertionBlock != InsElt->getParent())
690  return;
691 
692  // TODO: This restriction matches the check in visitInsertElementInst() and
693  // prevents an infinite loop caused by not turning the extract/insert pair
694  // into a shuffle. We really should not need either check, but we're lacking
695  // folds for shufflevectors because we're afraid to generate shuffle masks
696  // that the backend can't handle.
697  if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
698  return;
699 
700  auto *WideVec = new ShuffleVectorInst(ExtVecOp, ExtendMask);
701 
702  // Insert the new shuffle after the vector operand of the extract is defined
703  // (as long as it's not a PHI) or at the start of the basic block of the
704  // extract, so any subsequent extracts in the same basic block can use it.
705  // TODO: Insert before the earliest ExtractElementInst that is replaced.
706  if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
707  WideVec->insertAfter(ExtVecOpInst);
708  else
709  IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt());
710 
711  // Replace extracts from the original narrow vector with extracts from the new
712  // wide vector.
713  for (User *U : ExtVecOp->users()) {
714  ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
715  if (!OldExt || OldExt->getParent() != WideVec->getParent())
716  continue;
717  auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
718  NewExt->insertAfter(OldExt);
719  IC.replaceInstUsesWith(*OldExt, NewExt);
720  }
721 }
722 
723 /// We are building a shuffle to create V, which is a sequence of insertelement,
724 /// extractelement pairs. If PermittedRHS is set, then we must either use it or
725 /// not rely on the second vector source. Return a std::pair containing the
726 /// left and right vectors of the proposed shuffle (or 0), and set the Mask
727 /// parameter as required.
728 ///
729 /// Note: we intentionally don't try to fold earlier shuffles since they have
730 /// often been chosen carefully to be efficiently implementable on the target.
731 using ShuffleOps = std::pair<Value *, Value *>;
732 
734  Value *PermittedRHS,
735  InstCombinerImpl &IC) {
736  assert(V->getType()->isVectorTy() && "Invalid shuffle!");
737  unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
738 
739  if (match(V, m_Undef())) {
740  Mask.assign(NumElts, -1);
741  return std::make_pair(
742  PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr);
743  }
744 
745  if (isa<ConstantAggregateZero>(V)) {
746  Mask.assign(NumElts, 0);
747  return std::make_pair(V, nullptr);
748  }
749 
750  if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
751  // If this is an insert of an extract from some other vector, include it.
752  Value *VecOp = IEI->getOperand(0);
753  Value *ScalarOp = IEI->getOperand(1);
754  Value *IdxOp = IEI->getOperand(2);
755 
756  if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
757  if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
758  unsigned ExtractedIdx =
759  cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
760  unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
761 
762  // Either the extracted from or inserted into vector must be RHSVec,
763  // otherwise we'd end up with a shuffle of three inputs.
764  if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
765  Value *RHS = EI->getOperand(0);
766  ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC);
767  assert(LR.second == nullptr || LR.second == RHS);
768 
769  if (LR.first->getType() != RHS->getType()) {
770  // Although we are giving up for now, see if we can create extracts
771  // that match the inserts for another round of combining.
772  replaceExtractElements(IEI, EI, IC);
773 
774  // We tried our best, but we can't find anything compatible with RHS
775  // further up the chain. Return a trivial shuffle.
776  for (unsigned i = 0; i < NumElts; ++i)
777  Mask[i] = i;
778  return std::make_pair(V, nullptr);
779  }
780 
781  unsigned NumLHSElts =
782  cast<FixedVectorType>(RHS->getType())->getNumElements();
783  Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
784  return std::make_pair(LR.first, RHS);
785  }
786 
787  if (VecOp == PermittedRHS) {
788  // We've gone as far as we can: anything on the other side of the
789  // extractelement will already have been converted into a shuffle.
790  unsigned NumLHSElts =
791  cast<FixedVectorType>(EI->getOperand(0)->getType())
792  ->getNumElements();
793  for (unsigned i = 0; i != NumElts; ++i)
794  Mask.push_back(i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
795  return std::make_pair(EI->getOperand(0), PermittedRHS);
796  }
797 
798  // If this insertelement is a chain that comes from exactly these two
799  // vectors, return the vector and the effective shuffle.
800  if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
801  collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
802  Mask))
803  return std::make_pair(EI->getOperand(0), PermittedRHS);
804  }
805  }
806  }
807 
808  // Otherwise, we can't do anything fancy. Return an identity vector.
809  for (unsigned i = 0; i != NumElts; ++i)
810  Mask.push_back(i);
811  return std::make_pair(V, nullptr);
812 }
813 
814 /// Look for chain of insertvalue's that fully define an aggregate, and trace
815 /// back the values inserted, see if they are all were extractvalue'd from
816 /// the same source aggregate from the exact same element indexes.
817 /// If they were, just reuse the source aggregate.
818 /// This potentially deals with PHI indirections.
820  InsertValueInst &OrigIVI) {
821  Type *AggTy = OrigIVI.getType();
822  unsigned NumAggElts;
823  switch (AggTy->getTypeID()) {
824  case Type::StructTyID:
825  NumAggElts = AggTy->getStructNumElements();
826  break;
827  case Type::ArrayTyID:
828  NumAggElts = AggTy->getArrayNumElements();
829  break;
830  default:
831  llvm_unreachable("Unhandled aggregate type?");
832  }
833 
834  // Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able
835  // to handle clang C++ exception struct (which is hardcoded as {i8*, i32}),
836  // FIXME: any interesting patterns to be caught with larger limit?
837  assert(NumAggElts > 0 && "Aggregate should have elements.");
838  if (NumAggElts > 2)
839  return nullptr;
840 
841  static constexpr auto NotFound = None;
842  static constexpr auto FoundMismatch = nullptr;
843 
844  // Try to find a value of each element of an aggregate.
845  // FIXME: deal with more complex, not one-dimensional, aggregate types
846  SmallVector<Optional<Instruction *>, 2> AggElts(NumAggElts, NotFound);
847 
848  // Do we know values for each element of the aggregate?
849  auto KnowAllElts = [&AggElts]() {
850  return all_of(AggElts,
851  [](Optional<Instruction *> Elt) { return Elt != NotFound; });
852  };
853 
854  int Depth = 0;
855 
856  // Arbitrary `insertvalue` visitation depth limit. Let's be okay with
857  // every element being overwritten twice, which should never happen.
858  static const int DepthLimit = 2 * NumAggElts;
859 
860  // Recurse up the chain of `insertvalue` aggregate operands until either we've
861  // reconstructed full initializer or can't visit any more `insertvalue`'s.
862  for (InsertValueInst *CurrIVI = &OrigIVI;
863  Depth < DepthLimit && CurrIVI && !KnowAllElts();
864  CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()),
865  ++Depth) {
866  auto *InsertedValue =
867  dyn_cast<Instruction>(CurrIVI->getInsertedValueOperand());
868  if (!InsertedValue)
869  return nullptr; // Inserted value must be produced by an instruction.
870 
871  ArrayRef<unsigned int> Indices = CurrIVI->getIndices();
872 
873  // Don't bother with more than single-level aggregates.
874  if (Indices.size() != 1)
875  return nullptr; // FIXME: deal with more complex aggregates?
876 
877  // Now, we may have already previously recorded the value for this element
878  // of an aggregate. If we did, that means the CurrIVI will later be
879  // overwritten with the already-recorded value. But if not, let's record it!
880  Optional<Instruction *> &Elt = AggElts[Indices.front()];
881  Elt = Elt.value_or(InsertedValue);
882 
883  // FIXME: should we handle chain-terminating undef base operand?
884  }
885 
886  // Was that sufficient to deduce the full initializer for the aggregate?
887  if (!KnowAllElts())
888  return nullptr; // Give up then.
889 
890  // We now want to find the source[s] of the aggregate elements we've found.
891  // And with "source" we mean the original aggregate[s] from which
892  // the inserted elements were extracted. This may require PHI translation.
893 
894  enum class AggregateDescription {
895  /// When analyzing the value that was inserted into an aggregate, we did
896  /// not manage to find defining `extractvalue` instruction to analyze.
897  NotFound,
898  /// When analyzing the value that was inserted into an aggregate, we did
899  /// manage to find defining `extractvalue` instruction[s], and everything
900  /// matched perfectly - aggregate type, element insertion/extraction index.
901  Found,
902  /// When analyzing the value that was inserted into an aggregate, we did
903  /// manage to find defining `extractvalue` instruction, but there was
904  /// a mismatch: either the source type from which the extraction was didn't
905  /// match the aggregate type into which the insertion was,
906  /// or the extraction/insertion channels mismatched,
907  /// or different elements had different source aggregates.
908  FoundMismatch
909  };
910  auto Describe = [](Optional<Value *> SourceAggregate) {
911  if (SourceAggregate == NotFound)
913  if (*SourceAggregate == FoundMismatch)
914  return AggregateDescription::FoundMismatch;
915  return AggregateDescription::Found;
916  };
917 
918  // Given the value \p Elt that was being inserted into element \p EltIdx of an
919  // aggregate AggTy, see if \p Elt was originally defined by an
920  // appropriate extractvalue (same element index, same aggregate type).
921  // If found, return the source aggregate from which the extraction was.
922  // If \p PredBB is provided, does PHI translation of an \p Elt first.
923  auto FindSourceAggregate =
924  [&](Instruction *Elt, unsigned EltIdx, Optional<BasicBlock *> UseBB,
926  // For now(?), only deal with, at most, a single level of PHI indirection.
927  if (UseBB && PredBB)
928  Elt = dyn_cast<Instruction>(Elt->DoPHITranslation(*UseBB, *PredBB));
929  // FIXME: deal with multiple levels of PHI indirection?
930 
931  // Did we find an extraction?
932  auto *EVI = dyn_cast_or_null<ExtractValueInst>(Elt);
933  if (!EVI)
934  return NotFound;
935 
936  Value *SourceAggregate = EVI->getAggregateOperand();
937 
938  // Is the extraction from the same type into which the insertion was?
939  if (SourceAggregate->getType() != AggTy)
940  return FoundMismatch;
941  // And the element index doesn't change between extraction and insertion?
942  if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front())
943  return FoundMismatch;
944 
945  return SourceAggregate; // AggregateDescription::Found
946  };
947 
948  // Given elements AggElts that were constructing an aggregate OrigIVI,
949  // see if we can find appropriate source aggregate for each of the elements,
950  // and see it's the same aggregate for each element. If so, return it.
951  auto FindCommonSourceAggregate =
952  [&](Optional<BasicBlock *> UseBB,
954  Optional<Value *> SourceAggregate;
955 
956  for (auto I : enumerate(AggElts)) {
957  assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&
958  "We don't store nullptr in SourceAggregate!");
959  assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
960  (I.index() != 0) &&
961  "SourceAggregate should be valid after the first element,");
962 
963  // For this element, is there a plausible source aggregate?
964  // FIXME: we could special-case undef element, IFF we know that in the
965  // source aggregate said element isn't poison.
966  Optional<Value *> SourceAggregateForElement =
967  FindSourceAggregate(*I.value(), I.index(), UseBB, PredBB);
968 
969  // Okay, what have we found? Does that correlate with previous findings?
970 
971  // Regardless of whether or not we have previously found source
972  // aggregate for previous elements (if any), if we didn't find one for
973  // this element, passthrough whatever we have just found.
974  if (Describe(SourceAggregateForElement) != AggregateDescription::Found)
975  return SourceAggregateForElement;
976 
977  // Okay, we have found source aggregate for this element.
978  // Let's see what we already know from previous elements, if any.
979  switch (Describe(SourceAggregate)) {
981  // This is apparently the first element that we have examined.
982  SourceAggregate = SourceAggregateForElement; // Record the aggregate!
983  continue; // Great, now look at next element.
984  case AggregateDescription::Found:
985  // We have previously already successfully examined other elements.
986  // Is this the same source aggregate we've found for other elements?
987  if (*SourceAggregateForElement != *SourceAggregate)
988  return FoundMismatch;
989  continue; // Still the same aggregate, look at next element.
990  case AggregateDescription::FoundMismatch:
991  llvm_unreachable("Can't happen. We would have early-exited then.");
992  };
993  }
994 
995  assert(Describe(SourceAggregate) == AggregateDescription::Found &&
996  "Must be a valid Value");
997  return *SourceAggregate;
998  };
999 
1000  Optional<Value *> SourceAggregate;
1001 
1002  // Can we find the source aggregate without looking at predecessors?
1003  SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/None, /*PredBB=*/None);
1004  if (Describe(SourceAggregate) != AggregateDescription::NotFound) {
1005  if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch)
1006  return nullptr; // Conflicting source aggregates!
1007  ++NumAggregateReconstructionsSimplified;
1008  return replaceInstUsesWith(OrigIVI, *SourceAggregate);
1009  }
1010 
1011  // Okay, apparently we need to look at predecessors.
1012 
1013  // We should be smart about picking the "use" basic block, which will be the
1014  // merge point for aggregate, where we'll insert the final PHI that will be
1015  // used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice.
1016  // We should look in which blocks each of the AggElts is being defined,
1017  // they all should be defined in the same basic block.
1018  BasicBlock *UseBB = nullptr;
1019 
1020  for (const Optional<Instruction *> &I : AggElts) {
1021  BasicBlock *BB = (*I)->getParent();
1022  // If it's the first instruction we've encountered, record the basic block.
1023  if (!UseBB) {
1024  UseBB = BB;
1025  continue;
1026  }
1027  // Otherwise, this must be the same basic block we've seen previously.
1028  if (UseBB != BB)
1029  return nullptr;
1030  }
1031 
1032  // If *all* of the elements are basic-block-independent, meaning they are
1033  // either function arguments, or constant expressions, then if we didn't
1034  // handle them without predecessor-aware handling, we won't handle them now.
1035  if (!UseBB)
1036  return nullptr;
1037 
1038  // If we didn't manage to find source aggregate without looking at
1039  // predecessors, and there are no predecessors to look at, then we're done.
1040  if (pred_empty(UseBB))
1041  return nullptr;
1042 
1043  // Arbitrary predecessor count limit.
1044  static const int PredCountLimit = 64;
1045 
1046  // Cache the (non-uniqified!) list of predecessors in a vector,
1047  // checking the limit at the same time for efficiency.
1048  SmallVector<BasicBlock *, 4> Preds; // May have duplicates!
1049  for (BasicBlock *Pred : predecessors(UseBB)) {
1050  // Don't bother if there are too many predecessors.
1051  if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once?
1052  return nullptr;
1053  Preds.emplace_back(Pred);
1054  }
1055 
1056  // For each predecessor, what is the source aggregate,
1057  // from which all the elements were originally extracted from?
1058  // Note that we want for the map to have stable iteration order!
1059  SmallDenseMap<BasicBlock *, Value *, 4> SourceAggregates;
1060  for (BasicBlock *Pred : Preds) {
1061  std::pair<decltype(SourceAggregates)::iterator, bool> IV =
1062  SourceAggregates.insert({Pred, nullptr});
1063  // Did we already evaluate this predecessor?
1064  if (!IV.second)
1065  continue;
1066 
1067  // Let's hope that when coming from predecessor Pred, all elements of the
1068  // aggregate produced by OrigIVI must have been originally extracted from
1069  // the same aggregate. Is that so? Can we find said original aggregate?
1070  SourceAggregate = FindCommonSourceAggregate(UseBB, Pred);
1071  if (Describe(SourceAggregate) != AggregateDescription::Found)
1072  return nullptr; // Give up.
1073  IV.first->second = *SourceAggregate;
1074  }
1075 
1076  // All good! Now we just need to thread the source aggregates here.
1077  // Note that we have to insert the new PHI here, ourselves, because we can't
1078  // rely on InstCombinerImpl::run() inserting it into the right basic block.
1079  // Note that the same block can be a predecessor more than once,
1080  // and we need to preserve that invariant for the PHI node.
1082  Builder.SetInsertPoint(UseBB->getFirstNonPHI());
1083  auto *PHI =
1084  Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() + ".merged");
1085  for (BasicBlock *Pred : Preds)
1086  PHI->addIncoming(SourceAggregates[Pred], Pred);
1087 
1088  ++NumAggregateReconstructionsSimplified;
1089  return replaceInstUsesWith(OrigIVI, PHI);
1090 }
1091 
1092 /// Try to find redundant insertvalue instructions, like the following ones:
1093 /// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
1094 /// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
1095 /// Here the second instruction inserts values at the same indices, as the
1096 /// first one, making the first one redundant.
1097 /// It should be transformed to:
1098 /// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
1100  bool IsRedundant = false;
1101  ArrayRef<unsigned int> FirstIndices = I.getIndices();
1102 
1103  // If there is a chain of insertvalue instructions (each of them except the
1104  // last one has only one use and it's another insertvalue insn from this
1105  // chain), check if any of the 'children' uses the same indices as the first
1106  // instruction. In this case, the first one is redundant.
1107  Value *V = &I;
1108  unsigned Depth = 0;
1109  while (V->hasOneUse() && Depth < 10) {
1110  User *U = V->user_back();
1111  auto UserInsInst = dyn_cast<InsertValueInst>(U);
1112  if (!UserInsInst || U->getOperand(0) != V)
1113  break;
1114  if (UserInsInst->getIndices() == FirstIndices) {
1115  IsRedundant = true;
1116  break;
1117  }
1118  V = UserInsInst;
1119  Depth++;
1120  }
1121 
1122  if (IsRedundant)
1123  return replaceInstUsesWith(I, I.getOperand(0));
1124 
1125  if (Instruction *NewI = foldAggregateConstructionIntoAggregateReuse(I))
1126  return NewI;
1127 
1128  return nullptr;
1129 }
1130 
1132  // Can not analyze scalable type, the number of elements is not a compile-time
1133  // constant.
1134  if (isa<ScalableVectorType>(Shuf.getOperand(0)->getType()))
1135  return false;
1136 
1137  int MaskSize = Shuf.getShuffleMask().size();
1138  int VecSize =
1139  cast<FixedVectorType>(Shuf.getOperand(0)->getType())->getNumElements();
1140 
1141  // A vector select does not change the size of the operands.
1142  if (MaskSize != VecSize)
1143  return false;
1144 
1145  // Each mask element must be undefined or choose a vector element from one of
1146  // the source operands without crossing vector lanes.
1147  for (int i = 0; i != MaskSize; ++i) {
1148  int Elt = Shuf.getMaskValue(i);
1149  if (Elt != -1 && Elt != i && Elt != i + VecSize)
1150  return false;
1151  }
1152 
1153  return true;
1154 }
1155 
1156 /// Turn a chain of inserts that splats a value into an insert + shuffle:
1157 /// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
1158 /// shufflevector(insertelt(X, %k, 0), poison, zero)
1160  // We are interested in the last insert in a chain. So if this insert has a
1161  // single user and that user is an insert, bail.
1162  if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
1163  return nullptr;
1164 
1165  VectorType *VecTy = InsElt.getType();
1166  // Can not handle scalable type, the number of elements is not a compile-time
1167  // constant.
1168  if (isa<ScalableVectorType>(VecTy))
1169  return nullptr;
1170  unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
1171 
1172  // Do not try to do this for a one-element vector, since that's a nop,
1173  // and will cause an inf-loop.
1174  if (NumElements == 1)
1175  return nullptr;
1176 
1177  Value *SplatVal = InsElt.getOperand(1);
1178  InsertElementInst *CurrIE = &InsElt;
1179  SmallBitVector ElementPresent(NumElements, false);
1180  InsertElementInst *FirstIE = nullptr;
1181 
1182  // Walk the chain backwards, keeping track of which indices we inserted into,
1183  // until we hit something that isn't an insert of the splatted value.
1184  while (CurrIE) {
1185  auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
1186  if (!Idx || CurrIE->getOperand(1) != SplatVal)
1187  return nullptr;
1188 
1189  auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
1190  // Check none of the intermediate steps have any additional uses, except
1191  // for the root insertelement instruction, which can be re-used, if it
1192  // inserts at position 0.
1193  if (CurrIE != &InsElt &&
1194  (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
1195  return nullptr;
1196 
1197  ElementPresent[Idx->getZExtValue()] = true;
1198  FirstIE = CurrIE;
1199  CurrIE = NextIE;
1200  }
1201 
1202  // If this is just a single insertelement (not a sequence), we are done.
1203  if (FirstIE == &InsElt)
1204  return nullptr;
1205 
1206  // If we are not inserting into an undef vector, make sure we've seen an
1207  // insert into every element.
1208  // TODO: If the base vector is not undef, it might be better to create a splat
1209  // and then a select-shuffle (blend) with the base vector.
1210  if (!match(FirstIE->getOperand(0), m_Undef()))
1211  if (!ElementPresent.all())
1212  return nullptr;
1213 
1214  // Create the insert + shuffle.
1215  Type *Int32Ty = Type::getInt32Ty(InsElt.getContext());
1216  PoisonValue *PoisonVec = PoisonValue::get(VecTy);
1217  Constant *Zero = ConstantInt::get(Int32Ty, 0);
1218  if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
1219  FirstIE = InsertElementInst::Create(PoisonVec, SplatVal, Zero, "", &InsElt);
1220 
1221  // Splat from element 0, but replace absent elements with undef in the mask.
1222  SmallVector<int, 16> Mask(NumElements, 0);
1223  for (unsigned i = 0; i != NumElements; ++i)
1224  if (!ElementPresent[i])
1225  Mask[i] = -1;
1226 
1227  return new ShuffleVectorInst(FirstIE, Mask);
1228 }
1229 
1230 /// Try to fold an insert element into an existing splat shuffle by changing
1231 /// the shuffle's mask to include the index of this insert element.
1233  // Check if the vector operand of this insert is a canonical splat shuffle.
1234  auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1235  if (!Shuf || !Shuf->isZeroEltSplat())
1236  return nullptr;
1237 
1238  // Bail out early if shuffle is scalable type. The number of elements in
1239  // shuffle mask is unknown at compile-time.
1240  if (isa<ScalableVectorType>(Shuf->getType()))
1241  return nullptr;
1242 
1243  // Check for a constant insertion index.
1244  uint64_t IdxC;
1245  if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1246  return nullptr;
1247 
1248  // Check if the splat shuffle's input is the same as this insert's scalar op.
1249  Value *X = InsElt.getOperand(1);
1250  Value *Op0 = Shuf->getOperand(0);
1251  if (!match(Op0, m_InsertElt(m_Undef(), m_Specific(X), m_ZeroInt())))
1252  return nullptr;
1253 
1254  // Replace the shuffle mask element at the index of this insert with a zero.
1255  // For example:
1256  // inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1
1257  // --> shuf (inselt undef, X, 0), poison, <0,0,0,undef>
1258  unsigned NumMaskElts =
1259  cast<FixedVectorType>(Shuf->getType())->getNumElements();
1260  SmallVector<int, 16> NewMask(NumMaskElts);
1261  for (unsigned i = 0; i != NumMaskElts; ++i)
1262  NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);
1263 
1264  return new ShuffleVectorInst(Op0, NewMask);
1265 }
1266 
1267 /// Try to fold an extract+insert element into an existing identity shuffle by
1268 /// changing the shuffle's mask to include the index of this insert element.
1270  // Check if the vector operand of this insert is an identity shuffle.
1271  auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1272  if (!Shuf || !match(Shuf->getOperand(1), m_Undef()) ||
1273  !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
1274  return nullptr;
1275 
1276  // Bail out early if shuffle is scalable type. The number of elements in
1277  // shuffle mask is unknown at compile-time.
1278  if (isa<ScalableVectorType>(Shuf->getType()))
1279  return nullptr;
1280 
1281  // Check for a constant insertion index.
1282  uint64_t IdxC;
1283  if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1284  return nullptr;
1285 
1286  // Check if this insert's scalar op is extracted from the identity shuffle's
1287  // input vector.
1288  Value *Scalar = InsElt.getOperand(1);
1289  Value *X = Shuf->getOperand(0);
1290  if (!match(Scalar, m_ExtractElt(m_Specific(X), m_SpecificInt(IdxC))))
1291  return nullptr;
1292 
1293  // Replace the shuffle mask element at the index of this extract+insert with
1294  // that same index value.
1295  // For example:
1296  // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
1297  unsigned NumMaskElts =
1298  cast<FixedVectorType>(Shuf->getType())->getNumElements();
1299  SmallVector<int, 16> NewMask(NumMaskElts);
1300  ArrayRef<int> OldMask = Shuf->getShuffleMask();
1301  for (unsigned i = 0; i != NumMaskElts; ++i) {
1302  if (i != IdxC) {
1303  // All mask elements besides the inserted element remain the same.
1304  NewMask[i] = OldMask[i];
1305  } else if (OldMask[i] == (int)IdxC) {
1306  // If the mask element was already set, there's nothing to do
1307  // (demanded elements analysis may unset it later).
1308  return nullptr;
1309  } else {
1310  assert(OldMask[i] == UndefMaskElem &&
1311  "Unexpected shuffle mask element for identity shuffle");
1312  NewMask[i] = IdxC;
1313  }
1314  }
1315 
1316  return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask);
1317 }
1318 
1319 /// If we have an insertelement instruction feeding into another insertelement
1320 /// and the 2nd is inserting a constant into the vector, canonicalize that
1321 /// constant insertion before the insertion of a variable:
1322 ///
1323 /// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
1324 /// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
1325 ///
1326 /// This has the potential of eliminating the 2nd insertelement instruction
1327 /// via constant folding of the scalar constant into a vector constant.
1330  auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
1331  if (!InsElt1 || !InsElt1->hasOneUse())
1332  return nullptr;
1333 
1334  Value *X, *Y;
1335  Constant *ScalarC;
1336  ConstantInt *IdxC1, *IdxC2;
1337  if (match(InsElt1->getOperand(0), m_Value(X)) &&
1338  match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
1339  match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
1340  match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
1341  match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
1342  Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
1343  return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
1344  }
1345 
1346  return nullptr;
1347 }
1348 
1349 /// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
1350 /// --> shufflevector X, CVec', Mask'
1352  auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
1353  // Bail out if the parent has more than one use. In that case, we'd be
1354  // replacing the insertelt with a shuffle, and that's not a clear win.
1355  if (!Inst || !Inst->hasOneUse())
1356  return nullptr;
1357  if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
1358  // The shuffle must have a constant vector operand. The insertelt must have
1359  // a constant scalar being inserted at a constant position in the vector.
1360  Constant *ShufConstVec, *InsEltScalar;
1361  uint64_t InsEltIndex;
1362  if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
1363  !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
1364  !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
1365  return nullptr;
1366 
1367  // Adding an element to an arbitrary shuffle could be expensive, but a
1368  // shuffle that selects elements from vectors without crossing lanes is
1369  // assumed cheap.
1370  // If we're just adding a constant into that shuffle, it will still be
1371  // cheap.
1372  if (!isShuffleEquivalentToSelect(*Shuf))
1373  return nullptr;
1374 
1375  // From the above 'select' check, we know that the mask has the same number
1376  // of elements as the vector input operands. We also know that each constant
1377  // input element is used in its lane and can not be used more than once by
1378  // the shuffle. Therefore, replace the constant in the shuffle's constant
1379  // vector with the insertelt constant. Replace the constant in the shuffle's
1380  // mask vector with the insertelt index plus the length of the vector
1381  // (because the constant vector operand of a shuffle is always the 2nd
1382  // operand).
1383  ArrayRef<int> Mask = Shuf->getShuffleMask();
1384  unsigned NumElts = Mask.size();
1385  SmallVector<Constant *, 16> NewShufElts(NumElts);
1386  SmallVector<int, 16> NewMaskElts(NumElts);
1387  for (unsigned I = 0; I != NumElts; ++I) {
1388  if (I == InsEltIndex) {
1389  NewShufElts[I] = InsEltScalar;
1390  NewMaskElts[I] = InsEltIndex + NumElts;
1391  } else {
1392  // Copy over the existing values.
1393  NewShufElts[I] = ShufConstVec->getAggregateElement(I);
1394  NewMaskElts[I] = Mask[I];
1395  }
1396 
1397  // Bail if we failed to find an element.
1398  if (!NewShufElts[I])
1399  return nullptr;
1400  }
1401 
1402  // Create new operands for a shuffle that includes the constant of the
1403  // original insertelt. The old shuffle will be dead now.
1404  return new ShuffleVectorInst(Shuf->getOperand(0),
1405  ConstantVector::get(NewShufElts), NewMaskElts);
1406  } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
1407  // Transform sequences of insertelements ops with constant data/indexes into
1408  // a single shuffle op.
1409  // Can not handle scalable type, the number of elements needed to create
1410  // shuffle mask is not a compile-time constant.
1411  if (isa<ScalableVectorType>(InsElt.getType()))
1412  return nullptr;
1413  unsigned NumElts =
1414  cast<FixedVectorType>(InsElt.getType())->getNumElements();
1415 
1416  uint64_t InsertIdx[2];
1417  Constant *Val[2];
1418  if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
1419  !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
1420  !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
1421  !match(IEI->getOperand(1), m_Constant(Val[1])))
1422  return nullptr;
1423  SmallVector<Constant *, 16> Values(NumElts);
1424  SmallVector<int, 16> Mask(NumElts);
1425  auto ValI = std::begin(Val);
1426  // Generate new constant vector and mask.
1427  // We have 2 values/masks from the insertelements instructions. Insert them
1428  // into new value/mask vectors.
1429  for (uint64_t I : InsertIdx) {
1430  if (!Values[I]) {
1431  Values[I] = *ValI;
1432  Mask[I] = NumElts + I;
1433  }
1434  ++ValI;
1435  }
1436  // Remaining values are filled with 'undef' values.
1437  for (unsigned I = 0; I < NumElts; ++I) {
1438  if (!Values[I]) {
1439  Values[I] = UndefValue::get(InsElt.getType()->getElementType());
1440  Mask[I] = I;
1441  }
1442  }
1443  // Create new operands for a shuffle that includes the constant of the
1444  // original insertelt.
1445  return new ShuffleVectorInst(IEI->getOperand(0),
1446  ConstantVector::get(Values), Mask);
1447  }
1448  return nullptr;
1449 }
1450 
1451 /// If both the base vector and the inserted element are extended from the same
1452 /// type, do the insert element in the narrow source type followed by extend.
1453 /// TODO: This can be extended to include other cast opcodes, but particularly
1454 /// if we create a wider insertelement, make sure codegen is not harmed.
1457  // We are creating a vector extend. If the original vector extend has another
1458  // use, that would mean we end up with 2 vector extends, so avoid that.
1459  // TODO: We could ease the use-clause to "if at least one op has one use"
1460  // (assuming that the source types match - see next TODO comment).
1461  Value *Vec = InsElt.getOperand(0);
1462  if (!Vec->hasOneUse())
1463  return nullptr;
1464 
1465  Value *Scalar = InsElt.getOperand(1);
1466  Value *X, *Y;
1467  CastInst::CastOps CastOpcode;
1468  if (match(Vec, m_FPExt(m_Value(X))) && match(Scalar, m_FPExt(m_Value(Y))))
1469  CastOpcode = Instruction::FPExt;
1470  else if (match(Vec, m_SExt(m_Value(X))) && match(Scalar, m_SExt(m_Value(Y))))
1471  CastOpcode = Instruction::SExt;
1472  else if (match(Vec, m_ZExt(m_Value(X))) && match(Scalar, m_ZExt(m_Value(Y))))
1473  CastOpcode = Instruction::ZExt;
1474  else
1475  return nullptr;
1476 
1477  // TODO: We can allow mismatched types by creating an intermediate cast.
1478  if (X->getType()->getScalarType() != Y->getType())
1479  return nullptr;
1480 
1481  // inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index)
1482  Value *NewInsElt = Builder.CreateInsertElement(X, Y, InsElt.getOperand(2));
1483  return CastInst::Create(CastOpcode, NewInsElt, InsElt.getType());
1484 }
1485 
1487  Value *VecOp = IE.getOperand(0);
1488  Value *ScalarOp = IE.getOperand(1);
1489  Value *IdxOp = IE.getOperand(2);
1490 
1491  if (auto *V = simplifyInsertElementInst(
1492  VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
1493  return replaceInstUsesWith(IE, V);
1494 
1495  // Canonicalize type of constant indices to i64 to simplify CSE
1496  if (auto *IndexC = dyn_cast<ConstantInt>(IdxOp))
1497  if (auto *NewIdx = getPreferredVectorIndex(IndexC))
1498  return replaceOperand(IE, 2, NewIdx);
1499 
1500  // If the scalar is bitcast and inserted into undef, do the insert in the
1501  // source type followed by bitcast.
1502  // TODO: Generalize for insert into any constant, not just undef?
1503  Value *ScalarSrc;
1504  if (match(VecOp, m_Undef()) &&
1505  match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) &&
1506  (ScalarSrc->getType()->isIntegerTy() ||
1507  ScalarSrc->getType()->isFloatingPointTy())) {
1508  // inselt undef, (bitcast ScalarSrc), IdxOp -->
1509  // bitcast (inselt undef, ScalarSrc, IdxOp)
1510  Type *ScalarTy = ScalarSrc->getType();
1511  Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount());
1512  UndefValue *NewUndef = UndefValue::get(VecTy);
1513  Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp);
1514  return new BitCastInst(NewInsElt, IE.getType());
1515  }
1516 
1517  // If the vector and scalar are both bitcast from the same element type, do
1518  // the insert in that source type followed by bitcast.
1519  Value *VecSrc;
1520  if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
1521  match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
1522  (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
1523  VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
1524  cast<VectorType>(VecSrc->getType())->getElementType() ==
1525  ScalarSrc->getType()) {
1526  // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
1527  // bitcast (inselt VecSrc, ScalarSrc, IdxOp)
1528  Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
1529  return new BitCastInst(NewInsElt, IE.getType());
1530  }
1531 
1532  // If the inserted element was extracted from some other fixed-length vector
1533  // and both indexes are valid constants, try to turn this into a shuffle.
1534  // Can not handle scalable vector type, the number of elements needed to
1535  // create shuffle mask is not a compile-time constant.
1536  uint64_t InsertedIdx, ExtractedIdx;
1537  Value *ExtVecOp;
1538  if (isa<FixedVectorType>(IE.getType()) &&
1539  match(IdxOp, m_ConstantInt(InsertedIdx)) &&
1540  match(ScalarOp,
1541  m_ExtractElt(m_Value(ExtVecOp), m_ConstantInt(ExtractedIdx))) &&
1542  isa<FixedVectorType>(ExtVecOp->getType()) &&
1543  ExtractedIdx <
1544  cast<FixedVectorType>(ExtVecOp->getType())->getNumElements()) {
1545  // TODO: Looking at the user(s) to determine if this insert is a
1546  // fold-to-shuffle opportunity does not match the usual instcombine
1547  // constraints. We should decide if the transform is worthy based only
1548  // on this instruction and its operands, but that may not work currently.
1549  //
1550  // Here, we are trying to avoid creating shuffles before reaching
1551  // the end of a chain of extract-insert pairs. This is complicated because
1552  // we do not generally form arbitrary shuffle masks in instcombine
1553  // (because those may codegen poorly), but collectShuffleElements() does
1554  // exactly that.
1555  //
1556  // The rules for determining what is an acceptable target-independent
1557  // shuffle mask are fuzzy because they evolve based on the backend's
1558  // capabilities and real-world impact.
1559  auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
1560  if (!Insert.hasOneUse())
1561  return true;
1562  auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
1563  if (!InsertUser)
1564  return true;
1565  return false;
1566  };
1567 
1568  // Try to form a shuffle from a chain of extract-insert ops.
1569  if (isShuffleRootCandidate(IE)) {
1571  ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this);
1572 
1573  // The proposed shuffle may be trivial, in which case we shouldn't
1574  // perform the combine.
1575  if (LR.first != &IE && LR.second != &IE) {
1576  // We now have a shuffle of LHS, RHS, Mask.
1577  if (LR.second == nullptr)
1578  LR.second = UndefValue::get(LR.first->getType());
1579  return new ShuffleVectorInst(LR.first, LR.second, Mask);
1580  }
1581  }
1582  }
1583 
1584  if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {
1585  unsigned VWidth = VecTy->getNumElements();
1586  APInt UndefElts(VWidth, 0);
1587  APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
1588  if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
1589  if (V != &IE)
1590  return replaceInstUsesWith(IE, V);
1591  return &IE;
1592  }
1593  }
1594 
1596  return Shuf;
1597 
1598  if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
1599  return NewInsElt;
1600 
1601  if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
1602  return Broadcast;
1603 
1604  if (Instruction *Splat = foldInsEltIntoSplat(IE))
1605  return Splat;
1606 
1607  if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
1608  return IdentityShuf;
1609 
1611  return Ext;
1612 
1613  return nullptr;
1614 }
1615 
1616 /// Return true if we can evaluate the specified expression tree if the vector
1617 /// elements were shuffled in a different order.
1619  unsigned Depth = 5) {
1620  // We can always reorder the elements of a constant.
1621  if (isa<Constant>(V))
1622  return true;
1623 
1624  // We won't reorder vector arguments. No IPO here.
1625  Instruction *I = dyn_cast<Instruction>(V);
1626  if (!I) return false;
1627 
1628  // Two users may expect different orders of the elements. Don't try it.
1629  if (!I->hasOneUse())
1630  return false;
1631 
1632  if (Depth == 0) return false;
1633 
1634  switch (I->getOpcode()) {
1635  case Instruction::UDiv:
1636  case Instruction::SDiv:
1637  case Instruction::URem:
1638  case Instruction::SRem:
1639  // Propagating an undefined shuffle mask element to integer div/rem is not
1640  // allowed because those opcodes can create immediate undefined behavior
1641  // from an undefined element in an operand.
1642  if (llvm::is_contained(Mask, -1))
1643  return false;
1645  case Instruction::Add:
1646  case Instruction::FAdd:
1647  case Instruction::Sub:
1648  case Instruction::FSub:
1649  case Instruction::Mul:
1650  case Instruction::FMul:
1651  case Instruction::FDiv:
1652  case Instruction::FRem:
1653  case Instruction::Shl:
1654  case Instruction::LShr:
1655  case Instruction::AShr:
1656  case Instruction::And:
1657  case Instruction::Or:
1658  case Instruction::Xor:
1659  case Instruction::ICmp:
1660  case Instruction::FCmp:
1661  case Instruction::Trunc:
1662  case Instruction::ZExt:
1663  case Instruction::SExt:
1664  case Instruction::FPToUI:
1665  case Instruction::FPToSI:
1666  case Instruction::UIToFP:
1667  case Instruction::SIToFP:
1668  case Instruction::FPTrunc:
1669  case Instruction::FPExt:
1670  case Instruction::GetElementPtr: {
1671  // Bail out if we would create longer vector ops. We could allow creating
1672  // longer vector ops, but that may result in more expensive codegen.
1673  Type *ITy = I->getType();
1674  if (ITy->isVectorTy() &&
1675  Mask.size() > cast<FixedVectorType>(ITy)->getNumElements())
1676  return false;
1677  for (Value *Operand : I->operands()) {
1678  if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
1679  return false;
1680  }
1681  return true;
1682  }
1683  case Instruction::InsertElement: {
1684  ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
1685  if (!CI) return false;
1686  int ElementNumber = CI->getLimitedValue();
1687 
1688  // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1689  // can't put an element into multiple indices.
1690  bool SeenOnce = false;
1691  for (int i = 0, e = Mask.size(); i != e; ++i) {
1692  if (Mask[i] == ElementNumber) {
1693  if (SeenOnce)
1694  return false;
1695  SeenOnce = true;
1696  }
1697  }
1698  return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
1699  }
1700  }
1701  return false;
1702 }
1703 
1704 /// Rebuild a new instruction just like 'I' but with the new operands given.
1705 /// In the event of type mismatch, the type of the operands is correct.
1707  // We don't want to use the IRBuilder here because we want the replacement
1708  // instructions to appear next to 'I', not the builder's insertion point.
1709  switch (I->getOpcode()) {
1710  case Instruction::Add:
1711  case Instruction::FAdd:
1712  case Instruction::Sub:
1713  case Instruction::FSub:
1714  case Instruction::Mul:
1715  case Instruction::FMul:
1716  case Instruction::UDiv:
1717  case Instruction::SDiv:
1718  case Instruction::FDiv:
1719  case Instruction::URem:
1720  case Instruction::SRem:
1721  case Instruction::FRem:
1722  case Instruction::Shl:
1723  case Instruction::LShr:
1724  case Instruction::AShr:
1725  case Instruction::And:
1726  case Instruction::Or:
1727  case Instruction::Xor: {
1728  BinaryOperator *BO = cast<BinaryOperator>(I);
1729  assert(NewOps.size() == 2 && "binary operator with #ops != 2");
1730  BinaryOperator *New =
1731  BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(),
1732  NewOps[0], NewOps[1], "", BO);
1733  if (isa<OverflowingBinaryOperator>(BO)) {
1734  New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1735  New->setHasNoSignedWrap(BO->hasNoSignedWrap());
1736  }
1737  if (isa<PossiblyExactOperator>(BO)) {
1738  New->setIsExact(BO->isExact());
1739  }
1740  if (isa<FPMathOperator>(BO))
1741  New->copyFastMathFlags(I);
1742  return New;
1743  }
1744  case Instruction::ICmp:
1745  assert(NewOps.size() == 2 && "icmp with #ops != 2");
1746  return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(),
1747  NewOps[0], NewOps[1]);
1748  case Instruction::FCmp:
1749  assert(NewOps.size() == 2 && "fcmp with #ops != 2");
1750  return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(),
1751  NewOps[0], NewOps[1]);
1752  case Instruction::Trunc:
1753  case Instruction::ZExt:
1754  case Instruction::SExt:
1755  case Instruction::FPToUI:
1756  case Instruction::FPToSI:
1757  case Instruction::UIToFP:
1758  case Instruction::SIToFP:
1759  case Instruction::FPTrunc:
1760  case Instruction::FPExt: {
1761  // It's possible that the mask has a different number of elements from
1762  // the original cast. We recompute the destination type to match the mask.
1763  Type *DestTy = VectorType::get(
1764  I->getType()->getScalarType(),
1765  cast<VectorType>(NewOps[0]->getType())->getElementCount());
1766  assert(NewOps.size() == 1 && "cast with #ops != 1");
1767  return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy,
1768  "", I);
1769  }
1770  case Instruction::GetElementPtr: {
1771  Value *Ptr = NewOps[0];
1772  ArrayRef<Value*> Idx = NewOps.slice(1);
1774  cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I);
1775  GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
1776  return GEP;
1777  }
1778  }
1779  llvm_unreachable("failed to rebuild vector instructions");
1780 }
1781 
1783  // Mask.size() does not need to be equal to the number of vector elements.
1784 
1785  assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
1786  Type *EltTy = V->getType()->getScalarType();
1787  Type *I32Ty = IntegerType::getInt32Ty(V->getContext());
1788  if (match(V, m_Undef()))
1789  return UndefValue::get(FixedVectorType::get(EltTy, Mask.size()));
1790 
1791  if (isa<ConstantAggregateZero>(V))
1792  return ConstantAggregateZero::get(FixedVectorType::get(EltTy, Mask.size()));
1793 
1794  if (Constant *C = dyn_cast<Constant>(V))
1795  return ConstantExpr::getShuffleVector(C, PoisonValue::get(C->getType()),
1796  Mask);
1797 
1798  Instruction *I = cast<Instruction>(V);
1799  switch (I->getOpcode()) {
1800  case Instruction::Add:
1801  case Instruction::FAdd:
1802  case Instruction::Sub:
1803  case Instruction::FSub:
1804  case Instruction::Mul:
1805  case Instruction::FMul:
1806  case Instruction::UDiv:
1807  case Instruction::SDiv:
1808  case Instruction::FDiv:
1809  case Instruction::URem:
1810  case Instruction::SRem:
1811  case Instruction::FRem:
1812  case Instruction::Shl:
1813  case Instruction::LShr:
1814  case Instruction::AShr:
1815  case Instruction::And:
1816  case Instruction::Or:
1817  case Instruction::Xor:
1818  case Instruction::ICmp:
1819  case Instruction::FCmp:
1820  case Instruction::Trunc:
1821  case Instruction::ZExt:
1822  case Instruction::SExt:
1823  case Instruction::FPToUI:
1824  case Instruction::FPToSI:
1825  case Instruction::UIToFP:
1826  case Instruction::SIToFP:
1827  case Instruction::FPTrunc:
1828  case Instruction::FPExt:
1829  case Instruction::Select:
1830  case Instruction::GetElementPtr: {
1831  SmallVector<Value*, 8> NewOps;
1832  bool NeedsRebuild =
1833  (Mask.size() !=
1834  cast<FixedVectorType>(I->getType())->getNumElements());
1835  for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
1836  Value *V;
1837  // Recursively call evaluateInDifferentElementOrder on vector arguments
1838  // as well. E.g. GetElementPtr may have scalar operands even if the
1839  // return value is a vector, so we need to examine the operand type.
1840  if (I->getOperand(i)->getType()->isVectorTy())
1841  V = evaluateInDifferentElementOrder(I->getOperand(i), Mask);
1842  else
1843  V = I->getOperand(i);
1844  NewOps.push_back(V);
1845  NeedsRebuild |= (V != I->getOperand(i));
1846  }
1847  if (NeedsRebuild) {
1848  return buildNew(I, NewOps);
1849  }
1850  return I;
1851  }
1852  case Instruction::InsertElement: {
1853  int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
1854 
1855  // The insertelement was inserting at Element. Figure out which element
1856  // that becomes after shuffling. The answer is guaranteed to be unique
1857  // by CanEvaluateShuffled.
1858  bool Found = false;
1859  int Index = 0;
1860  for (int e = Mask.size(); Index != e; ++Index) {
1861  if (Mask[Index] == Element) {
1862  Found = true;
1863  break;
1864  }
1865  }
1866 
1867  // If element is not in Mask, no need to handle the operand 1 (element to
1868  // be inserted). Just evaluate values in operand 0 according to Mask.
1869  if (!Found)
1870  return evaluateInDifferentElementOrder(I->getOperand(0), Mask);
1871 
1872  Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask);
1873  return InsertElementInst::Create(V, I->getOperand(1),
1874  ConstantInt::get(I32Ty, Index), "", I);
1875  }
1876  }
1877  llvm_unreachable("failed to reorder elements of vector instruction!");
1878 }
1879 
1880 // Returns true if the shuffle is extracting a contiguous range of values from
1881 // LHS, for example:
1882 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
1883 // Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
1884 // Shuffles to: |EE|FF|GG|HH|
1885 // +--+--+--+--+
1887  ArrayRef<int> Mask) {
1888  unsigned LHSElems =
1889  cast<FixedVectorType>(SVI.getOperand(0)->getType())->getNumElements();
1890  unsigned MaskElems = Mask.size();
1891  unsigned BegIdx = Mask.front();
1892  unsigned EndIdx = Mask.back();
1893  if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
1894  return false;
1895  for (unsigned I = 0; I != MaskElems; ++I)
1896  if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
1897  return false;
1898  return true;
1899 }
1900 
1901 /// These are the ingredients in an alternate form binary operator as described
1902 /// below.
1903 struct BinopElts {
1908  Value *V0 = nullptr, Value *V1 = nullptr) :
1909  Opcode(Opc), Op0(V0), Op1(V1) {}
1910  operator bool() const { return Opcode != 0; }
1911 };
1912 
1913 /// Binops may be transformed into binops with different opcodes and operands.
1914 /// Reverse the usual canonicalization to enable folds with the non-canonical
1915 /// form of the binop. If a transform is possible, return the elements of the
1916 /// new binop. If not, return invalid elements.
1918  Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
1919  Type *Ty = BO->getType();
1920  switch (BO->getOpcode()) {
1921  case Instruction::Shl: {
1922  // shl X, C --> mul X, (1 << C)
1923  Constant *C;
1924  if (match(BO1, m_Constant(C))) {
1925  Constant *ShlOne = ConstantExpr::getShl(ConstantInt::get(Ty, 1), C);
1926  return {Instruction::Mul, BO0, ShlOne};
1927  }
1928  break;
1929  }
1930  case Instruction::Or: {
1931  // or X, C --> add X, C (when X and C have no common bits set)
1932  const APInt *C;
1933  if (match(BO1, m_APInt(C)) && MaskedValueIsZero(BO0, *C, DL))
1934  return {Instruction::Add, BO0, BO1};
1935  break;
1936  }
1937  case Instruction::Sub:
1938  // sub 0, X --> mul X, -1
1939  if (match(BO0, m_ZeroInt()))
1941  break;
1942  default:
1943  break;
1944  }
1945  return {};
1946 }
1947 
1949  assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
1950 
1951  // Are we shuffling together some value and that same value after it has been
1952  // modified by a binop with a constant?
1953  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
1954  Constant *C;
1955  bool Op0IsBinop;
1956  if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
1957  Op0IsBinop = true;
1958  else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
1959  Op0IsBinop = false;
1960  else
1961  return nullptr;
1962 
1963  // The identity constant for a binop leaves a variable operand unchanged. For
1964  // a vector, this is a splat of something like 0, -1, or 1.
1965  // If there's no identity constant for this binop, we're done.
1966  auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
1967  BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
1968  Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
1969  if (!IdC)
1970  return nullptr;
1971 
1972  // Shuffle identity constants into the lanes that return the original value.
1973  // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
1974  // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
1975  // The existing binop constant vector remains in the same operand position.
1977  Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
1979 
1980  bool MightCreatePoisonOrUB =
1982  (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
1983  if (MightCreatePoisonOrUB)
1984  NewC = InstCombiner::getSafeVectorConstantForBinop(BOpcode, NewC, true);
1985 
1986  // shuf (bop X, C), X, M --> bop X, C'
1987  // shuf X, (bop X, C), M --> bop X, C'
1988  Value *X = Op0IsBinop ? Op1 : Op0;
1989  Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
1990  NewBO->copyIRFlags(BO);
1991 
1992  // An undef shuffle mask element may propagate as an undef constant element in
1993  // the new binop. That would produce poison where the original code might not.
1994  // If we already made a safe constant, then there's no danger.
1995  if (is_contained(Mask, UndefMaskElem) && !MightCreatePoisonOrUB)
1996  NewBO->dropPoisonGeneratingFlags();
1997  return NewBO;
1998 }
1999 
2000 /// If we have an insert of a scalar to a non-zero element of an undefined
2001 /// vector and then shuffle that value, that's the same as inserting to the zero
2002 /// element and shuffling. Splatting from the zero element is recognized as the
2003 /// canonical form of splat.
2006  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2008  Value *X;
2009  uint64_t IndexC;
2010 
2011  // Match a shuffle that is a splat to a non-zero element.
2012  if (!match(Op0, m_OneUse(m_InsertElt(m_Undef(), m_Value(X),
2013  m_ConstantInt(IndexC)))) ||
2014  !match(Op1, m_Undef()) || match(Mask, m_ZeroMask()) || IndexC == 0)
2015  return nullptr;
2016 
2017  // Insert into element 0 of an undef vector.
2018  UndefValue *UndefVec = UndefValue::get(Shuf.getType());
2019  Constant *Zero = Builder.getInt32(0);
2020  Value *NewIns = Builder.CreateInsertElement(UndefVec, X, Zero);
2021 
2022  // Splat from element 0. Any mask element that is undefined remains undefined.
2023  // For example:
2024  // shuf (inselt undef, X, 2), _, <2,2,undef>
2025  // --> shuf (inselt undef, X, 0), poison, <0,0,undef>
2026  unsigned NumMaskElts =
2027  cast<FixedVectorType>(Shuf.getType())->getNumElements();
2028  SmallVector<int, 16> NewMask(NumMaskElts, 0);
2029  for (unsigned i = 0; i != NumMaskElts; ++i)
2030  if (Mask[i] == UndefMaskElem)
2031  NewMask[i] = Mask[i];
2032 
2033  return new ShuffleVectorInst(NewIns, NewMask);
2034 }
2035 
2036 /// Try to fold shuffles that are the equivalent of a vector select.
2038  if (!Shuf.isSelect())
2039  return nullptr;
2040 
2041  // Canonicalize to choose from operand 0 first unless operand 1 is undefined.
2042  // Commuting undef to operand 0 conflicts with another canonicalization.
2043  unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2044  if (!match(Shuf.getOperand(1), m_Undef()) &&
2045  Shuf.getMaskValue(0) >= (int)NumElts) {
2046  // TODO: Can we assert that both operands of a shuffle-select are not undef
2047  // (otherwise, it would have been folded by instsimplify?
2048  Shuf.commute();
2049  return &Shuf;
2050  }
2051 
2053  return I;
2054 
2055  BinaryOperator *B0, *B1;
2056  if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
2057  !match(Shuf.getOperand(1), m_BinOp(B1)))
2058  return nullptr;
2059 
2060  // If one operand is "0 - X", allow that to be viewed as "X * -1"
2061  // (ConstantsAreOp1) by getAlternateBinop below. If the neg is not paired
2062  // with a multiply, we will exit because C0/C1 will not be set.
2063  Value *X, *Y;
2064  Constant *C0 = nullptr, *C1 = nullptr;
2065  bool ConstantsAreOp1;
2066  if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
2068  ConstantsAreOp1 = false;
2069  else if (match(B0, m_CombineOr(m_BinOp(m_Value(X), m_Constant(C0)),
2070  m_Neg(m_Value(X)))) &&
2072  m_Neg(m_Value(Y)))))
2073  ConstantsAreOp1 = true;
2074  else
2075  return nullptr;
2076 
2077  // We need matching binops to fold the lanes together.
2078  BinaryOperator::BinaryOps Opc0 = B0->getOpcode();
2079  BinaryOperator::BinaryOps Opc1 = B1->getOpcode();
2080  bool DropNSW = false;
2081  if (ConstantsAreOp1 && Opc0 != Opc1) {
2082  // TODO: We drop "nsw" if shift is converted into multiply because it may
2083  // not be correct when the shift amount is BitWidth - 1. We could examine
2084  // each vector element to determine if it is safe to keep that flag.
2085  if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
2086  DropNSW = true;
2087  if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
2088  assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
2089  Opc0 = AltB0.Opcode;
2090  C0 = cast<Constant>(AltB0.Op1);
2091  } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
2092  assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
2093  Opc1 = AltB1.Opcode;
2094  C1 = cast<Constant>(AltB1.Op1);
2095  }
2096  }
2097 
2098  if (Opc0 != Opc1 || !C0 || !C1)
2099  return nullptr;
2100 
2101  // The opcodes must be the same. Use a new name to make that clear.
2102  BinaryOperator::BinaryOps BOpc = Opc0;
2103 
2104  // Select the constant elements needed for the single binop.
2107 
2108  // We are moving a binop after a shuffle. When a shuffle has an undefined
2109  // mask element, the result is undefined, but it is not poison or undefined
2110  // behavior. That is not necessarily true for div/rem/shift.
2111  bool MightCreatePoisonOrUB =
2114  if (MightCreatePoisonOrUB)
2116  ConstantsAreOp1);
2117 
2118  Value *V;
2119  if (X == Y) {
2120  // Remove a binop and the shuffle by rearranging the constant:
2121  // shuffle (op V, C0), (op V, C1), M --> op V, C'
2122  // shuffle (op C0, V), (op C1, V), M --> op C', V
2123  V = X;
2124  } else {
2125  // If there are 2 different variable operands, we must create a new shuffle
2126  // (select) first, so check uses to ensure that we don't end up with more
2127  // instructions than we started with.
2128  if (!B0->hasOneUse() && !B1->hasOneUse())
2129  return nullptr;
2130 
2131  // If we use the original shuffle mask and op1 is *variable*, we would be
2132  // putting an undef into operand 1 of div/rem/shift. This is either UB or
2133  // poison. We do not have to guard against UB when *constants* are op1
2134  // because safe constants guarantee that we do not overflow sdiv/srem (and
2135  // there's no danger for other opcodes).
2136  // TODO: To allow this case, create a new shuffle mask with no undefs.
2137  if (MightCreatePoisonOrUB && !ConstantsAreOp1)
2138  return nullptr;
2139 
2140  // Note: In general, we do not create new shuffles in InstCombine because we
2141  // do not know if a target can lower an arbitrary shuffle optimally. In this
2142  // case, the shuffle uses the existing mask, so there is no additional risk.
2143 
2144  // Select the variable vectors first, then perform the binop:
2145  // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
2146  // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
2147  V = Builder.CreateShuffleVector(X, Y, Mask);
2148  }
2149 
2150  Value *NewBO = ConstantsAreOp1 ? Builder.CreateBinOp(BOpc, V, NewC) :
2151  Builder.CreateBinOp(BOpc, NewC, V);
2152 
2153  // Flags are intersected from the 2 source binops. But there are 2 exceptions:
2154  // 1. If we changed an opcode, poison conditions might have changed.
2155  // 2. If the shuffle had undef mask elements, the new binop might have undefs
2156  // where the original code did not. But if we already made a safe constant,
2157  // then there's no danger.
2158  if (auto *NewI = dyn_cast<Instruction>(NewBO)) {
2159  NewI->copyIRFlags(B0);
2160  NewI->andIRFlags(B1);
2161  if (DropNSW)
2162  NewI->setHasNoSignedWrap(false);
2163  if (is_contained(Mask, UndefMaskElem) && !MightCreatePoisonOrUB)
2164  NewI->dropPoisonGeneratingFlags();
2165  }
2166  return replaceInstUsesWith(Shuf, NewBO);
2167 }
2168 
2169 /// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
2170 /// Example (little endian):
2171 /// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>
2173  bool IsBigEndian) {
2174  // This must be a bitcasted shuffle of 1 vector integer operand.
2175  Type *DestType = Shuf.getType();
2176  Value *X;
2177  if (!match(Shuf.getOperand(0), m_BitCast(m_Value(X))) ||
2178  !match(Shuf.getOperand(1), m_Undef()) || !DestType->isIntOrIntVectorTy())
2179  return nullptr;
2180 
2181  // The source type must have the same number of elements as the shuffle,
2182  // and the source element type must be larger than the shuffle element type.
2183  Type *SrcType = X->getType();
2184  if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() ||
2185  cast<FixedVectorType>(SrcType)->getNumElements() !=
2186  cast<FixedVectorType>(DestType)->getNumElements() ||
2187  SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0)
2188  return nullptr;
2189 
2190  assert(Shuf.changesLength() && !Shuf.increasesLength() &&
2191  "Expected a shuffle that decreases length");
2192 
2193  // Last, check that the mask chooses the correct low bits for each narrow
2194  // element in the result.
2195  uint64_t TruncRatio =
2196  SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
2198  for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
2199  if (Mask[i] == UndefMaskElem)
2200  continue;
2201  uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;
2202  assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits");
2203  if (Mask[i] != (int)LSBIndex)
2204  return nullptr;
2205  }
2206 
2207  return new TruncInst(X, DestType);
2208 }
2209 
2210 /// Match a shuffle-select-shuffle pattern where the shuffles are widening and
2211 /// narrowing (concatenating with undef and extracting back to the original
2212 /// length). This allows replacing the wide select with a narrow select.
2215  // This must be a narrowing identity shuffle. It extracts the 1st N elements
2216  // of the 1st vector operand of a shuffle.
2217  if (!match(Shuf.getOperand(1), m_Undef()) || !Shuf.isIdentityWithExtract())
2218  return nullptr;
2219 
2220  // The vector being shuffled must be a vector select that we can eliminate.
2221  // TODO: The one-use requirement could be eased if X and/or Y are constants.
2222  Value *Cond, *X, *Y;
2223  if (!match(Shuf.getOperand(0),
2225  return nullptr;
2226 
2227  // We need a narrow condition value. It must be extended with undef elements
2228  // and have the same number of elements as this shuffle.
2229  unsigned NarrowNumElts =
2230  cast<FixedVectorType>(Shuf.getType())->getNumElements();
2231  Value *NarrowCond;
2232  if (!match(Cond, m_OneUse(m_Shuffle(m_Value(NarrowCond), m_Undef()))) ||
2233  cast<FixedVectorType>(NarrowCond->getType())->getNumElements() !=
2234  NarrowNumElts ||
2235  !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
2236  return nullptr;
2237 
2238  // shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
2239  // sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
2240  Value *NarrowX = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2241  Value *NarrowY = Builder.CreateShuffleVector(Y, Shuf.getShuffleMask());
2242  return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
2243 }
2244 
2245 /// Canonicalize FP negate after shuffle.
2248  Instruction *FNeg0;
2249  Value *X;
2250  if (!match(Shuf.getOperand(0), m_CombineAnd(m_Instruction(FNeg0),
2251  m_FNeg(m_Value(X)))))
2252  return nullptr;
2253 
2254  // shuffle (fneg X), Mask --> fneg (shuffle X, Mask)
2255  if (FNeg0->hasOneUse() && match(Shuf.getOperand(1), m_Undef())) {
2256  Value *NewShuf = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2257  return UnaryOperator::CreateFNegFMF(NewShuf, FNeg0);
2258  }
2259 
2260  Instruction *FNeg1;
2261  Value *Y;
2262  if (!match(Shuf.getOperand(1), m_CombineAnd(m_Instruction(FNeg1),
2263  m_FNeg(m_Value(Y)))))
2264  return nullptr;
2265 
2266  // shuffle (fneg X), (fneg Y), Mask --> fneg (shuffle X, Y, Mask)
2267  if (FNeg0->hasOneUse() || FNeg1->hasOneUse()) {
2268  Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2269  Instruction *NewFNeg = UnaryOperator::CreateFNeg(NewShuf);
2270  NewFNeg->copyIRFlags(FNeg0);
2271  NewFNeg->andIRFlags(FNeg1);
2272  return NewFNeg;
2273  }
2274 
2275  return nullptr;
2276 }
2277 
2278 /// Canonicalize casts after shuffle.
2281  // Do we have 2 matching cast operands?
2282  auto *Cast0 = dyn_cast<CastInst>(Shuf.getOperand(0));
2283  auto *Cast1 = dyn_cast<CastInst>(Shuf.getOperand(1));
2284  if (!Cast0 || !Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||
2285  Cast0->getSrcTy() != Cast1->getSrcTy())
2286  return nullptr;
2287 
2288  // TODO: Allow other opcodes? That would require easing the type restrictions
2289  // below here.
2290  CastInst::CastOps CastOpcode = Cast0->getOpcode();
2291  switch (CastOpcode) {
2292  case Instruction::FPToSI:
2293  case Instruction::FPToUI:
2294  case Instruction::SIToFP:
2295  case Instruction::UIToFP:
2296  break;
2297  default:
2298  return nullptr;
2299  }
2300 
2301  VectorType *ShufTy = Shuf.getType();
2302  VectorType *ShufOpTy = cast<VectorType>(Shuf.getOperand(0)->getType());
2303  VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy());
2304 
2305  // TODO: Allow length-increasing shuffles?
2306  if (ShufTy->getElementCount().getKnownMinValue() >
2307  ShufOpTy->getElementCount().getKnownMinValue())
2308  return nullptr;
2309 
2310  // TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)?
2311  assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&
2312  "Expected fixed vector operands for casts and binary shuffle");
2313  if (CastSrcTy->getPrimitiveSizeInBits() > ShufOpTy->getPrimitiveSizeInBits())
2314  return nullptr;
2315 
2316  // At least one of the operands must have only one use (the shuffle).
2317  if (!Cast0->hasOneUse() && !Cast1->hasOneUse())
2318  return nullptr;
2319 
2320  // shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask)
2321  Value *X = Cast0->getOperand(0);
2322  Value *Y = Cast1->getOperand(0);
2323  Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2324  return CastInst::Create(CastOpcode, NewShuf, ShufTy);
2325 }
2326 
2327 /// Try to fold an extract subvector operation.
2329  Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2330  if (!Shuf.isIdentityWithExtract() || !match(Op1, m_Undef()))
2331  return nullptr;
2332 
2333  // Check if we are extracting all bits of an inserted scalar:
2334  // extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type
2335  Value *X;
2336  if (match(Op0, m_BitCast(m_InsertElt(m_Value(), m_Value(X), m_Zero()))) &&
2337  X->getType()->getPrimitiveSizeInBits() ==
2338  Shuf.getType()->getPrimitiveSizeInBits())
2339  return new BitCastInst(X, Shuf.getType());
2340 
2341  // Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
2342  Value *Y;
2344  if (!match(Op0, m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask))))
2345  return nullptr;
2346 
2347  // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
2348  // then combining may result in worse codegen.
2349  if (!Op0->hasOneUse())
2350  return nullptr;
2351 
2352  // We are extracting a subvector from a shuffle. Remove excess elements from
2353  // the 1st shuffle mask to eliminate the extract.
2354  //
2355  // This transform is conservatively limited to identity extracts because we do
2356  // not allow arbitrary shuffle mask creation as a target-independent transform
2357  // (because we can't guarantee that will lower efficiently).
2358  //
2359  // If the extracting shuffle has an undef mask element, it transfers to the
2360  // new shuffle mask. Otherwise, copy the original mask element. Example:
2361  // shuf (shuf X, Y, <C0, C1, C2, undef, C4>), undef, <0, undef, 2, 3> -->
2362  // shuf X, Y, <C0, undef, C2, undef>
2363  unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2364  SmallVector<int, 16> NewMask(NumElts);
2365  assert(NumElts < Mask.size() &&
2366  "Identity with extract must have less elements than its inputs");
2367 
2368  for (unsigned i = 0; i != NumElts; ++i) {
2369  int ExtractMaskElt = Shuf.getMaskValue(i);
2370  int MaskElt = Mask[i];
2371  NewMask[i] = ExtractMaskElt == UndefMaskElem ? ExtractMaskElt : MaskElt;
2372  }
2373  return new ShuffleVectorInst(X, Y, NewMask);
2374 }
2375 
2376 /// Try to replace a shuffle with an insertelement or try to replace a shuffle
2377 /// operand with the operand of an insertelement.
2379  InstCombinerImpl &IC) {
2380  Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
2382  Shuf.getShuffleMask(Mask);
2383 
2384  int NumElts = Mask.size();
2385  int InpNumElts = cast<FixedVectorType>(V0->getType())->getNumElements();
2386 
2387  // This is a specialization of a fold in SimplifyDemandedVectorElts. We may
2388  // not be able to handle it there if the insertelement has >1 use.
2389  // If the shuffle has an insertelement operand but does not choose the
2390  // inserted scalar element from that value, then we can replace that shuffle
2391  // operand with the source vector of the insertelement.
2392  Value *X;
2393  uint64_t IdxC;
2394  if (match(V0, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2395  // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
2396  if (!is_contained(Mask, (int)IdxC))
2397  return IC.replaceOperand(Shuf, 0, X);
2398  }
2399  if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2400  // Offset the index constant by the vector width because we are checking for
2401  // accesses to the 2nd vector input of the shuffle.
2402  IdxC += InpNumElts;
2403  // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
2404  if (!is_contained(Mask, (int)IdxC))
2405  return IC.replaceOperand(Shuf, 1, X);
2406  }
2407  // For the rest of the transform, the shuffle must not change vector sizes.
2408  // TODO: This restriction could be removed if the insert has only one use
2409  // (because the transform would require a new length-changing shuffle).
2410  if (NumElts != InpNumElts)
2411  return nullptr;
2412 
2413  // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
2414  auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
2415  // We need an insertelement with a constant index.
2416  if (!match(V0, m_InsertElt(m_Value(), m_Value(Scalar),
2417  m_ConstantInt(IndexC))))
2418  return false;
2419 
2420  // Test the shuffle mask to see if it splices the inserted scalar into the
2421  // operand 1 vector of the shuffle.
2422  int NewInsIndex = -1;
2423  for (int i = 0; i != NumElts; ++i) {
2424  // Ignore undef mask elements.
2425  if (Mask[i] == -1)
2426  continue;
2427 
2428  // The shuffle takes elements of operand 1 without lane changes.
2429  if (Mask[i] == NumElts + i)
2430  continue;
2431 
2432  // The shuffle must choose the inserted scalar exactly once.
2433  if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
2434  return false;
2435 
2436  // The shuffle is placing the inserted scalar into element i.
2437  NewInsIndex = i;
2438  }
2439 
2440  assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
2441 
2442  // Index is updated to the potentially translated insertion lane.
2443  IndexC = ConstantInt::get(IndexC->getType(), NewInsIndex);
2444  return true;
2445  };
2446 
2447  // If the shuffle is unnecessary, insert the scalar operand directly into
2448  // operand 1 of the shuffle. Example:
2449  // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
2450  Value *Scalar;
2451  ConstantInt *IndexC;
2452  if (isShufflingScalarIntoOp1(Scalar, IndexC))
2453  return InsertElementInst::Create(V1, Scalar, IndexC);
2454 
2455  // Try again after commuting shuffle. Example:
2456  // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
2457  // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
2458  std::swap(V0, V1);
2460  if (isShufflingScalarIntoOp1(Scalar, IndexC))
2461  return InsertElementInst::Create(V1, Scalar, IndexC);
2462 
2463  return nullptr;
2464 }
2465 
2467  // Match the operands as identity with padding (also known as concatenation
2468  // with undef) shuffles of the same source type. The backend is expected to
2469  // recreate these concatenations from a shuffle of narrow operands.
2470  auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
2471  auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
2472  if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
2473  !Shuffle1 || !Shuffle1->isIdentityWithPadding())
2474  return nullptr;
2475 
2476  // We limit this transform to power-of-2 types because we expect that the
2477  // backend can convert the simplified IR patterns to identical nodes as the
2478  // original IR.
2479  // TODO: If we can verify the same behavior for arbitrary types, the
2480  // power-of-2 checks can be removed.
2481  Value *X = Shuffle0->getOperand(0);
2482  Value *Y = Shuffle1->getOperand(0);
2483  if (X->getType() != Y->getType() ||
2484  !isPowerOf2_32(cast<FixedVectorType>(Shuf.getType())->getNumElements()) ||
2485  !isPowerOf2_32(
2486  cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) ||
2487  !isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) ||
2488  match(X, m_Undef()) || match(Y, m_Undef()))
2489  return nullptr;
2490  assert(match(Shuffle0->getOperand(1), m_Undef()) &&
2491  match(Shuffle1->getOperand(1), m_Undef()) &&
2492  "Unexpected operand for identity shuffle");
2493 
2494  // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
2495  // operands directly by adjusting the shuffle mask to account for the narrower
2496  // types:
2497  // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
2498  int NarrowElts = cast<FixedVectorType>(X->getType())->getNumElements();
2499  int WideElts = cast<FixedVectorType>(Shuffle0->getType())->getNumElements();
2500  assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
2501 
2503  SmallVector<int, 16> NewMask(Mask.size(), -1);
2504  for (int i = 0, e = Mask.size(); i != e; ++i) {
2505  if (Mask[i] == -1)
2506  continue;
2507 
2508  // If this shuffle is choosing an undef element from 1 of the sources, that
2509  // element is undef.
2510  if (Mask[i] < WideElts) {
2511  if (Shuffle0->getMaskValue(Mask[i]) == -1)
2512  continue;
2513  } else {
2514  if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
2515  continue;
2516  }
2517 
2518  // If this shuffle is choosing from the 1st narrow op, the mask element is
2519  // the same. If this shuffle is choosing from the 2nd narrow op, the mask
2520  // element is offset down to adjust for the narrow vector widths.
2521  if (Mask[i] < WideElts) {
2522  assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
2523  NewMask[i] = Mask[i];
2524  } else {
2525  assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
2526  NewMask[i] = Mask[i] - (WideElts - NarrowElts);
2527  }
2528  }
2529  return new ShuffleVectorInst(X, Y, NewMask);
2530 }
2531 
2533  Value *LHS = SVI.getOperand(0);
2534  Value *RHS = SVI.getOperand(1);
2535  SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI);
2536  if (auto *V = simplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(),
2537  SVI.getType(), ShufQuery))
2538  return replaceInstUsesWith(SVI, V);
2539 
2540  // Bail out for scalable vectors
2541  if (isa<ScalableVectorType>(LHS->getType()))
2542  return nullptr;
2543 
2544  unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements();
2545  unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements();
2546 
2547  // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
2548  //
2549  // if X and Y are of the same (vector) type, and the element size is not
2550  // changed by the bitcasts, we can distribute the bitcasts through the
2551  // shuffle, hopefully reducing the number of instructions. We make sure that
2552  // at least one bitcast only has one use, so we don't *increase* the number of
2553  // instructions here.
2554  Value *X, *Y;
2555  if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_BitCast(m_Value(Y))) &&
2556  X->getType()->isVectorTy() && X->getType() == Y->getType() &&
2557  X->getType()->getScalarSizeInBits() ==
2558  SVI.getType()->getScalarSizeInBits() &&
2559  (LHS->hasOneUse() || RHS->hasOneUse())) {
2560  Value *V = Builder.CreateShuffleVector(X, Y, SVI.getShuffleMask(),
2561  SVI.getName() + ".uncasted");
2562  return new BitCastInst(V, SVI.getType());
2563  }
2564 
2567 
2568  // Peek through a bitcasted shuffle operand by scaling the mask. If the
2569  // simulated shuffle can simplify, then this shuffle is unnecessary:
2570  // shuf (bitcast X), undef, Mask --> bitcast X'
2571  // TODO: This could be extended to allow length-changing shuffles.
2572  // The transform might also be obsoleted if we allowed canonicalization
2573  // of bitcasted shuffles.
2574  if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) &&
2575  X->getType()->isVectorTy() && VWidth == LHSWidth) {
2576  // Try to create a scaled mask constant.
2577  auto *XType = cast<FixedVectorType>(X->getType());
2578  unsigned XNumElts = XType->getNumElements();
2579  SmallVector<int, 16> ScaledMask;
2580  if (XNumElts >= VWidth) {
2581  assert(XNumElts % VWidth == 0 && "Unexpected vector bitcast");
2582  narrowShuffleMaskElts(XNumElts / VWidth, Mask, ScaledMask);
2583  } else {
2584  assert(VWidth % XNumElts == 0 && "Unexpected vector bitcast");
2585  if (!widenShuffleMaskElts(VWidth / XNumElts, Mask, ScaledMask))
2586  ScaledMask.clear();
2587  }
2588  if (!ScaledMask.empty()) {
2589  // If the shuffled source vector simplifies, cast that value to this
2590  // shuffle's type.
2591  if (auto *V = simplifyShuffleVectorInst(X, UndefValue::get(XType),
2592  ScaledMask, XType, ShufQuery))
2593  return BitCastInst::Create(Instruction::BitCast, V, SVI.getType());
2594  }
2595  }
2596 
2597  // shuffle x, x, mask --> shuffle x, undef, mask'
2598  if (LHS == RHS) {
2599  assert(!match(RHS, m_Undef()) &&
2600  "Shuffle with 2 undef ops not simplified?");
2601  return new ShuffleVectorInst(LHS, createUnaryMask(Mask, LHSWidth));
2602  }
2603 
2604  // shuffle undef, x, mask --> shuffle x, undef, mask'
2605  if (match(LHS, m_Undef())) {
2606  SVI.commute();
2607  return &SVI;
2608  }
2609 
2611  return I;
2612 
2613  if (Instruction *I = foldSelectShuffle(SVI))
2614  return I;
2615 
2616  if (Instruction *I = foldTruncShuffle(SVI, DL.isBigEndian()))
2617  return I;
2618 
2619  if (Instruction *I = narrowVectorSelect(SVI, Builder))
2620  return I;
2621 
2622  if (Instruction *I = foldFNegShuffle(SVI, Builder))
2623  return I;
2624 
2625  if (Instruction *I = foldCastShuffle(SVI, Builder))
2626  return I;
2627 
2628  APInt UndefElts(VWidth, 0);
2629  APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2630  if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
2631  if (V != &SVI)
2632  return replaceInstUsesWith(SVI, V);
2633  return &SVI;
2634  }
2635 
2637  return I;
2638 
2639  // These transforms have the potential to lose undef knowledge, so they are
2640  // intentionally placed after SimplifyDemandedVectorElts().
2641  if (Instruction *I = foldShuffleWithInsert(SVI, *this))
2642  return I;
2644  return I;
2645 
2646  if (match(RHS, m_Undef()) && canEvaluateShuffled(LHS, Mask)) {
2648  return replaceInstUsesWith(SVI, V);
2649  }
2650 
2651  // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
2652  // a non-vector type. We can instead bitcast the original vector followed by
2653  // an extract of the desired element:
2654  //
2655  // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
2656  // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2657  // %1 = bitcast <4 x i8> %sroa to i32
2658  // Becomes:
2659  // %bc = bitcast <16 x i8> %in to <4 x i32>
2660  // %ext = extractelement <4 x i32> %bc, i32 0
2661  //
2662  // If the shuffle is extracting a contiguous range of values from the input
2663  // vector then each use which is a bitcast of the extracted size can be
2664  // replaced. This will work if the vector types are compatible, and the begin
2665  // index is aligned to a value in the casted vector type. If the begin index
2666  // isn't aligned then we can shuffle the original vector (keeping the same
2667  // vector type) before extracting.
2668  //
2669  // This code will bail out if the target type is fundamentally incompatible
2670  // with vectors of the source type.
2671  //
2672  // Example of <16 x i8>, target type i32:
2673  // Index range [4,8): v-----------v Will work.
2674  // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2675  // <16 x i8>: | | | | | | | | | | | | | | | | |
2676  // <4 x i32>: | | | | |
2677  // +-----------+-----------+-----------+-----------+
2678  // Index range [6,10): ^-----------^ Needs an extra shuffle.
2679  // Target type i40: ^--------------^ Won't work, bail.
2680  bool MadeChange = false;
2681  if (isShuffleExtractingFromLHS(SVI, Mask)) {
2682  Value *V = LHS;
2683  unsigned MaskElems = Mask.size();
2684  auto *SrcTy = cast<FixedVectorType>(V->getType());
2685  unsigned VecBitWidth = SrcTy->getPrimitiveSizeInBits().getFixedSize();
2686  unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
2687  assert(SrcElemBitWidth && "vector elements must have a bitwidth");
2688  unsigned SrcNumElems = SrcTy->getNumElements();
2691  for (User *U : SVI.users())
2692  if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
2693  if (!BC->use_empty())
2694  // Only visit bitcasts that weren't previously handled.
2695  BCs.push_back(BC);
2696  for (BitCastInst *BC : BCs) {
2697  unsigned BegIdx = Mask.front();
2698  Type *TgtTy = BC->getDestTy();
2699  unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
2700  if (!TgtElemBitWidth)
2701  continue;
2702  unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
2703  bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
2704  bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
2705  if (!VecBitWidthsEqual)
2706  continue;
2707  if (!VectorType::isValidElementType(TgtTy))
2708  continue;
2709  auto *CastSrcTy = FixedVectorType::get(TgtTy, TgtNumElems);
2710  if (!BegIsAligned) {
2711  // Shuffle the input so [0,NumElements) contains the output, and
2712  // [NumElems,SrcNumElems) is undef.
2713  SmallVector<int, 16> ShuffleMask(SrcNumElems, -1);
2714  for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
2715  ShuffleMask[I] = Idx;
2716  V = Builder.CreateShuffleVector(V, ShuffleMask,
2717  SVI.getName() + ".extract");
2718  BegIdx = 0;
2719  }
2720  unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
2721  assert(SrcElemsPerTgtElem);
2722  BegIdx /= SrcElemsPerTgtElem;
2723  bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end();
2724  auto *NewBC =
2725  BCAlreadyExists
2726  ? NewBCs[CastSrcTy]
2727  : Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
2728  if (!BCAlreadyExists)
2729  NewBCs[CastSrcTy] = NewBC;
2730  auto *Ext = Builder.CreateExtractElement(
2731  NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract");
2732  // The shufflevector isn't being replaced: the bitcast that used it
2733  // is. InstCombine will visit the newly-created instructions.
2734  replaceInstUsesWith(*BC, Ext);
2735  MadeChange = true;
2736  }
2737  }
2738 
2739  // If the LHS is a shufflevector itself, see if we can combine it with this
2740  // one without producing an unusual shuffle.
2741  // Cases that might be simplified:
2742  // 1.
2743  // x1=shuffle(v1,v2,mask1)
2744  // x=shuffle(x1,undef,mask)
2745  // ==>
2746  // x=shuffle(v1,undef,newMask)
2747  // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
2748  // 2.
2749  // x1=shuffle(v1,undef,mask1)
2750  // x=shuffle(x1,x2,mask)
2751  // where v1.size() == mask1.size()
2752  // ==>
2753  // x=shuffle(v1,x2,newMask)
2754  // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
2755  // 3.
2756  // x2=shuffle(v2,undef,mask2)
2757  // x=shuffle(x1,x2,mask)
2758  // where v2.size() == mask2.size()
2759  // ==>
2760  // x=shuffle(x1,v2,newMask)
2761  // newMask[i] = (mask[i] < x1.size())
2762  // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
2763  // 4.
2764  // x1=shuffle(v1,undef,mask1)
2765  // x2=shuffle(v2,undef,mask2)
2766  // x=shuffle(x1,x2,mask)
2767  // where v1.size() == v2.size()
2768  // ==>
2769  // x=shuffle(v1,v2,newMask)
2770  // newMask[i] = (mask[i] < x1.size())
2771  // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
2772  //
2773  // Here we are really conservative:
2774  // we are absolutely afraid of producing a shuffle mask not in the input
2775  // program, because the code gen may not be smart enough to turn a merged
2776  // shuffle into two specific shuffles: it may produce worse code. As such,
2777  // we only merge two shuffles if the result is either a splat or one of the
2778  // input shuffle masks. In this case, merging the shuffles just removes
2779  // one instruction, which we know is safe. This is good for things like
2780  // turning: (splat(splat)) -> splat, or
2781  // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
2782  ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
2783  ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
2784  if (LHSShuffle)
2785  if (!match(LHSShuffle->getOperand(1), m_Undef()) && !match(RHS, m_Undef()))
2786  LHSShuffle = nullptr;
2787  if (RHSShuffle)
2788  if (!match(RHSShuffle->getOperand(1), m_Undef()))
2789  RHSShuffle = nullptr;
2790  if (!LHSShuffle && !RHSShuffle)
2791  return MadeChange ? &SVI : nullptr;
2792 
2793  Value* LHSOp0 = nullptr;
2794  Value* LHSOp1 = nullptr;
2795  Value* RHSOp0 = nullptr;
2796  unsigned LHSOp0Width = 0;
2797  unsigned RHSOp0Width = 0;
2798  if (LHSShuffle) {
2799  LHSOp0 = LHSShuffle->getOperand(0);
2800  LHSOp1 = LHSShuffle->getOperand(1);
2801  LHSOp0Width = cast<FixedVectorType>(LHSOp0->getType())->getNumElements();
2802  }
2803  if (RHSShuffle) {
2804  RHSOp0 = RHSShuffle->getOperand(0);
2805  RHSOp0Width = cast<FixedVectorType>(RHSOp0->getType())->getNumElements();
2806  }
2807  Value* newLHS = LHS;
2808  Value* newRHS = RHS;
2809  if (LHSShuffle) {
2810  // case 1
2811  if (match(RHS, m_Undef())) {
2812  newLHS = LHSOp0;
2813  newRHS = LHSOp1;
2814  }
2815  // case 2 or 4
2816  else if (LHSOp0Width == LHSWidth) {
2817  newLHS = LHSOp0;
2818  }
2819  }
2820  // case 3 or 4
2821  if (RHSShuffle && RHSOp0Width == LHSWidth) {
2822  newRHS = RHSOp0;
2823  }
2824  // case 4
2825  if (LHSOp0 == RHSOp0) {
2826  newLHS = LHSOp0;
2827  newRHS = nullptr;
2828  }
2829 
2830  if (newLHS == LHS && newRHS == RHS)
2831  return MadeChange ? &SVI : nullptr;
2832 
2833  ArrayRef<int> LHSMask;
2834  ArrayRef<int> RHSMask;
2835  if (newLHS != LHS)
2836  LHSMask = LHSShuffle->getShuffleMask();
2837  if (RHSShuffle && newRHS != RHS)
2838  RHSMask = RHSShuffle->getShuffleMask();
2839 
2840  unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
2841  SmallVector<int, 16> newMask;
2842  bool isSplat = true;
2843  int SplatElt = -1;
2844  // Create a new mask for the new ShuffleVectorInst so that the new
2845  // ShuffleVectorInst is equivalent to the original one.
2846  for (unsigned i = 0; i < VWidth; ++i) {
2847  int eltMask;
2848  if (Mask[i] < 0) {
2849  // This element is an undef value.
2850  eltMask = -1;
2851  } else if (Mask[i] < (int)LHSWidth) {
2852  // This element is from left hand side vector operand.
2853  //
2854  // If LHS is going to be replaced (case 1, 2, or 4), calculate the
2855  // new mask value for the element.
2856  if (newLHS != LHS) {
2857  eltMask = LHSMask[Mask[i]];
2858  // If the value selected is an undef value, explicitly specify it
2859  // with a -1 mask value.
2860  if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
2861  eltMask = -1;
2862  } else
2863  eltMask = Mask[i];
2864  } else {
2865  // This element is from right hand side vector operand
2866  //
2867  // If the value selected is an undef value, explicitly specify it
2868  // with a -1 mask value. (case 1)
2869  if (match(RHS, m_Undef()))
2870  eltMask = -1;
2871  // If RHS is going to be replaced (case 3 or 4), calculate the
2872  // new mask value for the element.
2873  else if (newRHS != RHS) {
2874  eltMask = RHSMask[Mask[i]-LHSWidth];
2875  // If the value selected is an undef value, explicitly specify it
2876  // with a -1 mask value.
2877  if (eltMask >= (int)RHSOp0Width) {
2878  assert(match(RHSShuffle->getOperand(1), m_Undef()) &&
2879  "should have been check above");
2880  eltMask = -1;
2881  }
2882  } else
2883  eltMask = Mask[i]-LHSWidth;
2884 
2885  // If LHS's width is changed, shift the mask value accordingly.
2886  // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
2887  // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
2888  // If newRHS == newLHS, we want to remap any references from newRHS to
2889  // newLHS so that we can properly identify splats that may occur due to
2890  // obfuscation across the two vectors.
2891  if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
2892  eltMask += newLHSWidth;
2893  }
2894 
2895  // Check if this could still be a splat.
2896  if (eltMask >= 0) {
2897  if (SplatElt >= 0 && SplatElt != eltMask)
2898  isSplat = false;
2899  SplatElt = eltMask;
2900  }
2901 
2902  newMask.push_back(eltMask);
2903  }
2904 
2905  // If the result mask is equal to one of the original shuffle masks,
2906  // or is a splat, do the replacement.
2907  if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
2908  if (!newRHS)
2909  newRHS = UndefValue::get(newLHS->getType());
2910  return new ShuffleVectorInst(newLHS, newRHS, newMask);
2911  }
2912 
2913  return MadeChange ? &SVI : nullptr;
2914 }
i
i
Definition: README.txt:29
llvm::simplifyInsertElementInst
Value * simplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx, const SimplifyQuery &Q)
Given operands for an InsertElement, fold the result or return null.
Definition: InstructionSimplify.cpp:4719
llvm::Type::ArrayTyID
@ ArrayTyID
Arrays.
Definition: Type.h:75
Int32Ty
IntegerType * Int32Ty
Definition: NVVMIntrRange.cpp:67
BinopElts::Opcode
BinaryOperator::BinaryOps Opcode
Definition: InstCombineVectorOps.cpp:1904
llvm::GetElementPtrInst::setIsInBounds
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
Definition: Instructions.cpp:1828
B1
llvm::ShuffleVectorInst::isIdentityWithExtract
bool isIdentityWithExtract() const
Return true if this shuffle extracts the first N elements of exactly one source vector.
Definition: Instructions.cpp:2422
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::MaskedValueIsZero
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if 'V & Mask' is known to be zero.
Definition: ValueTracking.cpp:391
llvm::PatternMatch::m_Mask
Definition: PatternMatch.h:1508
llvm::ARMII::VecSize
@ VecSize
Definition: ARMBaseInfo.h:421
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
InstCombiner.h
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:719
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::ConstantInt::getType
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:173
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
llvm::SimplifyQuery
Definition: InstructionSimplify.h:93
llvm::ShuffleVectorInst::getShuffleMask
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
Definition: Instructions.cpp:2115
llvm::ElementCount
Definition: TypeSize.h:404
foldInsSequenceIntoSplat
static Instruction * foldInsSequenceIntoSplat(InsertElementInst &InsElt)
Turn a chain of inserts that splats a value into an insert + shuffle: insertelt(insertelt(insertelt(i...
Definition: InstCombineVectorOps.cpp:1159
llvm::ExtractElementInst
This instruction extracts a single (scalar) element from a VectorType value.
Definition: Instructions.h:1859
llvm::BinaryOperator::CreateWithCopiedFlags
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Instruction *CopyO, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:249
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5212
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:309
llvm::PatternMatch::m_InsertElt
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Definition: PatternMatch.h:1478
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Statistic.h
llvm::enumerate
detail::enumerator< R > enumerate(R &&TheRange)
Given an input range, returns a new range whose values are are pair (A,B) such that A is the 0-based ...
Definition: STLExtras.h:2057
llvm::PatternMatch::m_Load
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
Definition: PatternMatch.h:1557
ErrorHandling.h
llvm::PatternMatch::m_CombineOr
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:212
llvm::IRBuilder< TargetFolder, IRBuilderCallbackInserter >
llvm::CastInst::Create
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Definition: Instructions.cpp:3182
llvm::APInt::zextOrTrunc
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:994
llvm::SmallDenseMap
Definition: DenseMap.h:882
llvm::Instruction::hasNoUnsignedWrap
bool hasNoUnsignedWrap() const
Determine whether the no unsigned wrap flag is set.
Definition: Instruction.cpp:135
narrowInsElt
static Instruction * narrowInsElt(InsertElementInst &InsElt, InstCombiner::BuilderTy &Builder)
If both the base vector and the inserted element are extended from the same type, do the insert eleme...
Definition: InstCombineVectorOps.cpp:1455
llvm::Type::getTypeID
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
llvm::ConstantInt::getLimitedValue
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:244
APInt.h
llvm::InsertElementInst::Create
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1937
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
llvm::ExtractElementInst::Create
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1872
buildNew
static Value * buildNew(Instruction *I, ArrayRef< Value * > NewOps)
Rebuild a new instruction just like 'I' but with the new operands given.
Definition: InstCombineVectorOps.cpp:1706
llvm::narrowShuffleMaskElts
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
Definition: VectorUtils.cpp:432
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
DenseMap.h
llvm::Instruction::isShift
bool isShift() const
Definition: Instruction.h:164
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
llvm::InstCombinerImpl::visitExtractElementInst
Instruction * visitExtractElementInst(ExtractElementInst &EI)
Definition: InstCombineVectorOps.cpp:377
llvm::createUnaryMask
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
Definition: VectorUtils.cpp:947
llvm::InstCombinerImpl::replaceInstUsesWith
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombineInternal.h:405
llvm::Optional
Definition: APInt.h:33
isShuffleEquivalentToSelect
static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf)
Definition: InstCombineVectorOps.cpp:1131
llvm::PatternMatch::m_BinOp
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
llvm::InstCombinerImpl::visitShuffleVectorInst
Instruction * visitShuffleVectorInst(ShuffleVectorInst &SVI)
Definition: InstCombineVectorOps.cpp:2532
Operator.h
llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:422
STLExtras.h
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:491
llvm::UnaryOperator
Definition: InstrTypes.h:101
llvm::PatternMatch::m_BitCast
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
Definition: PatternMatch.h:1587
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:163
llvm::count_if
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1716
llvm::LinearPolySize::isScalable
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:298
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
llvm::NVPTX::PTXLdStInstCode::VecType
VecType
Definition: NVPTX.h:121
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
foldConstantInsEltIntoShuffle
static Instruction * foldConstantInsEltIntoShuffle(InsertElementInst &InsElt)
insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex --> shufflevector X,...
Definition: InstCombineVectorOps.cpp:1351
llvm::UndefMaskElem
constexpr int UndefMaskElem
Definition: Instructions.h:1983
llvm::PatternMatch::m_OneUse
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
llvm::simplifyExtractElementInst
Value * simplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q)
Given operands for an ExtractElementInst, fold the result or return null.
Definition: InstructionSimplify.cpp:4823
llvm::SmallBitVector
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
Definition: SmallBitVector.h:35
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
Instruction.h
llvm::PatternMatch::m_APInt
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:272
llvm::ConstantExpr::getBinOpIdentity
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2845
llvm::UnaryOperator::getOpcode
UnaryOps getOpcode() const
Definition: InstrTypes.h:171
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:157
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1617
llvm::SelectInst::Create
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
Definition: Instructions.h:1755
llvm::APInt::setBit
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1280
llvm::ShuffleVectorInst::isSelect
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
Definition: Instructions.h:2188
InstCombineInternal.h
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition: PatternMatch.h:1456
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::PHINode::getIncomingValue
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Definition: Instructions.h:2747
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
llvm::PatternMatch::m_UnOp
class_match< UnaryOperator > m_UnOp()
Match an arbitrary unary operation and ignore it.
Definition: PatternMatch.h:79
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
isSplat
static bool isSplat(ArrayRef< Value * > VL)
Definition: SLPVectorizer.cpp:260
InstrTypes.h
llvm::FCmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1347
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
getOpcode
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:190
foldInsEltIntoIdentityShuffle
static Instruction * foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt)
Try to fold an extract+insert element into an existing identity shuffle by changing the shuffle's mas...
Definition: InstCombineVectorOps.cpp:1269
llvm::APInt::getAllOnes
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
llvm::PatternMatch::m_Instruction
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:710
llvm::InsertElementInst
This instruction inserts a single (scalar) element into a VectorType value.
Definition: Instructions.h:1923
llvm::Value::uses
iterator_range< use_iterator > uses()
Definition: Value.h:376
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:246
llvm::VectorType::getElementCount
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:627
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::PatternMatch::m_ZExt
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
Definition: PatternMatch.h:1623
llvm::SmallBitVector::all
bool all() const
Returns true if all bits are set.
Definition: SmallBitVector.h:216
llvm::Instruction::CastOps
CastOps
Definition: Instruction.h:800
llvm::ShuffleVectorInst::getMaskValue
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
Definition: Instructions.h:2050
llvm::PatternMatch::m_FNeg
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
Definition: PatternMatch.h:1027
llvm::Constant::getAllOnesValue
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:395
llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition: InstrTypes.h:392
llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:147
llvm::Instruction
Definition: Instruction.h:42
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:189
ShuffleOps
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
Definition: InstCombineVectorOps.cpp:731
llvm::APInt::isAllOnes
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:347
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::predecessors
auto predecessors(MachineBasicBlock *BB)
Definition: MachineSSAContext.h:30
llvm::UnaryOperator::CreateWithCopiedFlags
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:156
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1777
llvm::InstCombinerImpl
Definition: InstCombineInternal.h:61
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:928
collectShuffleElements
static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl< int > &Mask, Value *PermittedRHS, InstCombinerImpl &IC)
Definition: InstCombineVectorOps.cpp:733
llvm::InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse
Instruction * foldAggregateConstructionIntoAggregateReuse(InsertValueInst &OrigIVI)
Look for chain of insertvalue's that fully define an aggregate, and trace back the values inserted,...
Definition: InstCombineVectorOps.cpp:819
PatternMatch.h
llvm::BasicBlock::getFirstNonPHI
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:209
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:684
llvm::NVPTX::PTXLdStInstCode::Scalar
@ Scalar
Definition: NVPTX.h:122
llvm::Instruction::isIntDivRem
bool isIntDivRem() const
Definition: Instruction.h:163
llvm::ArrayRef::slice
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:194
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2743
llvm::None
const NoneType None
Definition: None.h:24
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
llvm::ShuffleVectorInst::getType
VectorType * getType() const
Overload to return most specific vector type.
Definition: Instructions.h:2041
Type.h
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::PatternMatch::m_ExtractElt
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
Definition: PatternMatch.h:1486
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::Type::getStructNumElements
unsigned getStructNumElements() const
Definition: DerivedTypes.h:348
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:191
llvm::APInt::getOneBitSet
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:222
llvm::Value::user_back
User * user_back()
Definition: Value.h:407
llvm::ConstantExpr::getShuffleVector
static Constant * getShuffleVector(Constant *V1, Constant *V2, ArrayRef< int > Mask, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2630
llvm::VectorType::isValidElementType
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:675
foldShuffleWithInsert
static Instruction * foldShuffleWithInsert(ShuffleVectorInst &Shuf, InstCombinerImpl &IC)
Try to replace a shuffle with an insertelement or try to replace a shuffle operand with the operand o...
Definition: InstCombineVectorOps.cpp:2378
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
llvm::Instruction::andIRFlags
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
Definition: Instruction.cpp:322
llvm::InstCombiner::getSafeVectorConstantForBinop
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
Definition: InstCombiner.h:314
VectorUtils.h
BasicBlock.h
llvm::ShuffleVectorInst::increasesLength
bool increasesLength() const
Return true if this shuffle returns a vector with a greater number of elements than its source vector...
Definition: Instructions.h:2091
llvm::ShuffleVectorInst::commuteShuffleMask
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
Definition: Instructions.h:2365
llvm::InstCombinerImpl::foldSelectShuffle
Instruction * foldSelectShuffle(ShuffleVectorInst &Shuf)
Try to fold shuffles that are the equivalent of a vector select.
Definition: InstCombineVectorOps.cpp:2037
llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:531
llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:159
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::UnaryOperator::CreateFNegFMF
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:164
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1173
uint64_t
llvm::Instruction::user_back
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:88
llvm::TruncInst
This class represents a truncation of integer types.
Definition: Instructions.h:4767
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2801
replaceExtractElements
static void replaceExtractElements(InsertElementInst *InsElt, ExtractElementInst *ExtElt, InstCombinerImpl &IC)
If we have insertion into a vector that is wider than the vector that we are extracting from,...
Definition: InstCombineVectorOps.cpp:651
getPreferredVectorIndex
ConstantInt * getPreferredVectorIndex(ConstantInt *IndexC)
Given a constant index for a extractelement or insertelement instruction, return it with the canonica...
Definition: InstCombineVectorOps.cpp:369
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap
Definition: DenseMap.h:716
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::UndefValue
'undef' values are things that do not have specified contents.
Definition: Constants.h:1386
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:916
llvm::ConstantExpr::getShl
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2799
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1682
ArrayRef.h
canonicalizeInsertSplat
static Instruction * canonicalizeInsertSplat(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
If we have an insert of a scalar to a non-zero element of an undefined vector and then shuffle that v...
Definition: InstCombineVectorOps.cpp:2004
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:152
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::Type::getArrayNumElements
uint64_t getArrayNumElements() const
Definition: DerivedTypes.h:384
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
BinopElts
These are the ingredients in an alternate form binary operator as described below.
Definition: InstCombineVectorOps.cpp:1903
llvm::ExtractElementInst::getVectorOperandType
VectorType * getVectorOperandType() const
Definition: Instructions.h:1893
llvm::PatternMatch::m_ZeroMask
Definition: PatternMatch.h:1517
llvm::SimplifyQuery::getWithInstruction
SimplifyQuery getWithInstruction(Instruction *I) const
Definition: InstructionSimplify.h:120
collectSingleShuffleElements
static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, SmallVectorImpl< int > &Mask)
If V is a shuffle of values that ONLY returns elements from either LHS or RHS, return the shuffle mas...
Definition: InstCombineVectorOps.cpp:578
llvm::PatternMatch::m_Constant
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:144
llvm::LinearPolySize::getKnownMinValue
ScalarTy getKnownMinValue() const
Returns the minimum value this size can represent.
Definition: TypeSize.h:296
foldIdentityExtractShuffle
static Instruction * foldIdentityExtractShuffle(ShuffleVectorInst &Shuf)
Try to fold an extract subvector operation.
Definition: InstCombineVectorOps.cpp:2328
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::GetElementPtrInst::Create
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:942
llvm::PatternMatch::m_SExt
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
Definition: PatternMatch.h:1617
llvm::PatternMatch::m_SpecificInt
specific_intval< false > m_SpecificInt(APInt V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:848
llvm::PatternMatch::m_CombineAnd
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
Definition: PatternMatch.h:218
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::BinaryOperator
Definition: InstrTypes.h:188
Mul
BinaryOperator * Mul
Definition: X86PartialReduction.cpp:70
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:137
evaluateInDifferentElementOrder
static Value * evaluateInDifferentElementOrder(Value *V, ArrayRef< int > Mask)
Definition: InstCombineVectorOps.cpp:1782
llvm::PatternMatch::m_Undef
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:136
llvm::ExtractElementInst::getVectorOperand
Value * getVectorOperand()
Definition: Instructions.h:1888
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Constant::getAggregateElement
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:410
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::PatternMatch::m_Shuffle
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
Definition: PatternMatch.h:1545
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:529
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:991
llvm::IRBuilderBase::InsertPointGuard
Definition: IRBuilder.h:350
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::InstCombinerImpl::visitInsertValueInst
Instruction * visitInsertValueInst(InsertValueInst &IV)
Try to find redundant insertvalue instructions, like the following ones: %0 = insertvalue { i8,...
Definition: InstCombineVectorOps.cpp:1099
llvm::Instruction::isExact
bool isExact() const
Determine whether the exact flag is set.
Definition: Instruction.cpp:195
llvm::pred_empty
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
llvm::ConstantVector::get
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1397
foldCastShuffle
static Instruction * foldCastShuffle(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize casts after shuffle.
Definition: InstCombineVectorOps.cpp:2279
llvm::APInt::ult
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1061
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:280
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:305
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:209
llvm::ArrayRef::front
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:167
findDemandedEltsBySingleUser
static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr)
Find elements of V demanded by UserInstr.
Definition: InstCombineVectorOps.cpp:302
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
llvm::InstCombinerImpl::visitInsertElementInst
Instruction * visitInsertElementInst(InsertElementInst &IE)
Definition: InstCombineVectorOps.cpp:1486
llvm::MCID::Select
@ Select
Definition: MCInstrDesc.h:164
llvm::PatternMatch::m_FPExt
CastClass_match< OpTy, Instruction::FPExt > m_FPExt(const OpTy &Op)
Definition: PatternMatch.h:1681
llvm::widenShuffleMaskElts
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Definition: VectorUtils.cpp:453
Constant.h
llvm::PPC::getPredicate
Predicate getPredicate(unsigned Condition, unsigned Hint)
Return predicate consisting of specified condition and hint bits.
Definition: PPCPredicates.h:87
foldTruncShuffle
static Instruction * foldTruncShuffle(ShuffleVectorInst &Shuf, bool IsBigEndian)
Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
Definition: InstCombineVectorOps.cpp:2172
llvm::Type::getIntNTy
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:243
llvm::InstCombinerImpl::InsertNewInstWith
Instruction * InsertNewInstWith(Instruction *New, Instruction &Old)
Same as InsertNewInstBefore, but also sets the debug loc.
Definition: InstCombineInternal.h:394
narrowVectorSelect
static Instruction * narrowVectorSelect(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Match a shuffle-select-shuffle pattern where the shuffles are widening and narrowing (concatenating w...
Definition: InstCombineVectorOps.cpp:2213
isShuffleExtractingFromLHS
static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, ArrayRef< int > Mask)
Definition: InstCombineVectorOps.cpp:1886
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:84
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:345
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2693
hoistInsEltConst
static Instruction * hoistInsEltConst(InsertElementInst &InsElt2, InstCombiner::BuilderTy &Builder)
If we have an insertelement instruction feeding into another insertelement and the 2nd is inserting a...
Definition: InstCombineVectorOps.cpp:1328
llvm::Type::isIntOrIntVectorTy
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:197
Casting.h
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
foldInsEltIntoSplat
static Instruction * foldInsEltIntoSplat(InsertElementInst &InsElt)
Try to fold an insert element into an existing splat shuffle by changing the shuffle's mask to includ...
Definition: InstCombineVectorOps.cpp:1232
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:591
llvm::PatternMatch::m_ZeroInt
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:518
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
llvm::findScalarElement
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
Definition: VectorUtils.cpp:285
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
llvm::CmpInst::Create
static CmpInst * Create(OtherOps Op, Predicate predicate, Value *S1, Value *S2, const Twine &Name="", Instruction *InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
Definition: Instructions.cpp:3853
llvm::InstCombinerImpl::replaceOperand
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombineInternal.h:426
llvm::Instruction::BinaryOps
BinaryOps
Definition: Instruction.h:786
llvm::InsertElementInst::getType
VectorType * getType() const
Overload to return most specific vector type.
Definition: Instructions.h:1956
llvm::Type::StructTyID
@ StructTyID
Structures.
Definition: Type.h:74
canEvaluateShuffled
static bool canEvaluateShuffled(Value *V, ArrayRef< int > Mask, unsigned Depth=5)
Return true if we can evaluate the specified expression tree if the vector elements were shuffled in ...
Definition: InstCombineVectorOps.cpp:1618
llvm::ShuffleVectorInst
This instruction constructs a fixed permutation of two input vectors.
Definition: Instructions.h:1995
Instructions.h
cheapToScalarize
static bool cheapToScalarize(Value *V, Value *EI)
Return true if the value is cheaper to scalarize than it is to leave as a vector operation.
Definition: InstCombineVectorOps.cpp:58
SmallVector.h
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:766
User.h
findDemandedEltsByAllUsers
static APInt findDemandedEltsByAllUsers(Value *V)
Find union of elements of V demanded by all its users.
Definition: InstCombineVectorOps.cpp:346
llvm::Value::DoPHITranslation
const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition: Value.cpp:983
llvm::Instruction::hasNoSignedWrap
bool hasNoSignedWrap() const
Determine whether the no signed wrap flag is set.
Definition: Instruction.cpp:139
SmallBitVector.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
InstructionSimplify.h
llvm::PHINode::getIncomingBlock
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Definition: Instructions.h:2767
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:164
llvm::IntegerType::getBitWidth
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
llvm::APInt::getActiveBits
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1435
foldFNegShuffle
static Instruction * foldFNegShuffle(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize FP negate after shuffle.
Definition: InstCombineVectorOps.cpp:2246
llvm::PHINode
Definition: Instructions.h:2651
llvm::Constant::getSplatValue
Constant * getSplatValue(bool AllowUndefs=false) const
If all elements of the vector constant have the same value, return that value.
Definition: Constants.cpp:1692
llvm::ExtractElementInst::getIndexOperand
Value * getIndexOperand()
Definition: Instructions.h:1889
llvm::Instruction::copyIRFlags
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
Definition: Instruction.cpp:298
IV
static const uint32_t IV[8]
Definition: blake3_impl.h:85
llvm::SmallVectorImpl< int >
foldIdentityPaddedShuffles
static Instruction * foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf)
Definition: InstCombineVectorOps.cpp:2466
DerivedTypes.h
BinopElts::Op0
Value * Op0
Definition: InstCombineVectorOps.cpp:1905
llvm::PatternMatch::m_Neg
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
Definition: PatternMatch.h:2267
getAlternateBinop
static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL)
Binops may be transformed into binops with different opcodes and operands.
Definition: InstCombineVectorOps.cpp:1917
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
foldSelectShuffleWith1Binop
static Instruction * foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf)
Definition: InstCombineVectorOps.cpp:1948
llvm::simplifyShuffleVectorInst
Value * simplifyShuffleVectorInst(Value *Op0, Value *Op1, ArrayRef< int > Mask, Type *RetTy, const SimplifyQuery &Q)
Given operands for a ShuffleVectorInst, fold the result or return null.
Definition: InstructionSimplify.cpp:5080
llvm::ConstantAggregateZero::get
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1656
llvm::InsertValueInst
This instruction inserts a struct field of array element value into an aggregate value.
Definition: Instructions.h:2509
llvm::BinaryOperator::Create
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Definition: Instructions.cpp:2778
Value.h
llvm::PatternMatch::m_Cmp
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:89
llvm::Optional::value_or
constexpr T value_or(U &&alt) const &
Definition: Optional.h:318
llvm::LegacyLegalizeActions::NotFound
@ NotFound
Sentinel value for when no action was found in the specified table.
Definition: LegacyLegalizerInfo.h:74
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
BinopElts::BinopElts
BinopElts(BinaryOperator::BinaryOps Opc=(BinaryOperator::BinaryOps) 0, Value *V0=nullptr, Value *V1=nullptr)
Definition: InstCombineVectorOps.cpp:1907
BinopElts::Op1
Value * Op1
Definition: InstCombineVectorOps.cpp:1906
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:668
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::Instruction::dropPoisonGeneratingFlags
void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
Definition: Instruction.cpp:147
llvm::ShuffleVectorInst::changesLength
bool changesLength() const
Return true if this shuffle returns a vector with a different number of elements than its source vect...
Definition: Instructions.h:2080
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:164
llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:927
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::PoisonValue
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition: Constants.h:1434
llvm::PoisonValue::get
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1796
llvm::ShuffleVectorInst::commute
void commute()
Swap the operands and adjust the mask to preserve the semantics of the instruction.
Definition: Instructions.cpp:2035