LLVM 22.0.0git
InstCombineVectorOps.cpp
Go to the documentation of this file.
1//===- InstCombineVectorOps.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements instcombine for ExtractElement, InsertElement and
10// ShuffleVector.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InstCombineInternal.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
24#include "llvm/IR/BasicBlock.h"
25#include "llvm/IR/Constant.h"
26#include "llvm/IR/Constants.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
31#include "llvm/IR/Operator.h"
33#include "llvm/IR/Type.h"
34#include "llvm/IR/User.h"
35#include "llvm/IR/Value.h"
39#include <cassert>
40#include <cstdint>
41#include <iterator>
42#include <utility>
43
44#define DEBUG_TYPE "instcombine"
45
46using namespace llvm;
47using namespace PatternMatch;
48
49STATISTIC(NumAggregateReconstructionsSimplified,
50 "Number of aggregate reconstructions turned into reuse of the "
51 "original aggregate");
52
53/// Return true if the value is cheaper to scalarize than it is to leave as a
54/// vector operation. If the extract index \p EI is a constant integer then
55/// some operations may be cheap to scalarize.
56///
57/// FIXME: It's possible to create more instructions than previously existed.
58static bool cheapToScalarize(Value *V, Value *EI) {
60
61 // If we can pick a scalar constant value out of a vector, that is free.
62 if (auto *C = dyn_cast<Constant>(V))
63 return CEI || C->getSplatValue();
64
66 ElementCount EC = cast<VectorType>(V->getType())->getElementCount();
67 // Index needs to be lower than the minimum size of the vector, because
68 // for scalable vector, the vector size is known at run time.
69 return CEI->getValue().ult(EC.getKnownMinValue());
70 }
71
72 // An insertelement to the same constant index as our extract will simplify
73 // to the scalar inserted element. An insertelement to a different constant
74 // index is irrelevant to our extract.
76 return CEI;
77
78 if (match(V, m_OneUse(m_Load(m_Value()))))
79 return true;
80
81 if (match(V, m_OneUse(m_UnOp())))
82 return true;
83
84 Value *V0, *V1;
85 if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
86 if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
87 return true;
88
89 CmpPredicate UnusedPred;
90 if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
91 if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
92 return true;
93
94 return false;
95}
96
97// If we have a PHI node with a vector type that is only used to feed
98// itself and be an operand of extractelement at a constant location,
99// try to replace the PHI of the vector type with a PHI of a scalar type.
100Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
101 PHINode *PN) {
102 SmallVector<Instruction *, 2> Extracts;
103 // The users we want the PHI to have are:
104 // 1) The EI ExtractElement (we already know this)
105 // 2) Possibly more ExtractElements with the same index.
106 // 3) Another operand, which will feed back into the PHI.
107 Instruction *PHIUser = nullptr;
108 for (auto *U : PN->users()) {
109 if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
110 if (EI.getIndexOperand() == EU->getIndexOperand())
111 Extracts.push_back(EU);
112 else
113 return nullptr;
114 } else if (!PHIUser) {
115 PHIUser = cast<Instruction>(U);
116 } else {
117 return nullptr;
118 }
119 }
120
121 if (!PHIUser)
122 return nullptr;
123
124 // Verify that this PHI user has one use, which is the PHI itself,
125 // and that it is a binary operation which is cheap to scalarize.
126 // otherwise return nullptr.
127 if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
128 !(isa<BinaryOperator>(PHIUser)) ||
129 !cheapToScalarize(PHIUser, EI.getIndexOperand()))
130 return nullptr;
131
132 // Create a scalar PHI node that will replace the vector PHI node
133 // just before the current PHI node.
134 PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
136 // Scalarize each PHI operand.
137 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
138 Value *PHIInVal = PN->getIncomingValue(i);
139 BasicBlock *inBB = PN->getIncomingBlock(i);
140 Value *Elt = EI.getIndexOperand();
141 // If the operand is the PHI induction variable:
142 if (PHIInVal == PHIUser) {
143 // Scalarize the binary operation. One operand is the
144 // scalar PHI, and the other is extracted from the other
145 // vector operand.
146 BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
147 unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
150 B0->getOperand(opId)->getName() + ".Elt"),
151 B0->getIterator());
152 // Preserve operand order for binary operation to preserve semantics of
153 // non-commutative operations.
154 Value *FirstOp = (B0->getOperand(0) == PN) ? scalarPHI : Op;
155 Value *SecondOp = (B0->getOperand(0) == PN) ? Op : scalarPHI;
156 Value *newPHIUser =
158 B0->getOpcode(), FirstOp, SecondOp, B0),
159 B0->getIterator());
160 scalarPHI->addIncoming(newPHIUser, inBB);
161 } else {
162 // Scalarize PHI input:
163 Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
164 // Insert the new instruction into the predecessor basic block.
165 Instruction *pos = dyn_cast<Instruction>(PHIInVal);
166 BasicBlock::iterator InsertPos;
167 if (pos && !isa<PHINode>(pos)) {
168 InsertPos = ++pos->getIterator();
169 } else {
170 InsertPos = inBB->getFirstInsertionPt();
171 }
172
173 InsertNewInstWith(newEI, InsertPos);
174
175 scalarPHI->addIncoming(newEI, inBB);
176 }
177 }
178
179 for (auto *E : Extracts) {
180 replaceInstUsesWith(*E, scalarPHI);
181 // Add old extract to worklist for DCE.
183 }
184
185 return &EI;
186}
187
188Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
189 Value *X;
190 uint64_t ExtIndexC;
191 if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
192 !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
193 return nullptr;
194
195 ElementCount NumElts =
196 cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
197 Type *DestTy = Ext.getType();
198 unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
199 bool IsBigEndian = DL.isBigEndian();
200
201 // If we are casting an integer to vector and extracting a portion, that is
202 // a shift-right and truncate.
203 if (X->getType()->isIntegerTy()) {
205 "Expected fixed vector type for bitcast from scalar integer");
206
207 // Big endian requires adjusting the extract index since MSB is at index 0.
208 // LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8
209 // BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8
210 if (IsBigEndian)
211 ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC;
212 unsigned ShiftAmountC = ExtIndexC * DestWidth;
213 if ((!ShiftAmountC ||
214 isDesirableIntType(X->getType()->getPrimitiveSizeInBits())) &&
215 Ext.getVectorOperand()->hasOneUse()) {
216 if (ShiftAmountC)
217 X = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset");
218 if (DestTy->isFloatingPointTy()) {
219 Type *DstIntTy = IntegerType::getIntNTy(X->getContext(), DestWidth);
220 Value *Trunc = Builder.CreateTrunc(X, DstIntTy);
221 return new BitCastInst(Trunc, DestTy);
222 }
223 return new TruncInst(X, DestTy);
224 }
225 }
226
227 if (!X->getType()->isVectorTy())
228 return nullptr;
229
230 // If this extractelement is using a bitcast from a vector of the same number
231 // of elements, see if we can find the source element from the source vector:
232 // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
233 auto *SrcTy = cast<VectorType>(X->getType());
234 ElementCount NumSrcElts = SrcTy->getElementCount();
235 if (NumSrcElts == NumElts)
236 if (Value *Elt = findScalarElement(X, ExtIndexC))
237 return new BitCastInst(Elt, DestTy);
238
239 assert(NumSrcElts.isScalable() == NumElts.isScalable() &&
240 "Src and Dst must be the same sort of vector type");
241
242 // If the source elements are wider than the destination, try to shift and
243 // truncate a subset of scalar bits of an insert op.
244 if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
245 Value *Scalar;
246 Value *Vec;
247 uint64_t InsIndexC;
248 if (!match(X, m_InsertElt(m_Value(Vec), m_Value(Scalar),
249 m_ConstantInt(InsIndexC))))
250 return nullptr;
251
252 // The extract must be from the subset of vector elements that we inserted
253 // into. Example: if we inserted element 1 of a <2 x i64> and we are
254 // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
255 // of elements 4-7 of the bitcasted vector.
256 unsigned NarrowingRatio =
257 NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
258
259 if (ExtIndexC / NarrowingRatio != InsIndexC) {
260 // Remove insertelement, if we don't use the inserted element.
261 // extractelement (bitcast (insertelement (Vec, b)), a) ->
262 // extractelement (bitcast (Vec), a)
263 // FIXME: this should be removed to SimplifyDemandedVectorElts,
264 // once scale vectors are supported.
265 if (X->hasOneUse() && Ext.getVectorOperand()->hasOneUse()) {
266 Value *NewBC = Builder.CreateBitCast(Vec, Ext.getVectorOperandType());
267 return ExtractElementInst::Create(NewBC, Ext.getIndexOperand());
268 }
269 return nullptr;
270 }
271
272 // We are extracting part of the original scalar. How that scalar is
273 // inserted into the vector depends on the endian-ness. Example:
274 // Vector Byte Elt Index: 0 1 2 3 4 5 6 7
275 // +--+--+--+--+--+--+--+--+
276 // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
277 // extelt <4 x i16> V', 3: | |S2|S3|
278 // +--+--+--+--+--+--+--+--+
279 // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
280 // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
281 // In this example, we must right-shift little-endian. Big-endian is just a
282 // truncate.
283 unsigned Chunk = ExtIndexC % NarrowingRatio;
284 if (IsBigEndian)
285 Chunk = NarrowingRatio - 1 - Chunk;
286
287 // Bail out if this is an FP vector to FP vector sequence. That would take
288 // more instructions than we started with unless there is no shift, and it
289 // may not be handled as well in the backend.
290 bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
291 bool NeedDestBitcast = DestTy->isFloatingPointTy();
292 if (NeedSrcBitcast && NeedDestBitcast)
293 return nullptr;
294
295 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
296 unsigned ShAmt = Chunk * DestWidth;
297
298 // TODO: This limitation is more strict than necessary. We could sum the
299 // number of new instructions and subtract the number eliminated to know if
300 // we can proceed.
301 if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
302 if (NeedSrcBitcast || NeedDestBitcast)
303 return nullptr;
304
305 if (NeedSrcBitcast) {
306 Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
307 Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
308 }
309
310 if (ShAmt) {
311 // Bail out if we could end with more instructions than we started with.
312 if (!Ext.getVectorOperand()->hasOneUse())
313 return nullptr;
314 Scalar = Builder.CreateLShr(Scalar, ShAmt);
315 }
316
317 if (NeedDestBitcast) {
318 Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
319 return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
320 }
321 return new TruncInst(Scalar, DestTy);
322 }
323
324 return nullptr;
325}
326
327/// Find elements of V demanded by UserInstr. If returns false, we were not able
328/// to determine all elements.
330 APInt &UnionUsedElts) {
331 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
332
333 switch (UserInstr->getOpcode()) {
334 case Instruction::ExtractElement: {
336 assert(EEI->getVectorOperand() == V);
338 if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {
339 UnionUsedElts.setBit(EEIIndexC->getZExtValue());
340 return true;
341 }
342 break;
343 }
344 case Instruction::ShuffleVector: {
345 ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);
346 unsigned MaskNumElts =
347 cast<FixedVectorType>(UserInstr->getType())->getNumElements();
348
349 for (auto I : llvm::seq(MaskNumElts)) {
350 unsigned MaskVal = Shuffle->getMaskValue(I);
351 if (MaskVal == -1u || MaskVal >= 2 * VWidth)
352 continue;
353 if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))
354 UnionUsedElts.setBit(MaskVal);
355 if (Shuffle->getOperand(1) == V &&
356 ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
357 UnionUsedElts.setBit(MaskVal - VWidth);
358 }
359 return true;
360 }
361 default:
362 break;
363 }
364
365 return false;
366}
367
368/// Find union of elements of V demanded by all its users.
369/// If it is known by querying findDemandedEltsBySingleUser that
370/// no user demands an element of V, then the corresponding bit
371/// remains unset in the returned value.
373 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
374
375 APInt UnionUsedElts(VWidth, 0);
376 for (const Use &U : V->uses()) {
377 if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
378 if (!findDemandedEltsBySingleUser(V, I, UnionUsedElts))
379 return APInt::getAllOnes(VWidth);
380 } else {
381 UnionUsedElts = APInt::getAllOnes(VWidth);
382 break;
383 }
384
385 if (UnionUsedElts.isAllOnes())
386 break;
387 }
388
389 return UnionUsedElts;
390}
391
392/// Given a constant index for a extractelement or insertelement instruction,
393/// return it with the canonical type if it isn't already canonical. We
394/// arbitrarily pick 64 bit as our canonical type. The actual bitwidth doesn't
395/// matter, we just want a consistent type to simplify CSE.
397 const unsigned IndexBW = IndexC->getBitWidth();
398 if (IndexBW == 64 || IndexC->getValue().getActiveBits() > 64)
399 return nullptr;
400 return ConstantInt::get(IndexC->getContext(),
401 IndexC->getValue().zextOrTrunc(64));
402}
403
405 Value *SrcVec = EI.getVectorOperand();
406 Value *Index = EI.getIndexOperand();
407 if (Value *V = simplifyExtractElementInst(SrcVec, Index,
408 SQ.getWithInstruction(&EI)))
409 return replaceInstUsesWith(EI, V);
410
411 // extractelt (select %x, %vec1, %vec2), %const ->
412 // select %x, %vec1[%const], %vec2[%const]
413 // TODO: Support constant folding of multiple select operands:
414 // extractelt (select %x, %vec1, %vec2), (select %x, %c1, %c2)
415 // If the extractelement will for instance try to do out of bounds accesses
416 // because of the values of %c1 and/or %c2, the sequence could be optimized
417 // early. This is currently not possible because constant folding will reach
418 // an unreachable assertion if it doesn't find a constant operand.
420 if (SI->getCondition()->getType()->isIntegerTy() &&
422 if (Instruction *R = FoldOpIntoSelect(EI, SI))
423 return R;
424
425 // If extracting a specified index from the vector, see if we can recursively
426 // find a previously computed scalar that was inserted into the vector.
427 auto *IndexC = dyn_cast<ConstantInt>(Index);
428 bool HasKnownValidIndex = false;
429 if (IndexC) {
430 // Canonicalize type of constant indices to i64 to simplify CSE
431 if (auto *NewIdx = getPreferredVectorIndex(IndexC))
432 return replaceOperand(EI, 1, NewIdx);
433
435 unsigned NumElts = EC.getKnownMinValue();
436 HasKnownValidIndex = IndexC->getValue().ult(NumElts);
437
439 Intrinsic::ID IID = II->getIntrinsicID();
440 // Index needs to be lower than the minimum size of the vector, because
441 // for scalable vector, the vector size is known at run time.
442 if (IID == Intrinsic::stepvector && IndexC->getValue().ult(NumElts)) {
443 Type *Ty = EI.getType();
444 unsigned BitWidth = Ty->getIntegerBitWidth();
445 Value *Idx;
446 // Return index when its value does not exceed the allowed limit
447 // for the element type of the vector.
448 // TODO: Truncate out-of-range values.
449 if (IndexC->getValue().getActiveBits() <= BitWidth)
450 Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth));
451 else
452 return nullptr;
453 return replaceInstUsesWith(EI, Idx);
454 }
455 }
456
457 // InstSimplify should handle cases where the index is invalid.
458 // For fixed-length vector, it's invalid to extract out-of-range element.
459 if (!EC.isScalable() && IndexC->getValue().uge(NumElts))
460 return nullptr;
461
462 if (Instruction *I = foldBitcastExtElt(EI))
463 return I;
464
465 // If there's a vector PHI feeding a scalar use through this extractelement
466 // instruction, try to scalarize the PHI.
467 if (auto *Phi = dyn_cast<PHINode>(SrcVec))
468 if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
469 return ScalarPHI;
470 }
471
472 // If SrcVec is a subvector starting at index 0, extract from the
473 // wider source vector
474 Value *V;
475 if (match(SrcVec,
477 return ExtractElementInst::Create(V, Index);
478
479 // TODO come up with a n-ary matcher that subsumes both unary and
480 // binary matchers.
481 UnaryOperator *UO;
482 if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, Index)) {
483 // extelt (unop X), Index --> unop (extelt X, Index)
484 Value *X = UO->getOperand(0);
485 Value *E = Builder.CreateExtractElement(X, Index);
487 }
488
489 // If the binop is not speculatable, we cannot hoist the extractelement if
490 // it may make the operand poison.
491 BinaryOperator *BO;
492 if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, Index) &&
493 (HasKnownValidIndex ||
495 // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
496 Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
497 Value *E0 = Builder.CreateExtractElement(X, Index);
498 Value *E1 = Builder.CreateExtractElement(Y, Index);
499 return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
500 }
501
502 Value *X, *Y;
503 CmpPredicate Pred;
504 if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
505 cheapToScalarize(SrcVec, Index)) {
506 // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
507 Value *E0 = Builder.CreateExtractElement(X, Index);
508 Value *E1 = Builder.CreateExtractElement(Y, Index);
509 CmpInst *SrcCmpInst = cast<CmpInst>(SrcVec);
510 return CmpInst::CreateWithCopiedFlags(SrcCmpInst->getOpcode(), Pred, E0, E1,
511 SrcCmpInst);
512 }
513
514 if (auto *I = dyn_cast<Instruction>(SrcVec)) {
515 if (auto *IE = dyn_cast<InsertElementInst>(I)) {
516 // instsimplify already handled the case where the indices are constants
517 // and equal by value, if both are constants, they must not be the same
518 // value, extract from the pre-inserted value instead.
519 if (isa<Constant>(IE->getOperand(2)) && IndexC)
520 return replaceOperand(EI, 0, IE->getOperand(0));
521 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
522 auto *VecType = cast<VectorType>(GEP->getType());
523 ElementCount EC = VecType->getElementCount();
524 uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0;
525 if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) {
526 // Find out why we have a vector result - these are a few examples:
527 // 1. We have a scalar pointer and a vector of indices, or
528 // 2. We have a vector of pointers and a scalar index, or
529 // 3. We have a vector of pointers and a vector of indices, etc.
530 // Here we only consider combining when there is exactly one vector
531 // operand, since the optimization is less obviously a win due to
532 // needing more than one extractelements.
533
534 unsigned VectorOps =
535 llvm::count_if(GEP->operands(), [](const Value *V) {
536 return isa<VectorType>(V->getType());
537 });
538 if (VectorOps == 1) {
539 Value *NewPtr = GEP->getPointerOperand();
540 if (isa<VectorType>(NewPtr->getType()))
541 NewPtr = Builder.CreateExtractElement(NewPtr, IndexC);
542
544 for (unsigned I = 1; I != GEP->getNumOperands(); ++I) {
545 Value *Op = GEP->getOperand(I);
546 if (isa<VectorType>(Op->getType()))
547 NewOps.push_back(Builder.CreateExtractElement(Op, IndexC));
548 else
549 NewOps.push_back(Op);
550 }
551
553 GEP->getSourceElementType(), NewPtr, NewOps);
554 NewGEP->setNoWrapFlags(GEP->getNoWrapFlags());
555 return NewGEP;
556 }
557 }
558 } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
559 int SplatIndex = getSplatIndex(SVI->getShuffleMask());
560 // We know the all-0 splat must be reading from the first operand, even
561 // in the case of scalable vectors (vscale is always > 0).
562 if (SplatIndex == 0)
563 return ExtractElementInst::Create(SVI->getOperand(0),
564 Builder.getInt64(0));
565
566 if (isa<FixedVectorType>(SVI->getType())) {
567 std::optional<int> SrcIdx;
568 // getSplatIndex returns -1 to mean not-found.
569 if (SplatIndex != -1)
570 SrcIdx = SplatIndex;
571 else if (ConstantInt *CI = dyn_cast<ConstantInt>(Index))
572 SrcIdx = SVI->getMaskValue(CI->getZExtValue());
573
574 if (SrcIdx) {
575 Value *Src;
576 unsigned LHSWidth =
577 cast<FixedVectorType>(SVI->getOperand(0)->getType())
578 ->getNumElements();
579
580 if (*SrcIdx < 0)
582 if (*SrcIdx < (int)LHSWidth)
583 Src = SVI->getOperand(0);
584 else {
585 *SrcIdx -= LHSWidth;
586 Src = SVI->getOperand(1);
587 }
588 Type *Int64Ty = Type::getInt64Ty(EI.getContext());
590 Src, ConstantInt::get(Int64Ty, *SrcIdx, false));
591 }
592 }
593 } else if (auto *CI = dyn_cast<CastInst>(I)) {
594 // Canonicalize extractelement(cast) -> cast(extractelement).
595 // Bitcasts can change the number of vector elements, and they cost
596 // nothing.
597 // If the CI has only one use, but that use is inside a loop, this
598 // canonicalization is not profitable because it would turn a vector
599 // operation into scalar operations inside the loop. Apply the transform
600 // when:
601 // - the index is constant and CI has one use, or
602 // - the CI and EI are in the same basic block, so the cast won't be sunk
603 // into a loop.
604 if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast) &&
605 (EI.getParent() == CI->getParent() || isa<ConstantInt>(Index))) {
606 Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
607 return CastInst::Create(CI->getOpcode(), EE, EI.getType());
608 }
609 }
610 }
611
612 // Run demanded elements after other transforms as this can drop flags on
613 // binops. If there's two paths to the same final result, we prefer the
614 // one which doesn't force us to drop flags.
615 if (IndexC) {
617 unsigned NumElts = EC.getKnownMinValue();
618 // This instruction only demands the single element from the input vector.
619 // Skip for scalable type, the number of elements is unknown at
620 // compile-time.
621 if (!EC.isScalable() && NumElts != 1) {
622 // If the input vector has a single use, simplify it based on this use
623 // property.
624 if (SrcVec->hasOneUse()) {
625 APInt PoisonElts(NumElts, 0);
626 APInt DemandedElts(NumElts, 0);
627 DemandedElts.setBit(IndexC->getZExtValue());
628 if (Value *V =
629 SimplifyDemandedVectorElts(SrcVec, DemandedElts, PoisonElts))
630 return replaceOperand(EI, 0, V);
631 } else {
632 // If the input vector has multiple uses, simplify it based on a union
633 // of all elements used.
634 APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
635 if (!DemandedElts.isAllOnes()) {
636 APInt PoisonElts(NumElts, 0);
638 SrcVec, DemandedElts, PoisonElts, 0 /* Depth */,
639 true /* AllowMultipleUsers */)) {
640 if (V != SrcVec) {
641 Worklist.addValue(SrcVec);
642 SrcVec->replaceAllUsesWith(V);
643 return &EI;
644 }
645 }
646 }
647 }
648 }
649 }
650 return nullptr;
651}
652
653/// If V is a shuffle of values that ONLY returns elements from either LHS or
654/// RHS, return the shuffle mask and true. Otherwise, return false.
656 SmallVectorImpl<int> &Mask) {
657 assert(LHS->getType() == RHS->getType() &&
658 "Invalid CollectSingleShuffleElements");
659 unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
660
661 if (match(V, m_Poison())) {
662 Mask.assign(NumElts, -1);
663 return true;
664 }
665
666 if (V == LHS) {
667 for (unsigned i = 0; i != NumElts; ++i)
668 Mask.push_back(i);
669 return true;
670 }
671
672 if (V == RHS) {
673 for (unsigned i = 0; i != NumElts; ++i)
674 Mask.push_back(i + NumElts);
675 return true;
676 }
677
679 // If this is an insert of an extract from some other vector, include it.
680 Value *VecOp = IEI->getOperand(0);
681 Value *ScalarOp = IEI->getOperand(1);
682 Value *IdxOp = IEI->getOperand(2);
683
684 if (!isa<ConstantInt>(IdxOp))
685 return false;
686 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
687
688 if (isa<PoisonValue>(ScalarOp)) { // inserting poison into vector.
689 // We can handle this if the vector we are inserting into is
690 // transitively ok.
691 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
692 // If so, update the mask to reflect the inserted poison.
693 Mask[InsertedIdx] = -1;
694 return true;
695 }
696 } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
697 if (isa<ConstantInt>(EI->getOperand(1))) {
698 unsigned ExtractedIdx =
699 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
700 unsigned NumLHSElts =
701 cast<FixedVectorType>(LHS->getType())->getNumElements();
702
703 // This must be extracting from either LHS or RHS.
704 if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
705 // We can handle this if the vector we are inserting into is
706 // transitively ok.
707 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
708 // If so, update the mask to reflect the inserted value.
709 if (EI->getOperand(0) == LHS) {
710 Mask[InsertedIdx % NumElts] = ExtractedIdx;
711 } else {
712 assert(EI->getOperand(0) == RHS);
713 Mask[InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;
714 }
715 return true;
716 }
717 }
718 }
719 }
720 }
721
722 return false;
723}
724
725/// If we have insertion into a vector that is wider than the vector that we
726/// are extracting from, try to widen the source vector to allow a single
727/// shufflevector to replace one or more insert/extract pairs.
729 ExtractElementInst *ExtElt,
730 InstCombinerImpl &IC) {
731 auto *InsVecType = cast<FixedVectorType>(InsElt->getType());
732 auto *ExtVecType = cast<FixedVectorType>(ExtElt->getVectorOperandType());
733 unsigned NumInsElts = InsVecType->getNumElements();
734 unsigned NumExtElts = ExtVecType->getNumElements();
735
736 // The inserted-to vector must be wider than the extracted-from vector.
737 if (InsVecType->getElementType() != ExtVecType->getElementType() ||
738 NumExtElts >= NumInsElts)
739 return false;
740
741 Value *ExtVecOp = ExtElt->getVectorOperand();
742 // Bail out on constant vectors.
743 if (isa<ConstantData>(ExtVecOp))
744 return false;
745
746 // Create a shuffle mask to widen the extended-from vector using poison
747 // values. The mask selects all of the values of the original vector followed
748 // by as many poison values as needed to create a vector of the same length
749 // as the inserted-to vector.
750 SmallVector<int, 16> ExtendMask;
751 for (unsigned i = 0; i < NumExtElts; ++i)
752 ExtendMask.push_back(i);
753 for (unsigned i = NumExtElts; i < NumInsElts; ++i)
754 ExtendMask.push_back(-1);
755
756 auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
757 BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
758 ? ExtVecOpInst->getParent()
759 : ExtElt->getParent();
760
761 // TODO: This restriction matches the basic block check below when creating
762 // new extractelement instructions. If that limitation is removed, this one
763 // could also be removed. But for now, we just bail out to ensure that we
764 // will replace the extractelement instruction that is feeding our
765 // insertelement instruction. This allows the insertelement to then be
766 // replaced by a shufflevector. If the insertelement is not replaced, we can
767 // induce infinite looping because there's an optimization for extractelement
768 // that will delete our widening shuffle. This would trigger another attempt
769 // here to create that shuffle, and we spin forever.
770 if (InsertionBlock != InsElt->getParent())
771 return false;
772
773 // TODO: This restriction matches the check in visitInsertElementInst() and
774 // prevents an infinite loop caused by not turning the extract/insert pair
775 // into a shuffle. We really should not need either check, but we're lacking
776 // folds for shufflevectors because we're afraid to generate shuffle masks
777 // that the backend can't handle.
778 if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
779 return false;
780
781 auto *WideVec = new ShuffleVectorInst(ExtVecOp, ExtendMask);
782
783 // Insert the new shuffle after the vector operand of the extract is defined
784 // (as long as it's not a PHI) or at the start of the basic block of the
785 // extract, so any subsequent extracts in the same basic block can use it.
786 // TODO: Insert before the earliest ExtractElementInst that is replaced.
787 if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
788 WideVec->insertAfter(ExtVecOpInst->getIterator());
789 else
790 IC.InsertNewInstWith(WideVec, ExtElt->getParent()->getFirstInsertionPt());
791
792 // Replace extracts from the original narrow vector with extracts from the new
793 // wide vector.
794 for (User *U : ExtVecOp->users()) {
796 if (!OldExt || OldExt->getParent() != WideVec->getParent())
797 continue;
798 auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
799 IC.InsertNewInstWith(NewExt, OldExt->getIterator());
800 IC.replaceInstUsesWith(*OldExt, NewExt);
801 // Add the old extracts to the worklist for DCE. We can't remove the
802 // extracts directly, because they may still be used by the calling code.
803 IC.addToWorklist(OldExt);
804 }
805
806 return true;
807}
808
809/// We are building a shuffle to create V, which is a sequence of insertelement,
810/// extractelement pairs. If PermittedRHS is set, then we must either use it or
811/// not rely on the second vector source. Return a std::pair containing the
812/// left and right vectors of the proposed shuffle (or 0), and set the Mask
813/// parameter as required.
814///
815/// Note: we intentionally don't try to fold earlier shuffles since they have
816/// often been chosen carefully to be efficiently implementable on the target.
817using ShuffleOps = std::pair<Value *, Value *>;
818
820 Value *PermittedRHS,
821 InstCombinerImpl &IC, bool &Rerun) {
822 assert(V->getType()->isVectorTy() && "Invalid shuffle!");
823 unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
824
825 if (match(V, m_Poison())) {
826 Mask.assign(NumElts, -1);
827 return std::make_pair(
828 PermittedRHS ? PoisonValue::get(PermittedRHS->getType()) : V, nullptr);
829 }
830
832 Mask.assign(NumElts, 0);
833 return std::make_pair(V, nullptr);
834 }
835
837 // If this is an insert of an extract from some other vector, include it.
838 Value *VecOp = IEI->getOperand(0);
839 Value *ScalarOp = IEI->getOperand(1);
840 Value *IdxOp = IEI->getOperand(2);
841
843 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
844 unsigned ExtractedIdx =
845 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
846 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
847
848 // Either the extracted from or inserted into vector must be RHSVec,
849 // otherwise we'd end up with a shuffle of three inputs.
850 if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
851 Value *RHS = EI->getOperand(0);
852 ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC, Rerun);
853 assert(LR.second == nullptr || LR.second == RHS);
854
855 if (LR.first->getType() != RHS->getType()) {
856 // Although we are giving up for now, see if we can create extracts
857 // that match the inserts for another round of combining.
858 if (replaceExtractElements(IEI, EI, IC))
859 Rerun = true;
860
861 // We tried our best, but we can't find anything compatible with RHS
862 // further up the chain. Return a trivial shuffle.
863 for (unsigned i = 0; i < NumElts; ++i)
864 Mask[i] = i;
865 return std::make_pair(V, nullptr);
866 }
867
868 unsigned NumLHSElts =
869 cast<FixedVectorType>(RHS->getType())->getNumElements();
870 Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
871 return std::make_pair(LR.first, RHS);
872 }
873
874 if (VecOp == PermittedRHS) {
875 // We've gone as far as we can: anything on the other side of the
876 // extractelement will already have been converted into a shuffle.
877 unsigned NumLHSElts =
879 ->getNumElements();
880 for (unsigned i = 0; i != NumElts; ++i)
881 Mask.push_back(i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
882 return std::make_pair(EI->getOperand(0), PermittedRHS);
883 }
884
885 // If this insertelement is a chain that comes from exactly these two
886 // vectors, return the vector and the effective shuffle.
887 if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
888 collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
889 Mask))
890 return std::make_pair(EI->getOperand(0), PermittedRHS);
891 }
892 }
893 }
894
895 // Otherwise, we can't do anything fancy. Return an identity vector.
896 for (unsigned i = 0; i != NumElts; ++i)
897 Mask.push_back(i);
898 return std::make_pair(V, nullptr);
899}
900
901/// Look for chain of insertvalue's that fully define an aggregate, and trace
902/// back the values inserted, see if they are all were extractvalue'd from
903/// the same source aggregate from the exact same element indexes.
904/// If they were, just reuse the source aggregate.
905/// This potentially deals with PHI indirections.
907 InsertValueInst &OrigIVI) {
908 Type *AggTy = OrigIVI.getType();
909 unsigned NumAggElts;
910 switch (AggTy->getTypeID()) {
911 case Type::StructTyID:
912 NumAggElts = AggTy->getStructNumElements();
913 break;
914 case Type::ArrayTyID:
915 NumAggElts = AggTy->getArrayNumElements();
916 break;
917 default:
918 llvm_unreachable("Unhandled aggregate type?");
919 }
920
921 // Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able
922 // to handle clang C++ exception struct (which is hardcoded as {i8*, i32}),
923 // FIXME: any interesting patterns to be caught with larger limit?
924 assert(NumAggElts > 0 && "Aggregate should have elements.");
925 if (NumAggElts > 2)
926 return nullptr;
927
928 static constexpr auto NotFound = std::nullopt;
929 static constexpr auto FoundMismatch = nullptr;
930
931 // Try to find a value of each element of an aggregate.
932 // FIXME: deal with more complex, not one-dimensional, aggregate types
933 SmallVector<std::optional<Instruction *>, 2> AggElts(NumAggElts, NotFound);
934
935 // Do we know values for each element of the aggregate?
936 auto KnowAllElts = [&AggElts]() {
937 return !llvm::is_contained(AggElts, NotFound);
938 };
939
940 int Depth = 0;
941
942 // Arbitrary `insertvalue` visitation depth limit. Let's be okay with
943 // every element being overwritten twice, which should never happen.
944 static const int DepthLimit = 2 * NumAggElts;
945
946 // Recurse up the chain of `insertvalue` aggregate operands until either we've
947 // reconstructed full initializer or can't visit any more `insertvalue`'s.
948 for (InsertValueInst *CurrIVI = &OrigIVI;
949 Depth < DepthLimit && CurrIVI && !KnowAllElts();
950 CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()),
951 ++Depth) {
952 auto *InsertedValue =
953 dyn_cast<Instruction>(CurrIVI->getInsertedValueOperand());
954 if (!InsertedValue)
955 return nullptr; // Inserted value must be produced by an instruction.
956
957 ArrayRef<unsigned int> Indices = CurrIVI->getIndices();
958
959 // Don't bother with more than single-level aggregates.
960 if (Indices.size() != 1)
961 return nullptr; // FIXME: deal with more complex aggregates?
962
963 // Now, we may have already previously recorded the value for this element
964 // of an aggregate. If we did, that means the CurrIVI will later be
965 // overwritten with the already-recorded value. But if not, let's record it!
966 std::optional<Instruction *> &Elt = AggElts[Indices.front()];
967 Elt = Elt.value_or(InsertedValue);
968
969 // FIXME: should we handle chain-terminating undef base operand?
970 }
971
972 // Was that sufficient to deduce the full initializer for the aggregate?
973 if (!KnowAllElts())
974 return nullptr; // Give up then.
975
976 // We now want to find the source[s] of the aggregate elements we've found.
977 // And with "source" we mean the original aggregate[s] from which
978 // the inserted elements were extracted. This may require PHI translation.
979
980 enum class AggregateDescription {
981 /// When analyzing the value that was inserted into an aggregate, we did
982 /// not manage to find defining `extractvalue` instruction to analyze.
983 NotFound,
984 /// When analyzing the value that was inserted into an aggregate, we did
985 /// manage to find defining `extractvalue` instruction[s], and everything
986 /// matched perfectly - aggregate type, element insertion/extraction index.
987 Found,
988 /// When analyzing the value that was inserted into an aggregate, we did
989 /// manage to find defining `extractvalue` instruction, but there was
990 /// a mismatch: either the source type from which the extraction was didn't
991 /// match the aggregate type into which the insertion was,
992 /// or the extraction/insertion channels mismatched,
993 /// or different elements had different source aggregates.
994 FoundMismatch
995 };
996 auto Describe = [](std::optional<Value *> SourceAggregate) {
997 if (SourceAggregate == NotFound)
998 return AggregateDescription::NotFound;
999 if (*SourceAggregate == FoundMismatch)
1000 return AggregateDescription::FoundMismatch;
1001 return AggregateDescription::Found;
1002 };
1003
1004 // If an aggregate element is defined in UseBB, we can't use it in PredBB.
1005 bool EltDefinedInUseBB = false;
1006
1007 // Given the value \p Elt that was being inserted into element \p EltIdx of an
1008 // aggregate AggTy, see if \p Elt was originally defined by an
1009 // appropriate extractvalue (same element index, same aggregate type).
1010 // If found, return the source aggregate from which the extraction was.
1011 // If \p PredBB is provided, does PHI translation of an \p Elt first.
1012 auto FindSourceAggregate =
1013 [&](Instruction *Elt, unsigned EltIdx, std::optional<BasicBlock *> UseBB,
1014 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
1015 // For now(?), only deal with, at most, a single level of PHI indirection.
1016 if (UseBB && PredBB) {
1017 Elt = dyn_cast<Instruction>(Elt->DoPHITranslation(*UseBB, *PredBB));
1018 if (Elt && Elt->getParent() == *UseBB)
1019 EltDefinedInUseBB = true;
1020 }
1021 // FIXME: deal with multiple levels of PHI indirection?
1022
1023 // Did we find an extraction?
1024 auto *EVI = dyn_cast_or_null<ExtractValueInst>(Elt);
1025 if (!EVI)
1026 return NotFound;
1027
1028 Value *SourceAggregate = EVI->getAggregateOperand();
1029
1030 // Is the extraction from the same type into which the insertion was?
1031 if (SourceAggregate->getType() != AggTy)
1032 return FoundMismatch;
1033 // And the element index doesn't change between extraction and insertion?
1034 if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front())
1035 return FoundMismatch;
1036
1037 return SourceAggregate; // AggregateDescription::Found
1038 };
1039
1040 // Given elements AggElts that were constructing an aggregate OrigIVI,
1041 // see if we can find appropriate source aggregate for each of the elements,
1042 // and see it's the same aggregate for each element. If so, return it.
1043 auto FindCommonSourceAggregate =
1044 [&](std::optional<BasicBlock *> UseBB,
1045 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
1046 std::optional<Value *> SourceAggregate;
1047
1048 for (auto I : enumerate(AggElts)) {
1049 assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&
1050 "We don't store nullptr in SourceAggregate!");
1051 assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
1052 (I.index() != 0) &&
1053 "SourceAggregate should be valid after the first element,");
1054
1055 // For this element, is there a plausible source aggregate?
1056 // FIXME: we could special-case undef element, IFF we know that in the
1057 // source aggregate said element isn't poison.
1058 std::optional<Value *> SourceAggregateForElement =
1059 FindSourceAggregate(*I.value(), I.index(), UseBB, PredBB);
1060
1061 // Okay, what have we found? Does that correlate with previous findings?
1062
1063 // Regardless of whether or not we have previously found source
1064 // aggregate for previous elements (if any), if we didn't find one for
1065 // this element, passthrough whatever we have just found.
1066 if (Describe(SourceAggregateForElement) != AggregateDescription::Found)
1067 return SourceAggregateForElement;
1068
1069 // Okay, we have found source aggregate for this element.
1070 // Let's see what we already know from previous elements, if any.
1071 switch (Describe(SourceAggregate)) {
1072 case AggregateDescription::NotFound:
1073 // This is apparently the first element that we have examined.
1074 SourceAggregate = SourceAggregateForElement; // Record the aggregate!
1075 continue; // Great, now look at next element.
1076 case AggregateDescription::Found:
1077 // We have previously already successfully examined other elements.
1078 // Is this the same source aggregate we've found for other elements?
1079 if (*SourceAggregateForElement != *SourceAggregate)
1080 return FoundMismatch;
1081 continue; // Still the same aggregate, look at next element.
1082 case AggregateDescription::FoundMismatch:
1083 llvm_unreachable("Can't happen. We would have early-exited then.");
1084 };
1085 }
1086
1087 assert(Describe(SourceAggregate) == AggregateDescription::Found &&
1088 "Must be a valid Value");
1089 return *SourceAggregate;
1090 };
1091
1092 std::optional<Value *> SourceAggregate;
1093
1094 // Can we find the source aggregate without looking at predecessors?
1095 SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/std::nullopt,
1096 /*PredBB=*/std::nullopt);
1097 if (Describe(SourceAggregate) != AggregateDescription::NotFound) {
1098 if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch)
1099 return nullptr; // Conflicting source aggregates!
1100 ++NumAggregateReconstructionsSimplified;
1101 return replaceInstUsesWith(OrigIVI, *SourceAggregate);
1102 }
1103
1104 // Okay, apparently we need to look at predecessors.
1105
1106 // We should be smart about picking the "use" basic block, which will be the
1107 // merge point for aggregate, where we'll insert the final PHI that will be
1108 // used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice.
1109 // We should look in which blocks each of the AggElts is being defined,
1110 // they all should be defined in the same basic block.
1111 BasicBlock *UseBB = nullptr;
1112
1113 for (const std::optional<Instruction *> &I : AggElts) {
1114 BasicBlock *BB = (*I)->getParent();
1115 // If it's the first instruction we've encountered, record the basic block.
1116 if (!UseBB) {
1117 UseBB = BB;
1118 continue;
1119 }
1120 // Otherwise, this must be the same basic block we've seen previously.
1121 if (UseBB != BB)
1122 return nullptr;
1123 }
1124
1125 // If *all* of the elements are basic-block-independent, meaning they are
1126 // either function arguments, or constant expressions, then if we didn't
1127 // handle them without predecessor-aware handling, we won't handle them now.
1128 if (!UseBB)
1129 return nullptr;
1130
1131 // If we didn't manage to find source aggregate without looking at
1132 // predecessors, and there are no predecessors to look at, then we're done.
1133 if (pred_empty(UseBB))
1134 return nullptr;
1135
1136 // Arbitrary predecessor count limit.
1137 static const int PredCountLimit = 64;
1138
1139 // Cache the (non-uniqified!) list of predecessors in a vector,
1140 // checking the limit at the same time for efficiency.
1141 SmallVector<BasicBlock *, 4> Preds; // May have duplicates!
1142 for (BasicBlock *Pred : predecessors(UseBB)) {
1143 // Don't bother if there are too many predecessors.
1144 if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once?
1145 return nullptr;
1146 Preds.emplace_back(Pred);
1147 }
1148
1149 // For each predecessor, what is the source aggregate,
1150 // from which all the elements were originally extracted from?
1151 // Note that we want for the map to have stable iteration order!
1153 bool FoundSrcAgg = false;
1154 for (BasicBlock *Pred : Preds) {
1155 std::pair<decltype(SourceAggregates)::iterator, bool> IV =
1156 SourceAggregates.try_emplace(Pred);
1157 // Did we already evaluate this predecessor?
1158 if (!IV.second)
1159 continue;
1160
1161 // Let's hope that when coming from predecessor Pred, all elements of the
1162 // aggregate produced by OrigIVI must have been originally extracted from
1163 // the same aggregate. Is that so? Can we find said original aggregate?
1164 SourceAggregate = FindCommonSourceAggregate(UseBB, Pred);
1165 if (Describe(SourceAggregate) == AggregateDescription::Found) {
1166 FoundSrcAgg = true;
1167 IV.first->second = *SourceAggregate;
1168 } else {
1169 // If UseBB is the single successor of Pred, we can add InsertValue to
1170 // Pred.
1171 auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
1172 if (!BI || !BI->isUnconditional())
1173 return nullptr;
1174 }
1175 }
1176
1177 if (!FoundSrcAgg)
1178 return nullptr;
1179
1180 // Do some sanity check if we need to add insertvalue into predecessors.
1181 auto OrigBB = OrigIVI.getParent();
1182 for (auto &It : SourceAggregates) {
1183 if (Describe(It.second) == AggregateDescription::Found)
1184 continue;
1185
1186 // Element is defined in UseBB, so it can't be used in predecessors.
1187 if (EltDefinedInUseBB)
1188 return nullptr;
1189
1190 // Do this transformation cross loop boundary may cause dead loop. So we
1191 // should avoid this situation. But LoopInfo is not generally available, we
1192 // must be conservative here.
1193 // If OrigIVI is in UseBB and it's the only successor of PredBB, PredBB
1194 // can't be in inner loop.
1195 if (UseBB != OrigBB)
1196 return nullptr;
1197
1198 // Avoid constructing constant aggregate because constant value may expose
1199 // more optimizations.
1200 bool ConstAgg = true;
1201 for (auto Val : AggElts) {
1202 Value *Elt = (*Val)->DoPHITranslation(UseBB, It.first);
1203 if (!isa<Constant>(Elt)) {
1204 ConstAgg = false;
1205 break;
1206 }
1207 }
1208 if (ConstAgg)
1209 return nullptr;
1210 }
1211
1212 // For predecessors without appropriate source aggregate, create one in the
1213 // predecessor.
1214 for (auto &It : SourceAggregates) {
1215 if (Describe(It.second) == AggregateDescription::Found)
1216 continue;
1217
1218 BasicBlock *Pred = It.first;
1219 Builder.SetInsertPoint(Pred->getTerminator());
1220 Value *V = PoisonValue::get(AggTy);
1221 for (auto [Idx, Val] : enumerate(AggElts)) {
1222 Value *Elt = (*Val)->DoPHITranslation(UseBB, Pred);
1223 V = Builder.CreateInsertValue(V, Elt, Idx);
1224 }
1225
1226 It.second = V;
1227 }
1228
1229 // All good! Now we just need to thread the source aggregates here.
1230 // Note that we have to insert the new PHI here, ourselves, because we can't
1231 // rely on InstCombinerImpl::run() inserting it into the right basic block.
1232 // Note that the same block can be a predecessor more than once,
1233 // and we need to preserve that invariant for the PHI node.
1235 Builder.SetInsertPoint(UseBB, UseBB->getFirstNonPHIIt());
1236 auto *PHI =
1237 Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() + ".merged");
1238 for (BasicBlock *Pred : Preds)
1239 PHI->addIncoming(SourceAggregates[Pred], Pred);
1240
1241 ++NumAggregateReconstructionsSimplified;
1242 return replaceInstUsesWith(OrigIVI, PHI);
1243}
1244
1245/// Try to find redundant insertvalue instructions, like the following ones:
1246/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
1247/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
1248/// Here the second instruction inserts values at the same indices, as the
1249/// first one, making the first one redundant.
1250/// It should be transformed to:
1251/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
1254 I.getAggregateOperand(), I.getInsertedValueOperand(), I.getIndices(),
1255 SQ.getWithInstruction(&I)))
1256 return replaceInstUsesWith(I, V);
1257
1258 bool IsRedundant = false;
1259 ArrayRef<unsigned int> FirstIndices = I.getIndices();
1260
1261 // If there is a chain of insertvalue instructions (each of them except the
1262 // last one has only one use and it's another insertvalue insn from this
1263 // chain), check if any of the 'children' uses the same indices as the first
1264 // instruction. In this case, the first one is redundant.
1265 Value *V = &I;
1266 unsigned Depth = 0;
1267 while (V->hasOneUse() && Depth < 10) {
1268 User *U = V->user_back();
1269 auto UserInsInst = dyn_cast<InsertValueInst>(U);
1270 if (!UserInsInst || U->getOperand(0) != V)
1271 break;
1272 if (UserInsInst->getIndices() == FirstIndices) {
1273 IsRedundant = true;
1274 break;
1275 }
1276 V = UserInsInst;
1277 Depth++;
1278 }
1279
1280 if (IsRedundant)
1281 return replaceInstUsesWith(I, I.getOperand(0));
1282
1284 return NewI;
1285
1286 return nullptr;
1287}
1288
1290 // Can not analyze scalable type, the number of elements is not a compile-time
1291 // constant.
1293 return false;
1294
1295 int MaskSize = Shuf.getShuffleMask().size();
1296 int VecSize =
1297 cast<FixedVectorType>(Shuf.getOperand(0)->getType())->getNumElements();
1298
1299 // A vector select does not change the size of the operands.
1300 if (MaskSize != VecSize)
1301 return false;
1302
1303 // Each mask element must be undefined or choose a vector element from one of
1304 // the source operands without crossing vector lanes.
1305 for (int i = 0; i != MaskSize; ++i) {
1306 int Elt = Shuf.getMaskValue(i);
1307 if (Elt != -1 && Elt != i && Elt != i + VecSize)
1308 return false;
1309 }
1310
1311 return true;
1312}
1313
1314/// Turn a chain of inserts that splats a value into an insert + shuffle:
1315/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
1316/// shufflevector(insertelt(X, %k, 0), poison, zero)
1318 // We are interested in the last insert in a chain. So if this insert has a
1319 // single user and that user is an insert, bail.
1320 if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
1321 return nullptr;
1322
1323 VectorType *VecTy = InsElt.getType();
1324 // Can not handle scalable type, the number of elements is not a compile-time
1325 // constant.
1326 if (isa<ScalableVectorType>(VecTy))
1327 return nullptr;
1328 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
1329
1330 // Do not try to do this for a one-element vector, since that's a nop,
1331 // and will cause an inf-loop.
1332 if (NumElements == 1)
1333 return nullptr;
1334
1335 Value *SplatVal = InsElt.getOperand(1);
1336 InsertElementInst *CurrIE = &InsElt;
1337 SmallBitVector ElementPresent(NumElements, false);
1338 InsertElementInst *FirstIE = nullptr;
1339
1340 // Walk the chain backwards, keeping track of which indices we inserted into,
1341 // until we hit something that isn't an insert of the splatted value.
1342 while (CurrIE) {
1343 auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
1344 if (!Idx || CurrIE->getOperand(1) != SplatVal)
1345 return nullptr;
1346
1347 auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
1348 // Check none of the intermediate steps have any additional uses, except
1349 // for the root insertelement instruction, which can be re-used, if it
1350 // inserts at position 0.
1351 if (CurrIE != &InsElt &&
1352 (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
1353 return nullptr;
1354
1355 ElementPresent[Idx->getZExtValue()] = true;
1356 FirstIE = CurrIE;
1357 CurrIE = NextIE;
1358 }
1359
1360 // If this is just a single insertelement (not a sequence), we are done.
1361 if (FirstIE == &InsElt)
1362 return nullptr;
1363
1364 // If we are not inserting into a poison vector, make sure we've seen an
1365 // insert into every element.
1366 // TODO: If the base vector is not undef, it might be better to create a splat
1367 // and then a select-shuffle (blend) with the base vector.
1368 if (!match(FirstIE->getOperand(0), m_Poison()))
1369 if (!ElementPresent.all())
1370 return nullptr;
1371
1372 // Create the insert + shuffle.
1373 Type *Int64Ty = Type::getInt64Ty(InsElt.getContext());
1374 PoisonValue *PoisonVec = PoisonValue::get(VecTy);
1375 Constant *Zero = ConstantInt::get(Int64Ty, 0);
1376 if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
1377 FirstIE = InsertElementInst::Create(PoisonVec, SplatVal, Zero, "",
1378 InsElt.getIterator());
1379
1380 // Splat from element 0, but replace absent elements with poison in the mask.
1381 SmallVector<int, 16> Mask(NumElements, 0);
1382 for (unsigned i = 0; i != NumElements; ++i)
1383 if (!ElementPresent[i])
1384 Mask[i] = -1;
1385
1386 return new ShuffleVectorInst(FirstIE, Mask);
1387}
1388
1389/// Try to fold an insert element into an existing splat shuffle by changing
1390/// the shuffle's mask to include the index of this insert element.
1392 // Check if the vector operand of this insert is a canonical splat shuffle.
1393 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1394 if (!Shuf || !Shuf->isZeroEltSplat())
1395 return nullptr;
1396
1397 // Bail out early if shuffle is scalable type. The number of elements in
1398 // shuffle mask is unknown at compile-time.
1399 if (isa<ScalableVectorType>(Shuf->getType()))
1400 return nullptr;
1401
1402 // Check for a constant insertion index.
1403 uint64_t IdxC;
1404 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1405 return nullptr;
1406
1407 // Check if the splat shuffle's input is the same as this insert's scalar op.
1408 Value *X = InsElt.getOperand(1);
1409 Value *Op0 = Shuf->getOperand(0);
1410 if (!match(Op0, m_InsertElt(m_Undef(), m_Specific(X), m_ZeroInt())))
1411 return nullptr;
1412
1413 // Replace the shuffle mask element at the index of this insert with a zero.
1414 // For example:
1415 // inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1
1416 // --> shuf (inselt undef, X, 0), poison, <0,0,0,undef>
1417 unsigned NumMaskElts =
1418 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1419 SmallVector<int, 16> NewMask(NumMaskElts);
1420 for (unsigned i = 0; i != NumMaskElts; ++i)
1421 NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);
1422
1423 return new ShuffleVectorInst(Op0, NewMask);
1424}
1425
1426/// Try to fold an extract+insert element into an existing identity shuffle by
1427/// changing the shuffle's mask to include the index of this insert element.
1429 // Check if the vector operand of this insert is an identity shuffle.
1430 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1431 if (!Shuf || !match(Shuf->getOperand(1), m_Poison()) ||
1432 !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
1433 return nullptr;
1434
1435 // Bail out early if shuffle is scalable type. The number of elements in
1436 // shuffle mask is unknown at compile-time.
1437 if (isa<ScalableVectorType>(Shuf->getType()))
1438 return nullptr;
1439
1440 // Check for a constant insertion index.
1441 uint64_t IdxC;
1442 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1443 return nullptr;
1444
1445 // Check if this insert's scalar op is extracted from the identity shuffle's
1446 // input vector.
1447 Value *Scalar = InsElt.getOperand(1);
1448 Value *X = Shuf->getOperand(0);
1449 if (!match(Scalar, m_ExtractElt(m_Specific(X), m_SpecificInt(IdxC))))
1450 return nullptr;
1451
1452 // Replace the shuffle mask element at the index of this extract+insert with
1453 // that same index value.
1454 // For example:
1455 // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
1456 unsigned NumMaskElts =
1457 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1458 SmallVector<int, 16> NewMask(NumMaskElts);
1459 ArrayRef<int> OldMask = Shuf->getShuffleMask();
1460 for (unsigned i = 0; i != NumMaskElts; ++i) {
1461 if (i != IdxC) {
1462 // All mask elements besides the inserted element remain the same.
1463 NewMask[i] = OldMask[i];
1464 } else if (OldMask[i] == (int)IdxC) {
1465 // If the mask element was already set, there's nothing to do
1466 // (demanded elements analysis may unset it later).
1467 return nullptr;
1468 } else {
1469 assert(OldMask[i] == PoisonMaskElem &&
1470 "Unexpected shuffle mask element for identity shuffle");
1471 NewMask[i] = IdxC;
1472 }
1473 }
1474
1475 return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask);
1476}
1477
1478/// If we have an insertelement instruction feeding into another insertelement
1479/// and the 2nd is inserting a constant into the vector, canonicalize that
1480/// constant insertion before the insertion of a variable:
1481///
1482/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
1483/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
1484///
1485/// This has the potential of eliminating the 2nd insertelement instruction
1486/// via constant folding of the scalar constant into a vector constant.
1488 InstCombiner::BuilderTy &Builder) {
1489 auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
1490 if (!InsElt1 || !InsElt1->hasOneUse())
1491 return nullptr;
1492
1493 Value *X, *Y;
1494 Constant *ScalarC;
1495 ConstantInt *IdxC1, *IdxC2;
1496 if (match(InsElt1->getOperand(0), m_Value(X)) &&
1497 match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
1498 match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
1499 match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
1500 match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
1501 Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
1502 return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
1503 }
1504
1505 return nullptr;
1506}
1507
1508/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
1509/// --> shufflevector X, CVec', Mask'
1511 auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
1512 // Bail out if the parent has more than one use. In that case, we'd be
1513 // replacing the insertelt with a shuffle, and that's not a clear win.
1514 if (!Inst || !Inst->hasOneUse())
1515 return nullptr;
1516 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
1517 // The shuffle must have a constant vector operand. The insertelt must have
1518 // a constant scalar being inserted at a constant position in the vector.
1519 Constant *ShufConstVec, *InsEltScalar;
1520 uint64_t InsEltIndex;
1521 if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
1522 !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
1523 !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
1524 return nullptr;
1525
1526 // Adding an element to an arbitrary shuffle could be expensive, but a
1527 // shuffle that selects elements from vectors without crossing lanes is
1528 // assumed cheap.
1529 // If we're just adding a constant into that shuffle, it will still be
1530 // cheap.
1531 if (!isShuffleEquivalentToSelect(*Shuf))
1532 return nullptr;
1533
1534 // From the above 'select' check, we know that the mask has the same number
1535 // of elements as the vector input operands. We also know that each constant
1536 // input element is used in its lane and can not be used more than once by
1537 // the shuffle. Therefore, replace the constant in the shuffle's constant
1538 // vector with the insertelt constant. Replace the constant in the shuffle's
1539 // mask vector with the insertelt index plus the length of the vector
1540 // (because the constant vector operand of a shuffle is always the 2nd
1541 // operand).
1542 ArrayRef<int> Mask = Shuf->getShuffleMask();
1543 unsigned NumElts = Mask.size();
1544 SmallVector<Constant *, 16> NewShufElts(NumElts);
1545 SmallVector<int, 16> NewMaskElts(NumElts);
1546 for (unsigned I = 0; I != NumElts; ++I) {
1547 if (I == InsEltIndex) {
1548 NewShufElts[I] = InsEltScalar;
1549 NewMaskElts[I] = InsEltIndex + NumElts;
1550 } else {
1551 // Copy over the existing values.
1552 NewShufElts[I] = ShufConstVec->getAggregateElement(I);
1553 NewMaskElts[I] = Mask[I];
1554 }
1555
1556 // Bail if we failed to find an element.
1557 if (!NewShufElts[I])
1558 return nullptr;
1559 }
1560
1561 // Create new operands for a shuffle that includes the constant of the
1562 // original insertelt. The old shuffle will be dead now.
1563 return new ShuffleVectorInst(Shuf->getOperand(0),
1564 ConstantVector::get(NewShufElts), NewMaskElts);
1565 } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
1566 // Transform sequences of insertelements ops with constant data/indexes into
1567 // a single shuffle op.
1568 // Can not handle scalable type, the number of elements needed to create
1569 // shuffle mask is not a compile-time constant.
1570 if (isa<ScalableVectorType>(InsElt.getType()))
1571 return nullptr;
1572 unsigned NumElts =
1573 cast<FixedVectorType>(InsElt.getType())->getNumElements();
1574
1575 uint64_t InsertIdx[2];
1576 Constant *Val[2];
1577 if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
1578 !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
1579 !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
1580 !match(IEI->getOperand(1), m_Constant(Val[1])))
1581 return nullptr;
1582 SmallVector<Constant *, 16> Values(NumElts);
1583 SmallVector<int, 16> Mask(NumElts);
1584 auto ValI = std::begin(Val);
1585 // Generate new constant vector and mask.
1586 // We have 2 values/masks from the insertelements instructions. Insert them
1587 // into new value/mask vectors.
1588 for (uint64_t I : InsertIdx) {
1589 if (!Values[I]) {
1590 Values[I] = *ValI;
1591 Mask[I] = NumElts + I;
1592 }
1593 ++ValI;
1594 }
1595 // Remaining values are filled with 'poison' values.
1596 for (unsigned I = 0; I < NumElts; ++I) {
1597 if (!Values[I]) {
1598 Values[I] = PoisonValue::get(InsElt.getType()->getElementType());
1599 Mask[I] = I;
1600 }
1601 }
1602 // Create new operands for a shuffle that includes the constant of the
1603 // original insertelt.
1604 return new ShuffleVectorInst(IEI->getOperand(0),
1605 ConstantVector::get(Values), Mask);
1606 }
1607 return nullptr;
1608}
1609
1610/// If both the base vector and the inserted element are extended from the same
1611/// type, do the insert element in the narrow source type followed by extend.
1612/// TODO: This can be extended to include other cast opcodes, but particularly
1613/// if we create a wider insertelement, make sure codegen is not harmed.
1615 InstCombiner::BuilderTy &Builder) {
1616 // We are creating a vector extend. If the original vector extend has another
1617 // use, that would mean we end up with 2 vector extends, so avoid that.
1618 // TODO: We could ease the use-clause to "if at least one op has one use"
1619 // (assuming that the source types match - see next TODO comment).
1620 Value *Vec = InsElt.getOperand(0);
1621 if (!Vec->hasOneUse())
1622 return nullptr;
1623
1624 Value *Scalar = InsElt.getOperand(1);
1625 Value *X, *Y;
1626 CastInst::CastOps CastOpcode;
1627 if (match(Vec, m_FPExt(m_Value(X))) && match(Scalar, m_FPExt(m_Value(Y))))
1628 CastOpcode = Instruction::FPExt;
1629 else if (match(Vec, m_SExt(m_Value(X))) && match(Scalar, m_SExt(m_Value(Y))))
1630 CastOpcode = Instruction::SExt;
1631 else if (match(Vec, m_ZExt(m_Value(X))) && match(Scalar, m_ZExt(m_Value(Y))))
1632 CastOpcode = Instruction::ZExt;
1633 else
1634 return nullptr;
1635
1636 // TODO: We can allow mismatched types by creating an intermediate cast.
1637 if (X->getType()->getScalarType() != Y->getType())
1638 return nullptr;
1639
1640 // inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index)
1641 Value *NewInsElt = Builder.CreateInsertElement(X, Y, InsElt.getOperand(2));
1642 return CastInst::Create(CastOpcode, NewInsElt, InsElt.getType());
1643}
1644
1645/// If we are inserting 2 halves of a value into adjacent elements of a vector,
1646/// try to convert to a single insert with appropriate bitcasts.
1648 bool IsBigEndian,
1649 InstCombiner::BuilderTy &Builder) {
1650 Value *VecOp = InsElt.getOperand(0);
1651 Value *ScalarOp = InsElt.getOperand(1);
1652 Value *IndexOp = InsElt.getOperand(2);
1653
1654 // Pattern depends on endian because we expect lower index is inserted first.
1655 // Big endian:
1656 // inselt (inselt BaseVec, (trunc (lshr X, BW/2), Index0), (trunc X), Index1
1657 // Little endian:
1658 // inselt (inselt BaseVec, (trunc X), Index0), (trunc (lshr X, BW/2)), Index1
1659 // Note: It is not safe to do this transform with an arbitrary base vector
1660 // because the bitcast of that vector to fewer/larger elements could
1661 // allow poison to spill into an element that was not poison before.
1662 // TODO: Detect smaller fractions of the scalar.
1663 // TODO: One-use checks are conservative.
1664 auto *VTy = dyn_cast<FixedVectorType>(InsElt.getType());
1665 Value *Scalar0, *BaseVec;
1666 uint64_t Index0, Index1;
1667 if (!VTy || (VTy->getNumElements() & 1) ||
1668 !match(IndexOp, m_ConstantInt(Index1)) ||
1669 !match(VecOp, m_InsertElt(m_Value(BaseVec), m_Value(Scalar0),
1670 m_ConstantInt(Index0))) ||
1671 !match(BaseVec, m_Undef()))
1672 return nullptr;
1673
1674 // The first insert must be to the index one less than this one, and
1675 // the first insert must be to an even index.
1676 if (Index0 + 1 != Index1 || Index0 & 1)
1677 return nullptr;
1678
1679 // For big endian, the high half of the value should be inserted first.
1680 // For little endian, the low half of the value should be inserted first.
1681 Value *X;
1682 uint64_t ShAmt;
1683 if (IsBigEndian) {
1684 if (!match(ScalarOp, m_Trunc(m_Value(X))) ||
1685 !match(Scalar0, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt)))))
1686 return nullptr;
1687 } else {
1688 if (!match(Scalar0, m_Trunc(m_Value(X))) ||
1689 !match(ScalarOp, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt)))))
1690 return nullptr;
1691 }
1692
1693 Type *SrcTy = X->getType();
1694 unsigned ScalarWidth = SrcTy->getScalarSizeInBits();
1695 unsigned VecEltWidth = VTy->getScalarSizeInBits();
1696 if (ScalarWidth != VecEltWidth * 2 || ShAmt != VecEltWidth)
1697 return nullptr;
1698
1699 // Bitcast the base vector to a vector type with the source element type.
1700 Type *CastTy = FixedVectorType::get(SrcTy, VTy->getNumElements() / 2);
1701 Value *CastBaseVec = Builder.CreateBitCast(BaseVec, CastTy);
1702
1703 // Scale the insert index for a vector with half as many elements.
1704 // bitcast (inselt (bitcast BaseVec), X, NewIndex)
1705 uint64_t NewIndex = IsBigEndian ? Index1 / 2 : Index0 / 2;
1706 Value *NewInsert = Builder.CreateInsertElement(CastBaseVec, X, NewIndex);
1707 return new BitCastInst(NewInsert, VTy);
1708}
1709
1711 Value *VecOp = IE.getOperand(0);
1712 Value *ScalarOp = IE.getOperand(1);
1713 Value *IdxOp = IE.getOperand(2);
1714
1715 if (auto *V = simplifyInsertElementInst(
1716 VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
1717 return replaceInstUsesWith(IE, V);
1718
1719 // Canonicalize type of constant indices to i64 to simplify CSE
1720 if (auto *IndexC = dyn_cast<ConstantInt>(IdxOp)) {
1721 if (auto *NewIdx = getPreferredVectorIndex(IndexC))
1722 return replaceOperand(IE, 2, NewIdx);
1723
1724 Value *BaseVec, *OtherScalar;
1725 uint64_t OtherIndexVal;
1726 if (match(VecOp, m_OneUse(m_InsertElt(m_Value(BaseVec),
1727 m_Value(OtherScalar),
1728 m_ConstantInt(OtherIndexVal)))) &&
1729 !isa<Constant>(OtherScalar) && OtherIndexVal > IndexC->getZExtValue()) {
1730 Value *NewIns = Builder.CreateInsertElement(BaseVec, ScalarOp, IdxOp);
1731 return InsertElementInst::Create(NewIns, OtherScalar,
1732 Builder.getInt64(OtherIndexVal));
1733 }
1734 }
1735
1736 // If the scalar is bitcast and inserted into undef, do the insert in the
1737 // source type followed by bitcast.
1738 // TODO: Generalize for insert into any constant, not just undef?
1739 Value *ScalarSrc;
1740 if (match(VecOp, m_Undef()) &&
1741 match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) &&
1742 (ScalarSrc->getType()->isIntegerTy() ||
1743 ScalarSrc->getType()->isFloatingPointTy())) {
1744 // inselt undef, (bitcast ScalarSrc), IdxOp -->
1745 // bitcast (inselt undef, ScalarSrc, IdxOp)
1746 Type *ScalarTy = ScalarSrc->getType();
1747 Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount());
1748 Constant *NewUndef = isa<PoisonValue>(VecOp) ? PoisonValue::get(VecTy)
1749 : UndefValue::get(VecTy);
1750 Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp);
1751 return new BitCastInst(NewInsElt, IE.getType());
1752 }
1753
1754 // If the vector and scalar are both bitcast from the same element type, do
1755 // the insert in that source type followed by bitcast.
1756 Value *VecSrc;
1757 if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
1758 match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
1759 (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
1760 VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
1761 cast<VectorType>(VecSrc->getType())->getElementType() ==
1762 ScalarSrc->getType()) {
1763 // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
1764 // bitcast (inselt VecSrc, ScalarSrc, IdxOp)
1765 Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
1766 return new BitCastInst(NewInsElt, IE.getType());
1767 }
1768
1769 // If the inserted element was extracted from some other fixed-length vector
1770 // and both indexes are valid constants, try to turn this into a shuffle.
1771 // Can not handle scalable vector type, the number of elements needed to
1772 // create shuffle mask is not a compile-time constant.
1773 uint64_t InsertedIdx, ExtractedIdx;
1774 Value *ExtVecOp;
1775 if (isa<FixedVectorType>(IE.getType()) &&
1776 match(IdxOp, m_ConstantInt(InsertedIdx)) &&
1777 match(ScalarOp,
1778 m_ExtractElt(m_Value(ExtVecOp), m_ConstantInt(ExtractedIdx))) &&
1779 isa<FixedVectorType>(ExtVecOp->getType()) &&
1780 ExtractedIdx <
1781 cast<FixedVectorType>(ExtVecOp->getType())->getNumElements()) {
1782 // TODO: Looking at the user(s) to determine if this insert is a
1783 // fold-to-shuffle opportunity does not match the usual instcombine
1784 // constraints. We should decide if the transform is worthy based only
1785 // on this instruction and its operands, but that may not work currently.
1786 //
1787 // Here, we are trying to avoid creating shuffles before reaching
1788 // the end of a chain of extract-insert pairs. This is complicated because
1789 // we do not generally form arbitrary shuffle masks in instcombine
1790 // (because those may codegen poorly), but collectShuffleElements() does
1791 // exactly that.
1792 //
1793 // The rules for determining what is an acceptable target-independent
1794 // shuffle mask are fuzzy because they evolve based on the backend's
1795 // capabilities and real-world impact.
1796 auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
1797 if (!Insert.hasOneUse())
1798 return true;
1799 auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
1800 if (!InsertUser)
1801 return true;
1802 return false;
1803 };
1804
1805 // Try to form a shuffle from a chain of extract-insert ops.
1806 if (isShuffleRootCandidate(IE)) {
1807 bool Rerun = true;
1808 while (Rerun) {
1809 Rerun = false;
1810
1812 ShuffleOps LR =
1813 collectShuffleElements(&IE, Mask, nullptr, *this, Rerun);
1814
1815 // The proposed shuffle may be trivial, in which case we shouldn't
1816 // perform the combine.
1817 if (LR.first != &IE && LR.second != &IE) {
1818 // We now have a shuffle of LHS, RHS, Mask.
1819 if (LR.second == nullptr)
1820 LR.second = PoisonValue::get(LR.first->getType());
1821 return new ShuffleVectorInst(LR.first, LR.second, Mask);
1822 }
1823 }
1824 }
1825 }
1826
1827 if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {
1828 unsigned VWidth = VecTy->getNumElements();
1829 APInt PoisonElts(VWidth, 0);
1830 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
1831 if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask,
1832 PoisonElts)) {
1833 if (V != &IE)
1834 return replaceInstUsesWith(IE, V);
1835 return &IE;
1836 }
1837 }
1838
1840 return Shuf;
1841
1842 if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
1843 return NewInsElt;
1844
1845 if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
1846 return Broadcast;
1847
1849 return Splat;
1850
1851 if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
1852 return IdentityShuf;
1853
1854 if (Instruction *Ext = narrowInsElt(IE, Builder))
1855 return Ext;
1856
1857 if (Instruction *Ext = foldTruncInsEltPair(IE, DL.isBigEndian(), Builder))
1858 return Ext;
1859
1860 return nullptr;
1861}
1862
1863/// Return true if we can evaluate the specified expression tree if the vector
1864/// elements were shuffled in a different order.
1866 unsigned Depth = 5) {
1867 // We can always reorder the elements of a constant.
1868 if (isa<Constant>(V))
1869 return true;
1870
1871 // We won't reorder vector arguments. No IPO here.
1873 if (!I) return false;
1874
1875 // Two users may expect different orders of the elements. Don't try it.
1876 if (!I->hasOneUse())
1877 return false;
1878
1879 if (Depth == 0) return false;
1880
1881 switch (I->getOpcode()) {
1882 case Instruction::UDiv:
1883 case Instruction::SDiv:
1884 case Instruction::URem:
1885 case Instruction::SRem:
1886 // Propagating an undefined shuffle mask element to integer div/rem is not
1887 // allowed because those opcodes can create immediate undefined behavior
1888 // from an undefined element in an operand.
1889 if (llvm::is_contained(Mask, -1))
1890 return false;
1891 [[fallthrough]];
1892 case Instruction::Add:
1893 case Instruction::FAdd:
1894 case Instruction::Sub:
1895 case Instruction::FSub:
1896 case Instruction::Mul:
1897 case Instruction::FMul:
1898 case Instruction::FDiv:
1899 case Instruction::FRem:
1900 case Instruction::Shl:
1901 case Instruction::LShr:
1902 case Instruction::AShr:
1903 case Instruction::And:
1904 case Instruction::Or:
1905 case Instruction::Xor:
1906 case Instruction::ICmp:
1907 case Instruction::FCmp:
1908 case Instruction::Trunc:
1909 case Instruction::ZExt:
1910 case Instruction::SExt:
1911 case Instruction::FPToUI:
1912 case Instruction::FPToSI:
1913 case Instruction::UIToFP:
1914 case Instruction::SIToFP:
1915 case Instruction::FPTrunc:
1916 case Instruction::FPExt:
1917 case Instruction::GetElementPtr: {
1918 // Bail out if we would create longer vector ops. We could allow creating
1919 // longer vector ops, but that may result in more expensive codegen.
1920 Type *ITy = I->getType();
1921 if (ITy->isVectorTy() &&
1922 Mask.size() > cast<FixedVectorType>(ITy)->getNumElements())
1923 return false;
1924 for (Value *Operand : I->operands()) {
1925 if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
1926 return false;
1927 }
1928 return true;
1929 }
1930 case Instruction::InsertElement: {
1931 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
1932 if (!CI) return false;
1933 int ElementNumber = CI->getLimitedValue();
1934
1935 // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1936 // can't put an element into multiple indices.
1937 bool SeenOnce = false;
1938 for (int I : Mask) {
1939 if (I == ElementNumber) {
1940 if (SeenOnce)
1941 return false;
1942 SeenOnce = true;
1943 }
1944 }
1945 return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
1946 }
1947 }
1948 return false;
1949}
1950
1951/// Rebuild a new instruction just like 'I' but with the new operands given.
1952/// In the event of type mismatch, the type of the operands is correct.
1954 IRBuilderBase &Builder) {
1955 Builder.SetInsertPoint(I);
1956 switch (I->getOpcode()) {
1957 case Instruction::Add:
1958 case Instruction::FAdd:
1959 case Instruction::Sub:
1960 case Instruction::FSub:
1961 case Instruction::Mul:
1962 case Instruction::FMul:
1963 case Instruction::UDiv:
1964 case Instruction::SDiv:
1965 case Instruction::FDiv:
1966 case Instruction::URem:
1967 case Instruction::SRem:
1968 case Instruction::FRem:
1969 case Instruction::Shl:
1970 case Instruction::LShr:
1971 case Instruction::AShr:
1972 case Instruction::And:
1973 case Instruction::Or:
1974 case Instruction::Xor: {
1976 assert(NewOps.size() == 2 && "binary operator with #ops != 2");
1977 Value *New = Builder.CreateBinOp(cast<BinaryOperator>(I)->getOpcode(),
1978 NewOps[0], NewOps[1]);
1979 if (auto *NewI = dyn_cast<Instruction>(New)) {
1981 NewI->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1982 NewI->setHasNoSignedWrap(BO->hasNoSignedWrap());
1983 }
1985 NewI->setIsExact(BO->isExact());
1986 }
1987 if (isa<FPMathOperator>(BO))
1988 NewI->copyFastMathFlags(I);
1989 }
1990 return New;
1991 }
1992 case Instruction::ICmp:
1993 assert(NewOps.size() == 2 && "icmp with #ops != 2");
1994 return Builder.CreateICmp(cast<ICmpInst>(I)->getPredicate(), NewOps[0],
1995 NewOps[1]);
1996 case Instruction::FCmp:
1997 assert(NewOps.size() == 2 && "fcmp with #ops != 2");
1998 return Builder.CreateFCmp(cast<FCmpInst>(I)->getPredicate(), NewOps[0],
1999 NewOps[1]);
2000 case Instruction::Trunc:
2001 case Instruction::ZExt:
2002 case Instruction::SExt:
2003 case Instruction::FPToUI:
2004 case Instruction::FPToSI:
2005 case Instruction::UIToFP:
2006 case Instruction::SIToFP:
2007 case Instruction::FPTrunc:
2008 case Instruction::FPExt: {
2009 // It's possible that the mask has a different number of elements from
2010 // the original cast. We recompute the destination type to match the mask.
2011 Type *DestTy = VectorType::get(
2012 I->getType()->getScalarType(),
2013 cast<VectorType>(NewOps[0]->getType())->getElementCount());
2014 assert(NewOps.size() == 1 && "cast with #ops != 1");
2015 return Builder.CreateCast(cast<CastInst>(I)->getOpcode(), NewOps[0],
2016 DestTy);
2017 }
2018 case Instruction::GetElementPtr: {
2019 Value *Ptr = NewOps[0];
2020 ArrayRef<Value*> Idx = NewOps.slice(1);
2021 return Builder.CreateGEP(cast<GEPOperator>(I)->getSourceElementType(),
2022 Ptr, Idx, "",
2023 cast<GEPOperator>(I)->getNoWrapFlags());
2024 }
2025 }
2026 llvm_unreachable("failed to rebuild vector instructions");
2027}
2028
2030 IRBuilderBase &Builder) {
2031 // Mask.size() does not need to be equal to the number of vector elements.
2032
2033 assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
2034 Type *EltTy = V->getType()->getScalarType();
2035
2036 if (isa<PoisonValue>(V))
2037 return PoisonValue::get(FixedVectorType::get(EltTy, Mask.size()));
2038
2039 if (match(V, m_Undef()))
2040 return UndefValue::get(FixedVectorType::get(EltTy, Mask.size()));
2041
2043 return ConstantAggregateZero::get(FixedVectorType::get(EltTy, Mask.size()));
2044
2045 if (Constant *C = dyn_cast<Constant>(V))
2047 Mask);
2048
2050 switch (I->getOpcode()) {
2051 case Instruction::Add:
2052 case Instruction::FAdd:
2053 case Instruction::Sub:
2054 case Instruction::FSub:
2055 case Instruction::Mul:
2056 case Instruction::FMul:
2057 case Instruction::UDiv:
2058 case Instruction::SDiv:
2059 case Instruction::FDiv:
2060 case Instruction::URem:
2061 case Instruction::SRem:
2062 case Instruction::FRem:
2063 case Instruction::Shl:
2064 case Instruction::LShr:
2065 case Instruction::AShr:
2066 case Instruction::And:
2067 case Instruction::Or:
2068 case Instruction::Xor:
2069 case Instruction::ICmp:
2070 case Instruction::FCmp:
2071 case Instruction::Trunc:
2072 case Instruction::ZExt:
2073 case Instruction::SExt:
2074 case Instruction::FPToUI:
2075 case Instruction::FPToSI:
2076 case Instruction::UIToFP:
2077 case Instruction::SIToFP:
2078 case Instruction::FPTrunc:
2079 case Instruction::FPExt:
2080 case Instruction::Select:
2081 case Instruction::GetElementPtr: {
2083 bool NeedsRebuild =
2084 (Mask.size() !=
2085 cast<FixedVectorType>(I->getType())->getNumElements());
2086 for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
2087 Value *V;
2088 // Recursively call evaluateInDifferentElementOrder on vector arguments
2089 // as well. E.g. GetElementPtr may have scalar operands even if the
2090 // return value is a vector, so we need to examine the operand type.
2091 if (I->getOperand(i)->getType()->isVectorTy())
2092 V = evaluateInDifferentElementOrder(I->getOperand(i), Mask, Builder);
2093 else
2094 V = I->getOperand(i);
2095 NewOps.push_back(V);
2096 NeedsRebuild |= (V != I->getOperand(i));
2097 }
2098 if (NeedsRebuild)
2099 return buildNew(I, NewOps, Builder);
2100 return I;
2101 }
2102 case Instruction::InsertElement: {
2103 int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
2104
2105 // The insertelement was inserting at Element. Figure out which element
2106 // that becomes after shuffling. The answer is guaranteed to be unique
2107 // by CanEvaluateShuffled.
2108 bool Found = false;
2109 int Index = 0;
2110 for (int e = Mask.size(); Index != e; ++Index) {
2111 if (Mask[Index] == Element) {
2112 Found = true;
2113 break;
2114 }
2115 }
2116
2117 // If element is not in Mask, no need to handle the operand 1 (element to
2118 // be inserted). Just evaluate values in operand 0 according to Mask.
2119 if (!Found)
2120 return evaluateInDifferentElementOrder(I->getOperand(0), Mask, Builder);
2121
2122 Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask,
2123 Builder);
2124 Builder.SetInsertPoint(I);
2125 return Builder.CreateInsertElement(V, I->getOperand(1), Index);
2126 }
2127 }
2128 llvm_unreachable("failed to reorder elements of vector instruction!");
2129}
2130
2131// Returns true if the shuffle is extracting a contiguous range of values from
2132// LHS, for example:
2133// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2134// Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
2135// Shuffles to: |EE|FF|GG|HH|
2136// +--+--+--+--+
2138 ArrayRef<int> Mask) {
2139 unsigned LHSElems =
2140 cast<FixedVectorType>(SVI.getOperand(0)->getType())->getNumElements();
2141 unsigned MaskElems = Mask.size();
2142 unsigned BegIdx = Mask.front();
2143 unsigned EndIdx = Mask.back();
2144 if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
2145 return false;
2146 for (unsigned I = 0; I != MaskElems; ++I)
2147 if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
2148 return false;
2149 return true;
2150}
2151
2152/// These are the ingredients in an alternate form binary operator as described
2153/// below.
2159 Value *V0 = nullptr, Value *V1 = nullptr) :
2160 Opcode(Opc), Op0(V0), Op1(V1) {}
2161 operator bool() const { return Opcode != 0; }
2162};
2163
2164/// Binops may be transformed into binops with different opcodes and operands.
2165/// Reverse the usual canonicalization to enable folds with the non-canonical
2166/// form of the binop. If a transform is possible, return the elements of the
2167/// new binop. If not, return invalid elements.
2169 Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
2170 Type *Ty = BO->getType();
2171 switch (BO->getOpcode()) {
2172 case Instruction::Shl: {
2173 // shl X, C --> mul X, (1 << C)
2174 Constant *C;
2175 if (match(BO1, m_ImmConstant(C))) {
2177 Instruction::Shl, ConstantInt::get(Ty, 1), C, DL);
2178 assert(ShlOne && "Constant folding of immediate constants failed");
2179 return {Instruction::Mul, BO0, ShlOne};
2180 }
2181 break;
2182 }
2183 case Instruction::Or: {
2184 // or disjoin X, C --> add X, C
2185 if (cast<PossiblyDisjointInst>(BO)->isDisjoint())
2186 return {Instruction::Add, BO0, BO1};
2187 break;
2188 }
2189 case Instruction::Sub:
2190 // sub 0, X --> mul X, -1
2191 if (match(BO0, m_ZeroInt()))
2192 return {Instruction::Mul, BO1, ConstantInt::getAllOnesValue(Ty)};
2193 break;
2194 default:
2195 break;
2196 }
2197 return {};
2198}
2199
2200/// A select shuffle of a select shuffle with a shared operand can be reduced
2201/// to a single select shuffle. This is an obvious improvement in IR, and the
2202/// backend is expected to lower select shuffles efficiently.
2204 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2205
2206 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2208 Shuf.getShuffleMask(Mask);
2209 unsigned NumElts = Mask.size();
2210
2211 // Canonicalize a select shuffle with common operand as Op1.
2212 auto *ShufOp = dyn_cast<ShuffleVectorInst>(Op0);
2213 if (ShufOp && ShufOp->isSelect() &&
2214 (ShufOp->getOperand(0) == Op1 || ShufOp->getOperand(1) == Op1)) {
2215 std::swap(Op0, Op1);
2217 }
2218
2219 ShufOp = dyn_cast<ShuffleVectorInst>(Op1);
2220 if (!ShufOp || !ShufOp->isSelect() ||
2221 (ShufOp->getOperand(0) != Op0 && ShufOp->getOperand(1) != Op0))
2222 return nullptr;
2223
2224 Value *X = ShufOp->getOperand(0), *Y = ShufOp->getOperand(1);
2226 ShufOp->getShuffleMask(Mask1);
2227 assert(Mask1.size() == NumElts && "Vector size changed with select shuffle");
2228
2229 // Canonicalize common operand (Op0) as X (first operand of first shuffle).
2230 if (Y == Op0) {
2231 std::swap(X, Y);
2233 }
2234
2235 // If the mask chooses from X (operand 0), it stays the same.
2236 // If the mask chooses from the earlier shuffle, the other mask value is
2237 // transferred to the combined select shuffle:
2238 // shuf X, (shuf X, Y, M1), M --> shuf X, Y, M'
2239 SmallVector<int, 16> NewMask(NumElts);
2240 for (unsigned i = 0; i != NumElts; ++i)
2241 NewMask[i] = Mask[i] < (signed)NumElts ? Mask[i] : Mask1[i];
2242
2243 // A select mask with undef elements might look like an identity mask.
2244 assert((ShuffleVectorInst::isSelectMask(NewMask, NumElts) ||
2245 ShuffleVectorInst::isIdentityMask(NewMask, NumElts)) &&
2246 "Unexpected shuffle mask");
2247 return new ShuffleVectorInst(X, Y, NewMask);
2248}
2249
2251 const SimplifyQuery &SQ) {
2252 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2253
2254 // Are we shuffling together some value and that same value after it has been
2255 // modified by a binop with a constant?
2256 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2257 Constant *C;
2258 bool Op0IsBinop;
2259 if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
2260 Op0IsBinop = true;
2261 else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
2262 Op0IsBinop = false;
2263 else
2264 return nullptr;
2265
2266 // The identity constant for a binop leaves a variable operand unchanged. For
2267 // a vector, this is a splat of something like 0, -1, or 1.
2268 // If there's no identity constant for this binop, we're done.
2269 auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
2270 BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
2271 Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
2272 if (!IdC)
2273 return nullptr;
2274
2275 Value *X = Op0IsBinop ? Op1 : Op0;
2276
2277 // Prevent folding in the case the non-binop operand might have NaN values.
2278 // If X can have NaN elements then we have that the floating point math
2279 // operation in the transformed code may not preserve the exact NaN
2280 // bit-pattern -- e.g. `fadd sNaN, 0.0 -> qNaN`.
2281 // This makes the transformation incorrect since the original program would
2282 // have preserved the exact NaN bit-pattern.
2283 // Avoid the folding if X can have NaN elements.
2284 if (Shuf.getType()->getElementType()->isFloatingPointTy() &&
2285 !isKnownNeverNaN(X, SQ))
2286 return nullptr;
2287
2288 // Shuffle identity constants into the lanes that return the original value.
2289 // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
2290 // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
2291 // The existing binop constant vector remains in the same operand position.
2292 ArrayRef<int> Mask = Shuf.getShuffleMask();
2293 Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
2295
2296 bool MightCreatePoisonOrUB =
2298 (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
2299 if (MightCreatePoisonOrUB)
2300 NewC = InstCombiner::getSafeVectorConstantForBinop(BOpcode, NewC, true);
2301
2302 // shuf (bop X, C), X, M --> bop X, C'
2303 // shuf X, (bop X, C), M --> bop X, C'
2304 Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
2305 NewBO->copyIRFlags(BO);
2306
2307 // An undef shuffle mask element may propagate as an undef constant element in
2308 // the new binop. That would produce poison where the original code might not.
2309 // If we already made a safe constant, then there's no danger.
2310 if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB)
2312 return NewBO;
2313}
2314
2315/// If we have an insert of a scalar to a non-zero element of an undefined
2316/// vector and then shuffle that value, that's the same as inserting to the zero
2317/// element and shuffling. Splatting from the zero element is recognized as the
2318/// canonical form of splat.
2320 InstCombiner::BuilderTy &Builder) {
2321 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2322 ArrayRef<int> Mask = Shuf.getShuffleMask();
2323 Value *X;
2324 uint64_t IndexC;
2325
2326 // Match a shuffle that is a splat to a non-zero element.
2328 m_ConstantInt(IndexC)))) ||
2329 !match(Op1, m_Poison()) || match(Mask, m_ZeroMask()) || IndexC == 0)
2330 return nullptr;
2331
2332 // Insert into element 0 of a poison vector.
2333 PoisonValue *PoisonVec = PoisonValue::get(Shuf.getType());
2334 Value *NewIns = Builder.CreateInsertElement(PoisonVec, X, (uint64_t)0);
2335
2336 // Splat from element 0. Any mask element that is poison remains poison.
2337 // For example:
2338 // shuf (inselt poison, X, 2), _, <2,2,undef>
2339 // --> shuf (inselt poison, X, 0), poison, <0,0,undef>
2340 unsigned NumMaskElts =
2341 cast<FixedVectorType>(Shuf.getType())->getNumElements();
2342 SmallVector<int, 16> NewMask(NumMaskElts, 0);
2343 for (unsigned i = 0; i != NumMaskElts; ++i)
2344 if (Mask[i] == PoisonMaskElem)
2345 NewMask[i] = Mask[i];
2346
2347 return new ShuffleVectorInst(NewIns, NewMask);
2348}
2349
2350/// Try to fold shuffles that are the equivalent of a vector select.
2352 if (!Shuf.isSelect())
2353 return nullptr;
2354
2355 // Canonicalize to choose from operand 0 first unless operand 1 is undefined.
2356 // Commuting undef to operand 0 conflicts with another canonicalization.
2357 unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2358 if (!match(Shuf.getOperand(1), m_Undef()) &&
2359 Shuf.getMaskValue(0) >= (int)NumElts) {
2360 // TODO: Can we assert that both operands of a shuffle-select are not undef
2361 // (otherwise, it would have been folded by instsimplify?
2362 Shuf.commute();
2363 return &Shuf;
2364 }
2365
2367 return I;
2368
2370 Shuf, getSimplifyQuery().getWithInstruction(&Shuf)))
2371 return I;
2372
2373 BinaryOperator *B0, *B1;
2374 if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
2375 !match(Shuf.getOperand(1), m_BinOp(B1)))
2376 return nullptr;
2377
2378 // If one operand is "0 - X", allow that to be viewed as "X * -1"
2379 // (ConstantsAreOp1) by getAlternateBinop below. If the neg is not paired
2380 // with a multiply, we will exit because C0/C1 will not be set.
2381 Value *X, *Y;
2382 Constant *C0 = nullptr, *C1 = nullptr;
2383 bool ConstantsAreOp1;
2384 if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
2385 match(B1, m_BinOp(m_Constant(C1), m_Value(Y))))
2386 ConstantsAreOp1 = false;
2387 else if (match(B0, m_CombineOr(m_BinOp(m_Value(X), m_Constant(C0)),
2388 m_Neg(m_Value(X)))) &&
2390 m_Neg(m_Value(Y)))))
2391 ConstantsAreOp1 = true;
2392 else
2393 return nullptr;
2394
2395 // We need matching binops to fold the lanes together.
2398 bool DropNSW = false;
2399 if (ConstantsAreOp1 && Opc0 != Opc1) {
2400 // TODO: We drop "nsw" if shift is converted into multiply because it may
2401 // not be correct when the shift amount is BitWidth - 1. We could examine
2402 // each vector element to determine if it is safe to keep that flag.
2403 if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
2404 DropNSW = true;
2405 if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
2406 assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
2407 Opc0 = AltB0.Opcode;
2408 C0 = cast<Constant>(AltB0.Op1);
2409 } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
2410 assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
2411 Opc1 = AltB1.Opcode;
2412 C1 = cast<Constant>(AltB1.Op1);
2413 }
2414 }
2415
2416 if (Opc0 != Opc1 || !C0 || !C1)
2417 return nullptr;
2418
2419 // The opcodes must be the same. Use a new name to make that clear.
2420 BinaryOperator::BinaryOps BOpc = Opc0;
2421
2422 // Select the constant elements needed for the single binop.
2423 ArrayRef<int> Mask = Shuf.getShuffleMask();
2424 Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Mask);
2425
2426 // We are moving a binop after a shuffle. When a shuffle has an undefined
2427 // mask element, the result is undefined, but it is not poison or undefined
2428 // behavior. That is not necessarily true for div/rem/shift.
2429 bool MightCreatePoisonOrUB =
2432 if (MightCreatePoisonOrUB)
2434 ConstantsAreOp1);
2435
2436 Value *V;
2437 if (X == Y) {
2438 // Remove a binop and the shuffle by rearranging the constant:
2439 // shuffle (op V, C0), (op V, C1), M --> op V, C'
2440 // shuffle (op C0, V), (op C1, V), M --> op C', V
2441 V = X;
2442 } else {
2443 // If there are 2 different variable operands, we must create a new shuffle
2444 // (select) first, so check uses to ensure that we don't end up with more
2445 // instructions than we started with.
2446 if (!B0->hasOneUse() && !B1->hasOneUse())
2447 return nullptr;
2448
2449 // If we use the original shuffle mask and op1 is *variable*, we would be
2450 // putting an undef into operand 1 of div/rem/shift. This is either UB or
2451 // poison. We do not have to guard against UB when *constants* are op1
2452 // because safe constants guarantee that we do not overflow sdiv/srem (and
2453 // there's no danger for other opcodes).
2454 // TODO: To allow this case, create a new shuffle mask with no undefs.
2455 if (MightCreatePoisonOrUB && !ConstantsAreOp1)
2456 return nullptr;
2457
2458 // Note: In general, we do not create new shuffles in InstCombine because we
2459 // do not know if a target can lower an arbitrary shuffle optimally. In this
2460 // case, the shuffle uses the existing mask, so there is no additional risk.
2461
2462 // Select the variable vectors first, then perform the binop:
2463 // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
2464 // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
2465 V = Builder.CreateShuffleVector(X, Y, Mask);
2466 }
2467
2468 Value *NewBO = ConstantsAreOp1 ? Builder.CreateBinOp(BOpc, V, NewC) :
2469 Builder.CreateBinOp(BOpc, NewC, V);
2470
2471 // Flags are intersected from the 2 source binops. But there are 2 exceptions:
2472 // 1. If we changed an opcode, poison conditions might have changed.
2473 // 2. If the shuffle had undef mask elements, the new binop might have undefs
2474 // where the original code did not. But if we already made a safe constant,
2475 // then there's no danger.
2476 if (auto *NewI = dyn_cast<Instruction>(NewBO)) {
2477 NewI->copyIRFlags(B0);
2478 NewI->andIRFlags(B1);
2479 if (DropNSW)
2480 NewI->setHasNoSignedWrap(false);
2481 if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB)
2482 NewI->dropPoisonGeneratingFlags();
2483 }
2484 return replaceInstUsesWith(Shuf, NewBO);
2485}
2486
2487/// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
2488/// Example (little endian):
2489/// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>
2491 bool IsBigEndian) {
2492 // This must be a bitcasted shuffle of 1 vector integer operand.
2493 Type *DestType = Shuf.getType();
2494 Value *X;
2495 if (!match(Shuf.getOperand(0), m_BitCast(m_Value(X))) ||
2496 !match(Shuf.getOperand(1), m_Poison()) || !DestType->isIntOrIntVectorTy())
2497 return nullptr;
2498
2499 // The source type must have the same number of elements as the shuffle,
2500 // and the source element type must be larger than the shuffle element type.
2501 Type *SrcType = X->getType();
2502 if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() ||
2503 cast<FixedVectorType>(SrcType)->getNumElements() !=
2504 cast<FixedVectorType>(DestType)->getNumElements() ||
2505 SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0)
2506 return nullptr;
2507
2508 assert(Shuf.changesLength() && !Shuf.increasesLength() &&
2509 "Expected a shuffle that decreases length");
2510
2511 // Last, check that the mask chooses the correct low bits for each narrow
2512 // element in the result.
2513 uint64_t TruncRatio =
2514 SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
2515 ArrayRef<int> Mask = Shuf.getShuffleMask();
2516 for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
2517 if (Mask[i] == PoisonMaskElem)
2518 continue;
2519 uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;
2520 assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits");
2521 if (Mask[i] != (int)LSBIndex)
2522 return nullptr;
2523 }
2524
2525 return new TruncInst(X, DestType);
2526}
2527
2528/// Match a shuffle-select-shuffle pattern where the shuffles are widening and
2529/// narrowing (concatenating with poison and extracting back to the original
2530/// length). This allows replacing the wide select with a narrow select.
2532 InstCombiner::BuilderTy &Builder) {
2533 // This must be a narrowing identity shuffle. It extracts the 1st N elements
2534 // of the 1st vector operand of a shuffle.
2535 if (!match(Shuf.getOperand(1), m_Poison()) || !Shuf.isIdentityWithExtract())
2536 return nullptr;
2537
2538 // The vector being shuffled must be a vector select that we can eliminate.
2539 // TODO: The one-use requirement could be eased if X and/or Y are constants.
2540 Value *Cond, *X, *Y;
2541 if (!match(Shuf.getOperand(0),
2543 return nullptr;
2544
2545 // We need a narrow condition value. It must be extended with poison elements
2546 // and have the same number of elements as this shuffle.
2547 unsigned NarrowNumElts =
2548 cast<FixedVectorType>(Shuf.getType())->getNumElements();
2549 Value *NarrowCond;
2550 if (!match(Cond, m_OneUse(m_Shuffle(m_Value(NarrowCond), m_Poison()))) ||
2551 cast<FixedVectorType>(NarrowCond->getType())->getNumElements() !=
2552 NarrowNumElts ||
2553 !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
2554 return nullptr;
2555
2556 // shuf (sel (shuf NarrowCond, poison, WideMask), X, Y), poison, NarrowMask)
2557 // -->
2558 // sel NarrowCond, (shuf X, poison, NarrowMask), (shuf Y, poison, NarrowMask)
2559 Value *NarrowX = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2560 Value *NarrowY = Builder.CreateShuffleVector(Y, Shuf.getShuffleMask());
2561 return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
2562}
2563
2564/// Canonicalize FP negate/abs after shuffle.
2566 InstCombiner::BuilderTy &Builder) {
2567 auto *S0 = dyn_cast<Instruction>(Shuf.getOperand(0));
2568 Value *X;
2569 if (!S0 || !match(S0, m_CombineOr(m_FNeg(m_Value(X)), m_FAbs(m_Value(X)))))
2570 return nullptr;
2571
2572 bool IsFNeg = S0->getOpcode() == Instruction::FNeg;
2573
2574 // Match 2-input (binary) shuffle.
2575 auto *S1 = dyn_cast<Instruction>(Shuf.getOperand(1));
2576 Value *Y;
2577 if (!S1 || !match(S1, m_CombineOr(m_FNeg(m_Value(Y)), m_FAbs(m_Value(Y)))) ||
2578 S0->getOpcode() != S1->getOpcode() ||
2579 (!S0->hasOneUse() && !S1->hasOneUse()))
2580 return nullptr;
2581
2582 // shuf (fneg/fabs X), (fneg/fabs Y), Mask --> fneg/fabs (shuf X, Y, Mask)
2583 Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2584 Instruction *NewF;
2585 if (IsFNeg) {
2586 NewF = UnaryOperator::CreateFNeg(NewShuf);
2587 } else {
2589 Shuf.getModule(), Intrinsic::fabs, Shuf.getType());
2590 NewF = CallInst::Create(FAbs, {NewShuf});
2591 }
2592 NewF->copyIRFlags(S0);
2593 NewF->andIRFlags(S1);
2594 return NewF;
2595}
2596
2597/// Canonicalize casts after shuffle.
2599 InstCombiner::BuilderTy &Builder) {
2600 auto *Cast0 = dyn_cast<CastInst>(Shuf.getOperand(0));
2601 if (!Cast0)
2602 return nullptr;
2603
2604 // TODO: Allow other opcodes? That would require easing the type restrictions
2605 // below here.
2606 CastInst::CastOps CastOpcode = Cast0->getOpcode();
2607 switch (CastOpcode) {
2608 case Instruction::SExt:
2609 case Instruction::ZExt:
2610 case Instruction::FPToSI:
2611 case Instruction::FPToUI:
2612 case Instruction::SIToFP:
2613 case Instruction::UIToFP:
2614 break;
2615 default:
2616 return nullptr;
2617 }
2618
2619 VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy());
2620 VectorType *ShufTy = Shuf.getType();
2621 VectorType *ShufOpTy = cast<VectorType>(Shuf.getOperand(0)->getType());
2622
2623 // TODO: Allow length-increasing shuffles?
2624 if (ShufTy->getElementCount().getKnownMinValue() >
2625 ShufOpTy->getElementCount().getKnownMinValue())
2626 return nullptr;
2627
2628 // shuffle (cast X), Poison, identity-with-extract-mask -->
2629 // cast (shuffle X, Poison, identity-with-extract-mask).
2630 if (isa<PoisonValue>(Shuf.getOperand(1)) && Cast0->hasOneUse() &&
2631 Shuf.isIdentityWithExtract()) {
2632 auto *NewIns = Builder.CreateShuffleVector(Cast0->getOperand(0),
2633 PoisonValue::get(CastSrcTy),
2634 Shuf.getShuffleMask());
2635 return CastInst::Create(Cast0->getOpcode(), NewIns, Shuf.getType());
2636 }
2637
2638 auto *Cast1 = dyn_cast<CastInst>(Shuf.getOperand(1));
2639 // Do we have 2 matching cast operands?
2640 if (!Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||
2641 Cast0->getSrcTy() != Cast1->getSrcTy())
2642 return nullptr;
2643
2644 // TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)?
2645 assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&
2646 "Expected fixed vector operands for casts and binary shuffle");
2647 if (CastSrcTy->getPrimitiveSizeInBits() > ShufOpTy->getPrimitiveSizeInBits())
2648 return nullptr;
2649
2650 // At least one of the operands must have only one use (the shuffle).
2651 if (!Cast0->hasOneUse() && !Cast1->hasOneUse())
2652 return nullptr;
2653
2654 // shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask)
2655 Value *X = Cast0->getOperand(0);
2656 Value *Y = Cast1->getOperand(0);
2657 Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2658 return CastInst::Create(CastOpcode, NewShuf, ShufTy);
2659}
2660
2661/// Try to fold an extract subvector operation.
2663 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2664 if (!Shuf.isIdentityWithExtract() || !match(Op1, m_Poison()))
2665 return nullptr;
2666
2667 // Check if we are extracting all bits of an inserted scalar:
2668 // extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type
2669 Value *X;
2670 if (match(Op0, m_BitCast(m_InsertElt(m_Value(), m_Value(X), m_Zero()))) &&
2671 X->getType()->getPrimitiveSizeInBits() ==
2673 return new BitCastInst(X, Shuf.getType());
2674
2675 // Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
2676 Value *Y;
2677 ArrayRef<int> Mask;
2678 if (!match(Op0, m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask))))
2679 return nullptr;
2680
2681 // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
2682 // then combining may result in worse codegen.
2683 if (!Op0->hasOneUse())
2684 return nullptr;
2685
2686 // We are extracting a subvector from a shuffle. Remove excess elements from
2687 // the 1st shuffle mask to eliminate the extract.
2688 //
2689 // This transform is conservatively limited to identity extracts because we do
2690 // not allow arbitrary shuffle mask creation as a target-independent transform
2691 // (because we can't guarantee that will lower efficiently).
2692 //
2693 // If the extracting shuffle has an poison mask element, it transfers to the
2694 // new shuffle mask. Otherwise, copy the original mask element. Example:
2695 // shuf (shuf X, Y, <C0, C1, C2, poison, C4>), poison, <0, poison, 2, 3> -->
2696 // shuf X, Y, <C0, poison, C2, poison>
2697 unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2698 SmallVector<int, 16> NewMask(NumElts);
2699 assert(NumElts < Mask.size() &&
2700 "Identity with extract must have less elements than its inputs");
2701
2702 for (unsigned i = 0; i != NumElts; ++i) {
2703 int ExtractMaskElt = Shuf.getMaskValue(i);
2704 int MaskElt = Mask[i];
2705 NewMask[i] = ExtractMaskElt == PoisonMaskElem ? ExtractMaskElt : MaskElt;
2706 }
2707 return new ShuffleVectorInst(X, Y, NewMask);
2708}
2709
2710/// Try to replace a shuffle with an insertelement or try to replace a shuffle
2711/// operand with the operand of an insertelement.
2713 InstCombinerImpl &IC) {
2714 Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
2716 Shuf.getShuffleMask(Mask);
2717
2718 int NumElts = Mask.size();
2719 int InpNumElts = cast<FixedVectorType>(V0->getType())->getNumElements();
2720
2721 // This is a specialization of a fold in SimplifyDemandedVectorElts. We may
2722 // not be able to handle it there if the insertelement has >1 use.
2723 // If the shuffle has an insertelement operand but does not choose the
2724 // inserted scalar element from that value, then we can replace that shuffle
2725 // operand with the source vector of the insertelement.
2726 Value *X;
2727 uint64_t IdxC;
2728 if (match(V0, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2729 // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
2730 if (!is_contained(Mask, (int)IdxC))
2731 return IC.replaceOperand(Shuf, 0, X);
2732 }
2733 if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2734 // Offset the index constant by the vector width because we are checking for
2735 // accesses to the 2nd vector input of the shuffle.
2736 IdxC += InpNumElts;
2737 // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
2738 if (!is_contained(Mask, (int)IdxC))
2739 return IC.replaceOperand(Shuf, 1, X);
2740 }
2741 // For the rest of the transform, the shuffle must not change vector sizes.
2742 // TODO: This restriction could be removed if the insert has only one use
2743 // (because the transform would require a new length-changing shuffle).
2744 if (NumElts != InpNumElts)
2745 return nullptr;
2746
2747 // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
2748 auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
2749 // We need an insertelement with a constant index.
2750 if (!match(V0, m_InsertElt(m_Value(), m_Value(Scalar),
2751 m_ConstantInt(IndexC))))
2752 return false;
2753
2754 // Test the shuffle mask to see if it splices the inserted scalar into the
2755 // operand 1 vector of the shuffle.
2756 int NewInsIndex = -1;
2757 for (int i = 0; i != NumElts; ++i) {
2758 // Ignore undef mask elements.
2759 if (Mask[i] == -1)
2760 continue;
2761
2762 // The shuffle takes elements of operand 1 without lane changes.
2763 if (Mask[i] == NumElts + i)
2764 continue;
2765
2766 // The shuffle must choose the inserted scalar exactly once.
2767 if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
2768 return false;
2769
2770 // The shuffle is placing the inserted scalar into element i.
2771 NewInsIndex = i;
2772 }
2773
2774 assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
2775
2776 // Index is updated to the potentially translated insertion lane.
2777 IndexC = ConstantInt::get(IndexC->getIntegerType(), NewInsIndex);
2778 return true;
2779 };
2780
2781 // If the shuffle is unnecessary, insert the scalar operand directly into
2782 // operand 1 of the shuffle. Example:
2783 // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
2784 Value *Scalar;
2785 ConstantInt *IndexC;
2786 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2787 return InsertElementInst::Create(V1, Scalar, IndexC);
2788
2789 // Try again after commuting shuffle. Example:
2790 // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
2791 // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
2792 std::swap(V0, V1);
2794 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2795 return InsertElementInst::Create(V1, Scalar, IndexC);
2796
2797 return nullptr;
2798}
2799
2801 // Match the operands as identity with padding (also known as concatenation
2802 // with undef) shuffles of the same source type. The backend is expected to
2803 // recreate these concatenations from a shuffle of narrow operands.
2804 auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
2805 auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
2806 if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
2807 !Shuffle1 || !Shuffle1->isIdentityWithPadding())
2808 return nullptr;
2809
2810 // We limit this transform to power-of-2 types because we expect that the
2811 // backend can convert the simplified IR patterns to identical nodes as the
2812 // original IR.
2813 // TODO: If we can verify the same behavior for arbitrary types, the
2814 // power-of-2 checks can be removed.
2815 Value *X = Shuffle0->getOperand(0);
2816 Value *Y = Shuffle1->getOperand(0);
2817 if (X->getType() != Y->getType() ||
2818 !isPowerOf2_32(cast<FixedVectorType>(Shuf.getType())->getNumElements()) ||
2820 cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) ||
2821 !isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) ||
2822 match(X, m_Undef()) || match(Y, m_Undef()))
2823 return nullptr;
2824 assert(match(Shuffle0->getOperand(1), m_Undef()) &&
2825 match(Shuffle1->getOperand(1), m_Undef()) &&
2826 "Unexpected operand for identity shuffle");
2827
2828 // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
2829 // operands directly by adjusting the shuffle mask to account for the narrower
2830 // types:
2831 // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
2832 int NarrowElts = cast<FixedVectorType>(X->getType())->getNumElements();
2833 int WideElts = cast<FixedVectorType>(Shuffle0->getType())->getNumElements();
2834 assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
2835
2836 ArrayRef<int> Mask = Shuf.getShuffleMask();
2837 SmallVector<int, 16> NewMask(Mask.size(), -1);
2838 for (int i = 0, e = Mask.size(); i != e; ++i) {
2839 if (Mask[i] == -1)
2840 continue;
2841
2842 // If this shuffle is choosing an undef element from 1 of the sources, that
2843 // element is undef.
2844 if (Mask[i] < WideElts) {
2845 if (Shuffle0->getMaskValue(Mask[i]) == -1)
2846 continue;
2847 } else {
2848 if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
2849 continue;
2850 }
2851
2852 // If this shuffle is choosing from the 1st narrow op, the mask element is
2853 // the same. If this shuffle is choosing from the 2nd narrow op, the mask
2854 // element is offset down to adjust for the narrow vector widths.
2855 if (Mask[i] < WideElts) {
2856 assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
2857 NewMask[i] = Mask[i];
2858 } else {
2859 assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
2860 NewMask[i] = Mask[i] - (WideElts - NarrowElts);
2861 }
2862 }
2863 return new ShuffleVectorInst(X, Y, NewMask);
2864}
2865
2866// Splatting the first element of the result of a BinOp, where any of the
2867// BinOp's operands are the result of a first element splat can be simplified to
2868// splatting the first element of the result of the BinOp
2870 if (!match(SVI.getOperand(1), m_Poison()) ||
2871 !match(SVI.getShuffleMask(), m_ZeroMask()) ||
2872 !SVI.getOperand(0)->hasOneUse())
2873 return nullptr;
2874
2875 Value *Op0 = SVI.getOperand(0);
2876 Value *X, *Y;
2878 m_Value(Y))) &&
2879 !match(Op0, m_BinOp(m_Value(X),
2881 return nullptr;
2882 if (X->getType() != Y->getType())
2883 return nullptr;
2884
2885 auto *BinOp = cast<BinaryOperator>(Op0);
2887 return nullptr;
2888
2889 Value *NewBO = Builder.CreateBinOp(BinOp->getOpcode(), X, Y);
2890 if (auto NewBOI = dyn_cast<Instruction>(NewBO))
2891 NewBOI->copyIRFlags(BinOp);
2892
2893 return new ShuffleVectorInst(NewBO, SVI.getShuffleMask());
2894}
2895
2897 Value *LHS = SVI.getOperand(0);
2898 Value *RHS = SVI.getOperand(1);
2899 SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI);
2900 if (auto *V = simplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(),
2901 SVI.getType(), ShufQuery))
2902 return replaceInstUsesWith(SVI, V);
2903
2904 if (Instruction *I = simplifyBinOpSplats(SVI))
2905 return I;
2906
2907 // Canonicalize splat shuffle to use poison RHS. Handle this explicitly in
2908 // order to support scalable vectors.
2909 if (match(SVI.getShuffleMask(), m_ZeroMask()) && !isa<PoisonValue>(RHS))
2910 return replaceOperand(SVI, 1, PoisonValue::get(RHS->getType()));
2911
2912 if (isa<ScalableVectorType>(LHS->getType()))
2913 return nullptr;
2914
2915 unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements();
2916 unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements();
2917
2918 // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
2919 //
2920 // if X and Y are of the same (vector) type, and the element size is not
2921 // changed by the bitcasts, we can distribute the bitcasts through the
2922 // shuffle, hopefully reducing the number of instructions. We make sure that
2923 // at least one bitcast only has one use, so we don't *increase* the number of
2924 // instructions here.
2925 Value *X, *Y;
2926 if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_BitCast(m_Value(Y))) &&
2927 X->getType()->isVectorTy() && X->getType() == Y->getType() &&
2928 X->getType()->getScalarSizeInBits() ==
2929 SVI.getType()->getScalarSizeInBits() &&
2930 (LHS->hasOneUse() || RHS->hasOneUse())) {
2931 Value *V = Builder.CreateShuffleVector(X, Y, SVI.getShuffleMask(),
2932 SVI.getName() + ".uncasted");
2933 return new BitCastInst(V, SVI.getType());
2934 }
2935
2936 ArrayRef<int> Mask = SVI.getShuffleMask();
2937
2938 // Peek through a bitcasted shuffle operand by scaling the mask. If the
2939 // simulated shuffle can simplify, then this shuffle is unnecessary:
2940 // shuf (bitcast X), undef, Mask --> bitcast X'
2941 // TODO: This could be extended to allow length-changing shuffles.
2942 // The transform might also be obsoleted if we allowed canonicalization
2943 // of bitcasted shuffles.
2944 if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) &&
2945 X->getType()->isVectorTy() && VWidth == LHSWidth) {
2946 // Try to create a scaled mask constant.
2947 auto *XType = cast<FixedVectorType>(X->getType());
2948 unsigned XNumElts = XType->getNumElements();
2949 SmallVector<int, 16> ScaledMask;
2950 if (scaleShuffleMaskElts(XNumElts, Mask, ScaledMask)) {
2951 // If the shuffled source vector simplifies, cast that value to this
2952 // shuffle's type.
2953 if (auto *V = simplifyShuffleVectorInst(X, UndefValue::get(XType),
2954 ScaledMask, XType, ShufQuery))
2955 return BitCastInst::Create(Instruction::BitCast, V, SVI.getType());
2956 }
2957 }
2958
2959 // shuffle x, x, mask --> shuffle x, undef, mask'
2960 if (LHS == RHS) {
2961 assert(!match(RHS, m_Undef()) &&
2962 "Shuffle with 2 undef ops not simplified?");
2963 return new ShuffleVectorInst(LHS, createUnaryMask(Mask, LHSWidth));
2964 }
2965
2966 // shuffle undef, x, mask --> shuffle x, undef, mask'
2967 if (match(LHS, m_Undef())) {
2968 SVI.commute();
2969 return &SVI;
2970 }
2971
2973 return I;
2974
2975 if (Instruction *I = foldSelectShuffle(SVI))
2976 return I;
2977
2978 if (Instruction *I = foldTruncShuffle(SVI, DL.isBigEndian()))
2979 return I;
2980
2982 return I;
2983
2985 return I;
2986
2987 if (Instruction *I = foldCastShuffle(SVI, Builder))
2988 return I;
2989
2990 APInt PoisonElts(VWidth, 0);
2991 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2992 if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, PoisonElts)) {
2993 if (V != &SVI)
2994 return replaceInstUsesWith(SVI, V);
2995 return &SVI;
2996 }
2997
2999 return I;
3000
3001 // These transforms have the potential to lose undef knowledge, so they are
3002 // intentionally placed after SimplifyDemandedVectorElts().
3003 if (Instruction *I = foldShuffleWithInsert(SVI, *this))
3004 return I;
3006 return I;
3007
3008 if (match(RHS, m_Constant())) {
3009 if (auto *SI = dyn_cast<SelectInst>(LHS)) {
3010 // We cannot do this fold for elementwise select since ShuffleVector is
3011 // not elementwise.
3012 if (SI->getCondition()->getType()->isIntegerTy() &&
3013 (isa<PoisonValue>(RHS) ||
3014 isGuaranteedNotToBePoison(SI->getCondition()))) {
3015 if (Instruction *I = FoldOpIntoSelect(SVI, SI))
3016 return I;
3017 }
3018 }
3019 if (auto *PN = dyn_cast<PHINode>(LHS)) {
3020 if (Instruction *I = foldOpIntoPhi(SVI, PN, /*AllowMultipleUses=*/true))
3021 return I;
3022 }
3023 }
3024
3025 if (match(RHS, m_Poison()) && canEvaluateShuffled(LHS, Mask)) {
3027 return replaceInstUsesWith(SVI, V);
3028 }
3029
3030 // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
3031 // a non-vector type. We can instead bitcast the original vector followed by
3032 // an extract of the desired element:
3033 //
3034 // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
3035 // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3036 // %1 = bitcast <4 x i8> %sroa to i32
3037 // Becomes:
3038 // %bc = bitcast <16 x i8> %in to <4 x i32>
3039 // %ext = extractelement <4 x i32> %bc, i32 0
3040 //
3041 // If the shuffle is extracting a contiguous range of values from the input
3042 // vector then each use which is a bitcast of the extracted size can be
3043 // replaced. This will work if the vector types are compatible, and the begin
3044 // index is aligned to a value in the casted vector type. If the begin index
3045 // isn't aligned then we can shuffle the original vector (keeping the same
3046 // vector type) before extracting.
3047 //
3048 // This code will bail out if the target type is fundamentally incompatible
3049 // with vectors of the source type.
3050 //
3051 // Example of <16 x i8>, target type i32:
3052 // Index range [4,8): v-----------v Will work.
3053 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
3054 // <16 x i8>: | | | | | | | | | | | | | | | | |
3055 // <4 x i32>: | | | | |
3056 // +-----------+-----------+-----------+-----------+
3057 // Index range [6,10): ^-----------^ Needs an extra shuffle.
3058 // Target type i40: ^--------------^ Won't work, bail.
3059 bool MadeChange = false;
3060 if (isShuffleExtractingFromLHS(SVI, Mask)) {
3061 Value *V = LHS;
3062 unsigned MaskElems = Mask.size();
3063 auto *SrcTy = cast<FixedVectorType>(V->getType());
3064 unsigned VecBitWidth = DL.getTypeSizeInBits(SrcTy);
3065 unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
3066 assert(SrcElemBitWidth && "vector elements must have a bitwidth");
3067 unsigned SrcNumElems = SrcTy->getNumElements();
3070 for (User *U : SVI.users())
3071 if (BitCastInst *BC = dyn_cast<BitCastInst>(U)) {
3072 // Only visit bitcasts that weren't previously handled.
3073 if (BC->use_empty())
3074 continue;
3075 // Prefer to combine bitcasts of bitcasts before attempting this fold.
3076 if (BC->hasOneUse()) {
3077 auto *BC2 = dyn_cast<BitCastInst>(BC->user_back());
3078 if (BC2 && isEliminableCastPair(BC, BC2))
3079 continue;
3080 }
3081 BCs.push_back(BC);
3082 }
3083 for (BitCastInst *BC : BCs) {
3084 unsigned BegIdx = Mask.front();
3085 Type *TgtTy = BC->getDestTy();
3086 unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
3087 if (!TgtElemBitWidth)
3088 continue;
3089 unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
3090 bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
3091 bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
3092 if (!VecBitWidthsEqual)
3093 continue;
3095 continue;
3096 auto *CastSrcTy = FixedVectorType::get(TgtTy, TgtNumElems);
3097 if (!BegIsAligned) {
3098 // Shuffle the input so [0,NumElements) contains the output, and
3099 // [NumElems,SrcNumElems) is undef.
3100 SmallVector<int, 16> ShuffleMask(SrcNumElems, -1);
3101 for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
3102 ShuffleMask[I] = Idx;
3103 V = Builder.CreateShuffleVector(V, ShuffleMask,
3104 SVI.getName() + ".extract");
3105 BegIdx = 0;
3106 }
3107 unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
3108 assert(SrcElemsPerTgtElem);
3109 BegIdx /= SrcElemsPerTgtElem;
3110 auto [It, Inserted] = NewBCs.try_emplace(CastSrcTy);
3111 if (Inserted)
3112 It->second = Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
3113 auto *Ext = Builder.CreateExtractElement(It->second, BegIdx,
3114 SVI.getName() + ".extract");
3115 // The shufflevector isn't being replaced: the bitcast that used it
3116 // is. InstCombine will visit the newly-created instructions.
3117 replaceInstUsesWith(*BC, Ext);
3118 MadeChange = true;
3119 }
3120 }
3121
3122 // If the LHS is a shufflevector itself, see if we can combine it with this
3123 // one without producing an unusual shuffle.
3124 // Cases that might be simplified:
3125 // 1.
3126 // x1=shuffle(v1,v2,mask1)
3127 // x=shuffle(x1,undef,mask)
3128 // ==>
3129 // x=shuffle(v1,undef,newMask)
3130 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
3131 // 2.
3132 // x1=shuffle(v1,undef,mask1)
3133 // x=shuffle(x1,x2,mask)
3134 // where v1.size() == mask1.size()
3135 // ==>
3136 // x=shuffle(v1,x2,newMask)
3137 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
3138 // 3.
3139 // x2=shuffle(v2,undef,mask2)
3140 // x=shuffle(x1,x2,mask)
3141 // where v2.size() == mask2.size()
3142 // ==>
3143 // x=shuffle(x1,v2,newMask)
3144 // newMask[i] = (mask[i] < x1.size())
3145 // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
3146 // 4.
3147 // x1=shuffle(v1,undef,mask1)
3148 // x2=shuffle(v2,undef,mask2)
3149 // x=shuffle(x1,x2,mask)
3150 // where v1.size() == v2.size()
3151 // ==>
3152 // x=shuffle(v1,v2,newMask)
3153 // newMask[i] = (mask[i] < x1.size())
3154 // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
3155 //
3156 // Here we are really conservative:
3157 // we are absolutely afraid of producing a shuffle mask not in the input
3158 // program, because the code gen may not be smart enough to turn a merged
3159 // shuffle into two specific shuffles: it may produce worse code. As such,
3160 // we only merge two shuffles if the result is either a splat or one of the
3161 // input shuffle masks. In this case, merging the shuffles just removes
3162 // one instruction, which we know is safe. This is good for things like
3163 // turning: (splat(splat)) -> splat, or
3164 // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
3167 if (LHSShuffle)
3168 if (!match(LHSShuffle->getOperand(1), m_Poison()) &&
3169 !match(RHS, m_Poison()))
3170 LHSShuffle = nullptr;
3171 if (RHSShuffle)
3172 if (!match(RHSShuffle->getOperand(1), m_Poison()))
3173 RHSShuffle = nullptr;
3174 if (!LHSShuffle && !RHSShuffle)
3175 return MadeChange ? &SVI : nullptr;
3176
3177 Value* LHSOp0 = nullptr;
3178 Value* LHSOp1 = nullptr;
3179 Value* RHSOp0 = nullptr;
3180 unsigned LHSOp0Width = 0;
3181 unsigned RHSOp0Width = 0;
3182 if (LHSShuffle) {
3183 LHSOp0 = LHSShuffle->getOperand(0);
3184 LHSOp1 = LHSShuffle->getOperand(1);
3185 LHSOp0Width = cast<FixedVectorType>(LHSOp0->getType())->getNumElements();
3186 }
3187 if (RHSShuffle) {
3188 RHSOp0 = RHSShuffle->getOperand(0);
3189 RHSOp0Width = cast<FixedVectorType>(RHSOp0->getType())->getNumElements();
3190 }
3191 Value* newLHS = LHS;
3192 Value* newRHS = RHS;
3193 if (LHSShuffle) {
3194 // case 1
3195 if (match(RHS, m_Poison())) {
3196 newLHS = LHSOp0;
3197 newRHS = LHSOp1;
3198 }
3199 // case 2 or 4
3200 else if (LHSOp0Width == LHSWidth) {
3201 newLHS = LHSOp0;
3202 }
3203 }
3204 // case 3 or 4
3205 if (RHSShuffle && RHSOp0Width == LHSWidth) {
3206 newRHS = RHSOp0;
3207 }
3208 // case 4
3209 if (LHSOp0 == RHSOp0) {
3210 newLHS = LHSOp0;
3211 newRHS = nullptr;
3212 }
3213
3214 if (newLHS == LHS && newRHS == RHS)
3215 return MadeChange ? &SVI : nullptr;
3216
3217 ArrayRef<int> LHSMask;
3218 ArrayRef<int> RHSMask;
3219 if (newLHS != LHS)
3220 LHSMask = LHSShuffle->getShuffleMask();
3221 if (RHSShuffle && newRHS != RHS)
3222 RHSMask = RHSShuffle->getShuffleMask();
3223
3224 unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
3225 SmallVector<int, 16> newMask;
3226 bool isSplat = true;
3227 int SplatElt = -1;
3228 // Create a new mask for the new ShuffleVectorInst so that the new
3229 // ShuffleVectorInst is equivalent to the original one.
3230 for (unsigned i = 0; i < VWidth; ++i) {
3231 int eltMask;
3232 if (Mask[i] < 0) {
3233 // This element is a poison value.
3234 eltMask = -1;
3235 } else if (Mask[i] < (int)LHSWidth) {
3236 // This element is from left hand side vector operand.
3237 //
3238 // If LHS is going to be replaced (case 1, 2, or 4), calculate the
3239 // new mask value for the element.
3240 if (newLHS != LHS) {
3241 eltMask = LHSMask[Mask[i]];
3242 // If the value selected is an poison value, explicitly specify it
3243 // with a -1 mask value.
3244 if (eltMask >= (int)LHSOp0Width && isa<PoisonValue>(LHSOp1))
3245 eltMask = -1;
3246 } else
3247 eltMask = Mask[i];
3248 } else {
3249 // This element is from right hand side vector operand
3250 //
3251 // If the value selected is a poison value, explicitly specify it
3252 // with a -1 mask value. (case 1)
3253 if (match(RHS, m_Poison()))
3254 eltMask = -1;
3255 // If RHS is going to be replaced (case 3 or 4), calculate the
3256 // new mask value for the element.
3257 else if (newRHS != RHS) {
3258 eltMask = RHSMask[Mask[i]-LHSWidth];
3259 // If the value selected is an poison value, explicitly specify it
3260 // with a -1 mask value.
3261 if (eltMask >= (int)RHSOp0Width) {
3262 assert(match(RHSShuffle->getOperand(1), m_Poison()) &&
3263 "should have been check above");
3264 eltMask = -1;
3265 }
3266 } else
3267 eltMask = Mask[i]-LHSWidth;
3268
3269 // If LHS's width is changed, shift the mask value accordingly.
3270 // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
3271 // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
3272 // If newRHS == newLHS, we want to remap any references from newRHS to
3273 // newLHS so that we can properly identify splats that may occur due to
3274 // obfuscation across the two vectors.
3275 if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
3276 eltMask += newLHSWidth;
3277 }
3278
3279 // Check if this could still be a splat.
3280 if (eltMask >= 0) {
3281 if (SplatElt >= 0 && SplatElt != eltMask)
3282 isSplat = false;
3283 SplatElt = eltMask;
3284 }
3285
3286 newMask.push_back(eltMask);
3287 }
3288
3289 // If the result mask is equal to one of the original shuffle masks,
3290 // or is a splat, do the replacement.
3291 if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
3292 if (!newRHS)
3293 newRHS = PoisonValue::get(newLHS->getType());
3294 return new ShuffleVectorInst(newLHS, newRHS, newMask);
3295 }
3296
3297 return MadeChange ? &SVI : nullptr;
3298}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
Hexagon Common GEP
This file provides internal interfaces used to implement the InstCombine.
static Instruction * foldConstantInsEltIntoShuffle(InsertElementInst &InsElt)
insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex --> shufflevector X,...
static Value * evaluateInDifferentElementOrder(Value *V, ArrayRef< int > Mask, IRBuilderBase &Builder)
static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, SmallVectorImpl< int > &Mask)
If V is a shuffle of values that ONLY returns elements from either LHS or RHS, return the shuffle mas...
static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl< int > &Mask, Value *PermittedRHS, InstCombinerImpl &IC, bool &Rerun)
static APInt findDemandedEltsByAllUsers(Value *V)
Find union of elements of V demanded by all its users.
static Instruction * foldTruncInsEltPair(InsertElementInst &InsElt, bool IsBigEndian, InstCombiner::BuilderTy &Builder)
If we are inserting 2 halves of a value into adjacent elements of a vector, try to convert to a singl...
static Instruction * foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf, const SimplifyQuery &SQ)
static Instruction * foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf)
static Instruction * foldIdentityExtractShuffle(ShuffleVectorInst &Shuf)
Try to fold an extract subvector operation.
static bool findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr, APInt &UnionUsedElts)
Find elements of V demanded by UserInstr.
static Instruction * foldInsEltIntoSplat(InsertElementInst &InsElt)
Try to fold an insert element into an existing splat shuffle by changing the shuffle's mask to includ...
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
static Instruction * foldShuffleWithInsert(ShuffleVectorInst &Shuf, InstCombinerImpl &IC)
Try to replace a shuffle with an insertelement or try to replace a shuffle operand with the operand o...
static Instruction * canonicalizeInsertSplat(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
If we have an insert of a scalar to a non-zero element of an undefined vector and then shuffle that v...
static Instruction * foldTruncShuffle(ShuffleVectorInst &Shuf, bool IsBigEndian)
Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
static bool replaceExtractElements(InsertElementInst *InsElt, ExtractElementInst *ExtElt, InstCombinerImpl &IC)
If we have insertion into a vector that is wider than the vector that we are extracting from,...
static bool cheapToScalarize(Value *V, Value *EI)
Return true if the value is cheaper to scalarize than it is to leave as a vector operation.
static Value * buildNew(Instruction *I, ArrayRef< Value * > NewOps, IRBuilderBase &Builder)
Rebuild a new instruction just like 'I' but with the new operands given.
static bool canEvaluateShuffled(Value *V, ArrayRef< int > Mask, unsigned Depth=5)
Return true if we can evaluate the specified expression tree if the vector elements were shuffled in ...
static Instruction * foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf)
A select shuffle of a select shuffle with a shared operand can be reduced to a single select shuffle.
static Instruction * hoistInsEltConst(InsertElementInst &InsElt2, InstCombiner::BuilderTy &Builder)
If we have an insertelement instruction feeding into another insertelement and the 2nd is inserting a...
static Instruction * foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize FP negate/abs after shuffle.
static Instruction * foldCastShuffle(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize casts after shuffle.
static Instruction * narrowInsElt(InsertElementInst &InsElt, InstCombiner::BuilderTy &Builder)
If both the base vector and the inserted element are extended from the same type, do the insert eleme...
static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf)
static Instruction * foldInsSequenceIntoSplat(InsertElementInst &InsElt)
Turn a chain of inserts that splats a value into an insert + shuffle: insertelt(insertelt(insertelt(i...
static Instruction * foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt)
Try to fold an extract+insert element into an existing identity shuffle by changing the shuffle's mas...
static ConstantInt * getPreferredVectorIndex(ConstantInt *IndexC)
Given a constant index for a extractelement or insertelement instruction, return it with the canonica...
static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, ArrayRef< int > Mask)
static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL)
Binops may be transformed into binops with different opcodes and operands.
This file provides the interface for the instcombine pass implementation.
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define I(x, y, z)
Definition MD5.cpp:57
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
This file implements the SmallBitVector class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
static SDValue narrowVectorSelect(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget)
If both arms of a vector select are concatenated vectors, split the select, and concatenate the resul...
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1513
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1331
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1112
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & front() const
front - Get the first element.
Definition ArrayRef.h:145
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:186
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
BinaryOps getOpcode() const
Definition InstrTypes.h:374
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:219
This class represents a no-op cast from one type to another.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
This class is the base class for the comparison instructions.
Definition InstrTypes.h:664
static LLVM_ABI CmpInst * CreateWithCopiedFlags(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Instruction *FlagsSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate, the two operands and the instructio...
OtherOps getOpcode() const
Get the opcode casted to the right type.
Definition InstrTypes.h:760
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * getShuffleVector(Constant *V1, Constant *V2, ArrayRef< int > Mask, Type *OnlyIfReducedTy=nullptr)
static LLVM_ABI Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:269
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:162
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
This instruction extracts a single (scalar) element from a VectorType value.
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
VectorType * getVectorOperandType() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
LLVM_ABI void setNoWrapFlags(GEPNoWrapFlags NW)
Set nowrap flags for GEP instruction.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This instruction inserts a single (scalar) element into a VectorType value.
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
VectorType * getType() const
Overload to return most specific vector type.
This instruction inserts a struct field of array element value into an aggregate value.
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
Instruction * foldSelectShuffle(ShuffleVectorInst &Shuf)
Try to fold shuffles that are the equivalent of a vector select.
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false, bool SimplifyBothArms=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * visitInsertValueInst(InsertValueInst &IV)
Try to find redundant insertvalue instructions, like the following ones: %0 = insertvalue { i8,...
Instruction * visitInsertElementInst(InsertElementInst &IE)
Instruction * visitExtractElementInst(ExtractElementInst &EI)
Instruction * simplifyBinOpSplats(ShuffleVectorInst &SVI)
Instruction * foldAggregateConstructionIntoAggregateReuse(InsertValueInst &OrigIVI)
Look for chain of insertvalue's that fully define an aggregate, and trace back the values inserted,...
Instruction * visitShuffleVectorInst(ShuffleVectorInst &SVI)
SimplifyQuery SQ
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
const DataLayout & DL
void addToWorklist(Instruction *I)
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
BuilderTy & Builder
const SimplifyQuery & getSimplifyQuery() const
LLVM_ABI bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
LLVM_ABI bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI bool isExact() const LLVM_READONLY
Determine whether the exact flag is set.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool isShift() const
LLVM_ABI void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
bool isIntDivRem() const
A wrapper class for inspecting calls to intrinsic functions.
std::pair< iterator, bool > try_emplace(const KeyT &Key, Ts &&...Args)
Definition MapVector.h:116
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition Constants.h:1478
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
bool changesLength() const
Return true if this shuffle returns a vector with a different number of elements than its source vect...
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
static LLVM_ABI bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
VectorType * getType() const
Overload to return most specific vector type.
bool increasesLength() const
Return true if this shuffle returns a vector with a greater number of elements than its source vector...
LLVM_ABI bool isIdentityWithExtract() const
Return true if this shuffle extracts the first N elements of exactly one source vector.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
LLVM_ABI void commute()
Swap the operands and adjust the mask to preserve the semantics of the instruction.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool all() const
Returns true if all bits are set.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition Type.h:246
LLVM_ABI unsigned getStructNumElements() const
LLVM_ABI uint64_t getArrayNumElements() const
@ ArrayTyID
Arrays.
Definition Type.h:74
@ StructTyID
Structures.
Definition Type.h:73
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
TypeID getTypeID() const
Return the type id for the type.
Definition Type.h:136
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:139
UnaryOps getOpcode() const
Definition InstrTypes.h:154
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Value * getOperand(unsigned i) const
Definition User.h:233
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition Value.cpp:1098
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
iterator_range< user_iterator > users()
Definition Value.h:426
User * user_back()
Definition Value.h:412
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1106
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Type * getElementType() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
BinaryOpc_match< LHS, RHS, false > m_BinOp(unsigned Opcode, const LHS &L, const RHS &R)
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
class_match< UnaryOperator > m_UnOp()
Match an arbitrary unary operation and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2530
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
LLVM_ABI Value * simplifyShuffleVectorInst(Value *Op0, Value *Op1, ArrayRef< int > Mask, Type *RetTy, const SimplifyQuery &Q)
Given operands for a ShuffleVectorInst, fold the result or return null.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI Value * simplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an InsertValueInst, fold the result or return null.
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
constexpr int PoisonMaskElem
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
DWARFExpression::Operation Op
bool isSafeToSpeculativelyExecuteWithVariableReplaced(const Instruction *I, bool IgnoreUBImplyingAttrs=true)
Don't use information from its non-constant operands.
LLVM_ABI Value * simplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx, const SimplifyQuery &Q)
Given operands for an InsertElement, fold the result or return null.
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2009
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1945
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
LLVM_ABI Value * simplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q)
Given operands for an ExtractElementInst, fold the result or return null.
LLVM_ABI bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
These are the ingredients in an alternate form binary operator as described below.
BinopElts(BinaryOperator::BinaryOps Opc=(BinaryOperator::BinaryOps) 0, Value *V0=nullptr, Value *V1=nullptr)
BinaryOperator::BinaryOps Opcode
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:276