LLVM 22.0.0git
InstCombineVectorOps.cpp
Go to the documentation of this file.
1//===- InstCombineVectorOps.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements instcombine for ExtractElement, InsertElement and
10// ShuffleVector.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InstCombineInternal.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
24#include "llvm/IR/BasicBlock.h"
25#include "llvm/IR/Constant.h"
26#include "llvm/IR/Constants.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
31#include "llvm/IR/Operator.h"
33#include "llvm/IR/Type.h"
34#include "llvm/IR/User.h"
35#include "llvm/IR/Value.h"
39#include <cassert>
40#include <cstdint>
41#include <iterator>
42#include <utility>
43
44#define DEBUG_TYPE "instcombine"
45
46using namespace llvm;
47using namespace PatternMatch;
48
49STATISTIC(NumAggregateReconstructionsSimplified,
50 "Number of aggregate reconstructions turned into reuse of the "
51 "original aggregate");
52
53/// Return true if the value is cheaper to scalarize than it is to leave as a
54/// vector operation. If the extract index \p EI is a constant integer then
55/// some operations may be cheap to scalarize.
56///
57/// FIXME: It's possible to create more instructions than previously existed.
58static bool cheapToScalarize(Value *V, Value *EI) {
60
61 // If we can pick a scalar constant value out of a vector, that is free.
62 if (auto *C = dyn_cast<Constant>(V))
63 return CEI || C->getSplatValue();
64
66 ElementCount EC = cast<VectorType>(V->getType())->getElementCount();
67 // Index needs to be lower than the minimum size of the vector, because
68 // for scalable vector, the vector size is known at run time.
69 return CEI->getValue().ult(EC.getKnownMinValue());
70 }
71
72 // An insertelement to the same constant index as our extract will simplify
73 // to the scalar inserted element. An insertelement to a different constant
74 // index is irrelevant to our extract.
76 return CEI;
77
78 if (match(V, m_OneUse(m_Load(m_Value()))))
79 return true;
80
81 if (match(V, m_OneUse(m_UnOp())))
82 return true;
83
84 Value *V0, *V1;
85 if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
86 if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
87 return true;
88
89 CmpPredicate UnusedPred;
90 if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
91 if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
92 return true;
93
94 return false;
95}
96
97// If we have a PHI node with a vector type that is only used to feed
98// itself and be an operand of extractelement at a constant location,
99// try to replace the PHI of the vector type with a PHI of a scalar type.
100Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
101 PHINode *PN) {
102 SmallVector<Instruction *, 2> Extracts;
103 // The users we want the PHI to have are:
104 // 1) The EI ExtractElement (we already know this)
105 // 2) Possibly more ExtractElements with the same index.
106 // 3) Another operand, which will feed back into the PHI.
107 Instruction *PHIUser = nullptr;
108 for (auto *U : PN->users()) {
109 if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
110 if (EI.getIndexOperand() == EU->getIndexOperand())
111 Extracts.push_back(EU);
112 else
113 return nullptr;
114 } else if (!PHIUser) {
115 PHIUser = cast<Instruction>(U);
116 } else {
117 return nullptr;
118 }
119 }
120
121 if (!PHIUser)
122 return nullptr;
123
124 // Verify that this PHI user has one use, which is the PHI itself,
125 // and that it is a binary operation which is cheap to scalarize.
126 // otherwise return nullptr.
127 if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
128 !(isa<BinaryOperator>(PHIUser)) ||
129 !cheapToScalarize(PHIUser, EI.getIndexOperand()))
130 return nullptr;
131
132 // Create a scalar PHI node that will replace the vector PHI node
133 // just before the current PHI node.
134 PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
136 // Scalarize each PHI operand.
137 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
138 Value *PHIInVal = PN->getIncomingValue(i);
139 BasicBlock *inBB = PN->getIncomingBlock(i);
140 Value *Elt = EI.getIndexOperand();
141 // If the operand is the PHI induction variable:
142 if (PHIInVal == PHIUser) {
143 // Scalarize the binary operation. One operand is the
144 // scalar PHI, and the other is extracted from the other
145 // vector operand.
146 BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
147 unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
150 B0->getOperand(opId)->getName() + ".Elt"),
151 B0->getIterator());
152 // Preserve operand order for binary operation to preserve semantics of
153 // non-commutative operations.
154 Value *FirstOp = (B0->getOperand(0) == PN) ? scalarPHI : Op;
155 Value *SecondOp = (B0->getOperand(0) == PN) ? Op : scalarPHI;
156 Value *newPHIUser =
158 B0->getOpcode(), FirstOp, SecondOp, B0),
159 B0->getIterator());
160 scalarPHI->addIncoming(newPHIUser, inBB);
161 } else {
162 // Scalarize PHI input:
163 Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
164 // Insert the new instruction into the predecessor basic block.
165 Instruction *pos = dyn_cast<Instruction>(PHIInVal);
166 BasicBlock::iterator InsertPos;
167 if (pos && !isa<PHINode>(pos)) {
168 InsertPos = ++pos->getIterator();
169 } else {
170 InsertPos = inBB->getFirstInsertionPt();
171 }
172
173 InsertNewInstWith(newEI, InsertPos);
174
175 scalarPHI->addIncoming(newEI, inBB);
176 }
177 }
178
179 for (auto *E : Extracts) {
180 replaceInstUsesWith(*E, scalarPHI);
181 // Add old extract to worklist for DCE.
183 }
184
185 return &EI;
186}
187
188Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
189 Value *X;
190 uint64_t ExtIndexC;
191 if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
192 !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
193 return nullptr;
194
195 ElementCount NumElts =
196 cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
197 Type *DestTy = Ext.getType();
198 unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
199 bool IsBigEndian = DL.isBigEndian();
200
201 // If we are casting an integer to vector and extracting a portion, that is
202 // a shift-right and truncate.
203 if (X->getType()->isIntegerTy()) {
205 "Expected fixed vector type for bitcast from scalar integer");
206
207 // Big endian requires adjusting the extract index since MSB is at index 0.
208 // LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8
209 // BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8
210 if (IsBigEndian)
211 ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC;
212 unsigned ShiftAmountC = ExtIndexC * DestWidth;
213 if ((!ShiftAmountC ||
214 isDesirableIntType(X->getType()->getPrimitiveSizeInBits())) &&
215 Ext.getVectorOperand()->hasOneUse()) {
216 if (ShiftAmountC)
217 X = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset");
218 if (DestTy->isFloatingPointTy()) {
219 Type *DstIntTy = IntegerType::getIntNTy(X->getContext(), DestWidth);
220 Value *Trunc = Builder.CreateTrunc(X, DstIntTy);
221 return new BitCastInst(Trunc, DestTy);
222 }
223 return new TruncInst(X, DestTy);
224 }
225 }
226
227 if (!X->getType()->isVectorTy())
228 return nullptr;
229
230 // If this extractelement is using a bitcast from a vector of the same number
231 // of elements, see if we can find the source element from the source vector:
232 // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
233 auto *SrcTy = cast<VectorType>(X->getType());
234 ElementCount NumSrcElts = SrcTy->getElementCount();
235 if (NumSrcElts == NumElts)
236 if (Value *Elt = findScalarElement(X, ExtIndexC))
237 return new BitCastInst(Elt, DestTy);
238
239 assert(NumSrcElts.isScalable() == NumElts.isScalable() &&
240 "Src and Dst must be the same sort of vector type");
241
242 // If the source elements are wider than the destination, try to shift and
243 // truncate a subset of scalar bits of an insert op.
244 if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
245 Value *Scalar;
246 Value *Vec;
247 uint64_t InsIndexC;
248 if (!match(X, m_InsertElt(m_Value(Vec), m_Value(Scalar),
249 m_ConstantInt(InsIndexC))))
250 return nullptr;
251
252 // The extract must be from the subset of vector elements that we inserted
253 // into. Example: if we inserted element 1 of a <2 x i64> and we are
254 // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
255 // of elements 4-7 of the bitcasted vector.
256 unsigned NarrowingRatio =
257 NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
258
259 if (ExtIndexC / NarrowingRatio != InsIndexC) {
260 // Remove insertelement, if we don't use the inserted element.
261 // extractelement (bitcast (insertelement (Vec, b)), a) ->
262 // extractelement (bitcast (Vec), a)
263 // FIXME: this should be removed to SimplifyDemandedVectorElts,
264 // once scale vectors are supported.
265 if (X->hasOneUse() && Ext.getVectorOperand()->hasOneUse()) {
266 Value *NewBC = Builder.CreateBitCast(Vec, Ext.getVectorOperandType());
267 return ExtractElementInst::Create(NewBC, Ext.getIndexOperand());
268 }
269 return nullptr;
270 }
271
272 // We are extracting part of the original scalar. How that scalar is
273 // inserted into the vector depends on the endian-ness. Example:
274 // Vector Byte Elt Index: 0 1 2 3 4 5 6 7
275 // +--+--+--+--+--+--+--+--+
276 // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
277 // extelt <4 x i16> V', 3: | |S2|S3|
278 // +--+--+--+--+--+--+--+--+
279 // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
280 // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
281 // In this example, we must right-shift little-endian. Big-endian is just a
282 // truncate.
283 unsigned Chunk = ExtIndexC % NarrowingRatio;
284 if (IsBigEndian)
285 Chunk = NarrowingRatio - 1 - Chunk;
286
287 // Bail out if this is an FP vector to FP vector sequence. That would take
288 // more instructions than we started with unless there is no shift, and it
289 // may not be handled as well in the backend.
290 bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
291 bool NeedDestBitcast = DestTy->isFloatingPointTy();
292 if (NeedSrcBitcast && NeedDestBitcast)
293 return nullptr;
294
295 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
296 unsigned ShAmt = Chunk * DestWidth;
297
298 // TODO: This limitation is more strict than necessary. We could sum the
299 // number of new instructions and subtract the number eliminated to know if
300 // we can proceed.
301 if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
302 if (NeedSrcBitcast || NeedDestBitcast)
303 return nullptr;
304
305 if (NeedSrcBitcast) {
306 Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
307 Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
308 }
309
310 if (ShAmt) {
311 // Bail out if we could end with more instructions than we started with.
312 if (!Ext.getVectorOperand()->hasOneUse())
313 return nullptr;
314 Scalar = Builder.CreateLShr(Scalar, ShAmt);
315 }
316
317 if (NeedDestBitcast) {
318 Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
319 return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
320 }
321 return new TruncInst(Scalar, DestTy);
322 }
323
324 return nullptr;
325}
326
327/// Find elements of V demanded by UserInstr. If returns false, we were not able
328/// to determine all elements.
330 APInt &UnionUsedElts) {
331 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
332
333 switch (UserInstr->getOpcode()) {
334 case Instruction::ExtractElement: {
336 assert(EEI->getVectorOperand() == V);
338 if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {
339 UnionUsedElts.setBit(EEIIndexC->getZExtValue());
340 return true;
341 }
342 break;
343 }
344 case Instruction::ShuffleVector: {
345 ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);
346 unsigned MaskNumElts =
347 cast<FixedVectorType>(UserInstr->getType())->getNumElements();
348
349 for (auto I : llvm::seq(MaskNumElts)) {
350 unsigned MaskVal = Shuffle->getMaskValue(I);
351 if (MaskVal == -1u || MaskVal >= 2 * VWidth)
352 continue;
353 if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))
354 UnionUsedElts.setBit(MaskVal);
355 if (Shuffle->getOperand(1) == V &&
356 ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
357 UnionUsedElts.setBit(MaskVal - VWidth);
358 }
359 return true;
360 }
361 default:
362 break;
363 }
364
365 return false;
366}
367
368/// Find union of elements of V demanded by all its users.
369/// If it is known by querying findDemandedEltsBySingleUser that
370/// no user demands an element of V, then the corresponding bit
371/// remains unset in the returned value.
373 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
374
375 APInt UnionUsedElts(VWidth, 0);
376 for (const Use &U : V->uses()) {
377 if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
378 if (!findDemandedEltsBySingleUser(V, I, UnionUsedElts))
379 return APInt::getAllOnes(VWidth);
380 } else {
381 UnionUsedElts = APInt::getAllOnes(VWidth);
382 break;
383 }
384
385 if (UnionUsedElts.isAllOnes())
386 break;
387 }
388
389 return UnionUsedElts;
390}
391
392/// Given a constant index for a extractelement or insertelement instruction,
393/// return it with the canonical type if it isn't already canonical. We
394/// arbitrarily pick 64 bit as our canonical type. The actual bitwidth doesn't
395/// matter, we just want a consistent type to simplify CSE.
397 const unsigned IndexBW = IndexC->getBitWidth();
398 if (IndexBW == 64 || IndexC->getValue().getActiveBits() > 64)
399 return nullptr;
400 return ConstantInt::get(IndexC->getContext(),
401 IndexC->getValue().zextOrTrunc(64));
402}
403
405 Value *SrcVec = EI.getVectorOperand();
406 Value *Index = EI.getIndexOperand();
407 if (Value *V = simplifyExtractElementInst(SrcVec, Index,
408 SQ.getWithInstruction(&EI)))
409 return replaceInstUsesWith(EI, V);
410
411 // extractelt (select %x, %vec1, %vec2), %const ->
412 // select %x, %vec1[%const], %vec2[%const]
413 // TODO: Support constant folding of multiple select operands:
414 // extractelt (select %x, %vec1, %vec2), (select %x, %c1, %c2)
415 // If the extractelement will for instance try to do out of bounds accesses
416 // because of the values of %c1 and/or %c2, the sequence could be optimized
417 // early. This is currently not possible because constant folding will reach
418 // an unreachable assertion if it doesn't find a constant operand.
420 if (SI->getCondition()->getType()->isIntegerTy() &&
422 if (Instruction *R = FoldOpIntoSelect(EI, SI))
423 return R;
424
425 // If extracting a specified index from the vector, see if we can recursively
426 // find a previously computed scalar that was inserted into the vector.
427 auto *IndexC = dyn_cast<ConstantInt>(Index);
428 bool HasKnownValidIndex = false;
429 if (IndexC) {
430 // Canonicalize type of constant indices to i64 to simplify CSE
431 if (auto *NewIdx = getPreferredVectorIndex(IndexC))
432 return replaceOperand(EI, 1, NewIdx);
433
435 unsigned NumElts = EC.getKnownMinValue();
436 HasKnownValidIndex = IndexC->getValue().ult(NumElts);
437
439 Intrinsic::ID IID = II->getIntrinsicID();
440 // Index needs to be lower than the minimum size of the vector, because
441 // for scalable vector, the vector size is known at run time.
442 if (IID == Intrinsic::stepvector && IndexC->getValue().ult(NumElts)) {
443 Type *Ty = EI.getType();
444 unsigned BitWidth = Ty->getIntegerBitWidth();
445 Value *Idx;
446 // Return index when its value does not exceed the allowed limit
447 // for the element type of the vector, otherwise return undefined.
448 if (IndexC->getValue().getActiveBits() <= BitWidth)
449 Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth));
450 else
451 Idx = PoisonValue::get(Ty);
452 return replaceInstUsesWith(EI, Idx);
453 }
454 }
455
456 // InstSimplify should handle cases where the index is invalid.
457 // For fixed-length vector, it's invalid to extract out-of-range element.
458 if (!EC.isScalable() && IndexC->getValue().uge(NumElts))
459 return nullptr;
460
461 if (Instruction *I = foldBitcastExtElt(EI))
462 return I;
463
464 // If there's a vector PHI feeding a scalar use through this extractelement
465 // instruction, try to scalarize the PHI.
466 if (auto *Phi = dyn_cast<PHINode>(SrcVec))
467 if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
468 return ScalarPHI;
469 }
470
471 // If SrcVec is a subvector starting at index 0, extract from the
472 // wider source vector
473 Value *V;
474 if (match(SrcVec,
476 return ExtractElementInst::Create(V, Index);
477
478 // TODO come up with a n-ary matcher that subsumes both unary and
479 // binary matchers.
480 UnaryOperator *UO;
481 if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, Index)) {
482 // extelt (unop X), Index --> unop (extelt X, Index)
483 Value *X = UO->getOperand(0);
484 Value *E = Builder.CreateExtractElement(X, Index);
486 }
487
488 // If the binop is not speculatable, we cannot hoist the extractelement if
489 // it may make the operand poison.
490 BinaryOperator *BO;
491 if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, Index) &&
492 (HasKnownValidIndex ||
494 // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
495 Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
496 Value *E0 = Builder.CreateExtractElement(X, Index);
497 Value *E1 = Builder.CreateExtractElement(Y, Index);
498 return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
499 }
500
501 Value *X, *Y;
502 CmpPredicate Pred;
503 if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
504 cheapToScalarize(SrcVec, Index)) {
505 // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
506 Value *E0 = Builder.CreateExtractElement(X, Index);
507 Value *E1 = Builder.CreateExtractElement(Y, Index);
508 CmpInst *SrcCmpInst = cast<CmpInst>(SrcVec);
509 return CmpInst::CreateWithCopiedFlags(SrcCmpInst->getOpcode(), Pred, E0, E1,
510 SrcCmpInst);
511 }
512
513 if (auto *I = dyn_cast<Instruction>(SrcVec)) {
514 if (auto *IE = dyn_cast<InsertElementInst>(I)) {
515 // instsimplify already handled the case where the indices are constants
516 // and equal by value, if both are constants, they must not be the same
517 // value, extract from the pre-inserted value instead.
518 if (isa<Constant>(IE->getOperand(2)) && IndexC)
519 return replaceOperand(EI, 0, IE->getOperand(0));
520 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
521 auto *VecType = cast<VectorType>(GEP->getType());
522 ElementCount EC = VecType->getElementCount();
523 uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0;
524 if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) {
525 // Find out why we have a vector result - these are a few examples:
526 // 1. We have a scalar pointer and a vector of indices, or
527 // 2. We have a vector of pointers and a scalar index, or
528 // 3. We have a vector of pointers and a vector of indices, etc.
529 // Here we only consider combining when there is exactly one vector
530 // operand, since the optimization is less obviously a win due to
531 // needing more than one extractelements.
532
533 unsigned VectorOps =
534 llvm::count_if(GEP->operands(), [](const Value *V) {
535 return isa<VectorType>(V->getType());
536 });
537 if (VectorOps == 1) {
538 Value *NewPtr = GEP->getPointerOperand();
539 if (isa<VectorType>(NewPtr->getType()))
540 NewPtr = Builder.CreateExtractElement(NewPtr, IndexC);
541
543 for (unsigned I = 1; I != GEP->getNumOperands(); ++I) {
544 Value *Op = GEP->getOperand(I);
545 if (isa<VectorType>(Op->getType()))
546 NewOps.push_back(Builder.CreateExtractElement(Op, IndexC));
547 else
548 NewOps.push_back(Op);
549 }
550
552 GEP->getSourceElementType(), NewPtr, NewOps);
553 NewGEP->setNoWrapFlags(GEP->getNoWrapFlags());
554 return NewGEP;
555 }
556 }
557 } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
558 int SplatIndex = getSplatIndex(SVI->getShuffleMask());
559 // We know the all-0 splat must be reading from the first operand, even
560 // in the case of scalable vectors (vscale is always > 0).
561 if (SplatIndex == 0)
562 return ExtractElementInst::Create(SVI->getOperand(0),
563 Builder.getInt64(0));
564
565 if (isa<FixedVectorType>(SVI->getType())) {
566 std::optional<int> SrcIdx;
567 // getSplatIndex returns -1 to mean not-found.
568 if (SplatIndex != -1)
569 SrcIdx = SplatIndex;
570 else if (ConstantInt *CI = dyn_cast<ConstantInt>(Index))
571 SrcIdx = SVI->getMaskValue(CI->getZExtValue());
572
573 if (SrcIdx) {
574 Value *Src;
575 unsigned LHSWidth =
576 cast<FixedVectorType>(SVI->getOperand(0)->getType())
577 ->getNumElements();
578
579 if (*SrcIdx < 0)
581 if (*SrcIdx < (int)LHSWidth)
582 Src = SVI->getOperand(0);
583 else {
584 *SrcIdx -= LHSWidth;
585 Src = SVI->getOperand(1);
586 }
587 Type *Int64Ty = Type::getInt64Ty(EI.getContext());
589 Src, ConstantInt::get(Int64Ty, *SrcIdx, false));
590 }
591 }
592 } else if (auto *CI = dyn_cast<CastInst>(I)) {
593 // Canonicalize extractelement(cast) -> cast(extractelement).
594 // Bitcasts can change the number of vector elements, and they cost
595 // nothing.
596 if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
597 Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
598 return CastInst::Create(CI->getOpcode(), EE, EI.getType());
599 }
600 }
601 }
602
603 // Run demanded elements after other transforms as this can drop flags on
604 // binops. If there's two paths to the same final result, we prefer the
605 // one which doesn't force us to drop flags.
606 if (IndexC) {
608 unsigned NumElts = EC.getKnownMinValue();
609 // This instruction only demands the single element from the input vector.
610 // Skip for scalable type, the number of elements is unknown at
611 // compile-time.
612 if (!EC.isScalable() && NumElts != 1) {
613 // If the input vector has a single use, simplify it based on this use
614 // property.
615 if (SrcVec->hasOneUse()) {
616 APInt PoisonElts(NumElts, 0);
617 APInt DemandedElts(NumElts, 0);
618 DemandedElts.setBit(IndexC->getZExtValue());
619 if (Value *V =
620 SimplifyDemandedVectorElts(SrcVec, DemandedElts, PoisonElts))
621 return replaceOperand(EI, 0, V);
622 } else {
623 // If the input vector has multiple uses, simplify it based on a union
624 // of all elements used.
625 APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
626 if (!DemandedElts.isAllOnes()) {
627 APInt PoisonElts(NumElts, 0);
629 SrcVec, DemandedElts, PoisonElts, 0 /* Depth */,
630 true /* AllowMultipleUsers */)) {
631 if (V != SrcVec) {
632 Worklist.addValue(SrcVec);
633 SrcVec->replaceAllUsesWith(V);
634 return &EI;
635 }
636 }
637 }
638 }
639 }
640 }
641 return nullptr;
642}
643
644/// If V is a shuffle of values that ONLY returns elements from either LHS or
645/// RHS, return the shuffle mask and true. Otherwise, return false.
647 SmallVectorImpl<int> &Mask) {
648 assert(LHS->getType() == RHS->getType() &&
649 "Invalid CollectSingleShuffleElements");
650 unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
651
652 if (match(V, m_Poison())) {
653 Mask.assign(NumElts, -1);
654 return true;
655 }
656
657 if (V == LHS) {
658 for (unsigned i = 0; i != NumElts; ++i)
659 Mask.push_back(i);
660 return true;
661 }
662
663 if (V == RHS) {
664 for (unsigned i = 0; i != NumElts; ++i)
665 Mask.push_back(i + NumElts);
666 return true;
667 }
668
670 // If this is an insert of an extract from some other vector, include it.
671 Value *VecOp = IEI->getOperand(0);
672 Value *ScalarOp = IEI->getOperand(1);
673 Value *IdxOp = IEI->getOperand(2);
674
675 if (!isa<ConstantInt>(IdxOp))
676 return false;
677 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
678
679 if (isa<PoisonValue>(ScalarOp)) { // inserting poison into vector.
680 // We can handle this if the vector we are inserting into is
681 // transitively ok.
682 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
683 // If so, update the mask to reflect the inserted poison.
684 Mask[InsertedIdx] = -1;
685 return true;
686 }
687 } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
688 if (isa<ConstantInt>(EI->getOperand(1))) {
689 unsigned ExtractedIdx =
690 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
691 unsigned NumLHSElts =
692 cast<FixedVectorType>(LHS->getType())->getNumElements();
693
694 // This must be extracting from either LHS or RHS.
695 if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
696 // We can handle this if the vector we are inserting into is
697 // transitively ok.
698 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
699 // If so, update the mask to reflect the inserted value.
700 if (EI->getOperand(0) == LHS) {
701 Mask[InsertedIdx % NumElts] = ExtractedIdx;
702 } else {
703 assert(EI->getOperand(0) == RHS);
704 Mask[InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;
705 }
706 return true;
707 }
708 }
709 }
710 }
711 }
712
713 return false;
714}
715
716/// If we have insertion into a vector that is wider than the vector that we
717/// are extracting from, try to widen the source vector to allow a single
718/// shufflevector to replace one or more insert/extract pairs.
720 ExtractElementInst *ExtElt,
721 InstCombinerImpl &IC) {
722 auto *InsVecType = cast<FixedVectorType>(InsElt->getType());
723 auto *ExtVecType = cast<FixedVectorType>(ExtElt->getVectorOperandType());
724 unsigned NumInsElts = InsVecType->getNumElements();
725 unsigned NumExtElts = ExtVecType->getNumElements();
726
727 // The inserted-to vector must be wider than the extracted-from vector.
728 if (InsVecType->getElementType() != ExtVecType->getElementType() ||
729 NumExtElts >= NumInsElts)
730 return false;
731
732 Value *ExtVecOp = ExtElt->getVectorOperand();
733 // Bail out on constant vectors.
734 if (isa<ConstantData>(ExtVecOp))
735 return false;
736
737 // Create a shuffle mask to widen the extended-from vector using poison
738 // values. The mask selects all of the values of the original vector followed
739 // by as many poison values as needed to create a vector of the same length
740 // as the inserted-to vector.
741 SmallVector<int, 16> ExtendMask;
742 for (unsigned i = 0; i < NumExtElts; ++i)
743 ExtendMask.push_back(i);
744 for (unsigned i = NumExtElts; i < NumInsElts; ++i)
745 ExtendMask.push_back(-1);
746
747 auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
748 BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
749 ? ExtVecOpInst->getParent()
750 : ExtElt->getParent();
751
752 // TODO: This restriction matches the basic block check below when creating
753 // new extractelement instructions. If that limitation is removed, this one
754 // could also be removed. But for now, we just bail out to ensure that we
755 // will replace the extractelement instruction that is feeding our
756 // insertelement instruction. This allows the insertelement to then be
757 // replaced by a shufflevector. If the insertelement is not replaced, we can
758 // induce infinite looping because there's an optimization for extractelement
759 // that will delete our widening shuffle. This would trigger another attempt
760 // here to create that shuffle, and we spin forever.
761 if (InsertionBlock != InsElt->getParent())
762 return false;
763
764 // TODO: This restriction matches the check in visitInsertElementInst() and
765 // prevents an infinite loop caused by not turning the extract/insert pair
766 // into a shuffle. We really should not need either check, but we're lacking
767 // folds for shufflevectors because we're afraid to generate shuffle masks
768 // that the backend can't handle.
769 if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
770 return false;
771
772 auto *WideVec = new ShuffleVectorInst(ExtVecOp, ExtendMask);
773
774 // Insert the new shuffle after the vector operand of the extract is defined
775 // (as long as it's not a PHI) or at the start of the basic block of the
776 // extract, so any subsequent extracts in the same basic block can use it.
777 // TODO: Insert before the earliest ExtractElementInst that is replaced.
778 if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
779 WideVec->insertAfter(ExtVecOpInst->getIterator());
780 else
781 IC.InsertNewInstWith(WideVec, ExtElt->getParent()->getFirstInsertionPt());
782
783 // Replace extracts from the original narrow vector with extracts from the new
784 // wide vector.
785 for (User *U : ExtVecOp->users()) {
787 if (!OldExt || OldExt->getParent() != WideVec->getParent())
788 continue;
789 auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
790 IC.InsertNewInstWith(NewExt, OldExt->getIterator());
791 IC.replaceInstUsesWith(*OldExt, NewExt);
792 // Add the old extracts to the worklist for DCE. We can't remove the
793 // extracts directly, because they may still be used by the calling code.
794 IC.addToWorklist(OldExt);
795 }
796
797 return true;
798}
799
800/// We are building a shuffle to create V, which is a sequence of insertelement,
801/// extractelement pairs. If PermittedRHS is set, then we must either use it or
802/// not rely on the second vector source. Return a std::pair containing the
803/// left and right vectors of the proposed shuffle (or 0), and set the Mask
804/// parameter as required.
805///
806/// Note: we intentionally don't try to fold earlier shuffles since they have
807/// often been chosen carefully to be efficiently implementable on the target.
808using ShuffleOps = std::pair<Value *, Value *>;
809
811 Value *PermittedRHS,
812 InstCombinerImpl &IC, bool &Rerun) {
813 assert(V->getType()->isVectorTy() && "Invalid shuffle!");
814 unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
815
816 if (match(V, m_Poison())) {
817 Mask.assign(NumElts, -1);
818 return std::make_pair(
819 PermittedRHS ? PoisonValue::get(PermittedRHS->getType()) : V, nullptr);
820 }
821
823 Mask.assign(NumElts, 0);
824 return std::make_pair(V, nullptr);
825 }
826
828 // If this is an insert of an extract from some other vector, include it.
829 Value *VecOp = IEI->getOperand(0);
830 Value *ScalarOp = IEI->getOperand(1);
831 Value *IdxOp = IEI->getOperand(2);
832
834 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
835 unsigned ExtractedIdx =
836 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
837 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
838
839 // Either the extracted from or inserted into vector must be RHSVec,
840 // otherwise we'd end up with a shuffle of three inputs.
841 if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
842 Value *RHS = EI->getOperand(0);
843 ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC, Rerun);
844 assert(LR.second == nullptr || LR.second == RHS);
845
846 if (LR.first->getType() != RHS->getType()) {
847 // Although we are giving up for now, see if we can create extracts
848 // that match the inserts for another round of combining.
849 if (replaceExtractElements(IEI, EI, IC))
850 Rerun = true;
851
852 // We tried our best, but we can't find anything compatible with RHS
853 // further up the chain. Return a trivial shuffle.
854 for (unsigned i = 0; i < NumElts; ++i)
855 Mask[i] = i;
856 return std::make_pair(V, nullptr);
857 }
858
859 unsigned NumLHSElts =
860 cast<FixedVectorType>(RHS->getType())->getNumElements();
861 Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
862 return std::make_pair(LR.first, RHS);
863 }
864
865 if (VecOp == PermittedRHS) {
866 // We've gone as far as we can: anything on the other side of the
867 // extractelement will already have been converted into a shuffle.
868 unsigned NumLHSElts =
870 ->getNumElements();
871 for (unsigned i = 0; i != NumElts; ++i)
872 Mask.push_back(i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
873 return std::make_pair(EI->getOperand(0), PermittedRHS);
874 }
875
876 // If this insertelement is a chain that comes from exactly these two
877 // vectors, return the vector and the effective shuffle.
878 if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
879 collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
880 Mask))
881 return std::make_pair(EI->getOperand(0), PermittedRHS);
882 }
883 }
884 }
885
886 // Otherwise, we can't do anything fancy. Return an identity vector.
887 for (unsigned i = 0; i != NumElts; ++i)
888 Mask.push_back(i);
889 return std::make_pair(V, nullptr);
890}
891
892/// Look for chain of insertvalue's that fully define an aggregate, and trace
893/// back the values inserted, see if they are all were extractvalue'd from
894/// the same source aggregate from the exact same element indexes.
895/// If they were, just reuse the source aggregate.
896/// This potentially deals with PHI indirections.
898 InsertValueInst &OrigIVI) {
899 Type *AggTy = OrigIVI.getType();
900 unsigned NumAggElts;
901 switch (AggTy->getTypeID()) {
902 case Type::StructTyID:
903 NumAggElts = AggTy->getStructNumElements();
904 break;
905 case Type::ArrayTyID:
906 NumAggElts = AggTy->getArrayNumElements();
907 break;
908 default:
909 llvm_unreachable("Unhandled aggregate type?");
910 }
911
912 // Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able
913 // to handle clang C++ exception struct (which is hardcoded as {i8*, i32}),
914 // FIXME: any interesting patterns to be caught with larger limit?
915 assert(NumAggElts > 0 && "Aggregate should have elements.");
916 if (NumAggElts > 2)
917 return nullptr;
918
919 static constexpr auto NotFound = std::nullopt;
920 static constexpr auto FoundMismatch = nullptr;
921
922 // Try to find a value of each element of an aggregate.
923 // FIXME: deal with more complex, not one-dimensional, aggregate types
924 SmallVector<std::optional<Instruction *>, 2> AggElts(NumAggElts, NotFound);
925
926 // Do we know values for each element of the aggregate?
927 auto KnowAllElts = [&AggElts]() {
928 return !llvm::is_contained(AggElts, NotFound);
929 };
930
931 int Depth = 0;
932
933 // Arbitrary `insertvalue` visitation depth limit. Let's be okay with
934 // every element being overwritten twice, which should never happen.
935 static const int DepthLimit = 2 * NumAggElts;
936
937 // Recurse up the chain of `insertvalue` aggregate operands until either we've
938 // reconstructed full initializer or can't visit any more `insertvalue`'s.
939 for (InsertValueInst *CurrIVI = &OrigIVI;
940 Depth < DepthLimit && CurrIVI && !KnowAllElts();
941 CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()),
942 ++Depth) {
943 auto *InsertedValue =
944 dyn_cast<Instruction>(CurrIVI->getInsertedValueOperand());
945 if (!InsertedValue)
946 return nullptr; // Inserted value must be produced by an instruction.
947
948 ArrayRef<unsigned int> Indices = CurrIVI->getIndices();
949
950 // Don't bother with more than single-level aggregates.
951 if (Indices.size() != 1)
952 return nullptr; // FIXME: deal with more complex aggregates?
953
954 // Now, we may have already previously recorded the value for this element
955 // of an aggregate. If we did, that means the CurrIVI will later be
956 // overwritten with the already-recorded value. But if not, let's record it!
957 std::optional<Instruction *> &Elt = AggElts[Indices.front()];
958 Elt = Elt.value_or(InsertedValue);
959
960 // FIXME: should we handle chain-terminating undef base operand?
961 }
962
963 // Was that sufficient to deduce the full initializer for the aggregate?
964 if (!KnowAllElts())
965 return nullptr; // Give up then.
966
967 // We now want to find the source[s] of the aggregate elements we've found.
968 // And with "source" we mean the original aggregate[s] from which
969 // the inserted elements were extracted. This may require PHI translation.
970
971 enum class AggregateDescription {
972 /// When analyzing the value that was inserted into an aggregate, we did
973 /// not manage to find defining `extractvalue` instruction to analyze.
974 NotFound,
975 /// When analyzing the value that was inserted into an aggregate, we did
976 /// manage to find defining `extractvalue` instruction[s], and everything
977 /// matched perfectly - aggregate type, element insertion/extraction index.
978 Found,
979 /// When analyzing the value that was inserted into an aggregate, we did
980 /// manage to find defining `extractvalue` instruction, but there was
981 /// a mismatch: either the source type from which the extraction was didn't
982 /// match the aggregate type into which the insertion was,
983 /// or the extraction/insertion channels mismatched,
984 /// or different elements had different source aggregates.
985 FoundMismatch
986 };
987 auto Describe = [](std::optional<Value *> SourceAggregate) {
988 if (SourceAggregate == NotFound)
989 return AggregateDescription::NotFound;
990 if (*SourceAggregate == FoundMismatch)
991 return AggregateDescription::FoundMismatch;
992 return AggregateDescription::Found;
993 };
994
995 // If an aggregate element is defined in UseBB, we can't use it in PredBB.
996 bool EltDefinedInUseBB = false;
997
998 // Given the value \p Elt that was being inserted into element \p EltIdx of an
999 // aggregate AggTy, see if \p Elt was originally defined by an
1000 // appropriate extractvalue (same element index, same aggregate type).
1001 // If found, return the source aggregate from which the extraction was.
1002 // If \p PredBB is provided, does PHI translation of an \p Elt first.
1003 auto FindSourceAggregate =
1004 [&](Instruction *Elt, unsigned EltIdx, std::optional<BasicBlock *> UseBB,
1005 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
1006 // For now(?), only deal with, at most, a single level of PHI indirection.
1007 if (UseBB && PredBB) {
1008 Elt = dyn_cast<Instruction>(Elt->DoPHITranslation(*UseBB, *PredBB));
1009 if (Elt && Elt->getParent() == *UseBB)
1010 EltDefinedInUseBB = true;
1011 }
1012 // FIXME: deal with multiple levels of PHI indirection?
1013
1014 // Did we find an extraction?
1015 auto *EVI = dyn_cast_or_null<ExtractValueInst>(Elt);
1016 if (!EVI)
1017 return NotFound;
1018
1019 Value *SourceAggregate = EVI->getAggregateOperand();
1020
1021 // Is the extraction from the same type into which the insertion was?
1022 if (SourceAggregate->getType() != AggTy)
1023 return FoundMismatch;
1024 // And the element index doesn't change between extraction and insertion?
1025 if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front())
1026 return FoundMismatch;
1027
1028 return SourceAggregate; // AggregateDescription::Found
1029 };
1030
1031 // Given elements AggElts that were constructing an aggregate OrigIVI,
1032 // see if we can find appropriate source aggregate for each of the elements,
1033 // and see it's the same aggregate for each element. If so, return it.
1034 auto FindCommonSourceAggregate =
1035 [&](std::optional<BasicBlock *> UseBB,
1036 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
1037 std::optional<Value *> SourceAggregate;
1038
1039 for (auto I : enumerate(AggElts)) {
1040 assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&
1041 "We don't store nullptr in SourceAggregate!");
1042 assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
1043 (I.index() != 0) &&
1044 "SourceAggregate should be valid after the first element,");
1045
1046 // For this element, is there a plausible source aggregate?
1047 // FIXME: we could special-case undef element, IFF we know that in the
1048 // source aggregate said element isn't poison.
1049 std::optional<Value *> SourceAggregateForElement =
1050 FindSourceAggregate(*I.value(), I.index(), UseBB, PredBB);
1051
1052 // Okay, what have we found? Does that correlate with previous findings?
1053
1054 // Regardless of whether or not we have previously found source
1055 // aggregate for previous elements (if any), if we didn't find one for
1056 // this element, passthrough whatever we have just found.
1057 if (Describe(SourceAggregateForElement) != AggregateDescription::Found)
1058 return SourceAggregateForElement;
1059
1060 // Okay, we have found source aggregate for this element.
1061 // Let's see what we already know from previous elements, if any.
1062 switch (Describe(SourceAggregate)) {
1063 case AggregateDescription::NotFound:
1064 // This is apparently the first element that we have examined.
1065 SourceAggregate = SourceAggregateForElement; // Record the aggregate!
1066 continue; // Great, now look at next element.
1067 case AggregateDescription::Found:
1068 // We have previously already successfully examined other elements.
1069 // Is this the same source aggregate we've found for other elements?
1070 if (*SourceAggregateForElement != *SourceAggregate)
1071 return FoundMismatch;
1072 continue; // Still the same aggregate, look at next element.
1073 case AggregateDescription::FoundMismatch:
1074 llvm_unreachable("Can't happen. We would have early-exited then.");
1075 };
1076 }
1077
1078 assert(Describe(SourceAggregate) == AggregateDescription::Found &&
1079 "Must be a valid Value");
1080 return *SourceAggregate;
1081 };
1082
1083 std::optional<Value *> SourceAggregate;
1084
1085 // Can we find the source aggregate without looking at predecessors?
1086 SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/std::nullopt,
1087 /*PredBB=*/std::nullopt);
1088 if (Describe(SourceAggregate) != AggregateDescription::NotFound) {
1089 if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch)
1090 return nullptr; // Conflicting source aggregates!
1091 ++NumAggregateReconstructionsSimplified;
1092 return replaceInstUsesWith(OrigIVI, *SourceAggregate);
1093 }
1094
1095 // Okay, apparently we need to look at predecessors.
1096
1097 // We should be smart about picking the "use" basic block, which will be the
1098 // merge point for aggregate, where we'll insert the final PHI that will be
1099 // used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice.
1100 // We should look in which blocks each of the AggElts is being defined,
1101 // they all should be defined in the same basic block.
1102 BasicBlock *UseBB = nullptr;
1103
1104 for (const std::optional<Instruction *> &I : AggElts) {
1105 BasicBlock *BB = (*I)->getParent();
1106 // If it's the first instruction we've encountered, record the basic block.
1107 if (!UseBB) {
1108 UseBB = BB;
1109 continue;
1110 }
1111 // Otherwise, this must be the same basic block we've seen previously.
1112 if (UseBB != BB)
1113 return nullptr;
1114 }
1115
1116 // If *all* of the elements are basic-block-independent, meaning they are
1117 // either function arguments, or constant expressions, then if we didn't
1118 // handle them without predecessor-aware handling, we won't handle them now.
1119 if (!UseBB)
1120 return nullptr;
1121
1122 // If we didn't manage to find source aggregate without looking at
1123 // predecessors, and there are no predecessors to look at, then we're done.
1124 if (pred_empty(UseBB))
1125 return nullptr;
1126
1127 // Arbitrary predecessor count limit.
1128 static const int PredCountLimit = 64;
1129
1130 // Cache the (non-uniqified!) list of predecessors in a vector,
1131 // checking the limit at the same time for efficiency.
1132 SmallVector<BasicBlock *, 4> Preds; // May have duplicates!
1133 for (BasicBlock *Pred : predecessors(UseBB)) {
1134 // Don't bother if there are too many predecessors.
1135 if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once?
1136 return nullptr;
1137 Preds.emplace_back(Pred);
1138 }
1139
1140 // For each predecessor, what is the source aggregate,
1141 // from which all the elements were originally extracted from?
1142 // Note that we want for the map to have stable iteration order!
1144 bool FoundSrcAgg = false;
1145 for (BasicBlock *Pred : Preds) {
1146 std::pair<decltype(SourceAggregates)::iterator, bool> IV =
1147 SourceAggregates.try_emplace(Pred);
1148 // Did we already evaluate this predecessor?
1149 if (!IV.second)
1150 continue;
1151
1152 // Let's hope that when coming from predecessor Pred, all elements of the
1153 // aggregate produced by OrigIVI must have been originally extracted from
1154 // the same aggregate. Is that so? Can we find said original aggregate?
1155 SourceAggregate = FindCommonSourceAggregate(UseBB, Pred);
1156 if (Describe(SourceAggregate) == AggregateDescription::Found) {
1157 FoundSrcAgg = true;
1158 IV.first->second = *SourceAggregate;
1159 } else {
1160 // If UseBB is the single successor of Pred, we can add InsertValue to
1161 // Pred.
1162 auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
1163 if (!BI || !BI->isUnconditional())
1164 return nullptr;
1165 }
1166 }
1167
1168 if (!FoundSrcAgg)
1169 return nullptr;
1170
1171 // Do some sanity check if we need to add insertvalue into predecessors.
1172 auto OrigBB = OrigIVI.getParent();
1173 for (auto &It : SourceAggregates) {
1174 if (Describe(It.second) == AggregateDescription::Found)
1175 continue;
1176
1177 // Element is defined in UseBB, so it can't be used in predecessors.
1178 if (EltDefinedInUseBB)
1179 return nullptr;
1180
1181 // Do this transformation cross loop boundary may cause dead loop. So we
1182 // should avoid this situation. But LoopInfo is not generally available, we
1183 // must be conservative here.
1184 // If OrigIVI is in UseBB and it's the only successor of PredBB, PredBB
1185 // can't be in inner loop.
1186 if (UseBB != OrigBB)
1187 return nullptr;
1188
1189 // Avoid constructing constant aggregate because constant value may expose
1190 // more optimizations.
1191 bool ConstAgg = true;
1192 for (auto Val : AggElts) {
1193 Value *Elt = (*Val)->DoPHITranslation(UseBB, It.first);
1194 if (!isa<Constant>(Elt)) {
1195 ConstAgg = false;
1196 break;
1197 }
1198 }
1199 if (ConstAgg)
1200 return nullptr;
1201 }
1202
1203 // For predecessors without appropriate source aggregate, create one in the
1204 // predecessor.
1205 for (auto &It : SourceAggregates) {
1206 if (Describe(It.second) == AggregateDescription::Found)
1207 continue;
1208
1209 BasicBlock *Pred = It.first;
1210 Builder.SetInsertPoint(Pred->getTerminator());
1211 Value *V = PoisonValue::get(AggTy);
1212 for (auto [Idx, Val] : enumerate(AggElts)) {
1213 Value *Elt = (*Val)->DoPHITranslation(UseBB, Pred);
1214 V = Builder.CreateInsertValue(V, Elt, Idx);
1215 }
1216
1217 It.second = V;
1218 }
1219
1220 // All good! Now we just need to thread the source aggregates here.
1221 // Note that we have to insert the new PHI here, ourselves, because we can't
1222 // rely on InstCombinerImpl::run() inserting it into the right basic block.
1223 // Note that the same block can be a predecessor more than once,
1224 // and we need to preserve that invariant for the PHI node.
1226 Builder.SetInsertPoint(UseBB, UseBB->getFirstNonPHIIt());
1227 auto *PHI =
1228 Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() + ".merged");
1229 for (BasicBlock *Pred : Preds)
1230 PHI->addIncoming(SourceAggregates[Pred], Pred);
1231
1232 ++NumAggregateReconstructionsSimplified;
1233 return replaceInstUsesWith(OrigIVI, PHI);
1234}
1235
1236/// Try to find redundant insertvalue instructions, like the following ones:
1237/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
1238/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
1239/// Here the second instruction inserts values at the same indices, as the
1240/// first one, making the first one redundant.
1241/// It should be transformed to:
1242/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
1245 I.getAggregateOperand(), I.getInsertedValueOperand(), I.getIndices(),
1246 SQ.getWithInstruction(&I)))
1247 return replaceInstUsesWith(I, V);
1248
1249 bool IsRedundant = false;
1250 ArrayRef<unsigned int> FirstIndices = I.getIndices();
1251
1252 // If there is a chain of insertvalue instructions (each of them except the
1253 // last one has only one use and it's another insertvalue insn from this
1254 // chain), check if any of the 'children' uses the same indices as the first
1255 // instruction. In this case, the first one is redundant.
1256 Value *V = &I;
1257 unsigned Depth = 0;
1258 while (V->hasOneUse() && Depth < 10) {
1259 User *U = V->user_back();
1260 auto UserInsInst = dyn_cast<InsertValueInst>(U);
1261 if (!UserInsInst || U->getOperand(0) != V)
1262 break;
1263 if (UserInsInst->getIndices() == FirstIndices) {
1264 IsRedundant = true;
1265 break;
1266 }
1267 V = UserInsInst;
1268 Depth++;
1269 }
1270
1271 if (IsRedundant)
1272 return replaceInstUsesWith(I, I.getOperand(0));
1273
1275 return NewI;
1276
1277 return nullptr;
1278}
1279
1281 // Can not analyze scalable type, the number of elements is not a compile-time
1282 // constant.
1284 return false;
1285
1286 int MaskSize = Shuf.getShuffleMask().size();
1287 int VecSize =
1288 cast<FixedVectorType>(Shuf.getOperand(0)->getType())->getNumElements();
1289
1290 // A vector select does not change the size of the operands.
1291 if (MaskSize != VecSize)
1292 return false;
1293
1294 // Each mask element must be undefined or choose a vector element from one of
1295 // the source operands without crossing vector lanes.
1296 for (int i = 0; i != MaskSize; ++i) {
1297 int Elt = Shuf.getMaskValue(i);
1298 if (Elt != -1 && Elt != i && Elt != i + VecSize)
1299 return false;
1300 }
1301
1302 return true;
1303}
1304
1305/// Turn a chain of inserts that splats a value into an insert + shuffle:
1306/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
1307/// shufflevector(insertelt(X, %k, 0), poison, zero)
1309 // We are interested in the last insert in a chain. So if this insert has a
1310 // single user and that user is an insert, bail.
1311 if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
1312 return nullptr;
1313
1314 VectorType *VecTy = InsElt.getType();
1315 // Can not handle scalable type, the number of elements is not a compile-time
1316 // constant.
1317 if (isa<ScalableVectorType>(VecTy))
1318 return nullptr;
1319 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
1320
1321 // Do not try to do this for a one-element vector, since that's a nop,
1322 // and will cause an inf-loop.
1323 if (NumElements == 1)
1324 return nullptr;
1325
1326 Value *SplatVal = InsElt.getOperand(1);
1327 InsertElementInst *CurrIE = &InsElt;
1328 SmallBitVector ElementPresent(NumElements, false);
1329 InsertElementInst *FirstIE = nullptr;
1330
1331 // Walk the chain backwards, keeping track of which indices we inserted into,
1332 // until we hit something that isn't an insert of the splatted value.
1333 while (CurrIE) {
1334 auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
1335 if (!Idx || CurrIE->getOperand(1) != SplatVal)
1336 return nullptr;
1337
1338 auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
1339 // Check none of the intermediate steps have any additional uses, except
1340 // for the root insertelement instruction, which can be re-used, if it
1341 // inserts at position 0.
1342 if (CurrIE != &InsElt &&
1343 (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
1344 return nullptr;
1345
1346 ElementPresent[Idx->getZExtValue()] = true;
1347 FirstIE = CurrIE;
1348 CurrIE = NextIE;
1349 }
1350
1351 // If this is just a single insertelement (not a sequence), we are done.
1352 if (FirstIE == &InsElt)
1353 return nullptr;
1354
1355 // If we are not inserting into a poison vector, make sure we've seen an
1356 // insert into every element.
1357 // TODO: If the base vector is not undef, it might be better to create a splat
1358 // and then a select-shuffle (blend) with the base vector.
1359 if (!match(FirstIE->getOperand(0), m_Poison()))
1360 if (!ElementPresent.all())
1361 return nullptr;
1362
1363 // Create the insert + shuffle.
1364 Type *Int64Ty = Type::getInt64Ty(InsElt.getContext());
1365 PoisonValue *PoisonVec = PoisonValue::get(VecTy);
1366 Constant *Zero = ConstantInt::get(Int64Ty, 0);
1367 if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
1368 FirstIE = InsertElementInst::Create(PoisonVec, SplatVal, Zero, "",
1369 InsElt.getIterator());
1370
1371 // Splat from element 0, but replace absent elements with poison in the mask.
1372 SmallVector<int, 16> Mask(NumElements, 0);
1373 for (unsigned i = 0; i != NumElements; ++i)
1374 if (!ElementPresent[i])
1375 Mask[i] = -1;
1376
1377 return new ShuffleVectorInst(FirstIE, Mask);
1378}
1379
1380/// Try to fold an insert element into an existing splat shuffle by changing
1381/// the shuffle's mask to include the index of this insert element.
1383 // Check if the vector operand of this insert is a canonical splat shuffle.
1384 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1385 if (!Shuf || !Shuf->isZeroEltSplat())
1386 return nullptr;
1387
1388 // Bail out early if shuffle is scalable type. The number of elements in
1389 // shuffle mask is unknown at compile-time.
1390 if (isa<ScalableVectorType>(Shuf->getType()))
1391 return nullptr;
1392
1393 // Check for a constant insertion index.
1394 uint64_t IdxC;
1395 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1396 return nullptr;
1397
1398 // Check if the splat shuffle's input is the same as this insert's scalar op.
1399 Value *X = InsElt.getOperand(1);
1400 Value *Op0 = Shuf->getOperand(0);
1401 if (!match(Op0, m_InsertElt(m_Undef(), m_Specific(X), m_ZeroInt())))
1402 return nullptr;
1403
1404 // Replace the shuffle mask element at the index of this insert with a zero.
1405 // For example:
1406 // inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1
1407 // --> shuf (inselt undef, X, 0), poison, <0,0,0,undef>
1408 unsigned NumMaskElts =
1409 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1410 SmallVector<int, 16> NewMask(NumMaskElts);
1411 for (unsigned i = 0; i != NumMaskElts; ++i)
1412 NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);
1413
1414 return new ShuffleVectorInst(Op0, NewMask);
1415}
1416
1417/// Try to fold an extract+insert element into an existing identity shuffle by
1418/// changing the shuffle's mask to include the index of this insert element.
1420 // Check if the vector operand of this insert is an identity shuffle.
1421 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1422 if (!Shuf || !match(Shuf->getOperand(1), m_Poison()) ||
1423 !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
1424 return nullptr;
1425
1426 // Bail out early if shuffle is scalable type. The number of elements in
1427 // shuffle mask is unknown at compile-time.
1428 if (isa<ScalableVectorType>(Shuf->getType()))
1429 return nullptr;
1430
1431 // Check for a constant insertion index.
1432 uint64_t IdxC;
1433 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1434 return nullptr;
1435
1436 // Check if this insert's scalar op is extracted from the identity shuffle's
1437 // input vector.
1438 Value *Scalar = InsElt.getOperand(1);
1439 Value *X = Shuf->getOperand(0);
1440 if (!match(Scalar, m_ExtractElt(m_Specific(X), m_SpecificInt(IdxC))))
1441 return nullptr;
1442
1443 // Replace the shuffle mask element at the index of this extract+insert with
1444 // that same index value.
1445 // For example:
1446 // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
1447 unsigned NumMaskElts =
1448 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1449 SmallVector<int, 16> NewMask(NumMaskElts);
1450 ArrayRef<int> OldMask = Shuf->getShuffleMask();
1451 for (unsigned i = 0; i != NumMaskElts; ++i) {
1452 if (i != IdxC) {
1453 // All mask elements besides the inserted element remain the same.
1454 NewMask[i] = OldMask[i];
1455 } else if (OldMask[i] == (int)IdxC) {
1456 // If the mask element was already set, there's nothing to do
1457 // (demanded elements analysis may unset it later).
1458 return nullptr;
1459 } else {
1460 assert(OldMask[i] == PoisonMaskElem &&
1461 "Unexpected shuffle mask element for identity shuffle");
1462 NewMask[i] = IdxC;
1463 }
1464 }
1465
1466 return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask);
1467}
1468
1469/// If we have an insertelement instruction feeding into another insertelement
1470/// and the 2nd is inserting a constant into the vector, canonicalize that
1471/// constant insertion before the insertion of a variable:
1472///
1473/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
1474/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
1475///
1476/// This has the potential of eliminating the 2nd insertelement instruction
1477/// via constant folding of the scalar constant into a vector constant.
1479 InstCombiner::BuilderTy &Builder) {
1480 auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
1481 if (!InsElt1 || !InsElt1->hasOneUse())
1482 return nullptr;
1483
1484 Value *X, *Y;
1485 Constant *ScalarC;
1486 ConstantInt *IdxC1, *IdxC2;
1487 if (match(InsElt1->getOperand(0), m_Value(X)) &&
1488 match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
1489 match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
1490 match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
1491 match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
1492 Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
1493 return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
1494 }
1495
1496 return nullptr;
1497}
1498
1499/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
1500/// --> shufflevector X, CVec', Mask'
1502 auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
1503 // Bail out if the parent has more than one use. In that case, we'd be
1504 // replacing the insertelt with a shuffle, and that's not a clear win.
1505 if (!Inst || !Inst->hasOneUse())
1506 return nullptr;
1507 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
1508 // The shuffle must have a constant vector operand. The insertelt must have
1509 // a constant scalar being inserted at a constant position in the vector.
1510 Constant *ShufConstVec, *InsEltScalar;
1511 uint64_t InsEltIndex;
1512 if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
1513 !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
1514 !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
1515 return nullptr;
1516
1517 // Adding an element to an arbitrary shuffle could be expensive, but a
1518 // shuffle that selects elements from vectors without crossing lanes is
1519 // assumed cheap.
1520 // If we're just adding a constant into that shuffle, it will still be
1521 // cheap.
1522 if (!isShuffleEquivalentToSelect(*Shuf))
1523 return nullptr;
1524
1525 // From the above 'select' check, we know that the mask has the same number
1526 // of elements as the vector input operands. We also know that each constant
1527 // input element is used in its lane and can not be used more than once by
1528 // the shuffle. Therefore, replace the constant in the shuffle's constant
1529 // vector with the insertelt constant. Replace the constant in the shuffle's
1530 // mask vector with the insertelt index plus the length of the vector
1531 // (because the constant vector operand of a shuffle is always the 2nd
1532 // operand).
1533 ArrayRef<int> Mask = Shuf->getShuffleMask();
1534 unsigned NumElts = Mask.size();
1535 SmallVector<Constant *, 16> NewShufElts(NumElts);
1536 SmallVector<int, 16> NewMaskElts(NumElts);
1537 for (unsigned I = 0; I != NumElts; ++I) {
1538 if (I == InsEltIndex) {
1539 NewShufElts[I] = InsEltScalar;
1540 NewMaskElts[I] = InsEltIndex + NumElts;
1541 } else {
1542 // Copy over the existing values.
1543 NewShufElts[I] = ShufConstVec->getAggregateElement(I);
1544 NewMaskElts[I] = Mask[I];
1545 }
1546
1547 // Bail if we failed to find an element.
1548 if (!NewShufElts[I])
1549 return nullptr;
1550 }
1551
1552 // Create new operands for a shuffle that includes the constant of the
1553 // original insertelt. The old shuffle will be dead now.
1554 return new ShuffleVectorInst(Shuf->getOperand(0),
1555 ConstantVector::get(NewShufElts), NewMaskElts);
1556 } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
1557 // Transform sequences of insertelements ops with constant data/indexes into
1558 // a single shuffle op.
1559 // Can not handle scalable type, the number of elements needed to create
1560 // shuffle mask is not a compile-time constant.
1561 if (isa<ScalableVectorType>(InsElt.getType()))
1562 return nullptr;
1563 unsigned NumElts =
1564 cast<FixedVectorType>(InsElt.getType())->getNumElements();
1565
1566 uint64_t InsertIdx[2];
1567 Constant *Val[2];
1568 if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
1569 !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
1570 !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
1571 !match(IEI->getOperand(1), m_Constant(Val[1])))
1572 return nullptr;
1573 SmallVector<Constant *, 16> Values(NumElts);
1574 SmallVector<int, 16> Mask(NumElts);
1575 auto ValI = std::begin(Val);
1576 // Generate new constant vector and mask.
1577 // We have 2 values/masks from the insertelements instructions. Insert them
1578 // into new value/mask vectors.
1579 for (uint64_t I : InsertIdx) {
1580 if (!Values[I]) {
1581 Values[I] = *ValI;
1582 Mask[I] = NumElts + I;
1583 }
1584 ++ValI;
1585 }
1586 // Remaining values are filled with 'poison' values.
1587 for (unsigned I = 0; I < NumElts; ++I) {
1588 if (!Values[I]) {
1589 Values[I] = PoisonValue::get(InsElt.getType()->getElementType());
1590 Mask[I] = I;
1591 }
1592 }
1593 // Create new operands for a shuffle that includes the constant of the
1594 // original insertelt.
1595 return new ShuffleVectorInst(IEI->getOperand(0),
1596 ConstantVector::get(Values), Mask);
1597 }
1598 return nullptr;
1599}
1600
1601/// If both the base vector and the inserted element are extended from the same
1602/// type, do the insert element in the narrow source type followed by extend.
1603/// TODO: This can be extended to include other cast opcodes, but particularly
1604/// if we create a wider insertelement, make sure codegen is not harmed.
1606 InstCombiner::BuilderTy &Builder) {
1607 // We are creating a vector extend. If the original vector extend has another
1608 // use, that would mean we end up with 2 vector extends, so avoid that.
1609 // TODO: We could ease the use-clause to "if at least one op has one use"
1610 // (assuming that the source types match - see next TODO comment).
1611 Value *Vec = InsElt.getOperand(0);
1612 if (!Vec->hasOneUse())
1613 return nullptr;
1614
1615 Value *Scalar = InsElt.getOperand(1);
1616 Value *X, *Y;
1617 CastInst::CastOps CastOpcode;
1618 if (match(Vec, m_FPExt(m_Value(X))) && match(Scalar, m_FPExt(m_Value(Y))))
1619 CastOpcode = Instruction::FPExt;
1620 else if (match(Vec, m_SExt(m_Value(X))) && match(Scalar, m_SExt(m_Value(Y))))
1621 CastOpcode = Instruction::SExt;
1622 else if (match(Vec, m_ZExt(m_Value(X))) && match(Scalar, m_ZExt(m_Value(Y))))
1623 CastOpcode = Instruction::ZExt;
1624 else
1625 return nullptr;
1626
1627 // TODO: We can allow mismatched types by creating an intermediate cast.
1628 if (X->getType()->getScalarType() != Y->getType())
1629 return nullptr;
1630
1631 // inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index)
1632 Value *NewInsElt = Builder.CreateInsertElement(X, Y, InsElt.getOperand(2));
1633 return CastInst::Create(CastOpcode, NewInsElt, InsElt.getType());
1634}
1635
1636/// If we are inserting 2 halves of a value into adjacent elements of a vector,
1637/// try to convert to a single insert with appropriate bitcasts.
1639 bool IsBigEndian,
1640 InstCombiner::BuilderTy &Builder) {
1641 Value *VecOp = InsElt.getOperand(0);
1642 Value *ScalarOp = InsElt.getOperand(1);
1643 Value *IndexOp = InsElt.getOperand(2);
1644
1645 // Pattern depends on endian because we expect lower index is inserted first.
1646 // Big endian:
1647 // inselt (inselt BaseVec, (trunc (lshr X, BW/2), Index0), (trunc X), Index1
1648 // Little endian:
1649 // inselt (inselt BaseVec, (trunc X), Index0), (trunc (lshr X, BW/2)), Index1
1650 // Note: It is not safe to do this transform with an arbitrary base vector
1651 // because the bitcast of that vector to fewer/larger elements could
1652 // allow poison to spill into an element that was not poison before.
1653 // TODO: Detect smaller fractions of the scalar.
1654 // TODO: One-use checks are conservative.
1655 auto *VTy = dyn_cast<FixedVectorType>(InsElt.getType());
1656 Value *Scalar0, *BaseVec;
1657 uint64_t Index0, Index1;
1658 if (!VTy || (VTy->getNumElements() & 1) ||
1659 !match(IndexOp, m_ConstantInt(Index1)) ||
1660 !match(VecOp, m_InsertElt(m_Value(BaseVec), m_Value(Scalar0),
1661 m_ConstantInt(Index0))) ||
1662 !match(BaseVec, m_Undef()))
1663 return nullptr;
1664
1665 // The first insert must be to the index one less than this one, and
1666 // the first insert must be to an even index.
1667 if (Index0 + 1 != Index1 || Index0 & 1)
1668 return nullptr;
1669
1670 // For big endian, the high half of the value should be inserted first.
1671 // For little endian, the low half of the value should be inserted first.
1672 Value *X;
1673 uint64_t ShAmt;
1674 if (IsBigEndian) {
1675 if (!match(ScalarOp, m_Trunc(m_Value(X))) ||
1676 !match(Scalar0, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt)))))
1677 return nullptr;
1678 } else {
1679 if (!match(Scalar0, m_Trunc(m_Value(X))) ||
1680 !match(ScalarOp, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt)))))
1681 return nullptr;
1682 }
1683
1684 Type *SrcTy = X->getType();
1685 unsigned ScalarWidth = SrcTy->getScalarSizeInBits();
1686 unsigned VecEltWidth = VTy->getScalarSizeInBits();
1687 if (ScalarWidth != VecEltWidth * 2 || ShAmt != VecEltWidth)
1688 return nullptr;
1689
1690 // Bitcast the base vector to a vector type with the source element type.
1691 Type *CastTy = FixedVectorType::get(SrcTy, VTy->getNumElements() / 2);
1692 Value *CastBaseVec = Builder.CreateBitCast(BaseVec, CastTy);
1693
1694 // Scale the insert index for a vector with half as many elements.
1695 // bitcast (inselt (bitcast BaseVec), X, NewIndex)
1696 uint64_t NewIndex = IsBigEndian ? Index1 / 2 : Index0 / 2;
1697 Value *NewInsert = Builder.CreateInsertElement(CastBaseVec, X, NewIndex);
1698 return new BitCastInst(NewInsert, VTy);
1699}
1700
1702 Value *VecOp = IE.getOperand(0);
1703 Value *ScalarOp = IE.getOperand(1);
1704 Value *IdxOp = IE.getOperand(2);
1705
1706 if (auto *V = simplifyInsertElementInst(
1707 VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
1708 return replaceInstUsesWith(IE, V);
1709
1710 // Canonicalize type of constant indices to i64 to simplify CSE
1711 if (auto *IndexC = dyn_cast<ConstantInt>(IdxOp)) {
1712 if (auto *NewIdx = getPreferredVectorIndex(IndexC))
1713 return replaceOperand(IE, 2, NewIdx);
1714
1715 Value *BaseVec, *OtherScalar;
1716 uint64_t OtherIndexVal;
1717 if (match(VecOp, m_OneUse(m_InsertElt(m_Value(BaseVec),
1718 m_Value(OtherScalar),
1719 m_ConstantInt(OtherIndexVal)))) &&
1720 !isa<Constant>(OtherScalar) && OtherIndexVal > IndexC->getZExtValue()) {
1721 Value *NewIns = Builder.CreateInsertElement(BaseVec, ScalarOp, IdxOp);
1722 return InsertElementInst::Create(NewIns, OtherScalar,
1723 Builder.getInt64(OtherIndexVal));
1724 }
1725 }
1726
1727 // If the scalar is bitcast and inserted into undef, do the insert in the
1728 // source type followed by bitcast.
1729 // TODO: Generalize for insert into any constant, not just undef?
1730 Value *ScalarSrc;
1731 if (match(VecOp, m_Undef()) &&
1732 match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) &&
1733 (ScalarSrc->getType()->isIntegerTy() ||
1734 ScalarSrc->getType()->isFloatingPointTy())) {
1735 // inselt undef, (bitcast ScalarSrc), IdxOp -->
1736 // bitcast (inselt undef, ScalarSrc, IdxOp)
1737 Type *ScalarTy = ScalarSrc->getType();
1738 Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount());
1739 Constant *NewUndef = isa<PoisonValue>(VecOp) ? PoisonValue::get(VecTy)
1740 : UndefValue::get(VecTy);
1741 Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp);
1742 return new BitCastInst(NewInsElt, IE.getType());
1743 }
1744
1745 // If the vector and scalar are both bitcast from the same element type, do
1746 // the insert in that source type followed by bitcast.
1747 Value *VecSrc;
1748 if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
1749 match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
1750 (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
1751 VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
1752 cast<VectorType>(VecSrc->getType())->getElementType() ==
1753 ScalarSrc->getType()) {
1754 // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
1755 // bitcast (inselt VecSrc, ScalarSrc, IdxOp)
1756 Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
1757 return new BitCastInst(NewInsElt, IE.getType());
1758 }
1759
1760 // If the inserted element was extracted from some other fixed-length vector
1761 // and both indexes are valid constants, try to turn this into a shuffle.
1762 // Can not handle scalable vector type, the number of elements needed to
1763 // create shuffle mask is not a compile-time constant.
1764 uint64_t InsertedIdx, ExtractedIdx;
1765 Value *ExtVecOp;
1766 if (isa<FixedVectorType>(IE.getType()) &&
1767 match(IdxOp, m_ConstantInt(InsertedIdx)) &&
1768 match(ScalarOp,
1769 m_ExtractElt(m_Value(ExtVecOp), m_ConstantInt(ExtractedIdx))) &&
1770 isa<FixedVectorType>(ExtVecOp->getType()) &&
1771 ExtractedIdx <
1772 cast<FixedVectorType>(ExtVecOp->getType())->getNumElements()) {
1773 // TODO: Looking at the user(s) to determine if this insert is a
1774 // fold-to-shuffle opportunity does not match the usual instcombine
1775 // constraints. We should decide if the transform is worthy based only
1776 // on this instruction and its operands, but that may not work currently.
1777 //
1778 // Here, we are trying to avoid creating shuffles before reaching
1779 // the end of a chain of extract-insert pairs. This is complicated because
1780 // we do not generally form arbitrary shuffle masks in instcombine
1781 // (because those may codegen poorly), but collectShuffleElements() does
1782 // exactly that.
1783 //
1784 // The rules for determining what is an acceptable target-independent
1785 // shuffle mask are fuzzy because they evolve based on the backend's
1786 // capabilities and real-world impact.
1787 auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
1788 if (!Insert.hasOneUse())
1789 return true;
1790 auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
1791 if (!InsertUser)
1792 return true;
1793 return false;
1794 };
1795
1796 // Try to form a shuffle from a chain of extract-insert ops.
1797 if (isShuffleRootCandidate(IE)) {
1798 bool Rerun = true;
1799 while (Rerun) {
1800 Rerun = false;
1801
1803 ShuffleOps LR =
1804 collectShuffleElements(&IE, Mask, nullptr, *this, Rerun);
1805
1806 // The proposed shuffle may be trivial, in which case we shouldn't
1807 // perform the combine.
1808 if (LR.first != &IE && LR.second != &IE) {
1809 // We now have a shuffle of LHS, RHS, Mask.
1810 if (LR.second == nullptr)
1811 LR.second = PoisonValue::get(LR.first->getType());
1812 return new ShuffleVectorInst(LR.first, LR.second, Mask);
1813 }
1814 }
1815 }
1816 }
1817
1818 if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {
1819 unsigned VWidth = VecTy->getNumElements();
1820 APInt PoisonElts(VWidth, 0);
1821 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
1822 if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask,
1823 PoisonElts)) {
1824 if (V != &IE)
1825 return replaceInstUsesWith(IE, V);
1826 return &IE;
1827 }
1828 }
1829
1831 return Shuf;
1832
1833 if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
1834 return NewInsElt;
1835
1836 if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
1837 return Broadcast;
1838
1840 return Splat;
1841
1842 if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
1843 return IdentityShuf;
1844
1845 if (Instruction *Ext = narrowInsElt(IE, Builder))
1846 return Ext;
1847
1848 if (Instruction *Ext = foldTruncInsEltPair(IE, DL.isBigEndian(), Builder))
1849 return Ext;
1850
1851 return nullptr;
1852}
1853
1854/// Return true if we can evaluate the specified expression tree if the vector
1855/// elements were shuffled in a different order.
1857 unsigned Depth = 5) {
1858 // We can always reorder the elements of a constant.
1859 if (isa<Constant>(V))
1860 return true;
1861
1862 // We won't reorder vector arguments. No IPO here.
1864 if (!I) return false;
1865
1866 // Two users may expect different orders of the elements. Don't try it.
1867 if (!I->hasOneUse())
1868 return false;
1869
1870 if (Depth == 0) return false;
1871
1872 switch (I->getOpcode()) {
1873 case Instruction::UDiv:
1874 case Instruction::SDiv:
1875 case Instruction::URem:
1876 case Instruction::SRem:
1877 // Propagating an undefined shuffle mask element to integer div/rem is not
1878 // allowed because those opcodes can create immediate undefined behavior
1879 // from an undefined element in an operand.
1880 if (llvm::is_contained(Mask, -1))
1881 return false;
1882 [[fallthrough]];
1883 case Instruction::Add:
1884 case Instruction::FAdd:
1885 case Instruction::Sub:
1886 case Instruction::FSub:
1887 case Instruction::Mul:
1888 case Instruction::FMul:
1889 case Instruction::FDiv:
1890 case Instruction::FRem:
1891 case Instruction::Shl:
1892 case Instruction::LShr:
1893 case Instruction::AShr:
1894 case Instruction::And:
1895 case Instruction::Or:
1896 case Instruction::Xor:
1897 case Instruction::ICmp:
1898 case Instruction::FCmp:
1899 case Instruction::Trunc:
1900 case Instruction::ZExt:
1901 case Instruction::SExt:
1902 case Instruction::FPToUI:
1903 case Instruction::FPToSI:
1904 case Instruction::UIToFP:
1905 case Instruction::SIToFP:
1906 case Instruction::FPTrunc:
1907 case Instruction::FPExt:
1908 case Instruction::GetElementPtr: {
1909 // Bail out if we would create longer vector ops. We could allow creating
1910 // longer vector ops, but that may result in more expensive codegen.
1911 Type *ITy = I->getType();
1912 if (ITy->isVectorTy() &&
1913 Mask.size() > cast<FixedVectorType>(ITy)->getNumElements())
1914 return false;
1915 for (Value *Operand : I->operands()) {
1916 if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
1917 return false;
1918 }
1919 return true;
1920 }
1921 case Instruction::InsertElement: {
1922 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
1923 if (!CI) return false;
1924 int ElementNumber = CI->getLimitedValue();
1925
1926 // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1927 // can't put an element into multiple indices.
1928 bool SeenOnce = false;
1929 for (int I : Mask) {
1930 if (I == ElementNumber) {
1931 if (SeenOnce)
1932 return false;
1933 SeenOnce = true;
1934 }
1935 }
1936 return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
1937 }
1938 }
1939 return false;
1940}
1941
1942/// Rebuild a new instruction just like 'I' but with the new operands given.
1943/// In the event of type mismatch, the type of the operands is correct.
1945 IRBuilderBase &Builder) {
1946 Builder.SetInsertPoint(I);
1947 switch (I->getOpcode()) {
1948 case Instruction::Add:
1949 case Instruction::FAdd:
1950 case Instruction::Sub:
1951 case Instruction::FSub:
1952 case Instruction::Mul:
1953 case Instruction::FMul:
1954 case Instruction::UDiv:
1955 case Instruction::SDiv:
1956 case Instruction::FDiv:
1957 case Instruction::URem:
1958 case Instruction::SRem:
1959 case Instruction::FRem:
1960 case Instruction::Shl:
1961 case Instruction::LShr:
1962 case Instruction::AShr:
1963 case Instruction::And:
1964 case Instruction::Or:
1965 case Instruction::Xor: {
1967 assert(NewOps.size() == 2 && "binary operator with #ops != 2");
1968 Value *New = Builder.CreateBinOp(cast<BinaryOperator>(I)->getOpcode(),
1969 NewOps[0], NewOps[1]);
1970 if (auto *NewI = dyn_cast<Instruction>(New)) {
1972 NewI->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1973 NewI->setHasNoSignedWrap(BO->hasNoSignedWrap());
1974 }
1976 NewI->setIsExact(BO->isExact());
1977 }
1978 if (isa<FPMathOperator>(BO))
1979 NewI->copyFastMathFlags(I);
1980 }
1981 return New;
1982 }
1983 case Instruction::ICmp:
1984 assert(NewOps.size() == 2 && "icmp with #ops != 2");
1985 return Builder.CreateICmp(cast<ICmpInst>(I)->getPredicate(), NewOps[0],
1986 NewOps[1]);
1987 case Instruction::FCmp:
1988 assert(NewOps.size() == 2 && "fcmp with #ops != 2");
1989 return Builder.CreateFCmp(cast<FCmpInst>(I)->getPredicate(), NewOps[0],
1990 NewOps[1]);
1991 case Instruction::Trunc:
1992 case Instruction::ZExt:
1993 case Instruction::SExt:
1994 case Instruction::FPToUI:
1995 case Instruction::FPToSI:
1996 case Instruction::UIToFP:
1997 case Instruction::SIToFP:
1998 case Instruction::FPTrunc:
1999 case Instruction::FPExt: {
2000 // It's possible that the mask has a different number of elements from
2001 // the original cast. We recompute the destination type to match the mask.
2002 Type *DestTy = VectorType::get(
2003 I->getType()->getScalarType(),
2004 cast<VectorType>(NewOps[0]->getType())->getElementCount());
2005 assert(NewOps.size() == 1 && "cast with #ops != 1");
2006 return Builder.CreateCast(cast<CastInst>(I)->getOpcode(), NewOps[0],
2007 DestTy);
2008 }
2009 case Instruction::GetElementPtr: {
2010 Value *Ptr = NewOps[0];
2011 ArrayRef<Value*> Idx = NewOps.slice(1);
2012 return Builder.CreateGEP(cast<GEPOperator>(I)->getSourceElementType(),
2013 Ptr, Idx, "",
2014 cast<GEPOperator>(I)->getNoWrapFlags());
2015 }
2016 }
2017 llvm_unreachable("failed to rebuild vector instructions");
2018}
2019
2021 IRBuilderBase &Builder) {
2022 // Mask.size() does not need to be equal to the number of vector elements.
2023
2024 assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
2025 Type *EltTy = V->getType()->getScalarType();
2026
2027 if (isa<PoisonValue>(V))
2028 return PoisonValue::get(FixedVectorType::get(EltTy, Mask.size()));
2029
2030 if (match(V, m_Undef()))
2031 return UndefValue::get(FixedVectorType::get(EltTy, Mask.size()));
2032
2034 return ConstantAggregateZero::get(FixedVectorType::get(EltTy, Mask.size()));
2035
2036 if (Constant *C = dyn_cast<Constant>(V))
2038 Mask);
2039
2041 switch (I->getOpcode()) {
2042 case Instruction::Add:
2043 case Instruction::FAdd:
2044 case Instruction::Sub:
2045 case Instruction::FSub:
2046 case Instruction::Mul:
2047 case Instruction::FMul:
2048 case Instruction::UDiv:
2049 case Instruction::SDiv:
2050 case Instruction::FDiv:
2051 case Instruction::URem:
2052 case Instruction::SRem:
2053 case Instruction::FRem:
2054 case Instruction::Shl:
2055 case Instruction::LShr:
2056 case Instruction::AShr:
2057 case Instruction::And:
2058 case Instruction::Or:
2059 case Instruction::Xor:
2060 case Instruction::ICmp:
2061 case Instruction::FCmp:
2062 case Instruction::Trunc:
2063 case Instruction::ZExt:
2064 case Instruction::SExt:
2065 case Instruction::FPToUI:
2066 case Instruction::FPToSI:
2067 case Instruction::UIToFP:
2068 case Instruction::SIToFP:
2069 case Instruction::FPTrunc:
2070 case Instruction::FPExt:
2071 case Instruction::Select:
2072 case Instruction::GetElementPtr: {
2074 bool NeedsRebuild =
2075 (Mask.size() !=
2076 cast<FixedVectorType>(I->getType())->getNumElements());
2077 for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
2078 Value *V;
2079 // Recursively call evaluateInDifferentElementOrder on vector arguments
2080 // as well. E.g. GetElementPtr may have scalar operands even if the
2081 // return value is a vector, so we need to examine the operand type.
2082 if (I->getOperand(i)->getType()->isVectorTy())
2083 V = evaluateInDifferentElementOrder(I->getOperand(i), Mask, Builder);
2084 else
2085 V = I->getOperand(i);
2086 NewOps.push_back(V);
2087 NeedsRebuild |= (V != I->getOperand(i));
2088 }
2089 if (NeedsRebuild)
2090 return buildNew(I, NewOps, Builder);
2091 return I;
2092 }
2093 case Instruction::InsertElement: {
2094 int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
2095
2096 // The insertelement was inserting at Element. Figure out which element
2097 // that becomes after shuffling. The answer is guaranteed to be unique
2098 // by CanEvaluateShuffled.
2099 bool Found = false;
2100 int Index = 0;
2101 for (int e = Mask.size(); Index != e; ++Index) {
2102 if (Mask[Index] == Element) {
2103 Found = true;
2104 break;
2105 }
2106 }
2107
2108 // If element is not in Mask, no need to handle the operand 1 (element to
2109 // be inserted). Just evaluate values in operand 0 according to Mask.
2110 if (!Found)
2111 return evaluateInDifferentElementOrder(I->getOperand(0), Mask, Builder);
2112
2113 Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask,
2114 Builder);
2115 Builder.SetInsertPoint(I);
2116 return Builder.CreateInsertElement(V, I->getOperand(1), Index);
2117 }
2118 }
2119 llvm_unreachable("failed to reorder elements of vector instruction!");
2120}
2121
2122// Returns true if the shuffle is extracting a contiguous range of values from
2123// LHS, for example:
2124// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2125// Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
2126// Shuffles to: |EE|FF|GG|HH|
2127// +--+--+--+--+
2129 ArrayRef<int> Mask) {
2130 unsigned LHSElems =
2131 cast<FixedVectorType>(SVI.getOperand(0)->getType())->getNumElements();
2132 unsigned MaskElems = Mask.size();
2133 unsigned BegIdx = Mask.front();
2134 unsigned EndIdx = Mask.back();
2135 if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
2136 return false;
2137 for (unsigned I = 0; I != MaskElems; ++I)
2138 if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
2139 return false;
2140 return true;
2141}
2142
2143/// These are the ingredients in an alternate form binary operator as described
2144/// below.
2150 Value *V0 = nullptr, Value *V1 = nullptr) :
2151 Opcode(Opc), Op0(V0), Op1(V1) {}
2152 operator bool() const { return Opcode != 0; }
2153};
2154
2155/// Binops may be transformed into binops with different opcodes and operands.
2156/// Reverse the usual canonicalization to enable folds with the non-canonical
2157/// form of the binop. If a transform is possible, return the elements of the
2158/// new binop. If not, return invalid elements.
2160 Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
2161 Type *Ty = BO->getType();
2162 switch (BO->getOpcode()) {
2163 case Instruction::Shl: {
2164 // shl X, C --> mul X, (1 << C)
2165 Constant *C;
2166 if (match(BO1, m_ImmConstant(C))) {
2168 Instruction::Shl, ConstantInt::get(Ty, 1), C, DL);
2169 assert(ShlOne && "Constant folding of immediate constants failed");
2170 return {Instruction::Mul, BO0, ShlOne};
2171 }
2172 break;
2173 }
2174 case Instruction::Or: {
2175 // or disjoin X, C --> add X, C
2176 if (cast<PossiblyDisjointInst>(BO)->isDisjoint())
2177 return {Instruction::Add, BO0, BO1};
2178 break;
2179 }
2180 case Instruction::Sub:
2181 // sub 0, X --> mul X, -1
2182 if (match(BO0, m_ZeroInt()))
2183 return {Instruction::Mul, BO1, ConstantInt::getAllOnesValue(Ty)};
2184 break;
2185 default:
2186 break;
2187 }
2188 return {};
2189}
2190
2191/// A select shuffle of a select shuffle with a shared operand can be reduced
2192/// to a single select shuffle. This is an obvious improvement in IR, and the
2193/// backend is expected to lower select shuffles efficiently.
2195 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2196
2197 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2199 Shuf.getShuffleMask(Mask);
2200 unsigned NumElts = Mask.size();
2201
2202 // Canonicalize a select shuffle with common operand as Op1.
2203 auto *ShufOp = dyn_cast<ShuffleVectorInst>(Op0);
2204 if (ShufOp && ShufOp->isSelect() &&
2205 (ShufOp->getOperand(0) == Op1 || ShufOp->getOperand(1) == Op1)) {
2206 std::swap(Op0, Op1);
2208 }
2209
2210 ShufOp = dyn_cast<ShuffleVectorInst>(Op1);
2211 if (!ShufOp || !ShufOp->isSelect() ||
2212 (ShufOp->getOperand(0) != Op0 && ShufOp->getOperand(1) != Op0))
2213 return nullptr;
2214
2215 Value *X = ShufOp->getOperand(0), *Y = ShufOp->getOperand(1);
2217 ShufOp->getShuffleMask(Mask1);
2218 assert(Mask1.size() == NumElts && "Vector size changed with select shuffle");
2219
2220 // Canonicalize common operand (Op0) as X (first operand of first shuffle).
2221 if (Y == Op0) {
2222 std::swap(X, Y);
2224 }
2225
2226 // If the mask chooses from X (operand 0), it stays the same.
2227 // If the mask chooses from the earlier shuffle, the other mask value is
2228 // transferred to the combined select shuffle:
2229 // shuf X, (shuf X, Y, M1), M --> shuf X, Y, M'
2230 SmallVector<int, 16> NewMask(NumElts);
2231 for (unsigned i = 0; i != NumElts; ++i)
2232 NewMask[i] = Mask[i] < (signed)NumElts ? Mask[i] : Mask1[i];
2233
2234 // A select mask with undef elements might look like an identity mask.
2235 assert((ShuffleVectorInst::isSelectMask(NewMask, NumElts) ||
2236 ShuffleVectorInst::isIdentityMask(NewMask, NumElts)) &&
2237 "Unexpected shuffle mask");
2238 return new ShuffleVectorInst(X, Y, NewMask);
2239}
2240
2242 const SimplifyQuery &SQ) {
2243 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2244
2245 // Are we shuffling together some value and that same value after it has been
2246 // modified by a binop with a constant?
2247 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2248 Constant *C;
2249 bool Op0IsBinop;
2250 if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
2251 Op0IsBinop = true;
2252 else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
2253 Op0IsBinop = false;
2254 else
2255 return nullptr;
2256
2257 // The identity constant for a binop leaves a variable operand unchanged. For
2258 // a vector, this is a splat of something like 0, -1, or 1.
2259 // If there's no identity constant for this binop, we're done.
2260 auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
2261 BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
2262 Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
2263 if (!IdC)
2264 return nullptr;
2265
2266 Value *X = Op0IsBinop ? Op1 : Op0;
2267
2268 // Prevent folding in the case the non-binop operand might have NaN values.
2269 // If X can have NaN elements then we have that the floating point math
2270 // operation in the transformed code may not preserve the exact NaN
2271 // bit-pattern -- e.g. `fadd sNaN, 0.0 -> qNaN`.
2272 // This makes the transformation incorrect since the original program would
2273 // have preserved the exact NaN bit-pattern.
2274 // Avoid the folding if X can have NaN elements.
2275 if (Shuf.getType()->getElementType()->isFloatingPointTy() &&
2276 !isKnownNeverNaN(X, SQ))
2277 return nullptr;
2278
2279 // Shuffle identity constants into the lanes that return the original value.
2280 // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
2281 // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
2282 // The existing binop constant vector remains in the same operand position.
2283 ArrayRef<int> Mask = Shuf.getShuffleMask();
2284 Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
2286
2287 bool MightCreatePoisonOrUB =
2289 (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
2290 if (MightCreatePoisonOrUB)
2291 NewC = InstCombiner::getSafeVectorConstantForBinop(BOpcode, NewC, true);
2292
2293 // shuf (bop X, C), X, M --> bop X, C'
2294 // shuf X, (bop X, C), M --> bop X, C'
2295 Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
2296 NewBO->copyIRFlags(BO);
2297
2298 // An undef shuffle mask element may propagate as an undef constant element in
2299 // the new binop. That would produce poison where the original code might not.
2300 // If we already made a safe constant, then there's no danger.
2301 if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB)
2303 return NewBO;
2304}
2305
2306/// If we have an insert of a scalar to a non-zero element of an undefined
2307/// vector and then shuffle that value, that's the same as inserting to the zero
2308/// element and shuffling. Splatting from the zero element is recognized as the
2309/// canonical form of splat.
2311 InstCombiner::BuilderTy &Builder) {
2312 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2313 ArrayRef<int> Mask = Shuf.getShuffleMask();
2314 Value *X;
2315 uint64_t IndexC;
2316
2317 // Match a shuffle that is a splat to a non-zero element.
2319 m_ConstantInt(IndexC)))) ||
2320 !match(Op1, m_Poison()) || match(Mask, m_ZeroMask()) || IndexC == 0)
2321 return nullptr;
2322
2323 // Insert into element 0 of a poison vector.
2324 PoisonValue *PoisonVec = PoisonValue::get(Shuf.getType());
2325 Value *NewIns = Builder.CreateInsertElement(PoisonVec, X, (uint64_t)0);
2326
2327 // Splat from element 0. Any mask element that is poison remains poison.
2328 // For example:
2329 // shuf (inselt poison, X, 2), _, <2,2,undef>
2330 // --> shuf (inselt poison, X, 0), poison, <0,0,undef>
2331 unsigned NumMaskElts =
2332 cast<FixedVectorType>(Shuf.getType())->getNumElements();
2333 SmallVector<int, 16> NewMask(NumMaskElts, 0);
2334 for (unsigned i = 0; i != NumMaskElts; ++i)
2335 if (Mask[i] == PoisonMaskElem)
2336 NewMask[i] = Mask[i];
2337
2338 return new ShuffleVectorInst(NewIns, NewMask);
2339}
2340
2341/// Try to fold shuffles that are the equivalent of a vector select.
2343 if (!Shuf.isSelect())
2344 return nullptr;
2345
2346 // Canonicalize to choose from operand 0 first unless operand 1 is undefined.
2347 // Commuting undef to operand 0 conflicts with another canonicalization.
2348 unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2349 if (!match(Shuf.getOperand(1), m_Undef()) &&
2350 Shuf.getMaskValue(0) >= (int)NumElts) {
2351 // TODO: Can we assert that both operands of a shuffle-select are not undef
2352 // (otherwise, it would have been folded by instsimplify?
2353 Shuf.commute();
2354 return &Shuf;
2355 }
2356
2358 return I;
2359
2361 Shuf, getSimplifyQuery().getWithInstruction(&Shuf)))
2362 return I;
2363
2364 BinaryOperator *B0, *B1;
2365 if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
2366 !match(Shuf.getOperand(1), m_BinOp(B1)))
2367 return nullptr;
2368
2369 // If one operand is "0 - X", allow that to be viewed as "X * -1"
2370 // (ConstantsAreOp1) by getAlternateBinop below. If the neg is not paired
2371 // with a multiply, we will exit because C0/C1 will not be set.
2372 Value *X, *Y;
2373 Constant *C0 = nullptr, *C1 = nullptr;
2374 bool ConstantsAreOp1;
2375 if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
2376 match(B1, m_BinOp(m_Constant(C1), m_Value(Y))))
2377 ConstantsAreOp1 = false;
2378 else if (match(B0, m_CombineOr(m_BinOp(m_Value(X), m_Constant(C0)),
2379 m_Neg(m_Value(X)))) &&
2381 m_Neg(m_Value(Y)))))
2382 ConstantsAreOp1 = true;
2383 else
2384 return nullptr;
2385
2386 // We need matching binops to fold the lanes together.
2389 bool DropNSW = false;
2390 if (ConstantsAreOp1 && Opc0 != Opc1) {
2391 // TODO: We drop "nsw" if shift is converted into multiply because it may
2392 // not be correct when the shift amount is BitWidth - 1. We could examine
2393 // each vector element to determine if it is safe to keep that flag.
2394 if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
2395 DropNSW = true;
2396 if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
2397 assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
2398 Opc0 = AltB0.Opcode;
2399 C0 = cast<Constant>(AltB0.Op1);
2400 } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
2401 assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
2402 Opc1 = AltB1.Opcode;
2403 C1 = cast<Constant>(AltB1.Op1);
2404 }
2405 }
2406
2407 if (Opc0 != Opc1 || !C0 || !C1)
2408 return nullptr;
2409
2410 // The opcodes must be the same. Use a new name to make that clear.
2411 BinaryOperator::BinaryOps BOpc = Opc0;
2412
2413 // Select the constant elements needed for the single binop.
2414 ArrayRef<int> Mask = Shuf.getShuffleMask();
2415 Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Mask);
2416
2417 // We are moving a binop after a shuffle. When a shuffle has an undefined
2418 // mask element, the result is undefined, but it is not poison or undefined
2419 // behavior. That is not necessarily true for div/rem/shift.
2420 bool MightCreatePoisonOrUB =
2423 if (MightCreatePoisonOrUB)
2425 ConstantsAreOp1);
2426
2427 Value *V;
2428 if (X == Y) {
2429 // Remove a binop and the shuffle by rearranging the constant:
2430 // shuffle (op V, C0), (op V, C1), M --> op V, C'
2431 // shuffle (op C0, V), (op C1, V), M --> op C', V
2432 V = X;
2433 } else {
2434 // If there are 2 different variable operands, we must create a new shuffle
2435 // (select) first, so check uses to ensure that we don't end up with more
2436 // instructions than we started with.
2437 if (!B0->hasOneUse() && !B1->hasOneUse())
2438 return nullptr;
2439
2440 // If we use the original shuffle mask and op1 is *variable*, we would be
2441 // putting an undef into operand 1 of div/rem/shift. This is either UB or
2442 // poison. We do not have to guard against UB when *constants* are op1
2443 // because safe constants guarantee that we do not overflow sdiv/srem (and
2444 // there's no danger for other opcodes).
2445 // TODO: To allow this case, create a new shuffle mask with no undefs.
2446 if (MightCreatePoisonOrUB && !ConstantsAreOp1)
2447 return nullptr;
2448
2449 // Note: In general, we do not create new shuffles in InstCombine because we
2450 // do not know if a target can lower an arbitrary shuffle optimally. In this
2451 // case, the shuffle uses the existing mask, so there is no additional risk.
2452
2453 // Select the variable vectors first, then perform the binop:
2454 // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
2455 // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
2456 V = Builder.CreateShuffleVector(X, Y, Mask);
2457 }
2458
2459 Value *NewBO = ConstantsAreOp1 ? Builder.CreateBinOp(BOpc, V, NewC) :
2460 Builder.CreateBinOp(BOpc, NewC, V);
2461
2462 // Flags are intersected from the 2 source binops. But there are 2 exceptions:
2463 // 1. If we changed an opcode, poison conditions might have changed.
2464 // 2. If the shuffle had undef mask elements, the new binop might have undefs
2465 // where the original code did not. But if we already made a safe constant,
2466 // then there's no danger.
2467 if (auto *NewI = dyn_cast<Instruction>(NewBO)) {
2468 NewI->copyIRFlags(B0);
2469 NewI->andIRFlags(B1);
2470 if (DropNSW)
2471 NewI->setHasNoSignedWrap(false);
2472 if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB)
2473 NewI->dropPoisonGeneratingFlags();
2474 }
2475 return replaceInstUsesWith(Shuf, NewBO);
2476}
2477
2478/// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
2479/// Example (little endian):
2480/// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>
2482 bool IsBigEndian) {
2483 // This must be a bitcasted shuffle of 1 vector integer operand.
2484 Type *DestType = Shuf.getType();
2485 Value *X;
2486 if (!match(Shuf.getOperand(0), m_BitCast(m_Value(X))) ||
2487 !match(Shuf.getOperand(1), m_Poison()) || !DestType->isIntOrIntVectorTy())
2488 return nullptr;
2489
2490 // The source type must have the same number of elements as the shuffle,
2491 // and the source element type must be larger than the shuffle element type.
2492 Type *SrcType = X->getType();
2493 if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() ||
2494 cast<FixedVectorType>(SrcType)->getNumElements() !=
2495 cast<FixedVectorType>(DestType)->getNumElements() ||
2496 SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0)
2497 return nullptr;
2498
2499 assert(Shuf.changesLength() && !Shuf.increasesLength() &&
2500 "Expected a shuffle that decreases length");
2501
2502 // Last, check that the mask chooses the correct low bits for each narrow
2503 // element in the result.
2504 uint64_t TruncRatio =
2505 SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
2506 ArrayRef<int> Mask = Shuf.getShuffleMask();
2507 for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
2508 if (Mask[i] == PoisonMaskElem)
2509 continue;
2510 uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;
2511 assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits");
2512 if (Mask[i] != (int)LSBIndex)
2513 return nullptr;
2514 }
2515
2516 return new TruncInst(X, DestType);
2517}
2518
2519/// Match a shuffle-select-shuffle pattern where the shuffles are widening and
2520/// narrowing (concatenating with poison and extracting back to the original
2521/// length). This allows replacing the wide select with a narrow select.
2523 InstCombiner::BuilderTy &Builder) {
2524 // This must be a narrowing identity shuffle. It extracts the 1st N elements
2525 // of the 1st vector operand of a shuffle.
2526 if (!match(Shuf.getOperand(1), m_Poison()) || !Shuf.isIdentityWithExtract())
2527 return nullptr;
2528
2529 // The vector being shuffled must be a vector select that we can eliminate.
2530 // TODO: The one-use requirement could be eased if X and/or Y are constants.
2531 Value *Cond, *X, *Y;
2532 if (!match(Shuf.getOperand(0),
2534 return nullptr;
2535
2536 // We need a narrow condition value. It must be extended with poison elements
2537 // and have the same number of elements as this shuffle.
2538 unsigned NarrowNumElts =
2539 cast<FixedVectorType>(Shuf.getType())->getNumElements();
2540 Value *NarrowCond;
2541 if (!match(Cond, m_OneUse(m_Shuffle(m_Value(NarrowCond), m_Poison()))) ||
2542 cast<FixedVectorType>(NarrowCond->getType())->getNumElements() !=
2543 NarrowNumElts ||
2544 !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
2545 return nullptr;
2546
2547 // shuf (sel (shuf NarrowCond, poison, WideMask), X, Y), poison, NarrowMask)
2548 // -->
2549 // sel NarrowCond, (shuf X, poison, NarrowMask), (shuf Y, poison, NarrowMask)
2550 Value *NarrowX = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2551 Value *NarrowY = Builder.CreateShuffleVector(Y, Shuf.getShuffleMask());
2552 return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
2553}
2554
2555/// Canonicalize FP negate/abs after shuffle.
2557 InstCombiner::BuilderTy &Builder) {
2558 auto *S0 = dyn_cast<Instruction>(Shuf.getOperand(0));
2559 Value *X;
2560 if (!S0 || !match(S0, m_CombineOr(m_FNeg(m_Value(X)), m_FAbs(m_Value(X)))))
2561 return nullptr;
2562
2563 bool IsFNeg = S0->getOpcode() == Instruction::FNeg;
2564
2565 // Match 2-input (binary) shuffle.
2566 auto *S1 = dyn_cast<Instruction>(Shuf.getOperand(1));
2567 Value *Y;
2568 if (!S1 || !match(S1, m_CombineOr(m_FNeg(m_Value(Y)), m_FAbs(m_Value(Y)))) ||
2569 S0->getOpcode() != S1->getOpcode() ||
2570 (!S0->hasOneUse() && !S1->hasOneUse()))
2571 return nullptr;
2572
2573 // shuf (fneg/fabs X), (fneg/fabs Y), Mask --> fneg/fabs (shuf X, Y, Mask)
2574 Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2575 Instruction *NewF;
2576 if (IsFNeg) {
2577 NewF = UnaryOperator::CreateFNeg(NewShuf);
2578 } else {
2580 Shuf.getModule(), Intrinsic::fabs, Shuf.getType());
2581 NewF = CallInst::Create(FAbs, {NewShuf});
2582 }
2583 NewF->copyIRFlags(S0);
2584 NewF->andIRFlags(S1);
2585 return NewF;
2586}
2587
2588/// Canonicalize casts after shuffle.
2590 InstCombiner::BuilderTy &Builder) {
2591 auto *Cast0 = dyn_cast<CastInst>(Shuf.getOperand(0));
2592 if (!Cast0)
2593 return nullptr;
2594
2595 // TODO: Allow other opcodes? That would require easing the type restrictions
2596 // below here.
2597 CastInst::CastOps CastOpcode = Cast0->getOpcode();
2598 switch (CastOpcode) {
2599 case Instruction::SExt:
2600 case Instruction::ZExt:
2601 case Instruction::FPToSI:
2602 case Instruction::FPToUI:
2603 case Instruction::SIToFP:
2604 case Instruction::UIToFP:
2605 break;
2606 default:
2607 return nullptr;
2608 }
2609
2610 VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy());
2611 VectorType *ShufTy = Shuf.getType();
2612 VectorType *ShufOpTy = cast<VectorType>(Shuf.getOperand(0)->getType());
2613
2614 // TODO: Allow length-increasing shuffles?
2615 if (ShufTy->getElementCount().getKnownMinValue() >
2616 ShufOpTy->getElementCount().getKnownMinValue())
2617 return nullptr;
2618
2619 // shuffle (cast X), Poison, identity-with-extract-mask -->
2620 // cast (shuffle X, Poison, identity-with-extract-mask).
2621 if (isa<PoisonValue>(Shuf.getOperand(1)) && Cast0->hasOneUse() &&
2622 Shuf.isIdentityWithExtract()) {
2623 auto *NewIns = Builder.CreateShuffleVector(Cast0->getOperand(0),
2624 PoisonValue::get(CastSrcTy),
2625 Shuf.getShuffleMask());
2626 return CastInst::Create(Cast0->getOpcode(), NewIns, Shuf.getType());
2627 }
2628
2629 auto *Cast1 = dyn_cast<CastInst>(Shuf.getOperand(1));
2630 // Do we have 2 matching cast operands?
2631 if (!Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||
2632 Cast0->getSrcTy() != Cast1->getSrcTy())
2633 return nullptr;
2634
2635 // TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)?
2636 assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&
2637 "Expected fixed vector operands for casts and binary shuffle");
2638 if (CastSrcTy->getPrimitiveSizeInBits() > ShufOpTy->getPrimitiveSizeInBits())
2639 return nullptr;
2640
2641 // At least one of the operands must have only one use (the shuffle).
2642 if (!Cast0->hasOneUse() && !Cast1->hasOneUse())
2643 return nullptr;
2644
2645 // shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask)
2646 Value *X = Cast0->getOperand(0);
2647 Value *Y = Cast1->getOperand(0);
2648 Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2649 return CastInst::Create(CastOpcode, NewShuf, ShufTy);
2650}
2651
2652/// Try to fold an extract subvector operation.
2654 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2655 if (!Shuf.isIdentityWithExtract() || !match(Op1, m_Poison()))
2656 return nullptr;
2657
2658 // Check if we are extracting all bits of an inserted scalar:
2659 // extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type
2660 Value *X;
2661 if (match(Op0, m_BitCast(m_InsertElt(m_Value(), m_Value(X), m_Zero()))) &&
2662 X->getType()->getPrimitiveSizeInBits() ==
2664 return new BitCastInst(X, Shuf.getType());
2665
2666 // Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
2667 Value *Y;
2668 ArrayRef<int> Mask;
2669 if (!match(Op0, m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask))))
2670 return nullptr;
2671
2672 // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
2673 // then combining may result in worse codegen.
2674 if (!Op0->hasOneUse())
2675 return nullptr;
2676
2677 // We are extracting a subvector from a shuffle. Remove excess elements from
2678 // the 1st shuffle mask to eliminate the extract.
2679 //
2680 // This transform is conservatively limited to identity extracts because we do
2681 // not allow arbitrary shuffle mask creation as a target-independent transform
2682 // (because we can't guarantee that will lower efficiently).
2683 //
2684 // If the extracting shuffle has an poison mask element, it transfers to the
2685 // new shuffle mask. Otherwise, copy the original mask element. Example:
2686 // shuf (shuf X, Y, <C0, C1, C2, poison, C4>), poison, <0, poison, 2, 3> -->
2687 // shuf X, Y, <C0, poison, C2, poison>
2688 unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2689 SmallVector<int, 16> NewMask(NumElts);
2690 assert(NumElts < Mask.size() &&
2691 "Identity with extract must have less elements than its inputs");
2692
2693 for (unsigned i = 0; i != NumElts; ++i) {
2694 int ExtractMaskElt = Shuf.getMaskValue(i);
2695 int MaskElt = Mask[i];
2696 NewMask[i] = ExtractMaskElt == PoisonMaskElem ? ExtractMaskElt : MaskElt;
2697 }
2698 return new ShuffleVectorInst(X, Y, NewMask);
2699}
2700
2701/// Try to replace a shuffle with an insertelement or try to replace a shuffle
2702/// operand with the operand of an insertelement.
2704 InstCombinerImpl &IC) {
2705 Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
2707 Shuf.getShuffleMask(Mask);
2708
2709 int NumElts = Mask.size();
2710 int InpNumElts = cast<FixedVectorType>(V0->getType())->getNumElements();
2711
2712 // This is a specialization of a fold in SimplifyDemandedVectorElts. We may
2713 // not be able to handle it there if the insertelement has >1 use.
2714 // If the shuffle has an insertelement operand but does not choose the
2715 // inserted scalar element from that value, then we can replace that shuffle
2716 // operand with the source vector of the insertelement.
2717 Value *X;
2718 uint64_t IdxC;
2719 if (match(V0, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2720 // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
2721 if (!is_contained(Mask, (int)IdxC))
2722 return IC.replaceOperand(Shuf, 0, X);
2723 }
2724 if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2725 // Offset the index constant by the vector width because we are checking for
2726 // accesses to the 2nd vector input of the shuffle.
2727 IdxC += InpNumElts;
2728 // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
2729 if (!is_contained(Mask, (int)IdxC))
2730 return IC.replaceOperand(Shuf, 1, X);
2731 }
2732 // For the rest of the transform, the shuffle must not change vector sizes.
2733 // TODO: This restriction could be removed if the insert has only one use
2734 // (because the transform would require a new length-changing shuffle).
2735 if (NumElts != InpNumElts)
2736 return nullptr;
2737
2738 // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
2739 auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
2740 // We need an insertelement with a constant index.
2741 if (!match(V0, m_InsertElt(m_Value(), m_Value(Scalar),
2742 m_ConstantInt(IndexC))))
2743 return false;
2744
2745 // Test the shuffle mask to see if it splices the inserted scalar into the
2746 // operand 1 vector of the shuffle.
2747 int NewInsIndex = -1;
2748 for (int i = 0; i != NumElts; ++i) {
2749 // Ignore undef mask elements.
2750 if (Mask[i] == -1)
2751 continue;
2752
2753 // The shuffle takes elements of operand 1 without lane changes.
2754 if (Mask[i] == NumElts + i)
2755 continue;
2756
2757 // The shuffle must choose the inserted scalar exactly once.
2758 if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
2759 return false;
2760
2761 // The shuffle is placing the inserted scalar into element i.
2762 NewInsIndex = i;
2763 }
2764
2765 assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
2766
2767 // Index is updated to the potentially translated insertion lane.
2768 IndexC = ConstantInt::get(IndexC->getIntegerType(), NewInsIndex);
2769 return true;
2770 };
2771
2772 // If the shuffle is unnecessary, insert the scalar operand directly into
2773 // operand 1 of the shuffle. Example:
2774 // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
2775 Value *Scalar;
2776 ConstantInt *IndexC;
2777 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2778 return InsertElementInst::Create(V1, Scalar, IndexC);
2779
2780 // Try again after commuting shuffle. Example:
2781 // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
2782 // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
2783 std::swap(V0, V1);
2785 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2786 return InsertElementInst::Create(V1, Scalar, IndexC);
2787
2788 return nullptr;
2789}
2790
2792 // Match the operands as identity with padding (also known as concatenation
2793 // with undef) shuffles of the same source type. The backend is expected to
2794 // recreate these concatenations from a shuffle of narrow operands.
2795 auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
2796 auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
2797 if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
2798 !Shuffle1 || !Shuffle1->isIdentityWithPadding())
2799 return nullptr;
2800
2801 // We limit this transform to power-of-2 types because we expect that the
2802 // backend can convert the simplified IR patterns to identical nodes as the
2803 // original IR.
2804 // TODO: If we can verify the same behavior for arbitrary types, the
2805 // power-of-2 checks can be removed.
2806 Value *X = Shuffle0->getOperand(0);
2807 Value *Y = Shuffle1->getOperand(0);
2808 if (X->getType() != Y->getType() ||
2809 !isPowerOf2_32(cast<FixedVectorType>(Shuf.getType())->getNumElements()) ||
2811 cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) ||
2812 !isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) ||
2813 match(X, m_Undef()) || match(Y, m_Undef()))
2814 return nullptr;
2815 assert(match(Shuffle0->getOperand(1), m_Undef()) &&
2816 match(Shuffle1->getOperand(1), m_Undef()) &&
2817 "Unexpected operand for identity shuffle");
2818
2819 // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
2820 // operands directly by adjusting the shuffle mask to account for the narrower
2821 // types:
2822 // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
2823 int NarrowElts = cast<FixedVectorType>(X->getType())->getNumElements();
2824 int WideElts = cast<FixedVectorType>(Shuffle0->getType())->getNumElements();
2825 assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
2826
2827 ArrayRef<int> Mask = Shuf.getShuffleMask();
2828 SmallVector<int, 16> NewMask(Mask.size(), -1);
2829 for (int i = 0, e = Mask.size(); i != e; ++i) {
2830 if (Mask[i] == -1)
2831 continue;
2832
2833 // If this shuffle is choosing an undef element from 1 of the sources, that
2834 // element is undef.
2835 if (Mask[i] < WideElts) {
2836 if (Shuffle0->getMaskValue(Mask[i]) == -1)
2837 continue;
2838 } else {
2839 if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
2840 continue;
2841 }
2842
2843 // If this shuffle is choosing from the 1st narrow op, the mask element is
2844 // the same. If this shuffle is choosing from the 2nd narrow op, the mask
2845 // element is offset down to adjust for the narrow vector widths.
2846 if (Mask[i] < WideElts) {
2847 assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
2848 NewMask[i] = Mask[i];
2849 } else {
2850 assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
2851 NewMask[i] = Mask[i] - (WideElts - NarrowElts);
2852 }
2853 }
2854 return new ShuffleVectorInst(X, Y, NewMask);
2855}
2856
2857// Splatting the first element of the result of a BinOp, where any of the
2858// BinOp's operands are the result of a first element splat can be simplified to
2859// splatting the first element of the result of the BinOp
2861 if (!match(SVI.getOperand(1), m_Poison()) ||
2862 !match(SVI.getShuffleMask(), m_ZeroMask()) ||
2863 !SVI.getOperand(0)->hasOneUse())
2864 return nullptr;
2865
2866 Value *Op0 = SVI.getOperand(0);
2867 Value *X, *Y;
2869 m_Value(Y))) &&
2870 !match(Op0, m_BinOp(m_Value(X),
2872 return nullptr;
2873 if (X->getType() != Y->getType())
2874 return nullptr;
2875
2876 auto *BinOp = cast<BinaryOperator>(Op0);
2878 return nullptr;
2879
2880 Value *NewBO = Builder.CreateBinOp(BinOp->getOpcode(), X, Y);
2881 if (auto NewBOI = dyn_cast<Instruction>(NewBO))
2882 NewBOI->copyIRFlags(BinOp);
2883
2884 return new ShuffleVectorInst(NewBO, SVI.getShuffleMask());
2885}
2886
2888 Value *LHS = SVI.getOperand(0);
2889 Value *RHS = SVI.getOperand(1);
2890 SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI);
2891 if (auto *V = simplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(),
2892 SVI.getType(), ShufQuery))
2893 return replaceInstUsesWith(SVI, V);
2894
2895 if (Instruction *I = simplifyBinOpSplats(SVI))
2896 return I;
2897
2898 // Canonicalize splat shuffle to use poison RHS. Handle this explicitly in
2899 // order to support scalable vectors.
2900 if (match(SVI.getShuffleMask(), m_ZeroMask()) && !isa<PoisonValue>(RHS))
2901 return replaceOperand(SVI, 1, PoisonValue::get(RHS->getType()));
2902
2903 if (isa<ScalableVectorType>(LHS->getType()))
2904 return nullptr;
2905
2906 unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements();
2907 unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements();
2908
2909 // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
2910 //
2911 // if X and Y are of the same (vector) type, and the element size is not
2912 // changed by the bitcasts, we can distribute the bitcasts through the
2913 // shuffle, hopefully reducing the number of instructions. We make sure that
2914 // at least one bitcast only has one use, so we don't *increase* the number of
2915 // instructions here.
2916 Value *X, *Y;
2917 if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_BitCast(m_Value(Y))) &&
2918 X->getType()->isVectorTy() && X->getType() == Y->getType() &&
2919 X->getType()->getScalarSizeInBits() ==
2920 SVI.getType()->getScalarSizeInBits() &&
2921 (LHS->hasOneUse() || RHS->hasOneUse())) {
2922 Value *V = Builder.CreateShuffleVector(X, Y, SVI.getShuffleMask(),
2923 SVI.getName() + ".uncasted");
2924 return new BitCastInst(V, SVI.getType());
2925 }
2926
2927 ArrayRef<int> Mask = SVI.getShuffleMask();
2928
2929 // Peek through a bitcasted shuffle operand by scaling the mask. If the
2930 // simulated shuffle can simplify, then this shuffle is unnecessary:
2931 // shuf (bitcast X), undef, Mask --> bitcast X'
2932 // TODO: This could be extended to allow length-changing shuffles.
2933 // The transform might also be obsoleted if we allowed canonicalization
2934 // of bitcasted shuffles.
2935 if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) &&
2936 X->getType()->isVectorTy() && VWidth == LHSWidth) {
2937 // Try to create a scaled mask constant.
2938 auto *XType = cast<FixedVectorType>(X->getType());
2939 unsigned XNumElts = XType->getNumElements();
2940 SmallVector<int, 16> ScaledMask;
2941 if (scaleShuffleMaskElts(XNumElts, Mask, ScaledMask)) {
2942 // If the shuffled source vector simplifies, cast that value to this
2943 // shuffle's type.
2944 if (auto *V = simplifyShuffleVectorInst(X, UndefValue::get(XType),
2945 ScaledMask, XType, ShufQuery))
2946 return BitCastInst::Create(Instruction::BitCast, V, SVI.getType());
2947 }
2948 }
2949
2950 // shuffle x, x, mask --> shuffle x, undef, mask'
2951 if (LHS == RHS) {
2952 assert(!match(RHS, m_Undef()) &&
2953 "Shuffle with 2 undef ops not simplified?");
2954 return new ShuffleVectorInst(LHS, createUnaryMask(Mask, LHSWidth));
2955 }
2956
2957 // shuffle undef, x, mask --> shuffle x, undef, mask'
2958 if (match(LHS, m_Undef())) {
2959 SVI.commute();
2960 return &SVI;
2961 }
2962
2964 return I;
2965
2966 if (Instruction *I = foldSelectShuffle(SVI))
2967 return I;
2968
2969 if (Instruction *I = foldTruncShuffle(SVI, DL.isBigEndian()))
2970 return I;
2971
2973 return I;
2974
2976 return I;
2977
2978 if (Instruction *I = foldCastShuffle(SVI, Builder))
2979 return I;
2980
2981 APInt PoisonElts(VWidth, 0);
2982 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2983 if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, PoisonElts)) {
2984 if (V != &SVI)
2985 return replaceInstUsesWith(SVI, V);
2986 return &SVI;
2987 }
2988
2990 return I;
2991
2992 // These transforms have the potential to lose undef knowledge, so they are
2993 // intentionally placed after SimplifyDemandedVectorElts().
2994 if (Instruction *I = foldShuffleWithInsert(SVI, *this))
2995 return I;
2997 return I;
2998
2999 if (match(RHS, m_Constant())) {
3000 if (auto *SI = dyn_cast<SelectInst>(LHS)) {
3001 // We cannot do this fold for elementwise select since ShuffleVector is
3002 // not elementwise.
3003 if (SI->getCondition()->getType()->isIntegerTy() &&
3004 (isa<PoisonValue>(RHS) ||
3005 isGuaranteedNotToBePoison(SI->getCondition()))) {
3006 if (Instruction *I = FoldOpIntoSelect(SVI, SI))
3007 return I;
3008 }
3009 }
3010 if (auto *PN = dyn_cast<PHINode>(LHS)) {
3011 if (Instruction *I = foldOpIntoPhi(SVI, PN, /*AllowMultipleUses=*/true))
3012 return I;
3013 }
3014 }
3015
3016 if (match(RHS, m_Poison()) && canEvaluateShuffled(LHS, Mask)) {
3018 return replaceInstUsesWith(SVI, V);
3019 }
3020
3021 // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
3022 // a non-vector type. We can instead bitcast the original vector followed by
3023 // an extract of the desired element:
3024 //
3025 // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
3026 // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3027 // %1 = bitcast <4 x i8> %sroa to i32
3028 // Becomes:
3029 // %bc = bitcast <16 x i8> %in to <4 x i32>
3030 // %ext = extractelement <4 x i32> %bc, i32 0
3031 //
3032 // If the shuffle is extracting a contiguous range of values from the input
3033 // vector then each use which is a bitcast of the extracted size can be
3034 // replaced. This will work if the vector types are compatible, and the begin
3035 // index is aligned to a value in the casted vector type. If the begin index
3036 // isn't aligned then we can shuffle the original vector (keeping the same
3037 // vector type) before extracting.
3038 //
3039 // This code will bail out if the target type is fundamentally incompatible
3040 // with vectors of the source type.
3041 //
3042 // Example of <16 x i8>, target type i32:
3043 // Index range [4,8): v-----------v Will work.
3044 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
3045 // <16 x i8>: | | | | | | | | | | | | | | | | |
3046 // <4 x i32>: | | | | |
3047 // +-----------+-----------+-----------+-----------+
3048 // Index range [6,10): ^-----------^ Needs an extra shuffle.
3049 // Target type i40: ^--------------^ Won't work, bail.
3050 bool MadeChange = false;
3051 if (isShuffleExtractingFromLHS(SVI, Mask)) {
3052 Value *V = LHS;
3053 unsigned MaskElems = Mask.size();
3054 auto *SrcTy = cast<FixedVectorType>(V->getType());
3055 unsigned VecBitWidth = DL.getTypeSizeInBits(SrcTy);
3056 unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
3057 assert(SrcElemBitWidth && "vector elements must have a bitwidth");
3058 unsigned SrcNumElems = SrcTy->getNumElements();
3061 for (User *U : SVI.users())
3062 if (BitCastInst *BC = dyn_cast<BitCastInst>(U)) {
3063 // Only visit bitcasts that weren't previously handled.
3064 if (BC->use_empty())
3065 continue;
3066 // Prefer to combine bitcasts of bitcasts before attempting this fold.
3067 if (BC->hasOneUse()) {
3068 auto *BC2 = dyn_cast<BitCastInst>(BC->user_back());
3069 if (BC2 && isEliminableCastPair(BC, BC2))
3070 continue;
3071 }
3072 BCs.push_back(BC);
3073 }
3074 for (BitCastInst *BC : BCs) {
3075 unsigned BegIdx = Mask.front();
3076 Type *TgtTy = BC->getDestTy();
3077 unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
3078 if (!TgtElemBitWidth)
3079 continue;
3080 unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
3081 bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
3082 bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
3083 if (!VecBitWidthsEqual)
3084 continue;
3086 continue;
3087 auto *CastSrcTy = FixedVectorType::get(TgtTy, TgtNumElems);
3088 if (!BegIsAligned) {
3089 // Shuffle the input so [0,NumElements) contains the output, and
3090 // [NumElems,SrcNumElems) is undef.
3091 SmallVector<int, 16> ShuffleMask(SrcNumElems, -1);
3092 for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
3093 ShuffleMask[I] = Idx;
3094 V = Builder.CreateShuffleVector(V, ShuffleMask,
3095 SVI.getName() + ".extract");
3096 BegIdx = 0;
3097 }
3098 unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
3099 assert(SrcElemsPerTgtElem);
3100 BegIdx /= SrcElemsPerTgtElem;
3101 auto [It, Inserted] = NewBCs.try_emplace(CastSrcTy);
3102 if (Inserted)
3103 It->second = Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
3104 auto *Ext = Builder.CreateExtractElement(It->second, BegIdx,
3105 SVI.getName() + ".extract");
3106 // The shufflevector isn't being replaced: the bitcast that used it
3107 // is. InstCombine will visit the newly-created instructions.
3108 replaceInstUsesWith(*BC, Ext);
3109 MadeChange = true;
3110 }
3111 }
3112
3113 // If the LHS is a shufflevector itself, see if we can combine it with this
3114 // one without producing an unusual shuffle.
3115 // Cases that might be simplified:
3116 // 1.
3117 // x1=shuffle(v1,v2,mask1)
3118 // x=shuffle(x1,undef,mask)
3119 // ==>
3120 // x=shuffle(v1,undef,newMask)
3121 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
3122 // 2.
3123 // x1=shuffle(v1,undef,mask1)
3124 // x=shuffle(x1,x2,mask)
3125 // where v1.size() == mask1.size()
3126 // ==>
3127 // x=shuffle(v1,x2,newMask)
3128 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
3129 // 3.
3130 // x2=shuffle(v2,undef,mask2)
3131 // x=shuffle(x1,x2,mask)
3132 // where v2.size() == mask2.size()
3133 // ==>
3134 // x=shuffle(x1,v2,newMask)
3135 // newMask[i] = (mask[i] < x1.size())
3136 // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
3137 // 4.
3138 // x1=shuffle(v1,undef,mask1)
3139 // x2=shuffle(v2,undef,mask2)
3140 // x=shuffle(x1,x2,mask)
3141 // where v1.size() == v2.size()
3142 // ==>
3143 // x=shuffle(v1,v2,newMask)
3144 // newMask[i] = (mask[i] < x1.size())
3145 // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
3146 //
3147 // Here we are really conservative:
3148 // we are absolutely afraid of producing a shuffle mask not in the input
3149 // program, because the code gen may not be smart enough to turn a merged
3150 // shuffle into two specific shuffles: it may produce worse code. As such,
3151 // we only merge two shuffles if the result is either a splat or one of the
3152 // input shuffle masks. In this case, merging the shuffles just removes
3153 // one instruction, which we know is safe. This is good for things like
3154 // turning: (splat(splat)) -> splat, or
3155 // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
3158 if (LHSShuffle)
3159 if (!match(LHSShuffle->getOperand(1), m_Poison()) &&
3160 !match(RHS, m_Poison()))
3161 LHSShuffle = nullptr;
3162 if (RHSShuffle)
3163 if (!match(RHSShuffle->getOperand(1), m_Poison()))
3164 RHSShuffle = nullptr;
3165 if (!LHSShuffle && !RHSShuffle)
3166 return MadeChange ? &SVI : nullptr;
3167
3168 Value* LHSOp0 = nullptr;
3169 Value* LHSOp1 = nullptr;
3170 Value* RHSOp0 = nullptr;
3171 unsigned LHSOp0Width = 0;
3172 unsigned RHSOp0Width = 0;
3173 if (LHSShuffle) {
3174 LHSOp0 = LHSShuffle->getOperand(0);
3175 LHSOp1 = LHSShuffle->getOperand(1);
3176 LHSOp0Width = cast<FixedVectorType>(LHSOp0->getType())->getNumElements();
3177 }
3178 if (RHSShuffle) {
3179 RHSOp0 = RHSShuffle->getOperand(0);
3180 RHSOp0Width = cast<FixedVectorType>(RHSOp0->getType())->getNumElements();
3181 }
3182 Value* newLHS = LHS;
3183 Value* newRHS = RHS;
3184 if (LHSShuffle) {
3185 // case 1
3186 if (match(RHS, m_Poison())) {
3187 newLHS = LHSOp0;
3188 newRHS = LHSOp1;
3189 }
3190 // case 2 or 4
3191 else if (LHSOp0Width == LHSWidth) {
3192 newLHS = LHSOp0;
3193 }
3194 }
3195 // case 3 or 4
3196 if (RHSShuffle && RHSOp0Width == LHSWidth) {
3197 newRHS = RHSOp0;
3198 }
3199 // case 4
3200 if (LHSOp0 == RHSOp0) {
3201 newLHS = LHSOp0;
3202 newRHS = nullptr;
3203 }
3204
3205 if (newLHS == LHS && newRHS == RHS)
3206 return MadeChange ? &SVI : nullptr;
3207
3208 ArrayRef<int> LHSMask;
3209 ArrayRef<int> RHSMask;
3210 if (newLHS != LHS)
3211 LHSMask = LHSShuffle->getShuffleMask();
3212 if (RHSShuffle && newRHS != RHS)
3213 RHSMask = RHSShuffle->getShuffleMask();
3214
3215 unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
3216 SmallVector<int, 16> newMask;
3217 bool isSplat = true;
3218 int SplatElt = -1;
3219 // Create a new mask for the new ShuffleVectorInst so that the new
3220 // ShuffleVectorInst is equivalent to the original one.
3221 for (unsigned i = 0; i < VWidth; ++i) {
3222 int eltMask;
3223 if (Mask[i] < 0) {
3224 // This element is a poison value.
3225 eltMask = -1;
3226 } else if (Mask[i] < (int)LHSWidth) {
3227 // This element is from left hand side vector operand.
3228 //
3229 // If LHS is going to be replaced (case 1, 2, or 4), calculate the
3230 // new mask value for the element.
3231 if (newLHS != LHS) {
3232 eltMask = LHSMask[Mask[i]];
3233 // If the value selected is an poison value, explicitly specify it
3234 // with a -1 mask value.
3235 if (eltMask >= (int)LHSOp0Width && isa<PoisonValue>(LHSOp1))
3236 eltMask = -1;
3237 } else
3238 eltMask = Mask[i];
3239 } else {
3240 // This element is from right hand side vector operand
3241 //
3242 // If the value selected is a poison value, explicitly specify it
3243 // with a -1 mask value. (case 1)
3244 if (match(RHS, m_Poison()))
3245 eltMask = -1;
3246 // If RHS is going to be replaced (case 3 or 4), calculate the
3247 // new mask value for the element.
3248 else if (newRHS != RHS) {
3249 eltMask = RHSMask[Mask[i]-LHSWidth];
3250 // If the value selected is an poison value, explicitly specify it
3251 // with a -1 mask value.
3252 if (eltMask >= (int)RHSOp0Width) {
3253 assert(match(RHSShuffle->getOperand(1), m_Poison()) &&
3254 "should have been check above");
3255 eltMask = -1;
3256 }
3257 } else
3258 eltMask = Mask[i]-LHSWidth;
3259
3260 // If LHS's width is changed, shift the mask value accordingly.
3261 // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
3262 // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
3263 // If newRHS == newLHS, we want to remap any references from newRHS to
3264 // newLHS so that we can properly identify splats that may occur due to
3265 // obfuscation across the two vectors.
3266 if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
3267 eltMask += newLHSWidth;
3268 }
3269
3270 // Check if this could still be a splat.
3271 if (eltMask >= 0) {
3272 if (SplatElt >= 0 && SplatElt != eltMask)
3273 isSplat = false;
3274 SplatElt = eltMask;
3275 }
3276
3277 newMask.push_back(eltMask);
3278 }
3279
3280 // If the result mask is equal to one of the original shuffle masks,
3281 // or is a splat, do the replacement.
3282 if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
3283 if (!newRHS)
3284 newRHS = PoisonValue::get(newLHS->getType());
3285 return new ShuffleVectorInst(newLHS, newRHS, newMask);
3286 }
3287
3288 return MadeChange ? &SVI : nullptr;
3289}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
Hexagon Common GEP
This file provides internal interfaces used to implement the InstCombine.
static Instruction * foldConstantInsEltIntoShuffle(InsertElementInst &InsElt)
insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex --> shufflevector X,...
static Value * evaluateInDifferentElementOrder(Value *V, ArrayRef< int > Mask, IRBuilderBase &Builder)
static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, SmallVectorImpl< int > &Mask)
If V is a shuffle of values that ONLY returns elements from either LHS or RHS, return the shuffle mas...
static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl< int > &Mask, Value *PermittedRHS, InstCombinerImpl &IC, bool &Rerun)
static APInt findDemandedEltsByAllUsers(Value *V)
Find union of elements of V demanded by all its users.
static Instruction * foldTruncInsEltPair(InsertElementInst &InsElt, bool IsBigEndian, InstCombiner::BuilderTy &Builder)
If we are inserting 2 halves of a value into adjacent elements of a vector, try to convert to a singl...
static Instruction * foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf, const SimplifyQuery &SQ)
static Instruction * foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf)
static Instruction * foldIdentityExtractShuffle(ShuffleVectorInst &Shuf)
Try to fold an extract subvector operation.
static bool findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr, APInt &UnionUsedElts)
Find elements of V demanded by UserInstr.
static Instruction * foldInsEltIntoSplat(InsertElementInst &InsElt)
Try to fold an insert element into an existing splat shuffle by changing the shuffle's mask to includ...
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
static Instruction * foldShuffleWithInsert(ShuffleVectorInst &Shuf, InstCombinerImpl &IC)
Try to replace a shuffle with an insertelement or try to replace a shuffle operand with the operand o...
static Instruction * canonicalizeInsertSplat(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
If we have an insert of a scalar to a non-zero element of an undefined vector and then shuffle that v...
static Instruction * foldTruncShuffle(ShuffleVectorInst &Shuf, bool IsBigEndian)
Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
static bool replaceExtractElements(InsertElementInst *InsElt, ExtractElementInst *ExtElt, InstCombinerImpl &IC)
If we have insertion into a vector that is wider than the vector that we are extracting from,...
static bool cheapToScalarize(Value *V, Value *EI)
Return true if the value is cheaper to scalarize than it is to leave as a vector operation.
static Value * buildNew(Instruction *I, ArrayRef< Value * > NewOps, IRBuilderBase &Builder)
Rebuild a new instruction just like 'I' but with the new operands given.
static bool canEvaluateShuffled(Value *V, ArrayRef< int > Mask, unsigned Depth=5)
Return true if we can evaluate the specified expression tree if the vector elements were shuffled in ...
static Instruction * foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf)
A select shuffle of a select shuffle with a shared operand can be reduced to a single select shuffle.
static Instruction * hoistInsEltConst(InsertElementInst &InsElt2, InstCombiner::BuilderTy &Builder)
If we have an insertelement instruction feeding into another insertelement and the 2nd is inserting a...
static Instruction * foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize FP negate/abs after shuffle.
static Instruction * foldCastShuffle(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize casts after shuffle.
static Instruction * narrowInsElt(InsertElementInst &InsElt, InstCombiner::BuilderTy &Builder)
If both the base vector and the inserted element are extended from the same type, do the insert eleme...
static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf)
static Instruction * foldInsSequenceIntoSplat(InsertElementInst &InsElt)
Turn a chain of inserts that splats a value into an insert + shuffle: insertelt(insertelt(insertelt(i...
static Instruction * foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt)
Try to fold an extract+insert element into an existing identity shuffle by changing the shuffle's mas...
static ConstantInt * getPreferredVectorIndex(ConstantInt *IndexC)
Given a constant index for a extractelement or insertelement instruction, return it with the canonica...
static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, ArrayRef< int > Mask)
static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL)
Binops may be transformed into binops with different opcodes and operands.
This file provides the interface for the instcombine pass implementation.
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define I(x, y, z)
Definition MD5.cpp:57
uint64_t IntrinsicInst * II
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
This file implements the SmallBitVector class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
static SDValue narrowVectorSelect(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget)
If both arms of a vector select are concatenated vectors, split the select, and concatenate the resul...
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition APInt.cpp:1033
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1513
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1331
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1112
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & front() const
front - Get the first element.
Definition ArrayRef.h:145
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:186
LLVM Basic Block Representation.
Definition BasicBlock.h:62
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
BinaryOps getOpcode() const
Definition InstrTypes.h:374
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:219
This class represents a no-op cast from one type to another.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
This class is the base class for the comparison instructions.
Definition InstrTypes.h:664
static LLVM_ABI CmpInst * CreateWithCopiedFlags(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Instruction *FlagsSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate, the two operands and the instructio...
OtherOps getOpcode() const
Get the opcode casted to the right type.
Definition InstrTypes.h:760
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * getShuffleVector(Constant *V1, Constant *V2, ArrayRef< int > Mask, Type *OnlyIfReducedTy=nullptr)
static LLVM_ABI Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:269
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:162
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
This instruction extracts a single (scalar) element from a VectorType value.
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
VectorType * getVectorOperandType() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
LLVM_ABI void setNoWrapFlags(GEPNoWrapFlags NW)
Set nowrap flags for GEP instruction.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
This instruction inserts a single (scalar) element into a VectorType value.
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
VectorType * getType() const
Overload to return most specific vector type.
This instruction inserts a struct field of array element value into an aggregate value.
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
Instruction * foldSelectShuffle(ShuffleVectorInst &Shuf)
Try to fold shuffles that are the equivalent of a vector select.
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false, bool SimplifyBothArms=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * visitInsertValueInst(InsertValueInst &IV)
Try to find redundant insertvalue instructions, like the following ones: %0 = insertvalue { i8,...
Instruction * visitInsertElementInst(InsertElementInst &IE)
Instruction * visitExtractElementInst(ExtractElementInst &EI)
Instruction * simplifyBinOpSplats(ShuffleVectorInst &SVI)
Instruction * foldAggregateConstructionIntoAggregateReuse(InsertValueInst &OrigIVI)
Look for chain of insertvalue's that fully define an aggregate, and trace back the values inserted,...
Instruction * visitShuffleVectorInst(ShuffleVectorInst &SVI)
SimplifyQuery SQ
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
const DataLayout & DL
void addToWorklist(Instruction *I)
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
BuilderTy & Builder
const SimplifyQuery & getSimplifyQuery() const
LLVM_ABI bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
LLVM_ABI bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI bool isExact() const LLVM_READONLY
Determine whether the exact flag is set.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool isShift() const
LLVM_ABI void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
bool isIntDivRem() const
A wrapper class for inspecting calls to intrinsic functions.
std::pair< iterator, bool > try_emplace(const KeyT &Key, Ts &&...Args)
Definition MapVector.h:116
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition Constants.h:1478
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
bool changesLength() const
Return true if this shuffle returns a vector with a different number of elements than its source vect...
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
static LLVM_ABI bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
VectorType * getType() const
Overload to return most specific vector type.
bool increasesLength() const
Return true if this shuffle returns a vector with a greater number of elements than its source vector...
LLVM_ABI bool isIdentityWithExtract() const
Return true if this shuffle extracts the first N elements of exactly one source vector.
static LLVM_ABI void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
static LLVM_ABI bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
LLVM_ABI void commute()
Swap the operands and adjust the mask to preserve the semantics of the instruction.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool all() const
Returns true if all bits are set.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition Type.h:246
LLVM_ABI unsigned getStructNumElements() const
LLVM_ABI uint64_t getArrayNumElements() const
@ ArrayTyID
Arrays.
Definition Type.h:74
@ StructTyID
Structures.
Definition Type.h:73
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
TypeID getTypeID() const
Return the type id for the type.
Definition Type.h:136
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:139
UnaryOps getOpcode() const
Definition InstrTypes.h:154
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
Value * getOperand(unsigned i) const
Definition User.h:232
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition Value.cpp:1091
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
User * user_back()
Definition Value.h:412
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Type * getElementType() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
BinaryOpc_match< LHS, RHS, false > m_BinOp(unsigned Opcode, const LHS &L, const RHS &R)
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
class_match< UnaryOperator > m_UnOp()
Match an arbitrary unary operation and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2484
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
LLVM_ABI Value * simplifyShuffleVectorInst(Value *Op0, Value *Op1, ArrayRef< int > Mask, Type *RetTy, const SimplifyQuery &Q)
Given operands for a ShuffleVectorInst, fold the result or return null.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI Value * simplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an InsertValueInst, fold the result or return null.
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
constexpr int PoisonMaskElem
LLVM_ABI Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
DWARFExpression::Operation Op
bool isSafeToSpeculativelyExecuteWithVariableReplaced(const Instruction *I, bool IgnoreUBImplyingAttrs=true)
Don't use information from its non-constant operands.
LLVM_ABI Value * simplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx, const SimplifyQuery &Q)
Given operands for an InsertElement, fold the result or return null.
constexpr unsigned BitWidth
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:1973
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1909
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
LLVM_ABI Value * simplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q)
Given operands for an ExtractElementInst, fold the result or return null.
LLVM_ABI bool scaleShuffleMaskElts(unsigned NumDstElts, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Attempt to narrow/widen the Mask shuffle mask to the NumDstElts target width.
LLVM_ABI int getSplatIndex(ArrayRef< int > Mask)
If all non-negative Mask elements are the same value, return that value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
These are the ingredients in an alternate form binary operator as described below.
BinopElts(BinaryOperator::BinaryOps Opc=(BinaryOperator::BinaryOps) 0, Value *V0=nullptr, Value *V1=nullptr)
BinaryOperator::BinaryOps Opcode
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:276