LLVM 17.0.0git
InstCombineVectorOps.cpp
Go to the documentation of this file.
1//===- InstCombineVectorOps.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements instcombine for ExtractElement, InsertElement and
10// ShuffleVector.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InstCombineInternal.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
24#include "llvm/IR/BasicBlock.h"
25#include "llvm/IR/Constant.h"
26#include "llvm/IR/Constants.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
31#include "llvm/IR/Operator.h"
33#include "llvm/IR/Type.h"
34#include "llvm/IR/User.h"
35#include "llvm/IR/Value.h"
39#include <cassert>
40#include <cstdint>
41#include <iterator>
42#include <utility>
43
44#define DEBUG_TYPE "instcombine"
45
46using namespace llvm;
47using namespace PatternMatch;
48
49STATISTIC(NumAggregateReconstructionsSimplified,
50 "Number of aggregate reconstructions turned into reuse of the "
51 "original aggregate");
52
53/// Return true if the value is cheaper to scalarize than it is to leave as a
54/// vector operation. If the extract index \p EI is a constant integer then
55/// some operations may be cheap to scalarize.
56///
57/// FIXME: It's possible to create more instructions than previously existed.
58static bool cheapToScalarize(Value *V, Value *EI) {
59 ConstantInt *CEI = dyn_cast<ConstantInt>(EI);
60
61 // If we can pick a scalar constant value out of a vector, that is free.
62 if (auto *C = dyn_cast<Constant>(V))
63 return CEI || C->getSplatValue();
64
65 if (CEI && match(V, m_Intrinsic<Intrinsic::experimental_stepvector>())) {
66 ElementCount EC = cast<VectorType>(V->getType())->getElementCount();
67 // Index needs to be lower than the minimum size of the vector, because
68 // for scalable vector, the vector size is known at run time.
69 return CEI->getValue().ult(EC.getKnownMinValue());
70 }
71
72 // An insertelement to the same constant index as our extract will simplify
73 // to the scalar inserted element. An insertelement to a different constant
74 // index is irrelevant to our extract.
76 return CEI;
77
78 if (match(V, m_OneUse(m_Load(m_Value()))))
79 return true;
80
81 if (match(V, m_OneUse(m_UnOp())))
82 return true;
83
84 Value *V0, *V1;
85 if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
86 if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
87 return true;
88
89 CmpInst::Predicate UnusedPred;
90 if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
91 if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
92 return true;
93
94 return false;
95}
96
97// If we have a PHI node with a vector type that is only used to feed
98// itself and be an operand of extractelement at a constant location,
99// try to replace the PHI of the vector type with a PHI of a scalar type.
100Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
101 PHINode *PN) {
103 // The users we want the PHI to have are:
104 // 1) The EI ExtractElement (we already know this)
105 // 2) Possibly more ExtractElements with the same index.
106 // 3) Another operand, which will feed back into the PHI.
107 Instruction *PHIUser = nullptr;
108 for (auto *U : PN->users()) {
109 if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
110 if (EI.getIndexOperand() == EU->getIndexOperand())
111 Extracts.push_back(EU);
112 else
113 return nullptr;
114 } else if (!PHIUser) {
115 PHIUser = cast<Instruction>(U);
116 } else {
117 return nullptr;
118 }
119 }
120
121 if (!PHIUser)
122 return nullptr;
123
124 // Verify that this PHI user has one use, which is the PHI itself,
125 // and that it is a binary operation which is cheap to scalarize.
126 // otherwise return nullptr.
127 if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
128 !(isa<BinaryOperator>(PHIUser)) ||
129 !cheapToScalarize(PHIUser, EI.getIndexOperand()))
130 return nullptr;
131
132 // Create a scalar PHI node that will replace the vector PHI node
133 // just before the current PHI node.
134 PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
135 PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN));
136 // Scalarize each PHI operand.
137 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
138 Value *PHIInVal = PN->getIncomingValue(i);
139 BasicBlock *inBB = PN->getIncomingBlock(i);
140 Value *Elt = EI.getIndexOperand();
141 // If the operand is the PHI induction variable:
142 if (PHIInVal == PHIUser) {
143 // Scalarize the binary operation. Its first operand is the
144 // scalar PHI, and the second operand is extracted from the other
145 // vector operand.
146 BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
147 unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
150 B0->getOperand(opId)->getName() + ".Elt"),
151 *B0);
152 Value *newPHIUser = InsertNewInstWith(
154 scalarPHI, Op, B0), *B0);
155 scalarPHI->addIncoming(newPHIUser, inBB);
156 } else {
157 // Scalarize PHI input:
158 Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
159 // Insert the new instruction into the predecessor basic block.
160 Instruction *pos = dyn_cast<Instruction>(PHIInVal);
161 BasicBlock::iterator InsertPos;
162 if (pos && !isa<PHINode>(pos)) {
163 InsertPos = ++pos->getIterator();
164 } else {
165 InsertPos = inBB->getFirstInsertionPt();
166 }
167
168 InsertNewInstWith(newEI, *InsertPos);
169
170 scalarPHI->addIncoming(newEI, inBB);
171 }
172 }
173
174 for (auto *E : Extracts)
175 replaceInstUsesWith(*E, scalarPHI);
176
177 return &EI;
178}
179
180Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
181 Value *X;
182 uint64_t ExtIndexC;
183 if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
184 !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
185 return nullptr;
186
187 ElementCount NumElts =
188 cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
189 Type *DestTy = Ext.getType();
190 unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
191 bool IsBigEndian = DL.isBigEndian();
192
193 // If we are casting an integer to vector and extracting a portion, that is
194 // a shift-right and truncate.
195 if (X->getType()->isIntegerTy()) {
196 assert(isa<FixedVectorType>(Ext.getVectorOperand()->getType()) &&
197 "Expected fixed vector type for bitcast from scalar integer");
198
199 // Big endian requires adjusting the extract index since MSB is at index 0.
200 // LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8
201 // BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8
202 if (IsBigEndian)
203 ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC;
204 unsigned ShiftAmountC = ExtIndexC * DestWidth;
205 if (!ShiftAmountC ||
206 (isDesirableIntType(X->getType()->getPrimitiveSizeInBits()) &&
207 Ext.getVectorOperand()->hasOneUse())) {
208 if (ShiftAmountC)
209 X = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset");
210 if (DestTy->isFloatingPointTy()) {
211 Type *DstIntTy = IntegerType::getIntNTy(X->getContext(), DestWidth);
212 Value *Trunc = Builder.CreateTrunc(X, DstIntTy);
213 return new BitCastInst(Trunc, DestTy);
214 }
215 return new TruncInst(X, DestTy);
216 }
217 }
218
219 if (!X->getType()->isVectorTy())
220 return nullptr;
221
222 // If this extractelement is using a bitcast from a vector of the same number
223 // of elements, see if we can find the source element from the source vector:
224 // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
225 auto *SrcTy = cast<VectorType>(X->getType());
226 ElementCount NumSrcElts = SrcTy->getElementCount();
227 if (NumSrcElts == NumElts)
228 if (Value *Elt = findScalarElement(X, ExtIndexC))
229 return new BitCastInst(Elt, DestTy);
230
231 assert(NumSrcElts.isScalable() == NumElts.isScalable() &&
232 "Src and Dst must be the same sort of vector type");
233
234 // If the source elements are wider than the destination, try to shift and
235 // truncate a subset of scalar bits of an insert op.
236 if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
237 Value *Scalar;
238 Value *Vec;
239 uint64_t InsIndexC;
240 if (!match(X, m_InsertElt(m_Value(Vec), m_Value(Scalar),
241 m_ConstantInt(InsIndexC))))
242 return nullptr;
243
244 // The extract must be from the subset of vector elements that we inserted
245 // into. Example: if we inserted element 1 of a <2 x i64> and we are
246 // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
247 // of elements 4-7 of the bitcasted vector.
248 unsigned NarrowingRatio =
249 NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
250
251 if (ExtIndexC / NarrowingRatio != InsIndexC) {
252 // Remove insertelement, if we don't use the inserted element.
253 // extractelement (bitcast (insertelement (Vec, b)), a) ->
254 // extractelement (bitcast (Vec), a)
255 // FIXME: this should be removed to SimplifyDemandedVectorElts,
256 // once scale vectors are supported.
257 if (X->hasOneUse() && Ext.getVectorOperand()->hasOneUse()) {
258 Value *NewBC = Builder.CreateBitCast(Vec, Ext.getVectorOperandType());
259 return ExtractElementInst::Create(NewBC, Ext.getIndexOperand());
260 }
261 return nullptr;
262 }
263
264 // We are extracting part of the original scalar. How that scalar is
265 // inserted into the vector depends on the endian-ness. Example:
266 // Vector Byte Elt Index: 0 1 2 3 4 5 6 7
267 // +--+--+--+--+--+--+--+--+
268 // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
269 // extelt <4 x i16> V', 3: | |S2|S3|
270 // +--+--+--+--+--+--+--+--+
271 // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
272 // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
273 // In this example, we must right-shift little-endian. Big-endian is just a
274 // truncate.
275 unsigned Chunk = ExtIndexC % NarrowingRatio;
276 if (IsBigEndian)
277 Chunk = NarrowingRatio - 1 - Chunk;
278
279 // Bail out if this is an FP vector to FP vector sequence. That would take
280 // more instructions than we started with unless there is no shift, and it
281 // may not be handled as well in the backend.
282 bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
283 bool NeedDestBitcast = DestTy->isFloatingPointTy();
284 if (NeedSrcBitcast && NeedDestBitcast)
285 return nullptr;
286
287 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
288 unsigned ShAmt = Chunk * DestWidth;
289
290 // TODO: This limitation is more strict than necessary. We could sum the
291 // number of new instructions and subtract the number eliminated to know if
292 // we can proceed.
293 if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
294 if (NeedSrcBitcast || NeedDestBitcast)
295 return nullptr;
296
297 if (NeedSrcBitcast) {
298 Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
299 Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
300 }
301
302 if (ShAmt) {
303 // Bail out if we could end with more instructions than we started with.
304 if (!Ext.getVectorOperand()->hasOneUse())
305 return nullptr;
306 Scalar = Builder.CreateLShr(Scalar, ShAmt);
307 }
308
309 if (NeedDestBitcast) {
310 Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
311 return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
312 }
313 return new TruncInst(Scalar, DestTy);
314 }
315
316 return nullptr;
317}
318
319/// Find elements of V demanded by UserInstr.
321 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
322
323 // Conservatively assume that all elements are needed.
324 APInt UsedElts(APInt::getAllOnes(VWidth));
325
326 switch (UserInstr->getOpcode()) {
327 case Instruction::ExtractElement: {
328 ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr);
329 assert(EEI->getVectorOperand() == V);
330 ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand());
331 if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {
332 UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue());
333 }
334 break;
335 }
336 case Instruction::ShuffleVector: {
337 ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);
338 unsigned MaskNumElts =
339 cast<FixedVectorType>(UserInstr->getType())->getNumElements();
340
341 UsedElts = APInt(VWidth, 0);
342 for (unsigned i = 0; i < MaskNumElts; i++) {
343 unsigned MaskVal = Shuffle->getMaskValue(i);
344 if (MaskVal == -1u || MaskVal >= 2 * VWidth)
345 continue;
346 if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))
347 UsedElts.setBit(MaskVal);
348 if (Shuffle->getOperand(1) == V &&
349 ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
350 UsedElts.setBit(MaskVal - VWidth);
351 }
352 break;
353 }
354 default:
355 break;
356 }
357 return UsedElts;
358}
359
360/// Find union of elements of V demanded by all its users.
361/// If it is known by querying findDemandedEltsBySingleUser that
362/// no user demands an element of V, then the corresponding bit
363/// remains unset in the returned value.
365 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
366
367 APInt UnionUsedElts(VWidth, 0);
368 for (const Use &U : V->uses()) {
369 if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
370 UnionUsedElts |= findDemandedEltsBySingleUser(V, I);
371 } else {
372 UnionUsedElts = APInt::getAllOnes(VWidth);
373 break;
374 }
375
376 if (UnionUsedElts.isAllOnes())
377 break;
378 }
379
380 return UnionUsedElts;
381}
382
383/// Given a constant index for a extractelement or insertelement instruction,
384/// return it with the canonical type if it isn't already canonical. We
385/// arbitrarily pick 64 bit as our canonical type. The actual bitwidth doesn't
386/// matter, we just want a consistent type to simplify CSE.
388 const unsigned IndexBW = IndexC->getType()->getBitWidth();
389 if (IndexBW == 64 || IndexC->getValue().getActiveBits() > 64)
390 return nullptr;
391 return ConstantInt::get(IndexC->getContext(),
392 IndexC->getValue().zextOrTrunc(64));
393}
394
396 Value *SrcVec = EI.getVectorOperand();
398 if (Value *V = simplifyExtractElementInst(SrcVec, Index,
400 return replaceInstUsesWith(EI, V);
401
402 // extractelt (select %x, %vec1, %vec2), %const ->
403 // select %x, %vec1[%const], %vec2[%const]
404 // TODO: Support constant folding of multiple select operands:
405 // extractelt (select %x, %vec1, %vec2), (select %x, %c1, %c2)
406 // If the extractelement will for instance try to do out of bounds accesses
407 // because of the values of %c1 and/or %c2, the sequence could be optimized
408 // early. This is currently not possible because constant folding will reach
409 // an unreachable assertion if it doesn't find a constant operand.
410 if (SelectInst *SI = dyn_cast<SelectInst>(EI.getVectorOperand()))
411 if (SI->getCondition()->getType()->isIntegerTy() &&
412 isa<Constant>(EI.getIndexOperand()))
413 if (Instruction *R = FoldOpIntoSelect(EI, SI))
414 return R;
415
416 // If extracting a specified index from the vector, see if we can recursively
417 // find a previously computed scalar that was inserted into the vector.
418 auto *IndexC = dyn_cast<ConstantInt>(Index);
419 if (IndexC) {
420 // Canonicalize type of constant indices to i64 to simplify CSE
421 if (auto *NewIdx = getPreferredVectorIndex(IndexC))
422 return replaceOperand(EI, 1, NewIdx);
423
425 unsigned NumElts = EC.getKnownMinValue();
426
427 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(SrcVec)) {
428 Intrinsic::ID IID = II->getIntrinsicID();
429 // Index needs to be lower than the minimum size of the vector, because
430 // for scalable vector, the vector size is known at run time.
431 if (IID == Intrinsic::experimental_stepvector &&
432 IndexC->getValue().ult(NumElts)) {
433 Type *Ty = EI.getType();
434 unsigned BitWidth = Ty->getIntegerBitWidth();
435 Value *Idx;
436 // Return index when its value does not exceed the allowed limit
437 // for the element type of the vector, otherwise return undefined.
438 if (IndexC->getValue().getActiveBits() <= BitWidth)
439 Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth));
440 else
441 Idx = UndefValue::get(Ty);
442 return replaceInstUsesWith(EI, Idx);
443 }
444 }
445
446 // InstSimplify should handle cases where the index is invalid.
447 // For fixed-length vector, it's invalid to extract out-of-range element.
448 if (!EC.isScalable() && IndexC->getValue().uge(NumElts))
449 return nullptr;
450
451 if (Instruction *I = foldBitcastExtElt(EI))
452 return I;
453
454 // If there's a vector PHI feeding a scalar use through this extractelement
455 // instruction, try to scalarize the PHI.
456 if (auto *Phi = dyn_cast<PHINode>(SrcVec))
457 if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
458 return ScalarPHI;
459 }
460
461 // TODO come up with a n-ary matcher that subsumes both unary and
462 // binary matchers.
463 UnaryOperator *UO;
464 if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, Index)) {
465 // extelt (unop X), Index --> unop (extelt X, Index)
466 Value *X = UO->getOperand(0);
469 }
470
471 BinaryOperator *BO;
472 if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, Index)) {
473 // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
474 Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
477 return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
478 }
479
480 Value *X, *Y;
482 if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
483 cheapToScalarize(SrcVec, Index)) {
484 // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
487 return CmpInst::Create(cast<CmpInst>(SrcVec)->getOpcode(), Pred, E0, E1);
488 }
489
490 if (auto *I = dyn_cast<Instruction>(SrcVec)) {
491 if (auto *IE = dyn_cast<InsertElementInst>(I)) {
492 // instsimplify already handled the case where the indices are constants
493 // and equal by value, if both are constants, they must not be the same
494 // value, extract from the pre-inserted value instead.
495 if (isa<Constant>(IE->getOperand(2)) && IndexC)
496 return replaceOperand(EI, 0, IE->getOperand(0));
497 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
498 auto *VecType = cast<VectorType>(GEP->getType());
499 ElementCount EC = VecType->getElementCount();
500 uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0;
501 if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) {
502 // Find out why we have a vector result - these are a few examples:
503 // 1. We have a scalar pointer and a vector of indices, or
504 // 2. We have a vector of pointers and a scalar index, or
505 // 3. We have a vector of pointers and a vector of indices, etc.
506 // Here we only consider combining when there is exactly one vector
507 // operand, since the optimization is less obviously a win due to
508 // needing more than one extractelements.
509
510 unsigned VectorOps =
511 llvm::count_if(GEP->operands(), [](const Value *V) {
512 return isa<VectorType>(V->getType());
513 });
514 if (VectorOps == 1) {
515 Value *NewPtr = GEP->getPointerOperand();
516 if (isa<VectorType>(NewPtr->getType()))
517 NewPtr = Builder.CreateExtractElement(NewPtr, IndexC);
518
520 for (unsigned I = 1; I != GEP->getNumOperands(); ++I) {
521 Value *Op = GEP->getOperand(I);
522 if (isa<VectorType>(Op->getType()))
523 NewOps.push_back(Builder.CreateExtractElement(Op, IndexC));
524 else
525 NewOps.push_back(Op);
526 }
527
529 GEP->getSourceElementType(), NewPtr, NewOps);
530 NewGEP->setIsInBounds(GEP->isInBounds());
531 return NewGEP;
532 }
533 }
534 } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
535 // If this is extracting an element from a shufflevector, figure out where
536 // it came from and extract from the appropriate input element instead.
537 // Restrict the following transformation to fixed-length vector.
538 if (isa<FixedVectorType>(SVI->getType()) && isa<ConstantInt>(Index)) {
539 int SrcIdx =
540 SVI->getMaskValue(cast<ConstantInt>(Index)->getZExtValue());
541 Value *Src;
542 unsigned LHSWidth = cast<FixedVectorType>(SVI->getOperand(0)->getType())
543 ->getNumElements();
544
545 if (SrcIdx < 0)
547 if (SrcIdx < (int)LHSWidth)
548 Src = SVI->getOperand(0);
549 else {
550 SrcIdx -= LHSWidth;
551 Src = SVI->getOperand(1);
552 }
555 Src, ConstantInt::get(Int32Ty, SrcIdx, false));
556 }
557 } else if (auto *CI = dyn_cast<CastInst>(I)) {
558 // Canonicalize extractelement(cast) -> cast(extractelement).
559 // Bitcasts can change the number of vector elements, and they cost
560 // nothing.
561 if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
562 Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
563 return CastInst::Create(CI->getOpcode(), EE, EI.getType());
564 }
565 }
566 }
567
568 // Run demanded elements after other transforms as this can drop flags on
569 // binops. If there's two paths to the same final result, we prefer the
570 // one which doesn't force us to drop flags.
571 if (IndexC) {
573 unsigned NumElts = EC.getKnownMinValue();
574 // This instruction only demands the single element from the input vector.
575 // Skip for scalable type, the number of elements is unknown at
576 // compile-time.
577 if (!EC.isScalable() && NumElts != 1) {
578 // If the input vector has a single use, simplify it based on this use
579 // property.
580 if (SrcVec->hasOneUse()) {
581 APInt UndefElts(NumElts, 0);
582 APInt DemandedElts(NumElts, 0);
583 DemandedElts.setBit(IndexC->getZExtValue());
584 if (Value *V =
585 SimplifyDemandedVectorElts(SrcVec, DemandedElts, UndefElts))
586 return replaceOperand(EI, 0, V);
587 } else {
588 // If the input vector has multiple uses, simplify it based on a union
589 // of all elements used.
590 APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
591 if (!DemandedElts.isAllOnes()) {
592 APInt UndefElts(NumElts, 0);
594 SrcVec, DemandedElts, UndefElts, 0 /* Depth */,
595 true /* AllowMultipleUsers */)) {
596 if (V != SrcVec) {
597 SrcVec->replaceAllUsesWith(V);
598 return &EI;
599 }
600 }
601 }
602 }
603 }
604 }
605 return nullptr;
606}
607
608/// If V is a shuffle of values that ONLY returns elements from either LHS or
609/// RHS, return the shuffle mask and true. Otherwise, return false.
611 SmallVectorImpl<int> &Mask) {
612 assert(LHS->getType() == RHS->getType() &&
613 "Invalid CollectSingleShuffleElements");
614 unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
615
616 if (match(V, m_Undef())) {
617 Mask.assign(NumElts, -1);
618 return true;
619 }
620
621 if (V == LHS) {
622 for (unsigned i = 0; i != NumElts; ++i)
623 Mask.push_back(i);
624 return true;
625 }
626
627 if (V == RHS) {
628 for (unsigned i = 0; i != NumElts; ++i)
629 Mask.push_back(i + NumElts);
630 return true;
631 }
632
633 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
634 // If this is an insert of an extract from some other vector, include it.
635 Value *VecOp = IEI->getOperand(0);
636 Value *ScalarOp = IEI->getOperand(1);
637 Value *IdxOp = IEI->getOperand(2);
638
639 if (!isa<ConstantInt>(IdxOp))
640 return false;
641 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
642
643 if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
644 // We can handle this if the vector we are inserting into is
645 // transitively ok.
646 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
647 // If so, update the mask to reflect the inserted undef.
648 Mask[InsertedIdx] = -1;
649 return true;
650 }
651 } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
652 if (isa<ConstantInt>(EI->getOperand(1))) {
653 unsigned ExtractedIdx =
654 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
655 unsigned NumLHSElts =
656 cast<FixedVectorType>(LHS->getType())->getNumElements();
657
658 // This must be extracting from either LHS or RHS.
659 if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
660 // We can handle this if the vector we are inserting into is
661 // transitively ok.
662 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
663 // If so, update the mask to reflect the inserted value.
664 if (EI->getOperand(0) == LHS) {
665 Mask[InsertedIdx % NumElts] = ExtractedIdx;
666 } else {
667 assert(EI->getOperand(0) == RHS);
668 Mask[InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;
669 }
670 return true;
671 }
672 }
673 }
674 }
675 }
676
677 return false;
678}
679
680/// If we have insertion into a vector that is wider than the vector that we
681/// are extracting from, try to widen the source vector to allow a single
682/// shufflevector to replace one or more insert/extract pairs.
684 ExtractElementInst *ExtElt,
685 InstCombinerImpl &IC) {
686 auto *InsVecType = cast<FixedVectorType>(InsElt->getType());
687 auto *ExtVecType = cast<FixedVectorType>(ExtElt->getVectorOperandType());
688 unsigned NumInsElts = InsVecType->getNumElements();
689 unsigned NumExtElts = ExtVecType->getNumElements();
690
691 // The inserted-to vector must be wider than the extracted-from vector.
692 if (InsVecType->getElementType() != ExtVecType->getElementType() ||
693 NumExtElts >= NumInsElts)
694 return;
695
696 // Create a shuffle mask to widen the extended-from vector using poison
697 // values. The mask selects all of the values of the original vector followed
698 // by as many poison values as needed to create a vector of the same length
699 // as the inserted-to vector.
700 SmallVector<int, 16> ExtendMask;
701 for (unsigned i = 0; i < NumExtElts; ++i)
702 ExtendMask.push_back(i);
703 for (unsigned i = NumExtElts; i < NumInsElts; ++i)
704 ExtendMask.push_back(-1);
705
706 Value *ExtVecOp = ExtElt->getVectorOperand();
707 auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
708 BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
709 ? ExtVecOpInst->getParent()
710 : ExtElt->getParent();
711
712 // TODO: This restriction matches the basic block check below when creating
713 // new extractelement instructions. If that limitation is removed, this one
714 // could also be removed. But for now, we just bail out to ensure that we
715 // will replace the extractelement instruction that is feeding our
716 // insertelement instruction. This allows the insertelement to then be
717 // replaced by a shufflevector. If the insertelement is not replaced, we can
718 // induce infinite looping because there's an optimization for extractelement
719 // that will delete our widening shuffle. This would trigger another attempt
720 // here to create that shuffle, and we spin forever.
721 if (InsertionBlock != InsElt->getParent())
722 return;
723
724 // TODO: This restriction matches the check in visitInsertElementInst() and
725 // prevents an infinite loop caused by not turning the extract/insert pair
726 // into a shuffle. We really should not need either check, but we're lacking
727 // folds for shufflevectors because we're afraid to generate shuffle masks
728 // that the backend can't handle.
729 if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
730 return;
731
732 auto *WideVec = new ShuffleVectorInst(ExtVecOp, ExtendMask);
733
734 // Insert the new shuffle after the vector operand of the extract is defined
735 // (as long as it's not a PHI) or at the start of the basic block of the
736 // extract, so any subsequent extracts in the same basic block can use it.
737 // TODO: Insert before the earliest ExtractElementInst that is replaced.
738 if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
739 WideVec->insertAfter(ExtVecOpInst);
740 else
741 IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt());
742
743 // Replace extracts from the original narrow vector with extracts from the new
744 // wide vector.
745 for (User *U : ExtVecOp->users()) {
746 ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
747 if (!OldExt || OldExt->getParent() != WideVec->getParent())
748 continue;
749 auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
750 NewExt->insertAfter(OldExt);
751 IC.replaceInstUsesWith(*OldExt, NewExt);
752 }
753}
754
755/// We are building a shuffle to create V, which is a sequence of insertelement,
756/// extractelement pairs. If PermittedRHS is set, then we must either use it or
757/// not rely on the second vector source. Return a std::pair containing the
758/// left and right vectors of the proposed shuffle (or 0), and set the Mask
759/// parameter as required.
760///
761/// Note: we intentionally don't try to fold earlier shuffles since they have
762/// often been chosen carefully to be efficiently implementable on the target.
763using ShuffleOps = std::pair<Value *, Value *>;
764
766 Value *PermittedRHS,
767 InstCombinerImpl &IC) {
768 assert(V->getType()->isVectorTy() && "Invalid shuffle!");
769 unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
770
771 if (match(V, m_Undef())) {
772 Mask.assign(NumElts, -1);
773 return std::make_pair(
774 PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr);
775 }
776
777 if (isa<ConstantAggregateZero>(V)) {
778 Mask.assign(NumElts, 0);
779 return std::make_pair(V, nullptr);
780 }
781
782 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
783 // If this is an insert of an extract from some other vector, include it.
784 Value *VecOp = IEI->getOperand(0);
785 Value *ScalarOp = IEI->getOperand(1);
786 Value *IdxOp = IEI->getOperand(2);
787
788 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
789 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
790 unsigned ExtractedIdx =
791 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
792 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
793
794 // Either the extracted from or inserted into vector must be RHSVec,
795 // otherwise we'd end up with a shuffle of three inputs.
796 if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
797 Value *RHS = EI->getOperand(0);
798 ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC);
799 assert(LR.second == nullptr || LR.second == RHS);
800
801 if (LR.first->getType() != RHS->getType()) {
802 // Although we are giving up for now, see if we can create extracts
803 // that match the inserts for another round of combining.
804 replaceExtractElements(IEI, EI, IC);
805
806 // We tried our best, but we can't find anything compatible with RHS
807 // further up the chain. Return a trivial shuffle.
808 for (unsigned i = 0; i < NumElts; ++i)
809 Mask[i] = i;
810 return std::make_pair(V, nullptr);
811 }
812
813 unsigned NumLHSElts =
814 cast<FixedVectorType>(RHS->getType())->getNumElements();
815 Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
816 return std::make_pair(LR.first, RHS);
817 }
818
819 if (VecOp == PermittedRHS) {
820 // We've gone as far as we can: anything on the other side of the
821 // extractelement will already have been converted into a shuffle.
822 unsigned NumLHSElts =
823 cast<FixedVectorType>(EI->getOperand(0)->getType())
824 ->getNumElements();
825 for (unsigned i = 0; i != NumElts; ++i)
826 Mask.push_back(i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
827 return std::make_pair(EI->getOperand(0), PermittedRHS);
828 }
829
830 // If this insertelement is a chain that comes from exactly these two
831 // vectors, return the vector and the effective shuffle.
832 if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
833 collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
834 Mask))
835 return std::make_pair(EI->getOperand(0), PermittedRHS);
836 }
837 }
838 }
839
840 // Otherwise, we can't do anything fancy. Return an identity vector.
841 for (unsigned i = 0; i != NumElts; ++i)
842 Mask.push_back(i);
843 return std::make_pair(V, nullptr);
844}
845
846/// Look for chain of insertvalue's that fully define an aggregate, and trace
847/// back the values inserted, see if they are all were extractvalue'd from
848/// the same source aggregate from the exact same element indexes.
849/// If they were, just reuse the source aggregate.
850/// This potentially deals with PHI indirections.
852 InsertValueInst &OrigIVI) {
853 Type *AggTy = OrigIVI.getType();
854 unsigned NumAggElts;
855 switch (AggTy->getTypeID()) {
856 case Type::StructTyID:
857 NumAggElts = AggTy->getStructNumElements();
858 break;
859 case Type::ArrayTyID:
860 NumAggElts = AggTy->getArrayNumElements();
861 break;
862 default:
863 llvm_unreachable("Unhandled aggregate type?");
864 }
865
866 // Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able
867 // to handle clang C++ exception struct (which is hardcoded as {i8*, i32}),
868 // FIXME: any interesting patterns to be caught with larger limit?
869 assert(NumAggElts > 0 && "Aggregate should have elements.");
870 if (NumAggElts > 2)
871 return nullptr;
872
873 static constexpr auto NotFound = std::nullopt;
874 static constexpr auto FoundMismatch = nullptr;
875
876 // Try to find a value of each element of an aggregate.
877 // FIXME: deal with more complex, not one-dimensional, aggregate types
878 SmallVector<std::optional<Instruction *>, 2> AggElts(NumAggElts, NotFound);
879
880 // Do we know values for each element of the aggregate?
881 auto KnowAllElts = [&AggElts]() {
882 return !llvm::is_contained(AggElts, NotFound);
883 };
884
885 int Depth = 0;
886
887 // Arbitrary `insertvalue` visitation depth limit. Let's be okay with
888 // every element being overwritten twice, which should never happen.
889 static const int DepthLimit = 2 * NumAggElts;
890
891 // Recurse up the chain of `insertvalue` aggregate operands until either we've
892 // reconstructed full initializer or can't visit any more `insertvalue`'s.
893 for (InsertValueInst *CurrIVI = &OrigIVI;
894 Depth < DepthLimit && CurrIVI && !KnowAllElts();
895 CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()),
896 ++Depth) {
897 auto *InsertedValue =
898 dyn_cast<Instruction>(CurrIVI->getInsertedValueOperand());
899 if (!InsertedValue)
900 return nullptr; // Inserted value must be produced by an instruction.
901
902 ArrayRef<unsigned int> Indices = CurrIVI->getIndices();
903
904 // Don't bother with more than single-level aggregates.
905 if (Indices.size() != 1)
906 return nullptr; // FIXME: deal with more complex aggregates?
907
908 // Now, we may have already previously recorded the value for this element
909 // of an aggregate. If we did, that means the CurrIVI will later be
910 // overwritten with the already-recorded value. But if not, let's record it!
911 std::optional<Instruction *> &Elt = AggElts[Indices.front()];
912 Elt = Elt.value_or(InsertedValue);
913
914 // FIXME: should we handle chain-terminating undef base operand?
915 }
916
917 // Was that sufficient to deduce the full initializer for the aggregate?
918 if (!KnowAllElts())
919 return nullptr; // Give up then.
920
921 // We now want to find the source[s] of the aggregate elements we've found.
922 // And with "source" we mean the original aggregate[s] from which
923 // the inserted elements were extracted. This may require PHI translation.
924
925 enum class AggregateDescription {
926 /// When analyzing the value that was inserted into an aggregate, we did
927 /// not manage to find defining `extractvalue` instruction to analyze.
928 NotFound,
929 /// When analyzing the value that was inserted into an aggregate, we did
930 /// manage to find defining `extractvalue` instruction[s], and everything
931 /// matched perfectly - aggregate type, element insertion/extraction index.
932 Found,
933 /// When analyzing the value that was inserted into an aggregate, we did
934 /// manage to find defining `extractvalue` instruction, but there was
935 /// a mismatch: either the source type from which the extraction was didn't
936 /// match the aggregate type into which the insertion was,
937 /// or the extraction/insertion channels mismatched,
938 /// or different elements had different source aggregates.
939 FoundMismatch
940 };
941 auto Describe = [](std::optional<Value *> SourceAggregate) {
942 if (SourceAggregate == NotFound)
943 return AggregateDescription::NotFound;
944 if (*SourceAggregate == FoundMismatch)
945 return AggregateDescription::FoundMismatch;
946 return AggregateDescription::Found;
947 };
948
949 // Given the value \p Elt that was being inserted into element \p EltIdx of an
950 // aggregate AggTy, see if \p Elt was originally defined by an
951 // appropriate extractvalue (same element index, same aggregate type).
952 // If found, return the source aggregate from which the extraction was.
953 // If \p PredBB is provided, does PHI translation of an \p Elt first.
954 auto FindSourceAggregate =
955 [&](Instruction *Elt, unsigned EltIdx, std::optional<BasicBlock *> UseBB,
956 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
957 // For now(?), only deal with, at most, a single level of PHI indirection.
958 if (UseBB && PredBB)
959 Elt = dyn_cast<Instruction>(Elt->DoPHITranslation(*UseBB, *PredBB));
960 // FIXME: deal with multiple levels of PHI indirection?
961
962 // Did we find an extraction?
963 auto *EVI = dyn_cast_or_null<ExtractValueInst>(Elt);
964 if (!EVI)
965 return NotFound;
966
967 Value *SourceAggregate = EVI->getAggregateOperand();
968
969 // Is the extraction from the same type into which the insertion was?
970 if (SourceAggregate->getType() != AggTy)
971 return FoundMismatch;
972 // And the element index doesn't change between extraction and insertion?
973 if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front())
974 return FoundMismatch;
975
976 return SourceAggregate; // AggregateDescription::Found
977 };
978
979 // Given elements AggElts that were constructing an aggregate OrigIVI,
980 // see if we can find appropriate source aggregate for each of the elements,
981 // and see it's the same aggregate for each element. If so, return it.
982 auto FindCommonSourceAggregate =
983 [&](std::optional<BasicBlock *> UseBB,
984 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
985 std::optional<Value *> SourceAggregate;
986
987 for (auto I : enumerate(AggElts)) {
988 assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&
989 "We don't store nullptr in SourceAggregate!");
990 assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
991 (I.index() != 0) &&
992 "SourceAggregate should be valid after the first element,");
993
994 // For this element, is there a plausible source aggregate?
995 // FIXME: we could special-case undef element, IFF we know that in the
996 // source aggregate said element isn't poison.
997 std::optional<Value *> SourceAggregateForElement =
998 FindSourceAggregate(*I.value(), I.index(), UseBB, PredBB);
999
1000 // Okay, what have we found? Does that correlate with previous findings?
1001
1002 // Regardless of whether or not we have previously found source
1003 // aggregate for previous elements (if any), if we didn't find one for
1004 // this element, passthrough whatever we have just found.
1005 if (Describe(SourceAggregateForElement) != AggregateDescription::Found)
1006 return SourceAggregateForElement;
1007
1008 // Okay, we have found source aggregate for this element.
1009 // Let's see what we already know from previous elements, if any.
1010 switch (Describe(SourceAggregate)) {
1011 case AggregateDescription::NotFound:
1012 // This is apparently the first element that we have examined.
1013 SourceAggregate = SourceAggregateForElement; // Record the aggregate!
1014 continue; // Great, now look at next element.
1015 case AggregateDescription::Found:
1016 // We have previously already successfully examined other elements.
1017 // Is this the same source aggregate we've found for other elements?
1018 if (*SourceAggregateForElement != *SourceAggregate)
1019 return FoundMismatch;
1020 continue; // Still the same aggregate, look at next element.
1021 case AggregateDescription::FoundMismatch:
1022 llvm_unreachable("Can't happen. We would have early-exited then.");
1023 };
1024 }
1025
1026 assert(Describe(SourceAggregate) == AggregateDescription::Found &&
1027 "Must be a valid Value");
1028 return *SourceAggregate;
1029 };
1030
1031 std::optional<Value *> SourceAggregate;
1032
1033 // Can we find the source aggregate without looking at predecessors?
1034 SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/std::nullopt,
1035 /*PredBB=*/std::nullopt);
1036 if (Describe(SourceAggregate) != AggregateDescription::NotFound) {
1037 if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch)
1038 return nullptr; // Conflicting source aggregates!
1039 ++NumAggregateReconstructionsSimplified;
1040 return replaceInstUsesWith(OrigIVI, *SourceAggregate);
1041 }
1042
1043 // Okay, apparently we need to look at predecessors.
1044
1045 // We should be smart about picking the "use" basic block, which will be the
1046 // merge point for aggregate, where we'll insert the final PHI that will be
1047 // used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice.
1048 // We should look in which blocks each of the AggElts is being defined,
1049 // they all should be defined in the same basic block.
1050 BasicBlock *UseBB = nullptr;
1051
1052 for (const std::optional<Instruction *> &I : AggElts) {
1053 BasicBlock *BB = (*I)->getParent();
1054 // If it's the first instruction we've encountered, record the basic block.
1055 if (!UseBB) {
1056 UseBB = BB;
1057 continue;
1058 }
1059 // Otherwise, this must be the same basic block we've seen previously.
1060 if (UseBB != BB)
1061 return nullptr;
1062 }
1063
1064 // If *all* of the elements are basic-block-independent, meaning they are
1065 // either function arguments, or constant expressions, then if we didn't
1066 // handle them without predecessor-aware handling, we won't handle them now.
1067 if (!UseBB)
1068 return nullptr;
1069
1070 // If we didn't manage to find source aggregate without looking at
1071 // predecessors, and there are no predecessors to look at, then we're done.
1072 if (pred_empty(UseBB))
1073 return nullptr;
1074
1075 // Arbitrary predecessor count limit.
1076 static const int PredCountLimit = 64;
1077
1078 // Cache the (non-uniqified!) list of predecessors in a vector,
1079 // checking the limit at the same time for efficiency.
1080 SmallVector<BasicBlock *, 4> Preds; // May have duplicates!
1081 for (BasicBlock *Pred : predecessors(UseBB)) {
1082 // Don't bother if there are too many predecessors.
1083 if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once?
1084 return nullptr;
1085 Preds.emplace_back(Pred);
1086 }
1087
1088 // For each predecessor, what is the source aggregate,
1089 // from which all the elements were originally extracted from?
1090 // Note that we want for the map to have stable iteration order!
1092 for (BasicBlock *Pred : Preds) {
1093 std::pair<decltype(SourceAggregates)::iterator, bool> IV =
1094 SourceAggregates.insert({Pred, nullptr});
1095 // Did we already evaluate this predecessor?
1096 if (!IV.second)
1097 continue;
1098
1099 // Let's hope that when coming from predecessor Pred, all elements of the
1100 // aggregate produced by OrigIVI must have been originally extracted from
1101 // the same aggregate. Is that so? Can we find said original aggregate?
1102 SourceAggregate = FindCommonSourceAggregate(UseBB, Pred);
1103 if (Describe(SourceAggregate) != AggregateDescription::Found)
1104 return nullptr; // Give up.
1105 IV.first->second = *SourceAggregate;
1106 }
1107
1108 // All good! Now we just need to thread the source aggregates here.
1109 // Note that we have to insert the new PHI here, ourselves, because we can't
1110 // rely on InstCombinerImpl::run() inserting it into the right basic block.
1111 // Note that the same block can be a predecessor more than once,
1112 // and we need to preserve that invariant for the PHI node.
1113 BuilderTy::InsertPointGuard Guard(Builder);
1114 Builder.SetInsertPoint(UseBB->getFirstNonPHI());
1115 auto *PHI =
1116 Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() + ".merged");
1117 for (BasicBlock *Pred : Preds)
1118 PHI->addIncoming(SourceAggregates[Pred], Pred);
1119
1120 ++NumAggregateReconstructionsSimplified;
1121 return replaceInstUsesWith(OrigIVI, PHI);
1122}
1123
1124/// Try to find redundant insertvalue instructions, like the following ones:
1125/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
1126/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
1127/// Here the second instruction inserts values at the same indices, as the
1128/// first one, making the first one redundant.
1129/// It should be transformed to:
1130/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
1133 I.getAggregateOperand(), I.getInsertedValueOperand(), I.getIndices(),
1135 return replaceInstUsesWith(I, V);
1136
1137 bool IsRedundant = false;
1138 ArrayRef<unsigned int> FirstIndices = I.getIndices();
1139
1140 // If there is a chain of insertvalue instructions (each of them except the
1141 // last one has only one use and it's another insertvalue insn from this
1142 // chain), check if any of the 'children' uses the same indices as the first
1143 // instruction. In this case, the first one is redundant.
1144 Value *V = &I;
1145 unsigned Depth = 0;
1146 while (V->hasOneUse() && Depth < 10) {
1147 User *U = V->user_back();
1148 auto UserInsInst = dyn_cast<InsertValueInst>(U);
1149 if (!UserInsInst || U->getOperand(0) != V)
1150 break;
1151 if (UserInsInst->getIndices() == FirstIndices) {
1152 IsRedundant = true;
1153 break;
1154 }
1155 V = UserInsInst;
1156 Depth++;
1157 }
1158
1159 if (IsRedundant)
1160 return replaceInstUsesWith(I, I.getOperand(0));
1161
1163 return NewI;
1164
1165 return nullptr;
1166}
1167
1169 // Can not analyze scalable type, the number of elements is not a compile-time
1170 // constant.
1171 if (isa<ScalableVectorType>(Shuf.getOperand(0)->getType()))
1172 return false;
1173
1174 int MaskSize = Shuf.getShuffleMask().size();
1175 int VecSize =
1176 cast<FixedVectorType>(Shuf.getOperand(0)->getType())->getNumElements();
1177
1178 // A vector select does not change the size of the operands.
1179 if (MaskSize != VecSize)
1180 return false;
1181
1182 // Each mask element must be undefined or choose a vector element from one of
1183 // the source operands without crossing vector lanes.
1184 for (int i = 0; i != MaskSize; ++i) {
1185 int Elt = Shuf.getMaskValue(i);
1186 if (Elt != -1 && Elt != i && Elt != i + VecSize)
1187 return false;
1188 }
1189
1190 return true;
1191}
1192
1193/// Turn a chain of inserts that splats a value into an insert + shuffle:
1194/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
1195/// shufflevector(insertelt(X, %k, 0), poison, zero)
1197 // We are interested in the last insert in a chain. So if this insert has a
1198 // single user and that user is an insert, bail.
1199 if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
1200 return nullptr;
1201
1202 VectorType *VecTy = InsElt.getType();
1203 // Can not handle scalable type, the number of elements is not a compile-time
1204 // constant.
1205 if (isa<ScalableVectorType>(VecTy))
1206 return nullptr;
1207 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
1208
1209 // Do not try to do this for a one-element vector, since that's a nop,
1210 // and will cause an inf-loop.
1211 if (NumElements == 1)
1212 return nullptr;
1213
1214 Value *SplatVal = InsElt.getOperand(1);
1215 InsertElementInst *CurrIE = &InsElt;
1216 SmallBitVector ElementPresent(NumElements, false);
1217 InsertElementInst *FirstIE = nullptr;
1218
1219 // Walk the chain backwards, keeping track of which indices we inserted into,
1220 // until we hit something that isn't an insert of the splatted value.
1221 while (CurrIE) {
1222 auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
1223 if (!Idx || CurrIE->getOperand(1) != SplatVal)
1224 return nullptr;
1225
1226 auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
1227 // Check none of the intermediate steps have any additional uses, except
1228 // for the root insertelement instruction, which can be re-used, if it
1229 // inserts at position 0.
1230 if (CurrIE != &InsElt &&
1231 (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
1232 return nullptr;
1233
1234 ElementPresent[Idx->getZExtValue()] = true;
1235 FirstIE = CurrIE;
1236 CurrIE = NextIE;
1237 }
1238
1239 // If this is just a single insertelement (not a sequence), we are done.
1240 if (FirstIE == &InsElt)
1241 return nullptr;
1242
1243 // If we are not inserting into an undef vector, make sure we've seen an
1244 // insert into every element.
1245 // TODO: If the base vector is not undef, it might be better to create a splat
1246 // and then a select-shuffle (blend) with the base vector.
1247 if (!match(FirstIE->getOperand(0), m_Undef()))
1248 if (!ElementPresent.all())
1249 return nullptr;
1250
1251 // Create the insert + shuffle.
1253 PoisonValue *PoisonVec = PoisonValue::get(VecTy);
1254 Constant *Zero = ConstantInt::get(Int32Ty, 0);
1255 if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
1256 FirstIE = InsertElementInst::Create(PoisonVec, SplatVal, Zero, "", &InsElt);
1257
1258 // Splat from element 0, but replace absent elements with undef in the mask.
1259 SmallVector<int, 16> Mask(NumElements, 0);
1260 for (unsigned i = 0; i != NumElements; ++i)
1261 if (!ElementPresent[i])
1262 Mask[i] = -1;
1263
1264 return new ShuffleVectorInst(FirstIE, Mask);
1265}
1266
1267/// Try to fold an insert element into an existing splat shuffle by changing
1268/// the shuffle's mask to include the index of this insert element.
1270 // Check if the vector operand of this insert is a canonical splat shuffle.
1271 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1272 if (!Shuf || !Shuf->isZeroEltSplat())
1273 return nullptr;
1274
1275 // Bail out early if shuffle is scalable type. The number of elements in
1276 // shuffle mask is unknown at compile-time.
1277 if (isa<ScalableVectorType>(Shuf->getType()))
1278 return nullptr;
1279
1280 // Check for a constant insertion index.
1281 uint64_t IdxC;
1282 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1283 return nullptr;
1284
1285 // Check if the splat shuffle's input is the same as this insert's scalar op.
1286 Value *X = InsElt.getOperand(1);
1287 Value *Op0 = Shuf->getOperand(0);
1288 if (!match(Op0, m_InsertElt(m_Undef(), m_Specific(X), m_ZeroInt())))
1289 return nullptr;
1290
1291 // Replace the shuffle mask element at the index of this insert with a zero.
1292 // For example:
1293 // inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1
1294 // --> shuf (inselt undef, X, 0), poison, <0,0,0,undef>
1295 unsigned NumMaskElts =
1296 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1297 SmallVector<int, 16> NewMask(NumMaskElts);
1298 for (unsigned i = 0; i != NumMaskElts; ++i)
1299 NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);
1300
1301 return new ShuffleVectorInst(Op0, NewMask);
1302}
1303
1304/// Try to fold an extract+insert element into an existing identity shuffle by
1305/// changing the shuffle's mask to include the index of this insert element.
1307 // Check if the vector operand of this insert is an identity shuffle.
1308 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1309 if (!Shuf || !match(Shuf->getOperand(1), m_Undef()) ||
1310 !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
1311 return nullptr;
1312
1313 // Bail out early if shuffle is scalable type. The number of elements in
1314 // shuffle mask is unknown at compile-time.
1315 if (isa<ScalableVectorType>(Shuf->getType()))
1316 return nullptr;
1317
1318 // Check for a constant insertion index.
1319 uint64_t IdxC;
1320 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1321 return nullptr;
1322
1323 // Check if this insert's scalar op is extracted from the identity shuffle's
1324 // input vector.
1325 Value *Scalar = InsElt.getOperand(1);
1326 Value *X = Shuf->getOperand(0);
1327 if (!match(Scalar, m_ExtractElt(m_Specific(X), m_SpecificInt(IdxC))))
1328 return nullptr;
1329
1330 // Replace the shuffle mask element at the index of this extract+insert with
1331 // that same index value.
1332 // For example:
1333 // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
1334 unsigned NumMaskElts =
1335 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1336 SmallVector<int, 16> NewMask(NumMaskElts);
1337 ArrayRef<int> OldMask = Shuf->getShuffleMask();
1338 for (unsigned i = 0; i != NumMaskElts; ++i) {
1339 if (i != IdxC) {
1340 // All mask elements besides the inserted element remain the same.
1341 NewMask[i] = OldMask[i];
1342 } else if (OldMask[i] == (int)IdxC) {
1343 // If the mask element was already set, there's nothing to do
1344 // (demanded elements analysis may unset it later).
1345 return nullptr;
1346 } else {
1347 assert(OldMask[i] == UndefMaskElem &&
1348 "Unexpected shuffle mask element for identity shuffle");
1349 NewMask[i] = IdxC;
1350 }
1351 }
1352
1353 return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask);
1354}
1355
1356/// If we have an insertelement instruction feeding into another insertelement
1357/// and the 2nd is inserting a constant into the vector, canonicalize that
1358/// constant insertion before the insertion of a variable:
1359///
1360/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
1361/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
1362///
1363/// This has the potential of eliminating the 2nd insertelement instruction
1364/// via constant folding of the scalar constant into a vector constant.
1366 InstCombiner::BuilderTy &Builder) {
1367 auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
1368 if (!InsElt1 || !InsElt1->hasOneUse())
1369 return nullptr;
1370
1371 Value *X, *Y;
1372 Constant *ScalarC;
1373 ConstantInt *IdxC1, *IdxC2;
1374 if (match(InsElt1->getOperand(0), m_Value(X)) &&
1375 match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
1376 match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
1377 match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
1378 match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
1379 Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
1380 return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
1381 }
1382
1383 return nullptr;
1384}
1385
1386/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
1387/// --> shufflevector X, CVec', Mask'
1389 auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
1390 // Bail out if the parent has more than one use. In that case, we'd be
1391 // replacing the insertelt with a shuffle, and that's not a clear win.
1392 if (!Inst || !Inst->hasOneUse())
1393 return nullptr;
1394 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
1395 // The shuffle must have a constant vector operand. The insertelt must have
1396 // a constant scalar being inserted at a constant position in the vector.
1397 Constant *ShufConstVec, *InsEltScalar;
1398 uint64_t InsEltIndex;
1399 if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
1400 !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
1401 !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
1402 return nullptr;
1403
1404 // Adding an element to an arbitrary shuffle could be expensive, but a
1405 // shuffle that selects elements from vectors without crossing lanes is
1406 // assumed cheap.
1407 // If we're just adding a constant into that shuffle, it will still be
1408 // cheap.
1409 if (!isShuffleEquivalentToSelect(*Shuf))
1410 return nullptr;
1411
1412 // From the above 'select' check, we know that the mask has the same number
1413 // of elements as the vector input operands. We also know that each constant
1414 // input element is used in its lane and can not be used more than once by
1415 // the shuffle. Therefore, replace the constant in the shuffle's constant
1416 // vector with the insertelt constant. Replace the constant in the shuffle's
1417 // mask vector with the insertelt index plus the length of the vector
1418 // (because the constant vector operand of a shuffle is always the 2nd
1419 // operand).
1420 ArrayRef<int> Mask = Shuf->getShuffleMask();
1421 unsigned NumElts = Mask.size();
1422 SmallVector<Constant *, 16> NewShufElts(NumElts);
1423 SmallVector<int, 16> NewMaskElts(NumElts);
1424 for (unsigned I = 0; I != NumElts; ++I) {
1425 if (I == InsEltIndex) {
1426 NewShufElts[I] = InsEltScalar;
1427 NewMaskElts[I] = InsEltIndex + NumElts;
1428 } else {
1429 // Copy over the existing values.
1430 NewShufElts[I] = ShufConstVec->getAggregateElement(I);
1431 NewMaskElts[I] = Mask[I];
1432 }
1433
1434 // Bail if we failed to find an element.
1435 if (!NewShufElts[I])
1436 return nullptr;
1437 }
1438
1439 // Create new operands for a shuffle that includes the constant of the
1440 // original insertelt. The old shuffle will be dead now.
1441 return new ShuffleVectorInst(Shuf->getOperand(0),
1442 ConstantVector::get(NewShufElts), NewMaskElts);
1443 } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
1444 // Transform sequences of insertelements ops with constant data/indexes into
1445 // a single shuffle op.
1446 // Can not handle scalable type, the number of elements needed to create
1447 // shuffle mask is not a compile-time constant.
1448 if (isa<ScalableVectorType>(InsElt.getType()))
1449 return nullptr;
1450 unsigned NumElts =
1451 cast<FixedVectorType>(InsElt.getType())->getNumElements();
1452
1453 uint64_t InsertIdx[2];
1454 Constant *Val[2];
1455 if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
1456 !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
1457 !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
1458 !match(IEI->getOperand(1), m_Constant(Val[1])))
1459 return nullptr;
1460 SmallVector<Constant *, 16> Values(NumElts);
1461 SmallVector<int, 16> Mask(NumElts);
1462 auto ValI = std::begin(Val);
1463 // Generate new constant vector and mask.
1464 // We have 2 values/masks from the insertelements instructions. Insert them
1465 // into new value/mask vectors.
1466 for (uint64_t I : InsertIdx) {
1467 if (!Values[I]) {
1468 Values[I] = *ValI;
1469 Mask[I] = NumElts + I;
1470 }
1471 ++ValI;
1472 }
1473 // Remaining values are filled with 'undef' values.
1474 for (unsigned I = 0; I < NumElts; ++I) {
1475 if (!Values[I]) {
1476 Values[I] = UndefValue::get(InsElt.getType()->getElementType());
1477 Mask[I] = I;
1478 }
1479 }
1480 // Create new operands for a shuffle that includes the constant of the
1481 // original insertelt.
1482 return new ShuffleVectorInst(IEI->getOperand(0),
1483 ConstantVector::get(Values), Mask);
1484 }
1485 return nullptr;
1486}
1487
1488/// If both the base vector and the inserted element are extended from the same
1489/// type, do the insert element in the narrow source type followed by extend.
1490/// TODO: This can be extended to include other cast opcodes, but particularly
1491/// if we create a wider insertelement, make sure codegen is not harmed.
1493 InstCombiner::BuilderTy &Builder) {
1494 // We are creating a vector extend. If the original vector extend has another
1495 // use, that would mean we end up with 2 vector extends, so avoid that.
1496 // TODO: We could ease the use-clause to "if at least one op has one use"
1497 // (assuming that the source types match - see next TODO comment).
1498 Value *Vec = InsElt.getOperand(0);
1499 if (!Vec->hasOneUse())
1500 return nullptr;
1501
1502 Value *Scalar = InsElt.getOperand(1);
1503 Value *X, *Y;
1504 CastInst::CastOps CastOpcode;
1505 if (match(Vec, m_FPExt(m_Value(X))) && match(Scalar, m_FPExt(m_Value(Y))))
1506 CastOpcode = Instruction::FPExt;
1507 else if (match(Vec, m_SExt(m_Value(X))) && match(Scalar, m_SExt(m_Value(Y))))
1508 CastOpcode = Instruction::SExt;
1509 else if (match(Vec, m_ZExt(m_Value(X))) && match(Scalar, m_ZExt(m_Value(Y))))
1510 CastOpcode = Instruction::ZExt;
1511 else
1512 return nullptr;
1513
1514 // TODO: We can allow mismatched types by creating an intermediate cast.
1515 if (X->getType()->getScalarType() != Y->getType())
1516 return nullptr;
1517
1518 // inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index)
1519 Value *NewInsElt = Builder.CreateInsertElement(X, Y, InsElt.getOperand(2));
1520 return CastInst::Create(CastOpcode, NewInsElt, InsElt.getType());
1521}
1522
1523/// If we are inserting 2 halves of a value into adjacent elements of a vector,
1524/// try to convert to a single insert with appropriate bitcasts.
1526 bool IsBigEndian,
1527 InstCombiner::BuilderTy &Builder) {
1528 Value *VecOp = InsElt.getOperand(0);
1529 Value *ScalarOp = InsElt.getOperand(1);
1530 Value *IndexOp = InsElt.getOperand(2);
1531
1532 // Pattern depends on endian because we expect lower index is inserted first.
1533 // Big endian:
1534 // inselt (inselt BaseVec, (trunc (lshr X, BW/2), Index0), (trunc X), Index1
1535 // Little endian:
1536 // inselt (inselt BaseVec, (trunc X), Index0), (trunc (lshr X, BW/2)), Index1
1537 // Note: It is not safe to do this transform with an arbitrary base vector
1538 // because the bitcast of that vector to fewer/larger elements could
1539 // allow poison to spill into an element that was not poison before.
1540 // TODO: Detect smaller fractions of the scalar.
1541 // TODO: One-use checks are conservative.
1542 auto *VTy = dyn_cast<FixedVectorType>(InsElt.getType());
1543 Value *Scalar0, *BaseVec;
1544 uint64_t Index0, Index1;
1545 if (!VTy || (VTy->getNumElements() & 1) ||
1546 !match(IndexOp, m_ConstantInt(Index1)) ||
1547 !match(VecOp, m_InsertElt(m_Value(BaseVec), m_Value(Scalar0),
1548 m_ConstantInt(Index0))) ||
1549 !match(BaseVec, m_Undef()))
1550 return nullptr;
1551
1552 // The first insert must be to the index one less than this one, and
1553 // the first insert must be to an even index.
1554 if (Index0 + 1 != Index1 || Index0 & 1)
1555 return nullptr;
1556
1557 // For big endian, the high half of the value should be inserted first.
1558 // For little endian, the low half of the value should be inserted first.
1559 Value *X;
1560 uint64_t ShAmt;
1561 if (IsBigEndian) {
1562 if (!match(ScalarOp, m_Trunc(m_Value(X))) ||
1563 !match(Scalar0, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt)))))
1564 return nullptr;
1565 } else {
1566 if (!match(Scalar0, m_Trunc(m_Value(X))) ||
1567 !match(ScalarOp, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt)))))
1568 return nullptr;
1569 }
1570
1571 Type *SrcTy = X->getType();
1572 unsigned ScalarWidth = SrcTy->getScalarSizeInBits();
1573 unsigned VecEltWidth = VTy->getScalarSizeInBits();
1574 if (ScalarWidth != VecEltWidth * 2 || ShAmt != VecEltWidth)
1575 return nullptr;
1576
1577 // Bitcast the base vector to a vector type with the source element type.
1578 Type *CastTy = FixedVectorType::get(SrcTy, VTy->getNumElements() / 2);
1579 Value *CastBaseVec = Builder.CreateBitCast(BaseVec, CastTy);
1580
1581 // Scale the insert index for a vector with half as many elements.
1582 // bitcast (inselt (bitcast BaseVec), X, NewIndex)
1583 uint64_t NewIndex = IsBigEndian ? Index1 / 2 : Index0 / 2;
1584 Value *NewInsert = Builder.CreateInsertElement(CastBaseVec, X, NewIndex);
1585 return new BitCastInst(NewInsert, VTy);
1586}
1587
1589 Value *VecOp = IE.getOperand(0);
1590 Value *ScalarOp = IE.getOperand(1);
1591 Value *IdxOp = IE.getOperand(2);
1592
1593 if (auto *V = simplifyInsertElementInst(
1594 VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
1595 return replaceInstUsesWith(IE, V);
1596
1597 // Canonicalize type of constant indices to i64 to simplify CSE
1598 if (auto *IndexC = dyn_cast<ConstantInt>(IdxOp)) {
1599 if (auto *NewIdx = getPreferredVectorIndex(IndexC))
1600 return replaceOperand(IE, 2, NewIdx);
1601
1602 Value *BaseVec, *OtherScalar;
1603 uint64_t OtherIndexVal;
1604 if (match(VecOp, m_OneUse(m_InsertElt(m_Value(BaseVec),
1605 m_Value(OtherScalar),
1606 m_ConstantInt(OtherIndexVal)))) &&
1607 !isa<Constant>(OtherScalar) && OtherIndexVal > IndexC->getZExtValue()) {
1608 Value *NewIns = Builder.CreateInsertElement(BaseVec, ScalarOp, IdxOp);
1609 return InsertElementInst::Create(NewIns, OtherScalar,
1610 Builder.getInt64(OtherIndexVal));
1611 }
1612 }
1613
1614 // If the scalar is bitcast and inserted into undef, do the insert in the
1615 // source type followed by bitcast.
1616 // TODO: Generalize for insert into any constant, not just undef?
1617 Value *ScalarSrc;
1618 if (match(VecOp, m_Undef()) &&
1619 match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) &&
1620 (ScalarSrc->getType()->isIntegerTy() ||
1621 ScalarSrc->getType()->isFloatingPointTy())) {
1622 // inselt undef, (bitcast ScalarSrc), IdxOp -->
1623 // bitcast (inselt undef, ScalarSrc, IdxOp)
1624 Type *ScalarTy = ScalarSrc->getType();
1625 Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount());
1626 UndefValue *NewUndef = UndefValue::get(VecTy);
1627 Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp);
1628 return new BitCastInst(NewInsElt, IE.getType());
1629 }
1630
1631 // If the vector and scalar are both bitcast from the same element type, do
1632 // the insert in that source type followed by bitcast.
1633 Value *VecSrc;
1634 if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
1635 match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
1636 (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
1637 VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
1638 cast<VectorType>(VecSrc->getType())->getElementType() ==
1639 ScalarSrc->getType()) {
1640 // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
1641 // bitcast (inselt VecSrc, ScalarSrc, IdxOp)
1642 Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
1643 return new BitCastInst(NewInsElt, IE.getType());
1644 }
1645
1646 // If the inserted element was extracted from some other fixed-length vector
1647 // and both indexes are valid constants, try to turn this into a shuffle.
1648 // Can not handle scalable vector type, the number of elements needed to
1649 // create shuffle mask is not a compile-time constant.
1650 uint64_t InsertedIdx, ExtractedIdx;
1651 Value *ExtVecOp;
1652 if (isa<FixedVectorType>(IE.getType()) &&
1653 match(IdxOp, m_ConstantInt(InsertedIdx)) &&
1654 match(ScalarOp,
1655 m_ExtractElt(m_Value(ExtVecOp), m_ConstantInt(ExtractedIdx))) &&
1656 isa<FixedVectorType>(ExtVecOp->getType()) &&
1657 ExtractedIdx <
1658 cast<FixedVectorType>(ExtVecOp->getType())->getNumElements()) {
1659 // TODO: Looking at the user(s) to determine if this insert is a
1660 // fold-to-shuffle opportunity does not match the usual instcombine
1661 // constraints. We should decide if the transform is worthy based only
1662 // on this instruction and its operands, but that may not work currently.
1663 //
1664 // Here, we are trying to avoid creating shuffles before reaching
1665 // the end of a chain of extract-insert pairs. This is complicated because
1666 // we do not generally form arbitrary shuffle masks in instcombine
1667 // (because those may codegen poorly), but collectShuffleElements() does
1668 // exactly that.
1669 //
1670 // The rules for determining what is an acceptable target-independent
1671 // shuffle mask are fuzzy because they evolve based on the backend's
1672 // capabilities and real-world impact.
1673 auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
1674 if (!Insert.hasOneUse())
1675 return true;
1676 auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
1677 if (!InsertUser)
1678 return true;
1679 return false;
1680 };
1681
1682 // Try to form a shuffle from a chain of extract-insert ops.
1683 if (isShuffleRootCandidate(IE)) {
1685 ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this);
1686
1687 // The proposed shuffle may be trivial, in which case we shouldn't
1688 // perform the combine.
1689 if (LR.first != &IE && LR.second != &IE) {
1690 // We now have a shuffle of LHS, RHS, Mask.
1691 if (LR.second == nullptr)
1692 LR.second = UndefValue::get(LR.first->getType());
1693 return new ShuffleVectorInst(LR.first, LR.second, Mask);
1694 }
1695 }
1696 }
1697
1698 if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {
1699 unsigned VWidth = VecTy->getNumElements();
1700 APInt UndefElts(VWidth, 0);
1701 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
1702 if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
1703 if (V != &IE)
1704 return replaceInstUsesWith(IE, V);
1705 return &IE;
1706 }
1707 }
1708
1710 return Shuf;
1711
1712 if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
1713 return NewInsElt;
1714
1715 if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
1716 return Broadcast;
1717
1718 if (Instruction *Splat = foldInsEltIntoSplat(IE))
1719 return Splat;
1720
1721 if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
1722 return IdentityShuf;
1723
1724 if (Instruction *Ext = narrowInsElt(IE, Builder))
1725 return Ext;
1726
1728 return Ext;
1729
1730 return nullptr;
1731}
1732
1733/// Return true if we can evaluate the specified expression tree if the vector
1734/// elements were shuffled in a different order.
1736 unsigned Depth = 5) {
1737 // We can always reorder the elements of a constant.
1738 if (isa<Constant>(V))
1739 return true;
1740
1741 // We won't reorder vector arguments. No IPO here.
1742 Instruction *I = dyn_cast<Instruction>(V);
1743 if (!I) return false;
1744
1745 // Two users may expect different orders of the elements. Don't try it.
1746 if (!I->hasOneUse())
1747 return false;
1748
1749 if (Depth == 0) return false;
1750
1751 switch (I->getOpcode()) {
1752 case Instruction::UDiv:
1753 case Instruction::SDiv:
1754 case Instruction::URem:
1755 case Instruction::SRem:
1756 // Propagating an undefined shuffle mask element to integer div/rem is not
1757 // allowed because those opcodes can create immediate undefined behavior
1758 // from an undefined element in an operand.
1759 if (llvm::is_contained(Mask, -1))
1760 return false;
1761 [[fallthrough]];
1762 case Instruction::Add:
1763 case Instruction::FAdd:
1764 case Instruction::Sub:
1765 case Instruction::FSub:
1766 case Instruction::Mul:
1767 case Instruction::FMul:
1768 case Instruction::FDiv:
1769 case Instruction::FRem:
1770 case Instruction::Shl:
1771 case Instruction::LShr:
1772 case Instruction::AShr:
1773 case Instruction::And:
1774 case Instruction::Or:
1775 case Instruction::Xor:
1776 case Instruction::ICmp:
1777 case Instruction::FCmp:
1778 case Instruction::Trunc:
1779 case Instruction::ZExt:
1780 case Instruction::SExt:
1781 case Instruction::FPToUI:
1782 case Instruction::FPToSI:
1783 case Instruction::UIToFP:
1784 case Instruction::SIToFP:
1785 case Instruction::FPTrunc:
1786 case Instruction::FPExt:
1787 case Instruction::GetElementPtr: {
1788 // Bail out if we would create longer vector ops. We could allow creating
1789 // longer vector ops, but that may result in more expensive codegen.
1790 Type *ITy = I->getType();
1791 if (ITy->isVectorTy() &&
1792 Mask.size() > cast<FixedVectorType>(ITy)->getNumElements())
1793 return false;
1794 for (Value *Operand : I->operands()) {
1795 if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
1796 return false;
1797 }
1798 return true;
1799 }
1800 case Instruction::InsertElement: {
1801 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
1802 if (!CI) return false;
1803 int ElementNumber = CI->getLimitedValue();
1804
1805 // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1806 // can't put an element into multiple indices.
1807 bool SeenOnce = false;
1808 for (int I : Mask) {
1809 if (I == ElementNumber) {
1810 if (SeenOnce)
1811 return false;
1812 SeenOnce = true;
1813 }
1814 }
1815 return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
1816 }
1817 }
1818 return false;
1819}
1820
1821/// Rebuild a new instruction just like 'I' but with the new operands given.
1822/// In the event of type mismatch, the type of the operands is correct.
1824 // We don't want to use the IRBuilder here because we want the replacement
1825 // instructions to appear next to 'I', not the builder's insertion point.
1826 switch (I->getOpcode()) {
1827 case Instruction::Add:
1828 case Instruction::FAdd:
1829 case Instruction::Sub:
1830 case Instruction::FSub:
1831 case Instruction::Mul:
1832 case Instruction::FMul:
1833 case Instruction::UDiv:
1834 case Instruction::SDiv:
1835 case Instruction::FDiv:
1836 case Instruction::URem:
1837 case Instruction::SRem:
1838 case Instruction::FRem:
1839 case Instruction::Shl:
1840 case Instruction::LShr:
1841 case Instruction::AShr:
1842 case Instruction::And:
1843 case Instruction::Or:
1844 case Instruction::Xor: {
1845 BinaryOperator *BO = cast<BinaryOperator>(I);
1846 assert(NewOps.size() == 2 && "binary operator with #ops != 2");
1847 BinaryOperator *New =
1848 BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(),
1849 NewOps[0], NewOps[1], "", BO);
1850 if (isa<OverflowingBinaryOperator>(BO)) {
1851 New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1852 New->setHasNoSignedWrap(BO->hasNoSignedWrap());
1853 }
1854 if (isa<PossiblyExactOperator>(BO)) {
1855 New->setIsExact(BO->isExact());
1856 }
1857 if (isa<FPMathOperator>(BO))
1858 New->copyFastMathFlags(I);
1859 return New;
1860 }
1861 case Instruction::ICmp:
1862 assert(NewOps.size() == 2 && "icmp with #ops != 2");
1863 return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(),
1864 NewOps[0], NewOps[1]);
1865 case Instruction::FCmp:
1866 assert(NewOps.size() == 2 && "fcmp with #ops != 2");
1867 return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(),
1868 NewOps[0], NewOps[1]);
1869 case Instruction::Trunc:
1870 case Instruction::ZExt:
1871 case Instruction::SExt:
1872 case Instruction::FPToUI:
1873 case Instruction::FPToSI:
1874 case Instruction::UIToFP:
1875 case Instruction::SIToFP:
1876 case Instruction::FPTrunc:
1877 case Instruction::FPExt: {
1878 // It's possible that the mask has a different number of elements from
1879 // the original cast. We recompute the destination type to match the mask.
1880 Type *DestTy = VectorType::get(
1881 I->getType()->getScalarType(),
1882 cast<VectorType>(NewOps[0]->getType())->getElementCount());
1883 assert(NewOps.size() == 1 && "cast with #ops != 1");
1884 return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy,
1885 "", I);
1886 }
1887 case Instruction::GetElementPtr: {
1888 Value *Ptr = NewOps[0];
1889 ArrayRef<Value*> Idx = NewOps.slice(1);
1891 cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I);
1892 GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
1893 return GEP;
1894 }
1895 }
1896 llvm_unreachable("failed to rebuild vector instructions");
1897}
1898
1900 // Mask.size() does not need to be equal to the number of vector elements.
1901
1902 assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
1903 Type *EltTy = V->getType()->getScalarType();
1904 Type *I32Ty = IntegerType::getInt32Ty(V->getContext());
1905 if (match(V, m_Undef()))
1906 return UndefValue::get(FixedVectorType::get(EltTy, Mask.size()));
1907
1908 if (isa<ConstantAggregateZero>(V))
1909 return ConstantAggregateZero::get(FixedVectorType::get(EltTy, Mask.size()));
1910
1911 if (Constant *C = dyn_cast<Constant>(V))
1913 Mask);
1914
1915 Instruction *I = cast<Instruction>(V);
1916 switch (I->getOpcode()) {
1917 case Instruction::Add:
1918 case Instruction::FAdd:
1919 case Instruction::Sub:
1920 case Instruction::FSub:
1921 case Instruction::Mul:
1922 case Instruction::FMul:
1923 case Instruction::UDiv:
1924 case Instruction::SDiv:
1925 case Instruction::FDiv:
1926 case Instruction::URem:
1927 case Instruction::SRem:
1928 case Instruction::FRem:
1929 case Instruction::Shl:
1930 case Instruction::LShr:
1931 case Instruction::AShr:
1932 case Instruction::And:
1933 case Instruction::Or:
1934 case Instruction::Xor:
1935 case Instruction::ICmp:
1936 case Instruction::FCmp:
1937 case Instruction::Trunc:
1938 case Instruction::ZExt:
1939 case Instruction::SExt:
1940 case Instruction::FPToUI:
1941 case Instruction::FPToSI:
1942 case Instruction::UIToFP:
1943 case Instruction::SIToFP:
1944 case Instruction::FPTrunc:
1945 case Instruction::FPExt:
1946 case Instruction::Select:
1947 case Instruction::GetElementPtr: {
1949 bool NeedsRebuild =
1950 (Mask.size() !=
1951 cast<FixedVectorType>(I->getType())->getNumElements());
1952 for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
1953 Value *V;
1954 // Recursively call evaluateInDifferentElementOrder on vector arguments
1955 // as well. E.g. GetElementPtr may have scalar operands even if the
1956 // return value is a vector, so we need to examine the operand type.
1957 if (I->getOperand(i)->getType()->isVectorTy())
1958 V = evaluateInDifferentElementOrder(I->getOperand(i), Mask);
1959 else
1960 V = I->getOperand(i);
1961 NewOps.push_back(V);
1962 NeedsRebuild |= (V != I->getOperand(i));
1963 }
1964 if (NeedsRebuild) {
1965 return buildNew(I, NewOps);
1966 }
1967 return I;
1968 }
1969 case Instruction::InsertElement: {
1970 int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
1971
1972 // The insertelement was inserting at Element. Figure out which element
1973 // that becomes after shuffling. The answer is guaranteed to be unique
1974 // by CanEvaluateShuffled.
1975 bool Found = false;
1976 int Index = 0;
1977 for (int e = Mask.size(); Index != e; ++Index) {
1978 if (Mask[Index] == Element) {
1979 Found = true;
1980 break;
1981 }
1982 }
1983
1984 // If element is not in Mask, no need to handle the operand 1 (element to
1985 // be inserted). Just evaluate values in operand 0 according to Mask.
1986 if (!Found)
1987 return evaluateInDifferentElementOrder(I->getOperand(0), Mask);
1988
1989 Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask);
1990 return InsertElementInst::Create(V, I->getOperand(1),
1991 ConstantInt::get(I32Ty, Index), "", I);
1992 }
1993 }
1994 llvm_unreachable("failed to reorder elements of vector instruction!");
1995}
1996
1997// Returns true if the shuffle is extracting a contiguous range of values from
1998// LHS, for example:
1999// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2000// Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
2001// Shuffles to: |EE|FF|GG|HH|
2002// +--+--+--+--+
2004 ArrayRef<int> Mask) {
2005 unsigned LHSElems =
2006 cast<FixedVectorType>(SVI.getOperand(0)->getType())->getNumElements();
2007 unsigned MaskElems = Mask.size();
2008 unsigned BegIdx = Mask.front();
2009 unsigned EndIdx = Mask.back();
2010 if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
2011 return false;
2012 for (unsigned I = 0; I != MaskElems; ++I)
2013 if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
2014 return false;
2015 return true;
2016}
2017
2018/// These are the ingredients in an alternate form binary operator as described
2019/// below.
2025 Value *V0 = nullptr, Value *V1 = nullptr) :
2026 Opcode(Opc), Op0(V0), Op1(V1) {}
2027 operator bool() const { return Opcode != 0; }
2028};
2029
2030/// Binops may be transformed into binops with different opcodes and operands.
2031/// Reverse the usual canonicalization to enable folds with the non-canonical
2032/// form of the binop. If a transform is possible, return the elements of the
2033/// new binop. If not, return invalid elements.
2035 Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
2036 Type *Ty = BO->getType();
2037 switch (BO->getOpcode()) {
2038 case Instruction::Shl: {
2039 // shl X, C --> mul X, (1 << C)
2040 Constant *C;
2041 if (match(BO1, m_Constant(C))) {
2043 return {Instruction::Mul, BO0, ShlOne};
2044 }
2045 break;
2046 }
2047 case Instruction::Or: {
2048 // or X, C --> add X, C (when X and C have no common bits set)
2049 const APInt *C;
2050 if (match(BO1, m_APInt(C)) && MaskedValueIsZero(BO0, *C, DL))
2051 return {Instruction::Add, BO0, BO1};
2052 break;
2053 }
2054 case Instruction::Sub:
2055 // sub 0, X --> mul X, -1
2056 if (match(BO0, m_ZeroInt()))
2057 return {Instruction::Mul, BO1, ConstantInt::getAllOnesValue(Ty)};
2058 break;
2059 default:
2060 break;
2061 }
2062 return {};
2063}
2064
2065/// A select shuffle of a select shuffle with a shared operand can be reduced
2066/// to a single select shuffle. This is an obvious improvement in IR, and the
2067/// backend is expected to lower select shuffles efficiently.
2069 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2070
2071 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2073 Shuf.getShuffleMask(Mask);
2074 unsigned NumElts = Mask.size();
2075
2076 // Canonicalize a select shuffle with common operand as Op1.
2077 auto *ShufOp = dyn_cast<ShuffleVectorInst>(Op0);
2078 if (ShufOp && ShufOp->isSelect() &&
2079 (ShufOp->getOperand(0) == Op1 || ShufOp->getOperand(1) == Op1)) {
2080 std::swap(Op0, Op1);
2082 }
2083
2084 ShufOp = dyn_cast<ShuffleVectorInst>(Op1);
2085 if (!ShufOp || !ShufOp->isSelect() ||
2086 (ShufOp->getOperand(0) != Op0 && ShufOp->getOperand(1) != Op0))
2087 return nullptr;
2088
2089 Value *X = ShufOp->getOperand(0), *Y = ShufOp->getOperand(1);
2091 ShufOp->getShuffleMask(Mask1);
2092 assert(Mask1.size() == NumElts && "Vector size changed with select shuffle");
2093
2094 // Canonicalize common operand (Op0) as X (first operand of first shuffle).
2095 if (Y == Op0) {
2096 std::swap(X, Y);
2098 }
2099
2100 // If the mask chooses from X (operand 0), it stays the same.
2101 // If the mask chooses from the earlier shuffle, the other mask value is
2102 // transferred to the combined select shuffle:
2103 // shuf X, (shuf X, Y, M1), M --> shuf X, Y, M'
2104 SmallVector<int, 16> NewMask(NumElts);
2105 for (unsigned i = 0; i != NumElts; ++i)
2106 NewMask[i] = Mask[i] < (signed)NumElts ? Mask[i] : Mask1[i];
2107
2108 // A select mask with undef elements might look like an identity mask.
2111 "Unexpected shuffle mask");
2112 return new ShuffleVectorInst(X, Y, NewMask);
2113}
2114
2116 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2117
2118 // Are we shuffling together some value and that same value after it has been
2119 // modified by a binop with a constant?
2120 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2121 Constant *C;
2122 bool Op0IsBinop;
2123 if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
2124 Op0IsBinop = true;
2125 else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
2126 Op0IsBinop = false;
2127 else
2128 return nullptr;
2129
2130 // The identity constant for a binop leaves a variable operand unchanged. For
2131 // a vector, this is a splat of something like 0, -1, or 1.
2132 // If there's no identity constant for this binop, we're done.
2133 auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
2134 BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
2135 Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
2136 if (!IdC)
2137 return nullptr;
2138
2139 // Shuffle identity constants into the lanes that return the original value.
2140 // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
2141 // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
2142 // The existing binop constant vector remains in the same operand position.
2143 ArrayRef<int> Mask = Shuf.getShuffleMask();
2144 Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
2146
2147 bool MightCreatePoisonOrUB =
2148 is_contained(Mask, UndefMaskElem) &&
2149 (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
2150 if (MightCreatePoisonOrUB)
2151 NewC = InstCombiner::getSafeVectorConstantForBinop(BOpcode, NewC, true);
2152
2153 // shuf (bop X, C), X, M --> bop X, C'
2154 // shuf X, (bop X, C), M --> bop X, C'
2155 Value *X = Op0IsBinop ? Op1 : Op0;
2156 Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
2157 NewBO->copyIRFlags(BO);
2158
2159 // An undef shuffle mask element may propagate as an undef constant element in
2160 // the new binop. That would produce poison where the original code might not.
2161 // If we already made a safe constant, then there's no danger.
2162 if (is_contained(Mask, UndefMaskElem) && !MightCreatePoisonOrUB)
2164 return NewBO;
2165}
2166
2167/// If we have an insert of a scalar to a non-zero element of an undefined
2168/// vector and then shuffle that value, that's the same as inserting to the zero
2169/// element and shuffling. Splatting from the zero element is recognized as the
2170/// canonical form of splat.
2172 InstCombiner::BuilderTy &Builder) {
2173 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2174 ArrayRef<int> Mask = Shuf.getShuffleMask();
2175 Value *X;
2176 uint64_t IndexC;
2177
2178 // Match a shuffle that is a splat to a non-zero element.
2179 if (!match(Op0, m_OneUse(m_InsertElt(m_Undef(), m_Value(X),
2180 m_ConstantInt(IndexC)))) ||
2181 !match(Op1, m_Undef()) || match(Mask, m_ZeroMask()) || IndexC == 0)
2182 return nullptr;
2183
2184 // Insert into element 0 of an undef vector.
2185 UndefValue *UndefVec = UndefValue::get(Shuf.getType());
2186 Constant *Zero = Builder.getInt32(0);
2187 Value *NewIns = Builder.CreateInsertElement(UndefVec, X, Zero);
2188
2189 // Splat from element 0. Any mask element that is undefined remains undefined.
2190 // For example:
2191 // shuf (inselt undef, X, 2), _, <2,2,undef>
2192 // --> shuf (inselt undef, X, 0), poison, <0,0,undef>
2193 unsigned NumMaskElts =
2194 cast<FixedVectorType>(Shuf.getType())->getNumElements();
2195 SmallVector<int, 16> NewMask(NumMaskElts, 0);
2196 for (unsigned i = 0; i != NumMaskElts; ++i)
2197 if (Mask[i] == UndefMaskElem)
2198 NewMask[i] = Mask[i];
2199
2200 return new ShuffleVectorInst(NewIns, NewMask);
2201}
2202
2203/// Try to fold shuffles that are the equivalent of a vector select.
2205 if (!Shuf.isSelect())
2206 return nullptr;
2207
2208 // Canonicalize to choose from operand 0 first unless operand 1 is undefined.
2209 // Commuting undef to operand 0 conflicts with another canonicalization.
2210 unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2211 if (!match(Shuf.getOperand(1), m_Undef()) &&
2212 Shuf.getMaskValue(0) >= (int)NumElts) {
2213 // TODO: Can we assert that both operands of a shuffle-select are not undef
2214 // (otherwise, it would have been folded by instsimplify?
2215 Shuf.commute();
2216 return &Shuf;
2217 }
2218
2220 return I;
2221
2223 return I;
2224
2225 BinaryOperator *B0, *B1;
2226 if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
2227 !match(Shuf.getOperand(1), m_BinOp(B1)))
2228 return nullptr;
2229
2230 // If one operand is "0 - X", allow that to be viewed as "X * -1"
2231 // (ConstantsAreOp1) by getAlternateBinop below. If the neg is not paired
2232 // with a multiply, we will exit because C0/C1 will not be set.
2233 Value *X, *Y;
2234 Constant *C0 = nullptr, *C1 = nullptr;
2235 bool ConstantsAreOp1;
2236 if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
2237 match(B1, m_BinOp(m_Constant(C1), m_Value(Y))))
2238 ConstantsAreOp1 = false;
2239 else if (match(B0, m_CombineOr(m_BinOp(m_Value(X), m_Constant(C0)),
2240 m_Neg(m_Value(X)))) &&
2242 m_Neg(m_Value(Y)))))
2243 ConstantsAreOp1 = true;
2244 else
2245 return nullptr;
2246
2247 // We need matching binops to fold the lanes together.
2250 bool DropNSW = false;
2251 if (ConstantsAreOp1 && Opc0 != Opc1) {
2252 // TODO: We drop "nsw" if shift is converted into multiply because it may
2253 // not be correct when the shift amount is BitWidth - 1. We could examine
2254 // each vector element to determine if it is safe to keep that flag.
2255 if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
2256 DropNSW = true;
2257 if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
2258 assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
2259 Opc0 = AltB0.Opcode;
2260 C0 = cast<Constant>(AltB0.Op1);
2261 } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
2262 assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
2263 Opc1 = AltB1.Opcode;
2264 C1 = cast<Constant>(AltB1.Op1);
2265 }
2266 }
2267
2268 if (Opc0 != Opc1 || !C0 || !C1)
2269 return nullptr;
2270
2271 // The opcodes must be the same. Use a new name to make that clear.
2272 BinaryOperator::BinaryOps BOpc = Opc0;
2273
2274 // Select the constant elements needed for the single binop.
2275 ArrayRef<int> Mask = Shuf.getShuffleMask();
2276 Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Mask);
2277
2278 // We are moving a binop after a shuffle. When a shuffle has an undefined
2279 // mask element, the result is undefined, but it is not poison or undefined
2280 // behavior. That is not necessarily true for div/rem/shift.
2281 bool MightCreatePoisonOrUB =
2282 is_contained(Mask, UndefMaskElem) &&
2284 if (MightCreatePoisonOrUB)
2286 ConstantsAreOp1);
2287
2288 Value *V;
2289 if (X == Y) {
2290 // Remove a binop and the shuffle by rearranging the constant:
2291 // shuffle (op V, C0), (op V, C1), M --> op V, C'
2292 // shuffle (op C0, V), (op C1, V), M --> op C', V
2293 V = X;
2294 } else {
2295 // If there are 2 different variable operands, we must create a new shuffle
2296 // (select) first, so check uses to ensure that we don't end up with more
2297 // instructions than we started with.
2298 if (!B0->hasOneUse() && !B1->hasOneUse())
2299 return nullptr;
2300
2301 // If we use the original shuffle mask and op1 is *variable*, we would be
2302 // putting an undef into operand 1 of div/rem/shift. This is either UB or
2303 // poison. We do not have to guard against UB when *constants* are op1
2304 // because safe constants guarantee that we do not overflow sdiv/srem (and
2305 // there's no danger for other opcodes).
2306 // TODO: To allow this case, create a new shuffle mask with no undefs.
2307 if (MightCreatePoisonOrUB && !ConstantsAreOp1)
2308 return nullptr;
2309
2310 // Note: In general, we do not create new shuffles in InstCombine because we
2311 // do not know if a target can lower an arbitrary shuffle optimally. In this
2312 // case, the shuffle uses the existing mask, so there is no additional risk.
2313
2314 // Select the variable vectors first, then perform the binop:
2315 // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
2316 // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
2317 V = Builder.CreateShuffleVector(X, Y, Mask);
2318 }
2319
2320 Value *NewBO = ConstantsAreOp1 ? Builder.CreateBinOp(BOpc, V, NewC) :
2321 Builder.CreateBinOp(BOpc, NewC, V);
2322
2323 // Flags are intersected from the 2 source binops. But there are 2 exceptions:
2324 // 1. If we changed an opcode, poison conditions might have changed.
2325 // 2. If the shuffle had undef mask elements, the new binop might have undefs
2326 // where the original code did not. But if we already made a safe constant,
2327 // then there's no danger.
2328 if (auto *NewI = dyn_cast<Instruction>(NewBO)) {
2329 NewI->copyIRFlags(B0);
2330 NewI->andIRFlags(B1);
2331 if (DropNSW)
2332 NewI->setHasNoSignedWrap(false);
2333 if (is_contained(Mask, UndefMaskElem) && !MightCreatePoisonOrUB)
2334 NewI->dropPoisonGeneratingFlags();
2335 }
2336 return replaceInstUsesWith(Shuf, NewBO);
2337}
2338
2339/// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
2340/// Example (little endian):
2341/// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>
2343 bool IsBigEndian) {
2344 // This must be a bitcasted shuffle of 1 vector integer operand.
2345 Type *DestType = Shuf.getType();
2346 Value *X;
2347 if (!match(Shuf.getOperand(0), m_BitCast(m_Value(X))) ||
2348 !match(Shuf.getOperand(1), m_Undef()) || !DestType->isIntOrIntVectorTy())
2349 return nullptr;
2350
2351 // The source type must have the same number of elements as the shuffle,
2352 // and the source element type must be larger than the shuffle element type.
2353 Type *SrcType = X->getType();
2354 if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() ||
2355 cast<FixedVectorType>(SrcType)->getNumElements() !=
2356 cast<FixedVectorType>(DestType)->getNumElements() ||
2357 SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0)
2358 return nullptr;
2359
2360 assert(Shuf.changesLength() && !Shuf.increasesLength() &&
2361 "Expected a shuffle that decreases length");
2362
2363 // Last, check that the mask chooses the correct low bits for each narrow
2364 // element in the result.
2365 uint64_t TruncRatio =
2366 SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
2367 ArrayRef<int> Mask = Shuf.getShuffleMask();
2368 for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
2369 if (Mask[i] == UndefMaskElem)
2370 continue;
2371 uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;
2372 assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits");
2373 if (Mask[i] != (int)LSBIndex)
2374 return nullptr;
2375 }
2376
2377 return new TruncInst(X, DestType);
2378}
2379
2380/// Match a shuffle-select-shuffle pattern where the shuffles are widening and
2381/// narrowing (concatenating with undef and extracting back to the original
2382/// length). This allows replacing the wide select with a narrow select.
2384 InstCombiner::BuilderTy &Builder) {
2385 // This must be a narrowing identity shuffle. It extracts the 1st N elements
2386 // of the 1st vector operand of a shuffle.
2387 if (!match(Shuf.getOperand(1), m_Undef()) || !Shuf.isIdentityWithExtract())
2388 return nullptr;
2389
2390 // The vector being shuffled must be a vector select that we can eliminate.
2391 // TODO: The one-use requirement could be eased if X and/or Y are constants.
2392 Value *Cond, *X, *Y;
2393 if (!match(Shuf.getOperand(0),
2395 return nullptr;
2396
2397 // We need a narrow condition value. It must be extended with undef elements
2398 // and have the same number of elements as this shuffle.
2399 unsigned NarrowNumElts =
2400 cast<FixedVectorType>(Shuf.getType())->getNumElements();
2401 Value *NarrowCond;
2402 if (!match(Cond, m_OneUse(m_Shuffle(m_Value(NarrowCond), m_Undef()))) ||
2403 cast<FixedVectorType>(NarrowCond->getType())->getNumElements() !=
2404 NarrowNumElts ||
2405 !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
2406 return nullptr;
2407
2408 // shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
2409 // sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
2410 Value *NarrowX = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2411 Value *NarrowY = Builder.CreateShuffleVector(Y, Shuf.getShuffleMask());
2412 return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
2413}
2414
2415/// Canonicalize FP negate/abs after shuffle.
2417 InstCombiner::BuilderTy &Builder) {
2418 auto *S0 = dyn_cast<Instruction>(Shuf.getOperand(0));
2419 Value *X;
2420 if (!S0 || !match(S0, m_CombineOr(m_FNeg(m_Value(X)), m_FAbs(m_Value(X)))))
2421 return nullptr;
2422
2423 bool IsFNeg = S0->getOpcode() == Instruction::FNeg;
2424
2425 // Match 1-input (unary) shuffle.
2426 // shuffle (fneg/fabs X), Mask --> fneg/fabs (shuffle X, Mask)
2427 if (S0->hasOneUse() && match(Shuf.getOperand(1), m_Undef())) {
2428 Value *NewShuf = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2429 if (IsFNeg)
2430 return UnaryOperator::CreateFNegFMF(NewShuf, S0);
2431
2433 Intrinsic::fabs, Shuf.getType());
2434 CallInst *NewF = CallInst::Create(FAbs, {NewShuf});
2435 NewF->setFastMathFlags(S0->getFastMathFlags());
2436 return NewF;
2437 }
2438
2439 // Match 2-input (binary) shuffle.
2440 auto *S1 = dyn_cast<Instruction>(Shuf.getOperand(1));
2441 Value *Y;
2442 if (!S1 || !match(S1, m_CombineOr(m_FNeg(m_Value(Y)), m_FAbs(m_Value(Y)))) ||
2443 S0->getOpcode() != S1->getOpcode() ||
2444 (!S0->hasOneUse() && !S1->hasOneUse()))
2445 return nullptr;
2446
2447 // shuf (fneg/fabs X), (fneg/fabs Y), Mask --> fneg/fabs (shuf X, Y, Mask)
2448 Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2449 Instruction *NewF;
2450 if (IsFNeg) {
2451 NewF = UnaryOperator::CreateFNeg(NewShuf);
2452 } else {
2454 Intrinsic::fabs, Shuf.getType());
2455 NewF = CallInst::Create(FAbs, {NewShuf});
2456 }
2457 NewF->copyIRFlags(S0);
2458 NewF->andIRFlags(S1);
2459 return NewF;
2460}
2461
2462/// Canonicalize casts after shuffle.
2464 InstCombiner::BuilderTy &Builder) {
2465 // Do we have 2 matching cast operands?
2466 auto *Cast0 = dyn_cast<CastInst>(Shuf.getOperand(0));
2467 auto *Cast1 = dyn_cast<CastInst>(Shuf.getOperand(1));
2468 if (!Cast0 || !Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||
2469 Cast0->getSrcTy() != Cast1->getSrcTy())
2470 return nullptr;
2471
2472 // TODO: Allow other opcodes? That would require easing the type restrictions
2473 // below here.
2474 CastInst::CastOps CastOpcode = Cast0->getOpcode();
2475 switch (CastOpcode) {
2476 case Instruction::FPToSI:
2477 case Instruction::FPToUI:
2478 case Instruction::SIToFP:
2479 case Instruction::UIToFP:
2480 break;
2481 default:
2482 return nullptr;
2483 }
2484
2485 VectorType *ShufTy = Shuf.getType();
2486 VectorType *ShufOpTy = cast<VectorType>(Shuf.getOperand(0)->getType());
2487 VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy());
2488
2489 // TODO: Allow length-increasing shuffles?
2490 if (ShufTy->getElementCount().getKnownMinValue() >
2491 ShufOpTy->getElementCount().getKnownMinValue())
2492 return nullptr;
2493
2494 // TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)?
2495 assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&
2496 "Expected fixed vector operands for casts and binary shuffle");
2497 if (CastSrcTy->getPrimitiveSizeInBits() > ShufOpTy->getPrimitiveSizeInBits())
2498 return nullptr;
2499
2500 // At least one of the operands must have only one use (the shuffle).
2501 if (!Cast0->hasOneUse() && !Cast1->hasOneUse())
2502 return nullptr;
2503
2504 // shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask)
2505 Value *X = Cast0->getOperand(0);
2506 Value *Y = Cast1->getOperand(0);
2507 Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2508 return CastInst::Create(CastOpcode, NewShuf, ShufTy);
2509}
2510
2511/// Try to fold an extract subvector operation.
2513 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2514 if (!Shuf.isIdentityWithExtract() || !match(Op1, m_Undef()))
2515 return nullptr;
2516
2517 // Check if we are extracting all bits of an inserted scalar:
2518 // extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type
2519 Value *X;
2520 if (match(Op0, m_BitCast(m_InsertElt(m_Value(), m_Value(X), m_Zero()))) &&
2521 X->getType()->getPrimitiveSizeInBits() ==
2523 return new BitCastInst(X, Shuf.getType());
2524
2525 // Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
2526 Value *Y;
2527 ArrayRef<int> Mask;
2528 if (!match(Op0, m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask))))
2529 return nullptr;
2530
2531 // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
2532 // then combining may result in worse codegen.
2533 if (!Op0->hasOneUse())
2534 return nullptr;
2535
2536 // We are extracting a subvector from a shuffle. Remove excess elements from
2537 // the 1st shuffle mask to eliminate the extract.
2538 //
2539 // This transform is conservatively limited to identity extracts because we do
2540 // not allow arbitrary shuffle mask creation as a target-independent transform
2541 // (because we can't guarantee that will lower efficiently).
2542 //
2543 // If the extracting shuffle has an undef mask element, it transfers to the
2544 // new shuffle mask. Otherwise, copy the original mask element. Example:
2545 // shuf (shuf X, Y, <C0, C1, C2, undef, C4>), undef, <0, undef, 2, 3> -->
2546 // shuf X, Y, <C0, undef, C2, undef>
2547 unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2548 SmallVector<int, 16> NewMask(NumElts);
2549 assert(NumElts < Mask.size() &&
2550 "Identity with extract must have less elements than its inputs");
2551
2552 for (unsigned i = 0; i != NumElts; ++i) {
2553 int ExtractMaskElt = Shuf.getMaskValue(i);
2554 int MaskElt = Mask[i];
2555 NewMask[i] = ExtractMaskElt == UndefMaskElem ? ExtractMaskElt : MaskElt;
2556 }
2557 return new ShuffleVectorInst(X, Y, NewMask);
2558}
2559
2560/// Try to replace a shuffle with an insertelement or try to replace a shuffle
2561/// operand with the operand of an insertelement.
2563 InstCombinerImpl &IC) {
2564 Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
2566 Shuf.getShuffleMask(Mask);
2567
2568 int NumElts = Mask.size();
2569 int InpNumElts = cast<FixedVectorType>(V0->getType())->getNumElements();
2570
2571 // This is a specialization of a fold in SimplifyDemandedVectorElts. We may
2572 // not be able to handle it there if the insertelement has >1 use.
2573 // If the shuffle has an insertelement operand but does not choose the
2574 // inserted scalar element from that value, then we can replace that shuffle
2575 // operand with the source vector of the insertelement.
2576 Value *X;
2577 uint64_t IdxC;
2578 if (match(V0, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2579 // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
2580 if (!is_contained(Mask, (int)IdxC))
2581 return IC.replaceOperand(Shuf, 0, X);
2582 }
2583 if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2584 // Offset the index constant by the vector width because we are checking for
2585 // accesses to the 2nd vector input of the shuffle.
2586 IdxC += InpNumElts;
2587 // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
2588 if (!is_contained(Mask, (int)IdxC))
2589 return IC.replaceOperand(Shuf, 1, X);
2590 }
2591 // For the rest of the transform, the shuffle must not change vector sizes.
2592 // TODO: This restriction could be removed if the insert has only one use
2593 // (because the transform would require a new length-changing shuffle).
2594 if (NumElts != InpNumElts)
2595 return nullptr;
2596
2597 // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
2598 auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
2599 // We need an insertelement with a constant index.
2600 if (!match(V0, m_InsertElt(m_Value(), m_Value(Scalar),
2601 m_ConstantInt(IndexC))))
2602 return false;
2603
2604 // Test the shuffle mask to see if it splices the inserted scalar into the
2605 // operand 1 vector of the shuffle.
2606 int NewInsIndex = -1;
2607 for (int i = 0; i != NumElts; ++i) {
2608 // Ignore undef mask elements.
2609 if (Mask[i] == -1)
2610 continue;
2611
2612 // The shuffle takes elements of operand 1 without lane changes.
2613 if (Mask[i] == NumElts + i)
2614 continue;
2615
2616 // The shuffle must choose the inserted scalar exactly once.
2617 if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
2618 return false;
2619
2620 // The shuffle is placing the inserted scalar into element i.
2621 NewInsIndex = i;
2622 }
2623
2624 assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
2625
2626 // Index is updated to the potentially translated insertion lane.
2627 IndexC = ConstantInt::get(IndexC->getType(), NewInsIndex);
2628 return true;
2629 };
2630
2631 // If the shuffle is unnecessary, insert the scalar operand directly into
2632 // operand 1 of the shuffle. Example:
2633 // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
2634 Value *Scalar;
2635 ConstantInt *IndexC;
2636 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2637 return InsertElementInst::Create(V1, Scalar, IndexC);
2638
2639 // Try again after commuting shuffle. Example:
2640 // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
2641 // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
2642 std::swap(V0, V1);
2644 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2645 return InsertElementInst::Create(V1, Scalar, IndexC);
2646
2647 return nullptr;
2648}
2649
2651 // Match the operands as identity with padding (also known as concatenation
2652 // with undef) shuffles of the same source type. The backend is expected to
2653 // recreate these concatenations from a shuffle of narrow operands.
2654 auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
2655 auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
2656 if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
2657 !Shuffle1 || !Shuffle1->isIdentityWithPadding())
2658 return nullptr;
2659
2660 // We limit this transform to power-of-2 types because we expect that the
2661 // backend can convert the simplified IR patterns to identical nodes as the
2662 // original IR.
2663 // TODO: If we can verify the same behavior for arbitrary types, the
2664 // power-of-2 checks can be removed.
2665 Value *X = Shuffle0->getOperand(0);
2666 Value *Y = Shuffle1->getOperand(0);
2667 if (X->getType() != Y->getType() ||
2668 !isPowerOf2_32(cast<FixedVectorType>(Shuf.getType())->getNumElements()) ||
2670 cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) ||
2671 !isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) ||
2672 match(X, m_Undef()) || match(Y, m_Undef()))
2673 return nullptr;
2674 assert(match(Shuffle0->getOperand(1), m_Undef()) &&
2675 match(Shuffle1->getOperand(1), m_Undef()) &&
2676 "Unexpected operand for identity shuffle");
2677
2678 // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
2679 // operands directly by adjusting the shuffle mask to account for the narrower
2680 // types:
2681 // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
2682 int NarrowElts = cast<FixedVectorType>(X->getType())->getNumElements();
2683 int WideElts = cast<FixedVectorType>(Shuffle0->getType())->getNumElements();
2684 assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
2685
2686 ArrayRef<int> Mask = Shuf.getShuffleMask();
2687 SmallVector<int, 16> NewMask(Mask.size(), -1);
2688 for (int i = 0, e = Mask.size(); i != e; ++i) {
2689 if (Mask[i] == -1)
2690 continue;
2691
2692 // If this shuffle is choosing an undef element from 1 of the sources, that
2693 // element is undef.
2694 if (Mask[i] < WideElts) {
2695 if (Shuffle0->getMaskValue(Mask[i]) == -1)
2696 continue;
2697 } else {
2698 if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
2699 continue;
2700 }
2701
2702 // If this shuffle is choosing from the 1st narrow op, the mask element is
2703 // the same. If this shuffle is choosing from the 2nd narrow op, the mask
2704 // element is offset down to adjust for the narrow vector widths.
2705 if (Mask[i] < WideElts) {
2706 assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
2707 NewMask[i] = Mask[i];
2708 } else {
2709 assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
2710 NewMask[i] = Mask[i] - (WideElts - NarrowElts);
2711 }
2712 }
2713 return new ShuffleVectorInst(X, Y, NewMask);
2714}
2715
2716// Splatting the first element of the result of a BinOp, where any of the
2717// BinOp's operands are the result of a first element splat can be simplified to
2718// splatting the first element of the result of the BinOp
2720 if (!match(SVI.getOperand(1), m_Undef()) ||
2721 !match(SVI.getShuffleMask(), m_ZeroMask()) ||
2722 !SVI.getOperand(0)->hasOneUse())
2723 return nullptr;
2724
2725 Value *Op0 = SVI.getOperand(0);
2726 Value *X, *Y;
2727 if (!match(Op0, m_BinOp(m_Shuffle(m_Value(X), m_Undef(), m_ZeroMask()),
2728 m_Value(Y))) &&
2729 !match(Op0, m_BinOp(m_Value(X),
2731 return nullptr;
2732 if (X->getType() != Y->getType())
2733 return nullptr;
2734
2735 auto *BinOp = cast<BinaryOperator>(Op0);
2736 if (!isSafeToSpeculativelyExecute(BinOp))
2737 return nullptr;
2738
2739 Value *NewBO = Builder.CreateBinOp(BinOp->getOpcode(), X, Y);
2740 if (auto NewBOI = dyn_cast<Instruction>(NewBO))
2741 NewBOI->copyIRFlags(BinOp);
2742
2743 return new ShuffleVectorInst(NewBO, SVI.getShuffleMask());
2744}
2745
2747 Value *LHS = SVI.getOperand(0);
2748 Value *RHS = SVI.getOperand(1);
2749 SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI);
2750 if (auto *V = simplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(),
2751 SVI.getType(), ShufQuery))
2752 return replaceInstUsesWith(SVI, V);
2753
2754 if (Instruction *I = simplifyBinOpSplats(SVI))
2755 return I;
2756
2757 if (isa<ScalableVectorType>(LHS->getType()))
2758 return nullptr;
2759
2760 unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements();
2761 unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements();
2762
2763 // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
2764 //
2765 // if X and Y are of the same (vector) type, and the element size is not
2766 // changed by the bitcasts, we can distribute the bitcasts through the
2767 // shuffle, hopefully reducing the number of instructions. We make sure that
2768 // at least one bitcast only has one use, so we don't *increase* the number of
2769 // instructions here.
2770 Value *X, *Y;
2772 X->getType()->isVectorTy() && X->getType() == Y->getType() &&
2773 X->getType()->getScalarSizeInBits() ==
2774 SVI.getType()->getScalarSizeInBits() &&
2775 (LHS->hasOneUse() || RHS->hasOneUse())) {
2777 SVI.getName() + ".uncasted");
2778 return new BitCastInst(V, SVI.getType());
2779 }
2780
2781 ArrayRef<int> Mask = SVI.getShuffleMask();
2783
2784 // Peek through a bitcasted shuffle operand by scaling the mask. If the
2785 // simulated shuffle can simplify, then this shuffle is unnecessary:
2786 // shuf (bitcast X), undef, Mask --> bitcast X'
2787 // TODO: This could be extended to allow length-changing shuffles.
2788 // The transform might also be obsoleted if we allowed canonicalization
2789 // of bitcasted shuffles.
2790 if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) &&
2791 X->getType()->isVectorTy() && VWidth == LHSWidth) {
2792 // Try to create a scaled mask constant.
2793 auto *XType = cast<FixedVectorType>(X->getType());
2794 unsigned XNumElts = XType->getNumElements();
2795 SmallVector<int, 16> ScaledMask;
2796 if (XNumElts >= VWidth) {
2797 assert(XNumElts % VWidth == 0 && "Unexpected vector bitcast");
2798 narrowShuffleMaskElts(XNumElts / VWidth, Mask, ScaledMask);
2799 } else {
2800 assert(VWidth % XNumElts == 0 && "Unexpected vector bitcast");
2801 if (!widenShuffleMaskElts(VWidth / XNumElts, Mask, ScaledMask))
2802 ScaledMask.clear();
2803 }
2804 if (!ScaledMask.empty()) {
2805 // If the shuffled source vector simplifies, cast that value to this
2806 // shuffle's type.
2807 if (auto *V = simplifyShuffleVectorInst(X, UndefValue::get(XType),
2808 ScaledMask, XType, ShufQuery))
2809 return BitCastInst::Create(Instruction::BitCast, V, SVI.getType());
2810 }
2811 }
2812
2813 // shuffle x, x, mask --> shuffle x, undef, mask'
2814 if (LHS == RHS) {
2815 assert(!match(RHS, m_Undef()) &&
2816 "Shuffle with 2 undef ops not simplified?");
2817 return new ShuffleVectorInst(LHS, createUnaryMask(Mask, LHSWidth));
2818 }
2819
2820 // shuffle undef, x, mask --> shuffle x, undef, mask'
2821 if (match(LHS, m_Undef())) {
2822 SVI.commute();
2823 return &SVI;
2824 }
2825
2827 return I;
2828
2829 if (Instruction *I = foldSelectShuffle(SVI))
2830 return I;
2831
2833 return I;
2834
2836 return I;
2837
2839 return I;
2840
2841 if (Instruction *I = foldCastShuffle(SVI, Builder))
2842 return I;
2843
2844 APInt UndefElts(VWidth, 0);
2845 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2846 if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
2847 if (V != &SVI)
2848 return replaceInstUsesWith(SVI, V);
2849 return &SVI;
2850 }
2851
2853 return I;
2854
2855 // These transforms have the potential to lose undef knowledge, so they are
2856 // intentionally placed after SimplifyDemandedVectorElts().
2857 if (Instruction *I = foldShuffleWithInsert(SVI, *this))
2858 return I;
2860 return I;
2861
2862 if (match(RHS, m_Undef()) && canEvaluateShuffled(LHS, Mask)) {
2864 return replaceInstUsesWith(SVI, V);
2865 }
2866
2867 // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
2868 // a non-vector type. We can instead bitcast the original vector followed by
2869 // an extract of the desired element:
2870 //
2871 // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
2872 // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2873 // %1 = bitcast <4 x i8> %sroa to i32
2874 // Becomes:
2875 // %bc = bitcast <16 x i8> %in to <4 x i32>
2876 // %ext = extractelement <4 x i32> %bc, i32 0
2877 //
2878 // If the shuffle is extracting a contiguous range of values from the input
2879 // vector then each use which is a bitcast of the extracted size can be
2880 // replaced. This will work if the vector types are compatible, and the begin
2881 // index is aligned to a value in the casted vector type. If the begin index
2882 // isn't aligned then we can shuffle the original vector (keeping the same
2883 // vector type) before extracting.
2884 //
2885 // This code will bail out if the target type is fundamentally incompatible
2886 // with vectors of the source type.
2887 //
2888 // Example of <16 x i8>, target type i32:
2889 // Index range [4,8): v-----------v Will work.
2890 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2891 // <16 x i8>: | | | | | | | | | | | | | | | | |
2892 // <4 x i32>: | | | | |
2893 // +-----------+-----------+-----------+-----------+
2894 // Index range [6,10): ^-----------^ Needs an extra shuffle.
2895 // Target type i40: ^--------------^ Won't work, bail.
2896 bool MadeChange = false;
2897 if (isShuffleExtractingFromLHS(SVI, Mask)) {
2898 Value *V = LHS;
2899 unsigned MaskElems = Mask.size();
2900 auto *SrcTy = cast<FixedVectorType>(V->getType());
2901 unsigned VecBitWidth = SrcTy->getPrimitiveSizeInBits().getFixedValue();
2902 unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
2903 assert(SrcElemBitWidth && "vector elements must have a bitwidth");
2904 unsigned SrcNumElems = SrcTy->getNumElements();
2907 for (User *U : SVI.users())
2908 if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
2909 if (!BC->use_empty())
2910 // Only visit bitcasts that weren't previously handled.
2911 BCs.push_back(BC);
2912 for (BitCastInst *BC : BCs) {
2913 unsigned BegIdx = Mask.front();
2914 Type *TgtTy = BC->getDestTy();
2915 unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
2916 if (!TgtElemBitWidth)
2917 continue;
2918 unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
2919 bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
2920 bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
2921 if (!VecBitWidthsEqual)
2922 continue;
2924 continue;
2925 auto *CastSrcTy = FixedVectorType::get(TgtTy, TgtNumElems);
2926 if (!BegIsAligned) {
2927 // Shuffle the input so [0,NumElements) contains the output, and
2928 // [NumElems,SrcNumElems) is undef.
2929 SmallVector<int, 16> ShuffleMask(SrcNumElems, -1);
2930 for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
2931 ShuffleMask[I] = Idx;
2932 V = Builder.CreateShuffleVector(V, ShuffleMask,
2933 SVI.getName() + ".extract");
2934 BegIdx = 0;
2935 }
2936 unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
2937 assert(SrcElemsPerTgtElem);
2938 BegIdx /= SrcElemsPerTgtElem;
2939 bool BCAlreadyExists = NewBCs.contains(CastSrcTy);
2940 auto *NewBC =
2941 BCAlreadyExists
2942 ? NewBCs[CastSrcTy]
2943 : Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
2944 if (!BCAlreadyExists)
2945 NewBCs[CastSrcTy] = NewBC;
2946 auto *Ext = Builder.CreateExtractElement(
2947 NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract");
2948 // The shufflevector isn't being replaced: the bitcast that used it
2949 // is. InstCombine will visit the newly-created instructions.
2950 replaceInstUsesWith(*BC, Ext);
2951 MadeChange = true;
2952 }
2953 }
2954
2955 // If the LHS is a shufflevector itself, see if we can combine it with this
2956 // one without producing an unusual shuffle.
2957 // Cases that might be simplified:
2958 // 1.
2959 // x1=shuffle(v1,v2,mask1)
2960 // x=shuffle(x1,undef,mask)
2961 // ==>
2962 // x=shuffle(v1,undef,newMask)
2963 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
2964 // 2.
2965 // x1=shuffle(v1,undef,mask1)
2966 // x=shuffle(x1,x2,mask)
2967 // where v1.size() == mask1.size()
2968 // ==>
2969 // x=shuffle(v1,x2,newMask)
2970 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
2971 // 3.
2972 // x2=shuffle(v2,undef,mask2)
2973 // x=shuffle(x1,x2,mask)
2974 // where v2.size() == mask2.size()
2975 // ==>
2976 // x=shuffle(x1,v2,newMask)
2977 // newMask[i] = (mask[i] < x1.size())
2978 // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
2979 // 4.
2980 // x1=shuffle(v1,undef,mask1)
2981 // x2=shuffle(v2,undef,mask2)
2982 // x=shuffle(x1,x2,mask)
2983 // where v1.size() == v2.size()
2984 // ==>
2985 // x=shuffle(v1,v2,newMask)
2986 // newMask[i] = (mask[i] < x1.size())
2987 // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
2988 //
2989 // Here we are really conservative:
2990 // we are absolutely afraid of producing a shuffle mask not in the input
2991 // program, because the code gen may not be smart enough to turn a merged
2992 // shuffle into two specific shuffles: it may produce worse code. As such,
2993 // we only merge two shuffles if the result is either a splat or one of the
2994 // input shuffle masks. In this case, merging the shuffles just removes
2995 // one instruction, which we know is safe. This is good for things like
2996 // turning: (splat(splat)) -> splat, or
2997 // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
2998 ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
2999 ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
3000 if (LHSShuffle)
3001 if (!match(LHSShuffle->getOperand(1), m_Undef()) && !match(RHS, m_Undef()))
3002 LHSShuffle = nullptr;
3003 if (RHSShuffle)
3004 if (!match(RHSShuffle->getOperand(1), m_Undef()))
3005 RHSShuffle = nullptr;
3006 if (!LHSShuffle && !RHSShuffle)
3007 return MadeChange ? &SVI : nullptr;
3008
3009 Value* LHSOp0 = nullptr;
3010 Value* LHSOp1 = nullptr;
3011 Value* RHSOp0 = nullptr;
3012 unsigned LHSOp0Width = 0;
3013 unsigned RHSOp0Width = 0;
3014 if (LHSShuffle) {
3015 LHSOp0 = LHSShuffle->getOperand(0);
3016 LHSOp1 = LHSShuffle->getOperand(1);
3017 LHSOp0Width = cast<FixedVectorType>(LHSOp0->getType())->getNumElements();
3018 }
3019 if (RHSShuffle) {
3020 RHSOp0 = RHSShuffle->getOperand(0);
3021 RHSOp0Width = cast<FixedVectorType>(RHSOp0->getType())->getNumElements();
3022 }
3023 Value* newLHS = LHS;
3024 Value* newRHS = RHS;
3025 if (LHSShuffle) {
3026 // case 1
3027 if (match(RHS, m_Undef())) {
3028 newLHS = LHSOp0;
3029 newRHS = LHSOp1;
3030 }
3031 // case 2 or 4
3032 else if (LHSOp0Width == LHSWidth) {
3033 newLHS = LHSOp0;
3034 }
3035 }
3036 // case 3 or 4
3037 if (RHSShuffle && RHSOp0Width == LHSWidth) {
3038 newRHS = RHSOp0;
3039 }
3040 // case 4
3041 if (LHSOp0 == RHSOp0) {
3042 newLHS = LHSOp0;
3043 newRHS = nullptr;
3044 }
3045
3046 if (newLHS == LHS && newRHS == RHS)
3047 return MadeChange ? &SVI : nullptr;
3048
3049 ArrayRef<int> LHSMask;
3050 ArrayRef<int> RHSMask;
3051 if (newLHS != LHS)
3052 LHSMask = LHSShuffle->getShuffleMask();
3053 if (RHSShuffle && newRHS != RHS)
3054 RHSMask = RHSShuffle->getShuffleMask();
3055
3056 unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
3057 SmallVector<int, 16> newMask;
3058 bool isSplat = true;
3059 int SplatElt = -1;
3060 // Create a new mask for the new ShuffleVectorInst so that the new
3061 // ShuffleVectorInst is equivalent to the original one.
3062 for (unsigned i = 0; i < VWidth; ++i) {
3063 int eltMask;
3064 if (Mask[i] < 0) {
3065 // This element is an undef value.
3066 eltMask = -1;
3067 } else if (Mask[i] < (int)LHSWidth) {
3068 // This element is from left hand side vector operand.
3069 //
3070 // If LHS is going to be replaced (case 1, 2, or 4), calculate the
3071 // new mask value for the element.
3072 if (newLHS != LHS) {
3073 eltMask = LHSMask[Mask[i]];
3074 // If the value selected is an undef value, explicitly specify it
3075 // with a -1 mask value.
3076 if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
3077 eltMask = -1;
3078 } else
3079 eltMask = Mask[i];
3080 } else {
3081 // This element is from right hand side vector operand
3082 //
3083 // If the value selected is an undef value, explicitly specify it
3084 // with a -1 mask value. (case 1)
3085 if (match(RHS, m_Undef()))
3086 eltMask = -1;
3087 // If RHS is going to be replaced (case 3 or 4), calculate the
3088 // new mask value for the element.
3089 else if (newRHS != RHS) {
3090 eltMask = RHSMask[Mask[i]-LHSWidth];
3091 // If the value selected is an undef value, explicitly specify it
3092 // with a -1 mask value.
3093 if (eltMask >= (int)RHSOp0Width) {
3094 assert(match(RHSShuffle->getOperand(1), m_Undef()) &&
3095 "should have been check above");
3096 eltMask = -1;
3097 }
3098 } else
3099 eltMask = Mask[i]-LHSWidth;
3100
3101 // If LHS's width is changed, shift the mask value accordingly.
3102 // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
3103 // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
3104 // If newRHS == newLHS, we want to remap any references from newRHS to
3105 // newLHS so that we can properly identify splats that may occur due to
3106 // obfuscation across the two vectors.
3107 if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
3108 eltMask += newLHSWidth;
3109 }
3110
3111 // Check if this could still be a splat.
3112 if (eltMask >= 0) {
3113 if (SplatElt >= 0 && SplatElt != eltMask)
3114 isSplat = false;
3115 SplatElt = eltMask;
3116 }
3117
3118 newMask.push_back(eltMask);
3119 }
3120
3121 // If the result mask is equal to one of the original shuffle masks,
3122 // or is a splat, do the replacement.
3123 if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
3124 if (!newRHS)
3125 newRHS = UndefValue::get(newLHS->getType());
3126 return new ShuffleVectorInst(newLHS, newRHS, newMask);
3127 }
3128
3129 return MadeChange ? &SVI : nullptr;
3130}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
assume Assume Builder
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon Common GEP
This file provides internal interfaces used to implement the InstCombine.
static Instruction * foldConstantInsEltIntoShuffle(InsertElementInst &InsElt)
insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex --> shufflevector X,...
static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, SmallVectorImpl< int > &Mask)
If V is a shuffle of values that ONLY returns elements from either LHS or RHS, return the shuffle mas...
static APInt findDemandedEltsByAllUsers(Value *V)
Find union of elements of V demanded by all its users.
static Instruction * foldTruncInsEltPair(InsertElementInst &InsElt, bool IsBigEndian, InstCombiner::BuilderTy &Builder)
If we are inserting 2 halves of a value into adjacent elements of a vector, try to convert to a singl...
static Instruction * foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf)
static Instruction * foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf)
static Value * buildNew(Instruction *I, ArrayRef< Value * > NewOps)
Rebuild a new instruction just like 'I' but with the new operands given.
static Instruction * foldIdentityExtractShuffle(ShuffleVectorInst &Shuf)
Try to fold an extract subvector operation.
static Instruction * foldInsEltIntoSplat(InsertElementInst &InsElt)
Try to fold an insert element into an existing splat shuffle by changing the shuffle's mask to includ...
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
ConstantInt * getPreferredVectorIndex(ConstantInt *IndexC)
Given a constant index for a extractelement or insertelement instruction, return it with the canonica...
static Instruction * foldShuffleWithInsert(ShuffleVectorInst &Shuf, InstCombinerImpl &IC)
Try to replace a shuffle with an insertelement or try to replace a shuffle operand with the operand o...
static Instruction * canonicalizeInsertSplat(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
If we have an insert of a scalar to a non-zero element of an undefined vector and then shuffle that v...
static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl< int > &Mask, Value *PermittedRHS, InstCombinerImpl &IC)
static Instruction * foldTruncShuffle(ShuffleVectorInst &Shuf, bool IsBigEndian)
Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
static bool cheapToScalarize(Value *V, Value *EI)
Return true if the value is cheaper to scalarize than it is to leave as a vector operation.
static Value * evaluateInDifferentElementOrder(Value *V, ArrayRef< int > Mask)
static void replaceExtractElements(InsertElementInst *InsElt, ExtractElementInst *ExtElt, InstCombinerImpl &IC)
If we have insertion into a vector that is wider than the vector that we are extracting from,...
static bool canEvaluateShuffled(Value *V, ArrayRef< int > Mask, unsigned Depth=5)
Return true if we can evaluate the specified expression tree if the vector elements were shuffled in ...
static Instruction * foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf)
A select shuffle of a select shuffle with a shared operand can be reduced to a single select shuffle.
static Instruction * hoistInsEltConst(InsertElementInst &InsElt2, InstCombiner::BuilderTy &Builder)
If we have an insertelement instruction feeding into another insertelement and the 2nd is inserting a...
static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr)
Find elements of V demanded by UserInstr.
static Instruction * foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize FP negate/abs after shuffle.
static Instruction * foldCastShuffle(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize casts after shuffle.
static Instruction * narrowInsElt(InsertElementInst &InsElt, InstCombiner::BuilderTy &Builder)
If both the base vector and the inserted element are extended from the same type, do the insert eleme...
static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf)
static Instruction * foldInsSequenceIntoSplat(InsertElementInst &InsElt)
Turn a chain of inserts that splats a value into an insert + shuffle: insertelt(insertelt(insertelt(i...
static Instruction * foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt)
Try to fold an extract+insert element into an existing identity shuffle by changing the shuffle's mas...
static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, ArrayRef< int > Mask)
static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL)
Binops may be transformed into binops with different opcodes and operands.
This file provides the interface for the instcombine pass implementation.
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define I(x, y, z)
Definition: MD5.cpp:58
IntegerType * Int32Ty
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file implements the SmallBitVector class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
static SDValue narrowVectorSelect(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
If both arms of a vector select are concatenated vectors, split the select, and concatenate the resul...
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:77
Class for arbitrary precision integers.
Definition: APInt.h:75
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:994
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:354
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1089
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:222
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:166
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:193
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:245
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
BinaryOps getOpcode() const
Definition: InstrTypes.h:391
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Instruction *CopyO, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:248
This class represents a no-op cast from one type to another.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:718
static CmpInst * Create(OtherOps Op, Predicate predicate, Value *S1, Value *S2, const Twine &Name="", Instruction *InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1594
static Constant * getShuffleVector(Constant *V1, Constant *V2, ArrayRef< int > Mask, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2583
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2655
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2701
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:243
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:172
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:141
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:132
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1357
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:403
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:418
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isBigEndian() const
Definition: DataLayout.h:239
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:669
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
This instruction extracts a single (scalar) element from a VectorType value.
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
VectorType * getVectorOperandType() const
This instruction compares its operands according to the predicate given to the constructor.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:704
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:940
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:966
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1920
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2361
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2349
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1360
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Definition: IRBuilder.h:477
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2016
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2383
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1589
This instruction inserts a single (scalar) element into a VectorType value.
VectorType * getType() const
Overload to return most specific vector type.
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
This instruction inserts a struct field of array element value into an aggregate value.
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &UndefElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
Instruction * foldSelectShuffle(ShuffleVectorInst &Shuf)
Try to fold shuffles that are the equivalent of a vector select.
Instruction * visitInsertValueInst(InsertValueInst &IV)
Try to find redundant insertvalue instructions, like the following ones: %0 = insertvalue { i8,...
Instruction * visitInsertElementInst(InsertElementInst &IE)
Instruction * visitExtractElementInst(ExtractElementInst &EI)
Instruction * simplifyBinOpSplats(ShuffleVectorInst &SVI)
Instruction * foldAggregateConstructionIntoAggregateReuse(InsertValueInst &OrigIVI)
Look for chain of insertvalue's that fully define an aggregate, and trace back the values inserted,...
Instruction * visitShuffleVectorInst(ShuffleVectorInst &SVI)
Instruction * InsertNewInstWith(Instruction *New, Instruction &Old)
Same as InsertNewInstBefore, but also sets the debug loc.
Definition: InstCombiner.h:407
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:418
const SimplifyQuery SQ
Definition: InstCombiner.h:74
const DataLayout & DL
Definition: InstCombiner.h:73
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombiner.h:442
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
Definition: InstCombiner.h:315
BuilderTy & Builder
Definition: InstCombiner.h:58
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:70
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
const BasicBlock * getParent() const
Definition: Instruction.h:90
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:87
bool isExact() const LLVM_READONLY
Determine whether the exact flag is set.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:168
bool isShift() const
Definition: Instruction.h:175
void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
bool isIntDivRem() const
Definition: Instruction.h:174
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition: Constants.h:1419
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1758
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
bool changesLength() const
Return true if this shuffle returns a vector with a different number of elements than its source vect...
static bool isSelectMask(ArrayRef< int > Mask)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
static bool isIdentityMask(ArrayRef< int > Mask)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
VectorType * getType() const
Overload to return most specific vector type.
bool increasesLength() const
Return true if this shuffle returns a vector with a greater number of elements than its source vector...
bool isIdentityWithExtract() const
Return true if this shuffle extracts the first N elements of exactly one source vector.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
void commute()
Swap the operands and adjust the mask to preserve the semantics of the instruction.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool all() const
Returns true if all bits are set.
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:267
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:237
unsigned getStructNumElements() const
uint64_t getArrayNumElements() const
@ ArrayTyID
Arrays.
Definition: Type.h:75
@ StructTyID
Structures.
Definition: Type.h:74
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:231
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:137
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:163
UnaryOps getOpcode() const
Definition: InstrTypes.h:170
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:155
'undef' values are things that do not have specified contents.
Definition: Constants.h:1371
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1739
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition: Value.cpp:986
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
iterator_range< user_iterator > users()
Definition: Value.h:421
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:994
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:695
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:627
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:688
Type * getElementType() const
Definition: DerivedTypes.h:422
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:166
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:163
self_iterator getIterator()
Definition: ilist_node.h:82
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1506
specific_intval< false > m_SpecificInt(APInt V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:854
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:144
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:772
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:147
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:524
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastClass_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:89
CastClass_match< OpTy, Instruction::FPExt > m_FPExt(const OpTy &Op)
class_match< UnaryOperator > m_UnOp()
Match an arbitrary unary operation and ignore it.
Definition: PatternMatch.h:79
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:278
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:136
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:537
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:218
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2387
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * simplifyShuffleVectorInst(Value *Op0, Value *Op1, ArrayRef< int > Mask, Type *RetTy, const SimplifyQuery &Q)
Given operands for a ShuffleVectorInst, fold the result or return null.
constexpr int UndefMaskElem
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:292
Value * simplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an InsertValueInst, fold the result or return null.
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
Value * simplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx, const SimplifyQuery &Q)
Given operands for an InsertElement, fold the result or return null.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:184
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1981
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1939
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if 'V & Mask' is known to be zero.
Value * simplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q)
Given operands for an ExtractElementInst, fold the result or return null.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
These are the ingredients in an alternate form binary operator as described below.
BinopElts(BinaryOperator::BinaryOps Opc=(BinaryOperator::BinaryOps) 0, Value *V0=nullptr, Value *V1=nullptr)
BinaryOperator::BinaryOps Opcode
SimplifyQuery getWithInstruction(Instruction *I) const