LLVM 18.0.0git
InstCombineVectorOps.cpp
Go to the documentation of this file.
1//===- InstCombineVectorOps.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements instcombine for ExtractElement, InsertElement and
10// ShuffleVector.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InstCombineInternal.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
24#include "llvm/IR/BasicBlock.h"
25#include "llvm/IR/Constant.h"
26#include "llvm/IR/Constants.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
31#include "llvm/IR/Operator.h"
33#include "llvm/IR/Type.h"
34#include "llvm/IR/User.h"
35#include "llvm/IR/Value.h"
39#include <cassert>
40#include <cstdint>
41#include <iterator>
42#include <utility>
43
44#define DEBUG_TYPE "instcombine"
45
46using namespace llvm;
47using namespace PatternMatch;
48
49STATISTIC(NumAggregateReconstructionsSimplified,
50 "Number of aggregate reconstructions turned into reuse of the "
51 "original aggregate");
52
53/// Return true if the value is cheaper to scalarize than it is to leave as a
54/// vector operation. If the extract index \p EI is a constant integer then
55/// some operations may be cheap to scalarize.
56///
57/// FIXME: It's possible to create more instructions than previously existed.
58static bool cheapToScalarize(Value *V, Value *EI) {
59 ConstantInt *CEI = dyn_cast<ConstantInt>(EI);
60
61 // If we can pick a scalar constant value out of a vector, that is free.
62 if (auto *C = dyn_cast<Constant>(V))
63 return CEI || C->getSplatValue();
64
65 if (CEI && match(V, m_Intrinsic<Intrinsic::experimental_stepvector>())) {
66 ElementCount EC = cast<VectorType>(V->getType())->getElementCount();
67 // Index needs to be lower than the minimum size of the vector, because
68 // for scalable vector, the vector size is known at run time.
69 return CEI->getValue().ult(EC.getKnownMinValue());
70 }
71
72 // An insertelement to the same constant index as our extract will simplify
73 // to the scalar inserted element. An insertelement to a different constant
74 // index is irrelevant to our extract.
76 return CEI;
77
78 if (match(V, m_OneUse(m_Load(m_Value()))))
79 return true;
80
81 if (match(V, m_OneUse(m_UnOp())))
82 return true;
83
84 Value *V0, *V1;
85 if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
86 if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
87 return true;
88
89 CmpInst::Predicate UnusedPred;
90 if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
91 if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
92 return true;
93
94 return false;
95}
96
97// If we have a PHI node with a vector type that is only used to feed
98// itself and be an operand of extractelement at a constant location,
99// try to replace the PHI of the vector type with a PHI of a scalar type.
100Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
101 PHINode *PN) {
103 // The users we want the PHI to have are:
104 // 1) The EI ExtractElement (we already know this)
105 // 2) Possibly more ExtractElements with the same index.
106 // 3) Another operand, which will feed back into the PHI.
107 Instruction *PHIUser = nullptr;
108 for (auto *U : PN->users()) {
109 if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
110 if (EI.getIndexOperand() == EU->getIndexOperand())
111 Extracts.push_back(EU);
112 else
113 return nullptr;
114 } else if (!PHIUser) {
115 PHIUser = cast<Instruction>(U);
116 } else {
117 return nullptr;
118 }
119 }
120
121 if (!PHIUser)
122 return nullptr;
123
124 // Verify that this PHI user has one use, which is the PHI itself,
125 // and that it is a binary operation which is cheap to scalarize.
126 // otherwise return nullptr.
127 if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
128 !(isa<BinaryOperator>(PHIUser)) ||
129 !cheapToScalarize(PHIUser, EI.getIndexOperand()))
130 return nullptr;
131
132 // Create a scalar PHI node that will replace the vector PHI node
133 // just before the current PHI node.
134 PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
136 // Scalarize each PHI operand.
137 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
138 Value *PHIInVal = PN->getIncomingValue(i);
139 BasicBlock *inBB = PN->getIncomingBlock(i);
140 Value *Elt = EI.getIndexOperand();
141 // If the operand is the PHI induction variable:
142 if (PHIInVal == PHIUser) {
143 // Scalarize the binary operation. Its first operand is the
144 // scalar PHI, and the second operand is extracted from the other
145 // vector operand.
146 BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
147 unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
150 B0->getOperand(opId)->getName() + ".Elt"),
151 B0->getIterator());
152 Value *newPHIUser = InsertNewInstWith(
154 scalarPHI, Op, B0), B0->getIterator());
155 scalarPHI->addIncoming(newPHIUser, inBB);
156 } else {
157 // Scalarize PHI input:
158 Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
159 // Insert the new instruction into the predecessor basic block.
160 Instruction *pos = dyn_cast<Instruction>(PHIInVal);
161 BasicBlock::iterator InsertPos;
162 if (pos && !isa<PHINode>(pos)) {
163 InsertPos = ++pos->getIterator();
164 } else {
165 InsertPos = inBB->getFirstInsertionPt();
166 }
167
168 InsertNewInstWith(newEI, InsertPos);
169
170 scalarPHI->addIncoming(newEI, inBB);
171 }
172 }
173
174 for (auto *E : Extracts) {
175 replaceInstUsesWith(*E, scalarPHI);
176 // Add old extract to worklist for DCE.
178 }
179
180 return &EI;
181}
182
183Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
184 Value *X;
185 uint64_t ExtIndexC;
186 if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
187 !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
188 return nullptr;
189
190 ElementCount NumElts =
191 cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
192 Type *DestTy = Ext.getType();
193 unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
194 bool IsBigEndian = DL.isBigEndian();
195
196 // If we are casting an integer to vector and extracting a portion, that is
197 // a shift-right and truncate.
198 if (X->getType()->isIntegerTy()) {
199 assert(isa<FixedVectorType>(Ext.getVectorOperand()->getType()) &&
200 "Expected fixed vector type for bitcast from scalar integer");
201
202 // Big endian requires adjusting the extract index since MSB is at index 0.
203 // LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8
204 // BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8
205 if (IsBigEndian)
206 ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC;
207 unsigned ShiftAmountC = ExtIndexC * DestWidth;
208 if (!ShiftAmountC ||
209 (isDesirableIntType(X->getType()->getPrimitiveSizeInBits()) &&
210 Ext.getVectorOperand()->hasOneUse())) {
211 if (ShiftAmountC)
212 X = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset");
213 if (DestTy->isFloatingPointTy()) {
214 Type *DstIntTy = IntegerType::getIntNTy(X->getContext(), DestWidth);
215 Value *Trunc = Builder.CreateTrunc(X, DstIntTy);
216 return new BitCastInst(Trunc, DestTy);
217 }
218 return new TruncInst(X, DestTy);
219 }
220 }
221
222 if (!X->getType()->isVectorTy())
223 return nullptr;
224
225 // If this extractelement is using a bitcast from a vector of the same number
226 // of elements, see if we can find the source element from the source vector:
227 // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
228 auto *SrcTy = cast<VectorType>(X->getType());
229 ElementCount NumSrcElts = SrcTy->getElementCount();
230 if (NumSrcElts == NumElts)
231 if (Value *Elt = findScalarElement(X, ExtIndexC))
232 return new BitCastInst(Elt, DestTy);
233
234 assert(NumSrcElts.isScalable() == NumElts.isScalable() &&
235 "Src and Dst must be the same sort of vector type");
236
237 // If the source elements are wider than the destination, try to shift and
238 // truncate a subset of scalar bits of an insert op.
239 if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
240 Value *Scalar;
241 Value *Vec;
242 uint64_t InsIndexC;
243 if (!match(X, m_InsertElt(m_Value(Vec), m_Value(Scalar),
244 m_ConstantInt(InsIndexC))))
245 return nullptr;
246
247 // The extract must be from the subset of vector elements that we inserted
248 // into. Example: if we inserted element 1 of a <2 x i64> and we are
249 // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
250 // of elements 4-7 of the bitcasted vector.
251 unsigned NarrowingRatio =
252 NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
253
254 if (ExtIndexC / NarrowingRatio != InsIndexC) {
255 // Remove insertelement, if we don't use the inserted element.
256 // extractelement (bitcast (insertelement (Vec, b)), a) ->
257 // extractelement (bitcast (Vec), a)
258 // FIXME: this should be removed to SimplifyDemandedVectorElts,
259 // once scale vectors are supported.
260 if (X->hasOneUse() && Ext.getVectorOperand()->hasOneUse()) {
261 Value *NewBC = Builder.CreateBitCast(Vec, Ext.getVectorOperandType());
262 return ExtractElementInst::Create(NewBC, Ext.getIndexOperand());
263 }
264 return nullptr;
265 }
266
267 // We are extracting part of the original scalar. How that scalar is
268 // inserted into the vector depends on the endian-ness. Example:
269 // Vector Byte Elt Index: 0 1 2 3 4 5 6 7
270 // +--+--+--+--+--+--+--+--+
271 // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
272 // extelt <4 x i16> V', 3: | |S2|S3|
273 // +--+--+--+--+--+--+--+--+
274 // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
275 // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
276 // In this example, we must right-shift little-endian. Big-endian is just a
277 // truncate.
278 unsigned Chunk = ExtIndexC % NarrowingRatio;
279 if (IsBigEndian)
280 Chunk = NarrowingRatio - 1 - Chunk;
281
282 // Bail out if this is an FP vector to FP vector sequence. That would take
283 // more instructions than we started with unless there is no shift, and it
284 // may not be handled as well in the backend.
285 bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
286 bool NeedDestBitcast = DestTy->isFloatingPointTy();
287 if (NeedSrcBitcast && NeedDestBitcast)
288 return nullptr;
289
290 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
291 unsigned ShAmt = Chunk * DestWidth;
292
293 // TODO: This limitation is more strict than necessary. We could sum the
294 // number of new instructions and subtract the number eliminated to know if
295 // we can proceed.
296 if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
297 if (NeedSrcBitcast || NeedDestBitcast)
298 return nullptr;
299
300 if (NeedSrcBitcast) {
301 Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
302 Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
303 }
304
305 if (ShAmt) {
306 // Bail out if we could end with more instructions than we started with.
307 if (!Ext.getVectorOperand()->hasOneUse())
308 return nullptr;
309 Scalar = Builder.CreateLShr(Scalar, ShAmt);
310 }
311
312 if (NeedDestBitcast) {
313 Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
314 return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
315 }
316 return new TruncInst(Scalar, DestTy);
317 }
318
319 return nullptr;
320}
321
322/// Find elements of V demanded by UserInstr.
324 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
325
326 // Conservatively assume that all elements are needed.
327 APInt UsedElts(APInt::getAllOnes(VWidth));
328
329 switch (UserInstr->getOpcode()) {
330 case Instruction::ExtractElement: {
331 ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr);
332 assert(EEI->getVectorOperand() == V);
333 ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand());
334 if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {
335 UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue());
336 }
337 break;
338 }
339 case Instruction::ShuffleVector: {
340 ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);
341 unsigned MaskNumElts =
342 cast<FixedVectorType>(UserInstr->getType())->getNumElements();
343
344 UsedElts = APInt(VWidth, 0);
345 for (unsigned i = 0; i < MaskNumElts; i++) {
346 unsigned MaskVal = Shuffle->getMaskValue(i);
347 if (MaskVal == -1u || MaskVal >= 2 * VWidth)
348 continue;
349 if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))
350 UsedElts.setBit(MaskVal);
351 if (Shuffle->getOperand(1) == V &&
352 ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
353 UsedElts.setBit(MaskVal - VWidth);
354 }
355 break;
356 }
357 default:
358 break;
359 }
360 return UsedElts;
361}
362
363/// Find union of elements of V demanded by all its users.
364/// If it is known by querying findDemandedEltsBySingleUser that
365/// no user demands an element of V, then the corresponding bit
366/// remains unset in the returned value.
368 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
369
370 APInt UnionUsedElts(VWidth, 0);
371 for (const Use &U : V->uses()) {
372 if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
373 UnionUsedElts |= findDemandedEltsBySingleUser(V, I);
374 } else {
375 UnionUsedElts = APInt::getAllOnes(VWidth);
376 break;
377 }
378
379 if (UnionUsedElts.isAllOnes())
380 break;
381 }
382
383 return UnionUsedElts;
384}
385
386/// Given a constant index for a extractelement or insertelement instruction,
387/// return it with the canonical type if it isn't already canonical. We
388/// arbitrarily pick 64 bit as our canonical type. The actual bitwidth doesn't
389/// matter, we just want a consistent type to simplify CSE.
391 const unsigned IndexBW = IndexC->getType()->getBitWidth();
392 if (IndexBW == 64 || IndexC->getValue().getActiveBits() > 64)
393 return nullptr;
394 return ConstantInt::get(IndexC->getContext(),
395 IndexC->getValue().zextOrTrunc(64));
396}
397
399 Value *SrcVec = EI.getVectorOperand();
401 if (Value *V = simplifyExtractElementInst(SrcVec, Index,
403 return replaceInstUsesWith(EI, V);
404
405 // extractelt (select %x, %vec1, %vec2), %const ->
406 // select %x, %vec1[%const], %vec2[%const]
407 // TODO: Support constant folding of multiple select operands:
408 // extractelt (select %x, %vec1, %vec2), (select %x, %c1, %c2)
409 // If the extractelement will for instance try to do out of bounds accesses
410 // because of the values of %c1 and/or %c2, the sequence could be optimized
411 // early. This is currently not possible because constant folding will reach
412 // an unreachable assertion if it doesn't find a constant operand.
413 if (SelectInst *SI = dyn_cast<SelectInst>(EI.getVectorOperand()))
414 if (SI->getCondition()->getType()->isIntegerTy() &&
415 isa<Constant>(EI.getIndexOperand()))
416 if (Instruction *R = FoldOpIntoSelect(EI, SI))
417 return R;
418
419 // If extracting a specified index from the vector, see if we can recursively
420 // find a previously computed scalar that was inserted into the vector.
421 auto *IndexC = dyn_cast<ConstantInt>(Index);
422 if (IndexC) {
423 // Canonicalize type of constant indices to i64 to simplify CSE
424 if (auto *NewIdx = getPreferredVectorIndex(IndexC))
425 return replaceOperand(EI, 1, NewIdx);
426
428 unsigned NumElts = EC.getKnownMinValue();
429
430 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(SrcVec)) {
431 Intrinsic::ID IID = II->getIntrinsicID();
432 // Index needs to be lower than the minimum size of the vector, because
433 // for scalable vector, the vector size is known at run time.
434 if (IID == Intrinsic::experimental_stepvector &&
435 IndexC->getValue().ult(NumElts)) {
436 Type *Ty = EI.getType();
437 unsigned BitWidth = Ty->getIntegerBitWidth();
438 Value *Idx;
439 // Return index when its value does not exceed the allowed limit
440 // for the element type of the vector, otherwise return undefined.
441 if (IndexC->getValue().getActiveBits() <= BitWidth)
442 Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth));
443 else
444 Idx = PoisonValue::get(Ty);
445 return replaceInstUsesWith(EI, Idx);
446 }
447 }
448
449 // InstSimplify should handle cases where the index is invalid.
450 // For fixed-length vector, it's invalid to extract out-of-range element.
451 if (!EC.isScalable() && IndexC->getValue().uge(NumElts))
452 return nullptr;
453
454 if (Instruction *I = foldBitcastExtElt(EI))
455 return I;
456
457 // If there's a vector PHI feeding a scalar use through this extractelement
458 // instruction, try to scalarize the PHI.
459 if (auto *Phi = dyn_cast<PHINode>(SrcVec))
460 if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
461 return ScalarPHI;
462 }
463
464 // TODO come up with a n-ary matcher that subsumes both unary and
465 // binary matchers.
466 UnaryOperator *UO;
467 if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, Index)) {
468 // extelt (unop X), Index --> unop (extelt X, Index)
469 Value *X = UO->getOperand(0);
472 }
473
474 BinaryOperator *BO;
475 if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, Index)) {
476 // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
477 Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
480 return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
481 }
482
483 Value *X, *Y;
485 if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
486 cheapToScalarize(SrcVec, Index)) {
487 // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
490 return CmpInst::Create(cast<CmpInst>(SrcVec)->getOpcode(), Pred, E0, E1);
491 }
492
493 if (auto *I = dyn_cast<Instruction>(SrcVec)) {
494 if (auto *IE = dyn_cast<InsertElementInst>(I)) {
495 // instsimplify already handled the case where the indices are constants
496 // and equal by value, if both are constants, they must not be the same
497 // value, extract from the pre-inserted value instead.
498 if (isa<Constant>(IE->getOperand(2)) && IndexC)
499 return replaceOperand(EI, 0, IE->getOperand(0));
500 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
501 auto *VecType = cast<VectorType>(GEP->getType());
502 ElementCount EC = VecType->getElementCount();
503 uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0;
504 if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) {
505 // Find out why we have a vector result - these are a few examples:
506 // 1. We have a scalar pointer and a vector of indices, or
507 // 2. We have a vector of pointers and a scalar index, or
508 // 3. We have a vector of pointers and a vector of indices, etc.
509 // Here we only consider combining when there is exactly one vector
510 // operand, since the optimization is less obviously a win due to
511 // needing more than one extractelements.
512
513 unsigned VectorOps =
514 llvm::count_if(GEP->operands(), [](const Value *V) {
515 return isa<VectorType>(V->getType());
516 });
517 if (VectorOps == 1) {
518 Value *NewPtr = GEP->getPointerOperand();
519 if (isa<VectorType>(NewPtr->getType()))
520 NewPtr = Builder.CreateExtractElement(NewPtr, IndexC);
521
523 for (unsigned I = 1; I != GEP->getNumOperands(); ++I) {
524 Value *Op = GEP->getOperand(I);
525 if (isa<VectorType>(Op->getType()))
526 NewOps.push_back(Builder.CreateExtractElement(Op, IndexC));
527 else
528 NewOps.push_back(Op);
529 }
530
532 GEP->getSourceElementType(), NewPtr, NewOps);
533 NewGEP->setIsInBounds(GEP->isInBounds());
534 return NewGEP;
535 }
536 }
537 } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
538 // If this is extracting an element from a shufflevector, figure out where
539 // it came from and extract from the appropriate input element instead.
540 // Restrict the following transformation to fixed-length vector.
541 if (isa<FixedVectorType>(SVI->getType()) && isa<ConstantInt>(Index)) {
542 int SrcIdx =
543 SVI->getMaskValue(cast<ConstantInt>(Index)->getZExtValue());
544 Value *Src;
545 unsigned LHSWidth = cast<FixedVectorType>(SVI->getOperand(0)->getType())
546 ->getNumElements();
547
548 if (SrcIdx < 0)
550 if (SrcIdx < (int)LHSWidth)
551 Src = SVI->getOperand(0);
552 else {
553 SrcIdx -= LHSWidth;
554 Src = SVI->getOperand(1);
555 }
556 Type *Int64Ty = Type::getInt64Ty(EI.getContext());
558 Src, ConstantInt::get(Int64Ty, SrcIdx, false));
559 }
560 } else if (auto *CI = dyn_cast<CastInst>(I)) {
561 // Canonicalize extractelement(cast) -> cast(extractelement).
562 // Bitcasts can change the number of vector elements, and they cost
563 // nothing.
564 if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
565 Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
566 return CastInst::Create(CI->getOpcode(), EE, EI.getType());
567 }
568 }
569 }
570
571 // Run demanded elements after other transforms as this can drop flags on
572 // binops. If there's two paths to the same final result, we prefer the
573 // one which doesn't force us to drop flags.
574 if (IndexC) {
576 unsigned NumElts = EC.getKnownMinValue();
577 // This instruction only demands the single element from the input vector.
578 // Skip for scalable type, the number of elements is unknown at
579 // compile-time.
580 if (!EC.isScalable() && NumElts != 1) {
581 // If the input vector has a single use, simplify it based on this use
582 // property.
583 if (SrcVec->hasOneUse()) {
584 APInt UndefElts(NumElts, 0);
585 APInt DemandedElts(NumElts, 0);
586 DemandedElts.setBit(IndexC->getZExtValue());
587 if (Value *V =
588 SimplifyDemandedVectorElts(SrcVec, DemandedElts, UndefElts))
589 return replaceOperand(EI, 0, V);
590 } else {
591 // If the input vector has multiple uses, simplify it based on a union
592 // of all elements used.
593 APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
594 if (!DemandedElts.isAllOnes()) {
595 APInt UndefElts(NumElts, 0);
597 SrcVec, DemandedElts, UndefElts, 0 /* Depth */,
598 true /* AllowMultipleUsers */)) {
599 if (V != SrcVec) {
600 Worklist.addValue(SrcVec);
601 SrcVec->replaceAllUsesWith(V);
602 return &EI;
603 }
604 }
605 }
606 }
607 }
608 }
609 return nullptr;
610}
611
612/// If V is a shuffle of values that ONLY returns elements from either LHS or
613/// RHS, return the shuffle mask and true. Otherwise, return false.
615 SmallVectorImpl<int> &Mask) {
616 assert(LHS->getType() == RHS->getType() &&
617 "Invalid CollectSingleShuffleElements");
618 unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
619
620 if (match(V, m_Undef())) {
621 Mask.assign(NumElts, -1);
622 return true;
623 }
624
625 if (V == LHS) {
626 for (unsigned i = 0; i != NumElts; ++i)
627 Mask.push_back(i);
628 return true;
629 }
630
631 if (V == RHS) {
632 for (unsigned i = 0; i != NumElts; ++i)
633 Mask.push_back(i + NumElts);
634 return true;
635 }
636
637 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
638 // If this is an insert of an extract from some other vector, include it.
639 Value *VecOp = IEI->getOperand(0);
640 Value *ScalarOp = IEI->getOperand(1);
641 Value *IdxOp = IEI->getOperand(2);
642
643 if (!isa<ConstantInt>(IdxOp))
644 return false;
645 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
646
647 if (isa<PoisonValue>(ScalarOp)) { // inserting poison into vector.
648 // We can handle this if the vector we are inserting into is
649 // transitively ok.
650 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
651 // If so, update the mask to reflect the inserted poison.
652 Mask[InsertedIdx] = -1;
653 return true;
654 }
655 } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
656 if (isa<ConstantInt>(EI->getOperand(1))) {
657 unsigned ExtractedIdx =
658 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
659 unsigned NumLHSElts =
660 cast<FixedVectorType>(LHS->getType())->getNumElements();
661
662 // This must be extracting from either LHS or RHS.
663 if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
664 // We can handle this if the vector we are inserting into is
665 // transitively ok.
666 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
667 // If so, update the mask to reflect the inserted value.
668 if (EI->getOperand(0) == LHS) {
669 Mask[InsertedIdx % NumElts] = ExtractedIdx;
670 } else {
671 assert(EI->getOperand(0) == RHS);
672 Mask[InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;
673 }
674 return true;
675 }
676 }
677 }
678 }
679 }
680
681 return false;
682}
683
684/// If we have insertion into a vector that is wider than the vector that we
685/// are extracting from, try to widen the source vector to allow a single
686/// shufflevector to replace one or more insert/extract pairs.
688 ExtractElementInst *ExtElt,
689 InstCombinerImpl &IC) {
690 auto *InsVecType = cast<FixedVectorType>(InsElt->getType());
691 auto *ExtVecType = cast<FixedVectorType>(ExtElt->getVectorOperandType());
692 unsigned NumInsElts = InsVecType->getNumElements();
693 unsigned NumExtElts = ExtVecType->getNumElements();
694
695 // The inserted-to vector must be wider than the extracted-from vector.
696 if (InsVecType->getElementType() != ExtVecType->getElementType() ||
697 NumExtElts >= NumInsElts)
698 return false;
699
700 // Create a shuffle mask to widen the extended-from vector using poison
701 // values. The mask selects all of the values of the original vector followed
702 // by as many poison values as needed to create a vector of the same length
703 // as the inserted-to vector.
704 SmallVector<int, 16> ExtendMask;
705 for (unsigned i = 0; i < NumExtElts; ++i)
706 ExtendMask.push_back(i);
707 for (unsigned i = NumExtElts; i < NumInsElts; ++i)
708 ExtendMask.push_back(-1);
709
710 Value *ExtVecOp = ExtElt->getVectorOperand();
711 auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
712 BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
713 ? ExtVecOpInst->getParent()
714 : ExtElt->getParent();
715
716 // TODO: This restriction matches the basic block check below when creating
717 // new extractelement instructions. If that limitation is removed, this one
718 // could also be removed. But for now, we just bail out to ensure that we
719 // will replace the extractelement instruction that is feeding our
720 // insertelement instruction. This allows the insertelement to then be
721 // replaced by a shufflevector. If the insertelement is not replaced, we can
722 // induce infinite looping because there's an optimization for extractelement
723 // that will delete our widening shuffle. This would trigger another attempt
724 // here to create that shuffle, and we spin forever.
725 if (InsertionBlock != InsElt->getParent())
726 return false;
727
728 // TODO: This restriction matches the check in visitInsertElementInst() and
729 // prevents an infinite loop caused by not turning the extract/insert pair
730 // into a shuffle. We really should not need either check, but we're lacking
731 // folds for shufflevectors because we're afraid to generate shuffle masks
732 // that the backend can't handle.
733 if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
734 return false;
735
736 auto *WideVec = new ShuffleVectorInst(ExtVecOp, ExtendMask);
737
738 // Insert the new shuffle after the vector operand of the extract is defined
739 // (as long as it's not a PHI) or at the start of the basic block of the
740 // extract, so any subsequent extracts in the same basic block can use it.
741 // TODO: Insert before the earliest ExtractElementInst that is replaced.
742 if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
743 WideVec->insertAfter(ExtVecOpInst);
744 else
745 IC.InsertNewInstWith(WideVec, ExtElt->getParent()->getFirstInsertionPt());
746
747 // Replace extracts from the original narrow vector with extracts from the new
748 // wide vector.
749 for (User *U : ExtVecOp->users()) {
750 ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
751 if (!OldExt || OldExt->getParent() != WideVec->getParent())
752 continue;
753 auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
754 IC.InsertNewInstWith(NewExt, OldExt->getIterator());
755 IC.replaceInstUsesWith(*OldExt, NewExt);
756 // Add the old extracts to the worklist for DCE. We can't remove the
757 // extracts directly, because they may still be used by the calling code.
758 IC.addToWorklist(OldExt);
759 }
760
761 return true;
762}
763
764/// We are building a shuffle to create V, which is a sequence of insertelement,
765/// extractelement pairs. If PermittedRHS is set, then we must either use it or
766/// not rely on the second vector source. Return a std::pair containing the
767/// left and right vectors of the proposed shuffle (or 0), and set the Mask
768/// parameter as required.
769///
770/// Note: we intentionally don't try to fold earlier shuffles since they have
771/// often been chosen carefully to be efficiently implementable on the target.
772using ShuffleOps = std::pair<Value *, Value *>;
773
775 Value *PermittedRHS,
776 InstCombinerImpl &IC, bool &Rerun) {
777 assert(V->getType()->isVectorTy() && "Invalid shuffle!");
778 unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
779
780 if (match(V, m_Undef())) {
781 Mask.assign(NumElts, -1);
782 return std::make_pair(
783 PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr);
784 }
785
786 if (isa<ConstantAggregateZero>(V)) {
787 Mask.assign(NumElts, 0);
788 return std::make_pair(V, nullptr);
789 }
790
791 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
792 // If this is an insert of an extract from some other vector, include it.
793 Value *VecOp = IEI->getOperand(0);
794 Value *ScalarOp = IEI->getOperand(1);
795 Value *IdxOp = IEI->getOperand(2);
796
797 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
798 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
799 unsigned ExtractedIdx =
800 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
801 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
802
803 // Either the extracted from or inserted into vector must be RHSVec,
804 // otherwise we'd end up with a shuffle of three inputs.
805 if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
806 Value *RHS = EI->getOperand(0);
807 ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC, Rerun);
808 assert(LR.second == nullptr || LR.second == RHS);
809
810 if (LR.first->getType() != RHS->getType()) {
811 // Although we are giving up for now, see if we can create extracts
812 // that match the inserts for another round of combining.
813 if (replaceExtractElements(IEI, EI, IC))
814 Rerun = true;
815
816 // We tried our best, but we can't find anything compatible with RHS
817 // further up the chain. Return a trivial shuffle.
818 for (unsigned i = 0; i < NumElts; ++i)
819 Mask[i] = i;
820 return std::make_pair(V, nullptr);
821 }
822
823 unsigned NumLHSElts =
824 cast<FixedVectorType>(RHS->getType())->getNumElements();
825 Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
826 return std::make_pair(LR.first, RHS);
827 }
828
829 if (VecOp == PermittedRHS) {
830 // We've gone as far as we can: anything on the other side of the
831 // extractelement will already have been converted into a shuffle.
832 unsigned NumLHSElts =
833 cast<FixedVectorType>(EI->getOperand(0)->getType())
834 ->getNumElements();
835 for (unsigned i = 0; i != NumElts; ++i)
836 Mask.push_back(i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
837 return std::make_pair(EI->getOperand(0), PermittedRHS);
838 }
839
840 // If this insertelement is a chain that comes from exactly these two
841 // vectors, return the vector and the effective shuffle.
842 if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
843 collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
844 Mask))
845 return std::make_pair(EI->getOperand(0), PermittedRHS);
846 }
847 }
848 }
849
850 // Otherwise, we can't do anything fancy. Return an identity vector.
851 for (unsigned i = 0; i != NumElts; ++i)
852 Mask.push_back(i);
853 return std::make_pair(V, nullptr);
854}
855
856/// Look for chain of insertvalue's that fully define an aggregate, and trace
857/// back the values inserted, see if they are all were extractvalue'd from
858/// the same source aggregate from the exact same element indexes.
859/// If they were, just reuse the source aggregate.
860/// This potentially deals with PHI indirections.
862 InsertValueInst &OrigIVI) {
863 Type *AggTy = OrigIVI.getType();
864 unsigned NumAggElts;
865 switch (AggTy->getTypeID()) {
866 case Type::StructTyID:
867 NumAggElts = AggTy->getStructNumElements();
868 break;
869 case Type::ArrayTyID:
870 NumAggElts = AggTy->getArrayNumElements();
871 break;
872 default:
873 llvm_unreachable("Unhandled aggregate type?");
874 }
875
876 // Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able
877 // to handle clang C++ exception struct (which is hardcoded as {i8*, i32}),
878 // FIXME: any interesting patterns to be caught with larger limit?
879 assert(NumAggElts > 0 && "Aggregate should have elements.");
880 if (NumAggElts > 2)
881 return nullptr;
882
883 static constexpr auto NotFound = std::nullopt;
884 static constexpr auto FoundMismatch = nullptr;
885
886 // Try to find a value of each element of an aggregate.
887 // FIXME: deal with more complex, not one-dimensional, aggregate types
888 SmallVector<std::optional<Instruction *>, 2> AggElts(NumAggElts, NotFound);
889
890 // Do we know values for each element of the aggregate?
891 auto KnowAllElts = [&AggElts]() {
892 return !llvm::is_contained(AggElts, NotFound);
893 };
894
895 int Depth = 0;
896
897 // Arbitrary `insertvalue` visitation depth limit. Let's be okay with
898 // every element being overwritten twice, which should never happen.
899 static const int DepthLimit = 2 * NumAggElts;
900
901 // Recurse up the chain of `insertvalue` aggregate operands until either we've
902 // reconstructed full initializer or can't visit any more `insertvalue`'s.
903 for (InsertValueInst *CurrIVI = &OrigIVI;
904 Depth < DepthLimit && CurrIVI && !KnowAllElts();
905 CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()),
906 ++Depth) {
907 auto *InsertedValue =
908 dyn_cast<Instruction>(CurrIVI->getInsertedValueOperand());
909 if (!InsertedValue)
910 return nullptr; // Inserted value must be produced by an instruction.
911
912 ArrayRef<unsigned int> Indices = CurrIVI->getIndices();
913
914 // Don't bother with more than single-level aggregates.
915 if (Indices.size() != 1)
916 return nullptr; // FIXME: deal with more complex aggregates?
917
918 // Now, we may have already previously recorded the value for this element
919 // of an aggregate. If we did, that means the CurrIVI will later be
920 // overwritten with the already-recorded value. But if not, let's record it!
921 std::optional<Instruction *> &Elt = AggElts[Indices.front()];
922 Elt = Elt.value_or(InsertedValue);
923
924 // FIXME: should we handle chain-terminating undef base operand?
925 }
926
927 // Was that sufficient to deduce the full initializer for the aggregate?
928 if (!KnowAllElts())
929 return nullptr; // Give up then.
930
931 // We now want to find the source[s] of the aggregate elements we've found.
932 // And with "source" we mean the original aggregate[s] from which
933 // the inserted elements were extracted. This may require PHI translation.
934
935 enum class AggregateDescription {
936 /// When analyzing the value that was inserted into an aggregate, we did
937 /// not manage to find defining `extractvalue` instruction to analyze.
938 NotFound,
939 /// When analyzing the value that was inserted into an aggregate, we did
940 /// manage to find defining `extractvalue` instruction[s], and everything
941 /// matched perfectly - aggregate type, element insertion/extraction index.
942 Found,
943 /// When analyzing the value that was inserted into an aggregate, we did
944 /// manage to find defining `extractvalue` instruction, but there was
945 /// a mismatch: either the source type from which the extraction was didn't
946 /// match the aggregate type into which the insertion was,
947 /// or the extraction/insertion channels mismatched,
948 /// or different elements had different source aggregates.
949 FoundMismatch
950 };
951 auto Describe = [](std::optional<Value *> SourceAggregate) {
952 if (SourceAggregate == NotFound)
953 return AggregateDescription::NotFound;
954 if (*SourceAggregate == FoundMismatch)
955 return AggregateDescription::FoundMismatch;
956 return AggregateDescription::Found;
957 };
958
959 // Given the value \p Elt that was being inserted into element \p EltIdx of an
960 // aggregate AggTy, see if \p Elt was originally defined by an
961 // appropriate extractvalue (same element index, same aggregate type).
962 // If found, return the source aggregate from which the extraction was.
963 // If \p PredBB is provided, does PHI translation of an \p Elt first.
964 auto FindSourceAggregate =
965 [&](Instruction *Elt, unsigned EltIdx, std::optional<BasicBlock *> UseBB,
966 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
967 // For now(?), only deal with, at most, a single level of PHI indirection.
968 if (UseBB && PredBB)
969 Elt = dyn_cast<Instruction>(Elt->DoPHITranslation(*UseBB, *PredBB));
970 // FIXME: deal with multiple levels of PHI indirection?
971
972 // Did we find an extraction?
973 auto *EVI = dyn_cast_or_null<ExtractValueInst>(Elt);
974 if (!EVI)
975 return NotFound;
976
977 Value *SourceAggregate = EVI->getAggregateOperand();
978
979 // Is the extraction from the same type into which the insertion was?
980 if (SourceAggregate->getType() != AggTy)
981 return FoundMismatch;
982 // And the element index doesn't change between extraction and insertion?
983 if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front())
984 return FoundMismatch;
985
986 return SourceAggregate; // AggregateDescription::Found
987 };
988
989 // Given elements AggElts that were constructing an aggregate OrigIVI,
990 // see if we can find appropriate source aggregate for each of the elements,
991 // and see it's the same aggregate for each element. If so, return it.
992 auto FindCommonSourceAggregate =
993 [&](std::optional<BasicBlock *> UseBB,
994 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
995 std::optional<Value *> SourceAggregate;
996
997 for (auto I : enumerate(AggElts)) {
998 assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&
999 "We don't store nullptr in SourceAggregate!");
1000 assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
1001 (I.index() != 0) &&
1002 "SourceAggregate should be valid after the first element,");
1003
1004 // For this element, is there a plausible source aggregate?
1005 // FIXME: we could special-case undef element, IFF we know that in the
1006 // source aggregate said element isn't poison.
1007 std::optional<Value *> SourceAggregateForElement =
1008 FindSourceAggregate(*I.value(), I.index(), UseBB, PredBB);
1009
1010 // Okay, what have we found? Does that correlate with previous findings?
1011
1012 // Regardless of whether or not we have previously found source
1013 // aggregate for previous elements (if any), if we didn't find one for
1014 // this element, passthrough whatever we have just found.
1015 if (Describe(SourceAggregateForElement) != AggregateDescription::Found)
1016 return SourceAggregateForElement;
1017
1018 // Okay, we have found source aggregate for this element.
1019 // Let's see what we already know from previous elements, if any.
1020 switch (Describe(SourceAggregate)) {
1021 case AggregateDescription::NotFound:
1022 // This is apparently the first element that we have examined.
1023 SourceAggregate = SourceAggregateForElement; // Record the aggregate!
1024 continue; // Great, now look at next element.
1025 case AggregateDescription::Found:
1026 // We have previously already successfully examined other elements.
1027 // Is this the same source aggregate we've found for other elements?
1028 if (*SourceAggregateForElement != *SourceAggregate)
1029 return FoundMismatch;
1030 continue; // Still the same aggregate, look at next element.
1031 case AggregateDescription::FoundMismatch:
1032 llvm_unreachable("Can't happen. We would have early-exited then.");
1033 };
1034 }
1035
1036 assert(Describe(SourceAggregate) == AggregateDescription::Found &&
1037 "Must be a valid Value");
1038 return *SourceAggregate;
1039 };
1040
1041 std::optional<Value *> SourceAggregate;
1042
1043 // Can we find the source aggregate without looking at predecessors?
1044 SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/std::nullopt,
1045 /*PredBB=*/std::nullopt);
1046 if (Describe(SourceAggregate) != AggregateDescription::NotFound) {
1047 if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch)
1048 return nullptr; // Conflicting source aggregates!
1049 ++NumAggregateReconstructionsSimplified;
1050 return replaceInstUsesWith(OrigIVI, *SourceAggregate);
1051 }
1052
1053 // Okay, apparently we need to look at predecessors.
1054
1055 // We should be smart about picking the "use" basic block, which will be the
1056 // merge point for aggregate, where we'll insert the final PHI that will be
1057 // used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice.
1058 // We should look in which blocks each of the AggElts is being defined,
1059 // they all should be defined in the same basic block.
1060 BasicBlock *UseBB = nullptr;
1061
1062 for (const std::optional<Instruction *> &I : AggElts) {
1063 BasicBlock *BB = (*I)->getParent();
1064 // If it's the first instruction we've encountered, record the basic block.
1065 if (!UseBB) {
1066 UseBB = BB;
1067 continue;
1068 }
1069 // Otherwise, this must be the same basic block we've seen previously.
1070 if (UseBB != BB)
1071 return nullptr;
1072 }
1073
1074 // If *all* of the elements are basic-block-independent, meaning they are
1075 // either function arguments, or constant expressions, then if we didn't
1076 // handle them without predecessor-aware handling, we won't handle them now.
1077 if (!UseBB)
1078 return nullptr;
1079
1080 // If we didn't manage to find source aggregate without looking at
1081 // predecessors, and there are no predecessors to look at, then we're done.
1082 if (pred_empty(UseBB))
1083 return nullptr;
1084
1085 // Arbitrary predecessor count limit.
1086 static const int PredCountLimit = 64;
1087
1088 // Cache the (non-uniqified!) list of predecessors in a vector,
1089 // checking the limit at the same time for efficiency.
1090 SmallVector<BasicBlock *, 4> Preds; // May have duplicates!
1091 for (BasicBlock *Pred : predecessors(UseBB)) {
1092 // Don't bother if there are too many predecessors.
1093 if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once?
1094 return nullptr;
1095 Preds.emplace_back(Pred);
1096 }
1097
1098 // For each predecessor, what is the source aggregate,
1099 // from which all the elements were originally extracted from?
1100 // Note that we want for the map to have stable iteration order!
1102 for (BasicBlock *Pred : Preds) {
1103 std::pair<decltype(SourceAggregates)::iterator, bool> IV =
1104 SourceAggregates.insert({Pred, nullptr});
1105 // Did we already evaluate this predecessor?
1106 if (!IV.second)
1107 continue;
1108
1109 // Let's hope that when coming from predecessor Pred, all elements of the
1110 // aggregate produced by OrigIVI must have been originally extracted from
1111 // the same aggregate. Is that so? Can we find said original aggregate?
1112 SourceAggregate = FindCommonSourceAggregate(UseBB, Pred);
1113 if (Describe(SourceAggregate) != AggregateDescription::Found)
1114 return nullptr; // Give up.
1115 IV.first->second = *SourceAggregate;
1116 }
1117
1118 // All good! Now we just need to thread the source aggregates here.
1119 // Note that we have to insert the new PHI here, ourselves, because we can't
1120 // rely on InstCombinerImpl::run() inserting it into the right basic block.
1121 // Note that the same block can be a predecessor more than once,
1122 // and we need to preserve that invariant for the PHI node.
1123 BuilderTy::InsertPointGuard Guard(Builder);
1124 Builder.SetInsertPoint(UseBB, UseBB->getFirstNonPHIIt());
1125 auto *PHI =
1126 Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() + ".merged");
1127 for (BasicBlock *Pred : Preds)
1128 PHI->addIncoming(SourceAggregates[Pred], Pred);
1129
1130 ++NumAggregateReconstructionsSimplified;
1131 return replaceInstUsesWith(OrigIVI, PHI);
1132}
1133
1134/// Try to find redundant insertvalue instructions, like the following ones:
1135/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
1136/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
1137/// Here the second instruction inserts values at the same indices, as the
1138/// first one, making the first one redundant.
1139/// It should be transformed to:
1140/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
1143 I.getAggregateOperand(), I.getInsertedValueOperand(), I.getIndices(),
1145 return replaceInstUsesWith(I, V);
1146
1147 bool IsRedundant = false;
1148 ArrayRef<unsigned int> FirstIndices = I.getIndices();
1149
1150 // If there is a chain of insertvalue instructions (each of them except the
1151 // last one has only one use and it's another insertvalue insn from this
1152 // chain), check if any of the 'children' uses the same indices as the first
1153 // instruction. In this case, the first one is redundant.
1154 Value *V = &I;
1155 unsigned Depth = 0;
1156 while (V->hasOneUse() && Depth < 10) {
1157 User *U = V->user_back();
1158 auto UserInsInst = dyn_cast<InsertValueInst>(U);
1159 if (!UserInsInst || U->getOperand(0) != V)
1160 break;
1161 if (UserInsInst->getIndices() == FirstIndices) {
1162 IsRedundant = true;
1163 break;
1164 }
1165 V = UserInsInst;
1166 Depth++;
1167 }
1168
1169 if (IsRedundant)
1170 return replaceInstUsesWith(I, I.getOperand(0));
1171
1173 return NewI;
1174
1175 return nullptr;
1176}
1177
1179 // Can not analyze scalable type, the number of elements is not a compile-time
1180 // constant.
1181 if (isa<ScalableVectorType>(Shuf.getOperand(0)->getType()))
1182 return false;
1183
1184 int MaskSize = Shuf.getShuffleMask().size();
1185 int VecSize =
1186 cast<FixedVectorType>(Shuf.getOperand(0)->getType())->getNumElements();
1187
1188 // A vector select does not change the size of the operands.
1189 if (MaskSize != VecSize)
1190 return false;
1191
1192 // Each mask element must be undefined or choose a vector element from one of
1193 // the source operands without crossing vector lanes.
1194 for (int i = 0; i != MaskSize; ++i) {
1195 int Elt = Shuf.getMaskValue(i);
1196 if (Elt != -1 && Elt != i && Elt != i + VecSize)
1197 return false;
1198 }
1199
1200 return true;
1201}
1202
1203/// Turn a chain of inserts that splats a value into an insert + shuffle:
1204/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
1205/// shufflevector(insertelt(X, %k, 0), poison, zero)
1207 // We are interested in the last insert in a chain. So if this insert has a
1208 // single user and that user is an insert, bail.
1209 if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
1210 return nullptr;
1211
1212 VectorType *VecTy = InsElt.getType();
1213 // Can not handle scalable type, the number of elements is not a compile-time
1214 // constant.
1215 if (isa<ScalableVectorType>(VecTy))
1216 return nullptr;
1217 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
1218
1219 // Do not try to do this for a one-element vector, since that's a nop,
1220 // and will cause an inf-loop.
1221 if (NumElements == 1)
1222 return nullptr;
1223
1224 Value *SplatVal = InsElt.getOperand(1);
1225 InsertElementInst *CurrIE = &InsElt;
1226 SmallBitVector ElementPresent(NumElements, false);
1227 InsertElementInst *FirstIE = nullptr;
1228
1229 // Walk the chain backwards, keeping track of which indices we inserted into,
1230 // until we hit something that isn't an insert of the splatted value.
1231 while (CurrIE) {
1232 auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
1233 if (!Idx || CurrIE->getOperand(1) != SplatVal)
1234 return nullptr;
1235
1236 auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
1237 // Check none of the intermediate steps have any additional uses, except
1238 // for the root insertelement instruction, which can be re-used, if it
1239 // inserts at position 0.
1240 if (CurrIE != &InsElt &&
1241 (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
1242 return nullptr;
1243
1244 ElementPresent[Idx->getZExtValue()] = true;
1245 FirstIE = CurrIE;
1246 CurrIE = NextIE;
1247 }
1248
1249 // If this is just a single insertelement (not a sequence), we are done.
1250 if (FirstIE == &InsElt)
1251 return nullptr;
1252
1253 // If we are not inserting into a poison vector, make sure we've seen an
1254 // insert into every element.
1255 // TODO: If the base vector is not undef, it might be better to create a splat
1256 // and then a select-shuffle (blend) with the base vector.
1257 if (!match(FirstIE->getOperand(0), m_Poison()))
1258 if (!ElementPresent.all())
1259 return nullptr;
1260
1261 // Create the insert + shuffle.
1262 Type *Int64Ty = Type::getInt64Ty(InsElt.getContext());
1263 PoisonValue *PoisonVec = PoisonValue::get(VecTy);
1264 Constant *Zero = ConstantInt::get(Int64Ty, 0);
1265 if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
1266 FirstIE = InsertElementInst::Create(PoisonVec, SplatVal, Zero, "", &InsElt);
1267
1268 // Splat from element 0, but replace absent elements with poison in the mask.
1269 SmallVector<int, 16> Mask(NumElements, 0);
1270 for (unsigned i = 0; i != NumElements; ++i)
1271 if (!ElementPresent[i])
1272 Mask[i] = -1;
1273
1274 return new ShuffleVectorInst(FirstIE, Mask);
1275}
1276
1277/// Try to fold an insert element into an existing splat shuffle by changing
1278/// the shuffle's mask to include the index of this insert element.
1280 // Check if the vector operand of this insert is a canonical splat shuffle.
1281 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1282 if (!Shuf || !Shuf->isZeroEltSplat())
1283 return nullptr;
1284
1285 // Bail out early if shuffle is scalable type. The number of elements in
1286 // shuffle mask is unknown at compile-time.
1287 if (isa<ScalableVectorType>(Shuf->getType()))
1288 return nullptr;
1289
1290 // Check for a constant insertion index.
1291 uint64_t IdxC;
1292 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1293 return nullptr;
1294
1295 // Check if the splat shuffle's input is the same as this insert's scalar op.
1296 Value *X = InsElt.getOperand(1);
1297 Value *Op0 = Shuf->getOperand(0);
1298 if (!match(Op0, m_InsertElt(m_Undef(), m_Specific(X), m_ZeroInt())))
1299 return nullptr;
1300
1301 // Replace the shuffle mask element at the index of this insert with a zero.
1302 // For example:
1303 // inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1
1304 // --> shuf (inselt undef, X, 0), poison, <0,0,0,undef>
1305 unsigned NumMaskElts =
1306 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1307 SmallVector<int, 16> NewMask(NumMaskElts);
1308 for (unsigned i = 0; i != NumMaskElts; ++i)
1309 NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);
1310
1311 return new ShuffleVectorInst(Op0, NewMask);
1312}
1313
1314/// Try to fold an extract+insert element into an existing identity shuffle by
1315/// changing the shuffle's mask to include the index of this insert element.
1317 // Check if the vector operand of this insert is an identity shuffle.
1318 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1319 if (!Shuf || !match(Shuf->getOperand(1), m_Undef()) ||
1320 !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
1321 return nullptr;
1322
1323 // Bail out early if shuffle is scalable type. The number of elements in
1324 // shuffle mask is unknown at compile-time.
1325 if (isa<ScalableVectorType>(Shuf->getType()))
1326 return nullptr;
1327
1328 // Check for a constant insertion index.
1329 uint64_t IdxC;
1330 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1331 return nullptr;
1332
1333 // Check if this insert's scalar op is extracted from the identity shuffle's
1334 // input vector.
1335 Value *Scalar = InsElt.getOperand(1);
1336 Value *X = Shuf->getOperand(0);
1337 if (!match(Scalar, m_ExtractElt(m_Specific(X), m_SpecificInt(IdxC))))
1338 return nullptr;
1339
1340 // Replace the shuffle mask element at the index of this extract+insert with
1341 // that same index value.
1342 // For example:
1343 // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
1344 unsigned NumMaskElts =
1345 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1346 SmallVector<int, 16> NewMask(NumMaskElts);
1347 ArrayRef<int> OldMask = Shuf->getShuffleMask();
1348 for (unsigned i = 0; i != NumMaskElts; ++i) {
1349 if (i != IdxC) {
1350 // All mask elements besides the inserted element remain the same.
1351 NewMask[i] = OldMask[i];
1352 } else if (OldMask[i] == (int)IdxC) {
1353 // If the mask element was already set, there's nothing to do
1354 // (demanded elements analysis may unset it later).
1355 return nullptr;
1356 } else {
1357 assert(OldMask[i] == PoisonMaskElem &&
1358 "Unexpected shuffle mask element for identity shuffle");
1359 NewMask[i] = IdxC;
1360 }
1361 }
1362
1363 return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask);
1364}
1365
1366/// If we have an insertelement instruction feeding into another insertelement
1367/// and the 2nd is inserting a constant into the vector, canonicalize that
1368/// constant insertion before the insertion of a variable:
1369///
1370/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
1371/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
1372///
1373/// This has the potential of eliminating the 2nd insertelement instruction
1374/// via constant folding of the scalar constant into a vector constant.
1376 InstCombiner::BuilderTy &Builder) {
1377 auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
1378 if (!InsElt1 || !InsElt1->hasOneUse())
1379 return nullptr;
1380
1381 Value *X, *Y;
1382 Constant *ScalarC;
1383 ConstantInt *IdxC1, *IdxC2;
1384 if (match(InsElt1->getOperand(0), m_Value(X)) &&
1385 match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
1386 match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
1387 match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
1388 match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
1389 Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
1390 return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
1391 }
1392
1393 return nullptr;
1394}
1395
1396/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
1397/// --> shufflevector X, CVec', Mask'
1399 auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
1400 // Bail out if the parent has more than one use. In that case, we'd be
1401 // replacing the insertelt with a shuffle, and that's not a clear win.
1402 if (!Inst || !Inst->hasOneUse())
1403 return nullptr;
1404 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
1405 // The shuffle must have a constant vector operand. The insertelt must have
1406 // a constant scalar being inserted at a constant position in the vector.
1407 Constant *ShufConstVec, *InsEltScalar;
1408 uint64_t InsEltIndex;
1409 if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
1410 !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
1411 !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
1412 return nullptr;
1413
1414 // Adding an element to an arbitrary shuffle could be expensive, but a
1415 // shuffle that selects elements from vectors without crossing lanes is
1416 // assumed cheap.
1417 // If we're just adding a constant into that shuffle, it will still be
1418 // cheap.
1419 if (!isShuffleEquivalentToSelect(*Shuf))
1420 return nullptr;
1421
1422 // From the above 'select' check, we know that the mask has the same number
1423 // of elements as the vector input operands. We also know that each constant
1424 // input element is used in its lane and can not be used more than once by
1425 // the shuffle. Therefore, replace the constant in the shuffle's constant
1426 // vector with the insertelt constant. Replace the constant in the shuffle's
1427 // mask vector with the insertelt index plus the length of the vector
1428 // (because the constant vector operand of a shuffle is always the 2nd
1429 // operand).
1430 ArrayRef<int> Mask = Shuf->getShuffleMask();
1431 unsigned NumElts = Mask.size();
1432 SmallVector<Constant *, 16> NewShufElts(NumElts);
1433 SmallVector<int, 16> NewMaskElts(NumElts);
1434 for (unsigned I = 0; I != NumElts; ++I) {
1435 if (I == InsEltIndex) {
1436 NewShufElts[I] = InsEltScalar;
1437 NewMaskElts[I] = InsEltIndex + NumElts;
1438 } else {
1439 // Copy over the existing values.
1440 NewShufElts[I] = ShufConstVec->getAggregateElement(I);
1441 NewMaskElts[I] = Mask[I];
1442 }
1443
1444 // Bail if we failed to find an element.
1445 if (!NewShufElts[I])
1446 return nullptr;
1447 }
1448
1449 // Create new operands for a shuffle that includes the constant of the
1450 // original insertelt. The old shuffle will be dead now.
1451 return new ShuffleVectorInst(Shuf->getOperand(0),
1452 ConstantVector::get(NewShufElts), NewMaskElts);
1453 } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
1454 // Transform sequences of insertelements ops with constant data/indexes into
1455 // a single shuffle op.
1456 // Can not handle scalable type, the number of elements needed to create
1457 // shuffle mask is not a compile-time constant.
1458 if (isa<ScalableVectorType>(InsElt.getType()))
1459 return nullptr;
1460 unsigned NumElts =
1461 cast<FixedVectorType>(InsElt.getType())->getNumElements();
1462
1463 uint64_t InsertIdx[2];
1464 Constant *Val[2];
1465 if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
1466 !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
1467 !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
1468 !match(IEI->getOperand(1), m_Constant(Val[1])))
1469 return nullptr;
1470 SmallVector<Constant *, 16> Values(NumElts);
1471 SmallVector<int, 16> Mask(NumElts);
1472 auto ValI = std::begin(Val);
1473 // Generate new constant vector and mask.
1474 // We have 2 values/masks from the insertelements instructions. Insert them
1475 // into new value/mask vectors.
1476 for (uint64_t I : InsertIdx) {
1477 if (!Values[I]) {
1478 Values[I] = *ValI;
1479 Mask[I] = NumElts + I;
1480 }
1481 ++ValI;
1482 }
1483 // Remaining values are filled with 'poison' values.
1484 for (unsigned I = 0; I < NumElts; ++I) {
1485 if (!Values[I]) {
1486 Values[I] = PoisonValue::get(InsElt.getType()->getElementType());
1487 Mask[I] = I;
1488 }
1489 }
1490 // Create new operands for a shuffle that includes the constant of the
1491 // original insertelt.
1492 return new ShuffleVectorInst(IEI->getOperand(0),
1493 ConstantVector::get(Values), Mask);
1494 }
1495 return nullptr;
1496}
1497
1498/// If both the base vector and the inserted element are extended from the same
1499/// type, do the insert element in the narrow source type followed by extend.
1500/// TODO: This can be extended to include other cast opcodes, but particularly
1501/// if we create a wider insertelement, make sure codegen is not harmed.
1503 InstCombiner::BuilderTy &Builder) {
1504 // We are creating a vector extend. If the original vector extend has another
1505 // use, that would mean we end up with 2 vector extends, so avoid that.
1506 // TODO: We could ease the use-clause to "if at least one op has one use"
1507 // (assuming that the source types match - see next TODO comment).
1508 Value *Vec = InsElt.getOperand(0);
1509 if (!Vec->hasOneUse())
1510 return nullptr;
1511
1512 Value *Scalar = InsElt.getOperand(1);
1513 Value *X, *Y;
1514 CastInst::CastOps CastOpcode;
1515 if (match(Vec, m_FPExt(m_Value(X))) && match(Scalar, m_FPExt(m_Value(Y))))
1516 CastOpcode = Instruction::FPExt;
1517 else if (match(Vec, m_SExt(m_Value(X))) && match(Scalar, m_SExt(m_Value(Y))))
1518 CastOpcode = Instruction::SExt;
1519 else if (match(Vec, m_ZExt(m_Value(X))) && match(Scalar, m_ZExt(m_Value(Y))))
1520 CastOpcode = Instruction::ZExt;
1521 else
1522 return nullptr;
1523
1524 // TODO: We can allow mismatched types by creating an intermediate cast.
1525 if (X->getType()->getScalarType() != Y->getType())
1526 return nullptr;
1527
1528 // inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index)
1529 Value *NewInsElt = Builder.CreateInsertElement(X, Y, InsElt.getOperand(2));
1530 return CastInst::Create(CastOpcode, NewInsElt, InsElt.getType());
1531}
1532
1533/// If we are inserting 2 halves of a value into adjacent elements of a vector,
1534/// try to convert to a single insert with appropriate bitcasts.
1536 bool IsBigEndian,
1537 InstCombiner::BuilderTy &Builder) {
1538 Value *VecOp = InsElt.getOperand(0);
1539 Value *ScalarOp = InsElt.getOperand(1);
1540 Value *IndexOp = InsElt.getOperand(2);
1541
1542 // Pattern depends on endian because we expect lower index is inserted first.
1543 // Big endian:
1544 // inselt (inselt BaseVec, (trunc (lshr X, BW/2), Index0), (trunc X), Index1
1545 // Little endian:
1546 // inselt (inselt BaseVec, (trunc X), Index0), (trunc (lshr X, BW/2)), Index1
1547 // Note: It is not safe to do this transform with an arbitrary base vector
1548 // because the bitcast of that vector to fewer/larger elements could
1549 // allow poison to spill into an element that was not poison before.
1550 // TODO: Detect smaller fractions of the scalar.
1551 // TODO: One-use checks are conservative.
1552 auto *VTy = dyn_cast<FixedVectorType>(InsElt.getType());
1553 Value *Scalar0, *BaseVec;
1554 uint64_t Index0, Index1;
1555 if (!VTy || (VTy->getNumElements() & 1) ||
1556 !match(IndexOp, m_ConstantInt(Index1)) ||
1557 !match(VecOp, m_InsertElt(m_Value(BaseVec), m_Value(Scalar0),
1558 m_ConstantInt(Index0))) ||
1559 !match(BaseVec, m_Undef()))
1560 return nullptr;
1561
1562 // The first insert must be to the index one less than this one, and
1563 // the first insert must be to an even index.
1564 if (Index0 + 1 != Index1 || Index0 & 1)
1565 return nullptr;
1566
1567 // For big endian, the high half of the value should be inserted first.
1568 // For little endian, the low half of the value should be inserted first.
1569 Value *X;
1570 uint64_t ShAmt;
1571 if (IsBigEndian) {
1572 if (!match(ScalarOp, m_Trunc(m_Value(X))) ||
1573 !match(Scalar0, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt)))))
1574 return nullptr;
1575 } else {
1576 if (!match(Scalar0, m_Trunc(m_Value(X))) ||
1577 !match(ScalarOp, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt)))))
1578 return nullptr;
1579 }
1580
1581 Type *SrcTy = X->getType();
1582 unsigned ScalarWidth = SrcTy->getScalarSizeInBits();
1583 unsigned VecEltWidth = VTy->getScalarSizeInBits();
1584 if (ScalarWidth != VecEltWidth * 2 || ShAmt != VecEltWidth)
1585 return nullptr;
1586
1587 // Bitcast the base vector to a vector type with the source element type.
1588 Type *CastTy = FixedVectorType::get(SrcTy, VTy->getNumElements() / 2);
1589 Value *CastBaseVec = Builder.CreateBitCast(BaseVec, CastTy);
1590
1591 // Scale the insert index for a vector with half as many elements.
1592 // bitcast (inselt (bitcast BaseVec), X, NewIndex)
1593 uint64_t NewIndex = IsBigEndian ? Index1 / 2 : Index0 / 2;
1594 Value *NewInsert = Builder.CreateInsertElement(CastBaseVec, X, NewIndex);
1595 return new BitCastInst(NewInsert, VTy);
1596}
1597
1599 Value *VecOp = IE.getOperand(0);
1600 Value *ScalarOp = IE.getOperand(1);
1601 Value *IdxOp = IE.getOperand(2);
1602
1603 if (auto *V = simplifyInsertElementInst(
1604 VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
1605 return replaceInstUsesWith(IE, V);
1606
1607 // Canonicalize type of constant indices to i64 to simplify CSE
1608 if (auto *IndexC = dyn_cast<ConstantInt>(IdxOp)) {
1609 if (auto *NewIdx = getPreferredVectorIndex(IndexC))
1610 return replaceOperand(IE, 2, NewIdx);
1611
1612 Value *BaseVec, *OtherScalar;
1613 uint64_t OtherIndexVal;
1614 if (match(VecOp, m_OneUse(m_InsertElt(m_Value(BaseVec),
1615 m_Value(OtherScalar),
1616 m_ConstantInt(OtherIndexVal)))) &&
1617 !isa<Constant>(OtherScalar) && OtherIndexVal > IndexC->getZExtValue()) {
1618 Value *NewIns = Builder.CreateInsertElement(BaseVec, ScalarOp, IdxOp);
1619 return InsertElementInst::Create(NewIns, OtherScalar,
1620 Builder.getInt64(OtherIndexVal));
1621 }
1622 }
1623
1624 // If the scalar is bitcast and inserted into undef, do the insert in the
1625 // source type followed by bitcast.
1626 // TODO: Generalize for insert into any constant, not just undef?
1627 Value *ScalarSrc;
1628 if (match(VecOp, m_Undef()) &&
1629 match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) &&
1630 (ScalarSrc->getType()->isIntegerTy() ||
1631 ScalarSrc->getType()->isFloatingPointTy())) {
1632 // inselt undef, (bitcast ScalarSrc), IdxOp -->
1633 // bitcast (inselt undef, ScalarSrc, IdxOp)
1634 Type *ScalarTy = ScalarSrc->getType();
1635 Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount());
1636 UndefValue *NewUndef = UndefValue::get(VecTy);
1637 Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp);
1638 return new BitCastInst(NewInsElt, IE.getType());
1639 }
1640
1641 // If the vector and scalar are both bitcast from the same element type, do
1642 // the insert in that source type followed by bitcast.
1643 Value *VecSrc;
1644 if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
1645 match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
1646 (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
1647 VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
1648 cast<VectorType>(VecSrc->getType())->getElementType() ==
1649 ScalarSrc->getType()) {
1650 // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
1651 // bitcast (inselt VecSrc, ScalarSrc, IdxOp)
1652 Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
1653 return new BitCastInst(NewInsElt, IE.getType());
1654 }
1655
1656 // If the inserted element was extracted from some other fixed-length vector
1657 // and both indexes are valid constants, try to turn this into a shuffle.
1658 // Can not handle scalable vector type, the number of elements needed to
1659 // create shuffle mask is not a compile-time constant.
1660 uint64_t InsertedIdx, ExtractedIdx;
1661 Value *ExtVecOp;
1662 if (isa<FixedVectorType>(IE.getType()) &&
1663 match(IdxOp, m_ConstantInt(InsertedIdx)) &&
1664 match(ScalarOp,
1665 m_ExtractElt(m_Value(ExtVecOp), m_ConstantInt(ExtractedIdx))) &&
1666 isa<FixedVectorType>(ExtVecOp->getType()) &&
1667 ExtractedIdx <
1668 cast<FixedVectorType>(ExtVecOp->getType())->getNumElements()) {
1669 // TODO: Looking at the user(s) to determine if this insert is a
1670 // fold-to-shuffle opportunity does not match the usual instcombine
1671 // constraints. We should decide if the transform is worthy based only
1672 // on this instruction and its operands, but that may not work currently.
1673 //
1674 // Here, we are trying to avoid creating shuffles before reaching
1675 // the end of a chain of extract-insert pairs. This is complicated because
1676 // we do not generally form arbitrary shuffle masks in instcombine
1677 // (because those may codegen poorly), but collectShuffleElements() does
1678 // exactly that.
1679 //
1680 // The rules for determining what is an acceptable target-independent
1681 // shuffle mask are fuzzy because they evolve based on the backend's
1682 // capabilities and real-world impact.
1683 auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
1684 if (!Insert.hasOneUse())
1685 return true;
1686 auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
1687 if (!InsertUser)
1688 return true;
1689 return false;
1690 };
1691
1692 // Try to form a shuffle from a chain of extract-insert ops.
1693 if (isShuffleRootCandidate(IE)) {
1694 bool Rerun = true;
1695 while (Rerun) {
1696 Rerun = false;
1697
1699 ShuffleOps LR =
1700 collectShuffleElements(&IE, Mask, nullptr, *this, Rerun);
1701
1702 // The proposed shuffle may be trivial, in which case we shouldn't
1703 // perform the combine.
1704 if (LR.first != &IE && LR.second != &IE) {
1705 // We now have a shuffle of LHS, RHS, Mask.
1706 if (LR.second == nullptr)
1707 LR.second = PoisonValue::get(LR.first->getType());
1708 return new ShuffleVectorInst(LR.first, LR.second, Mask);
1709 }
1710 }
1711 }
1712 }
1713
1714 if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {
1715 unsigned VWidth = VecTy->getNumElements();
1716 APInt UndefElts(VWidth, 0);
1717 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
1718 if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
1719 if (V != &IE)
1720 return replaceInstUsesWith(IE, V);
1721 return &IE;
1722 }
1723 }
1724
1726 return Shuf;
1727
1728 if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
1729 return NewInsElt;
1730
1731 if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
1732 return Broadcast;
1733
1735 return Splat;
1736
1737 if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
1738 return IdentityShuf;
1739
1740 if (Instruction *Ext = narrowInsElt(IE, Builder))
1741 return Ext;
1742
1744 return Ext;
1745
1746 return nullptr;
1747}
1748
1749/// Return true if we can evaluate the specified expression tree if the vector
1750/// elements were shuffled in a different order.
1752 unsigned Depth = 5) {
1753 // We can always reorder the elements of a constant.
1754 if (isa<Constant>(V))
1755 return true;
1756
1757 // We won't reorder vector arguments. No IPO here.
1758 Instruction *I = dyn_cast<Instruction>(V);
1759 if (!I) return false;
1760
1761 // Two users may expect different orders of the elements. Don't try it.
1762 if (!I->hasOneUse())
1763 return false;
1764
1765 if (Depth == 0) return false;
1766
1767 switch (I->getOpcode()) {
1768 case Instruction::UDiv:
1769 case Instruction::SDiv:
1770 case Instruction::URem:
1771 case Instruction::SRem:
1772 // Propagating an undefined shuffle mask element to integer div/rem is not
1773 // allowed because those opcodes can create immediate undefined behavior
1774 // from an undefined element in an operand.
1775 if (llvm::is_contained(Mask, -1))
1776 return false;
1777 [[fallthrough]];
1778 case Instruction::Add:
1779 case Instruction::FAdd:
1780 case Instruction::Sub:
1781 case Instruction::FSub:
1782 case Instruction::Mul:
1783 case Instruction::FMul:
1784 case Instruction::FDiv:
1785 case Instruction::FRem:
1786 case Instruction::Shl:
1787 case Instruction::LShr:
1788 case Instruction::AShr:
1789 case Instruction::And:
1790 case Instruction::Or:
1791 case Instruction::Xor:
1792 case Instruction::ICmp:
1793 case Instruction::FCmp:
1794 case Instruction::Trunc:
1795 case Instruction::ZExt:
1796 case Instruction::SExt:
1797 case Instruction::FPToUI:
1798 case Instruction::FPToSI:
1799 case Instruction::UIToFP:
1800 case Instruction::SIToFP:
1801 case Instruction::FPTrunc:
1802 case Instruction::FPExt:
1803 case Instruction::GetElementPtr: {
1804 // Bail out if we would create longer vector ops. We could allow creating
1805 // longer vector ops, but that may result in more expensive codegen.
1806 Type *ITy = I->getType();
1807 if (ITy->isVectorTy() &&
1808 Mask.size() > cast<FixedVectorType>(ITy)->getNumElements())
1809 return false;
1810 for (Value *Operand : I->operands()) {
1811 if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
1812 return false;
1813 }
1814 return true;
1815 }
1816 case Instruction::InsertElement: {
1817 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
1818 if (!CI) return false;
1819 int ElementNumber = CI->getLimitedValue();
1820
1821 // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1822 // can't put an element into multiple indices.
1823 bool SeenOnce = false;
1824 for (int I : Mask) {
1825 if (I == ElementNumber) {
1826 if (SeenOnce)
1827 return false;
1828 SeenOnce = true;
1829 }
1830 }
1831 return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
1832 }
1833 }
1834 return false;
1835}
1836
1837/// Rebuild a new instruction just like 'I' but with the new operands given.
1838/// In the event of type mismatch, the type of the operands is correct.
1840 IRBuilderBase &Builder) {
1841 Builder.SetInsertPoint(I);
1842 switch (I->getOpcode()) {
1843 case Instruction::Add:
1844 case Instruction::FAdd:
1845 case Instruction::Sub:
1846 case Instruction::FSub:
1847 case Instruction::Mul:
1848 case Instruction::FMul:
1849 case Instruction::UDiv:
1850 case Instruction::SDiv:
1851 case Instruction::FDiv:
1852 case Instruction::URem:
1853 case Instruction::SRem:
1854 case Instruction::FRem:
1855 case Instruction::Shl:
1856 case Instruction::LShr:
1857 case Instruction::AShr:
1858 case Instruction::And:
1859 case Instruction::Or:
1860 case Instruction::Xor: {
1861 BinaryOperator *BO = cast<BinaryOperator>(I);
1862 assert(NewOps.size() == 2 && "binary operator with #ops != 2");
1863 Value *New = Builder.CreateBinOp(cast<BinaryOperator>(I)->getOpcode(),
1864 NewOps[0], NewOps[1]);
1865 if (auto *NewI = dyn_cast<Instruction>(New)) {
1866 if (isa<OverflowingBinaryOperator>(BO)) {
1867 NewI->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1868 NewI->setHasNoSignedWrap(BO->hasNoSignedWrap());
1869 }
1870 if (isa<PossiblyExactOperator>(BO)) {
1871 NewI->setIsExact(BO->isExact());
1872 }
1873 if (isa<FPMathOperator>(BO))
1874 NewI->copyFastMathFlags(I);
1875 }
1876 return New;
1877 }
1878 case Instruction::ICmp:
1879 assert(NewOps.size() == 2 && "icmp with #ops != 2");
1880 return Builder.CreateICmp(cast<ICmpInst>(I)->getPredicate(), NewOps[0],
1881 NewOps[1]);
1882 case Instruction::FCmp:
1883 assert(NewOps.size() == 2 && "fcmp with #ops != 2");
1884 return Builder.CreateFCmp(cast<FCmpInst>(I)->getPredicate(), NewOps[0],
1885 NewOps[1]);
1886 case Instruction::Trunc:
1887 case Instruction::ZExt:
1888 case Instruction::SExt:
1889 case Instruction::FPToUI:
1890 case Instruction::FPToSI:
1891 case Instruction::UIToFP:
1892 case Instruction::SIToFP:
1893 case Instruction::FPTrunc:
1894 case Instruction::FPExt: {
1895 // It's possible that the mask has a different number of elements from
1896 // the original cast. We recompute the destination type to match the mask.
1897 Type *DestTy = VectorType::get(
1898 I->getType()->getScalarType(),
1899 cast<VectorType>(NewOps[0]->getType())->getElementCount());
1900 assert(NewOps.size() == 1 && "cast with #ops != 1");
1901 return Builder.CreateCast(cast<CastInst>(I)->getOpcode(), NewOps[0],
1902 DestTy);
1903 }
1904 case Instruction::GetElementPtr: {
1905 Value *Ptr = NewOps[0];
1906 ArrayRef<Value*> Idx = NewOps.slice(1);
1907 return Builder.CreateGEP(cast<GEPOperator>(I)->getSourceElementType(),
1908 Ptr, Idx, "",
1909 cast<GEPOperator>(I)->isInBounds());
1910 }
1911 }
1912 llvm_unreachable("failed to rebuild vector instructions");
1913}
1914
1916 IRBuilderBase &Builder) {
1917 // Mask.size() does not need to be equal to the number of vector elements.
1918
1919 assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
1920 Type *EltTy = V->getType()->getScalarType();
1921 if (match(V, m_Undef()))
1922 return UndefValue::get(FixedVectorType::get(EltTy, Mask.size()));
1923
1924 if (isa<ConstantAggregateZero>(V))
1925 return ConstantAggregateZero::get(FixedVectorType::get(EltTy, Mask.size()));
1926
1927 if (Constant *C = dyn_cast<Constant>(V))
1929 Mask);
1930
1931 Instruction *I = cast<Instruction>(V);
1932 switch (I->getOpcode()) {
1933 case Instruction::Add:
1934 case Instruction::FAdd:
1935 case Instruction::Sub:
1936 case Instruction::FSub:
1937 case Instruction::Mul:
1938 case Instruction::FMul:
1939 case Instruction::UDiv:
1940 case Instruction::SDiv:
1941 case Instruction::FDiv:
1942 case Instruction::URem:
1943 case Instruction::SRem:
1944 case Instruction::FRem:
1945 case Instruction::Shl:
1946 case Instruction::LShr:
1947 case Instruction::AShr:
1948 case Instruction::And:
1949 case Instruction::Or:
1950 case Instruction::Xor:
1951 case Instruction::ICmp:
1952 case Instruction::FCmp:
1953 case Instruction::Trunc:
1954 case Instruction::ZExt:
1955 case Instruction::SExt:
1956 case Instruction::FPToUI:
1957 case Instruction::FPToSI:
1958 case Instruction::UIToFP:
1959 case Instruction::SIToFP:
1960 case Instruction::FPTrunc:
1961 case Instruction::FPExt:
1962 case Instruction::Select:
1963 case Instruction::GetElementPtr: {
1965 bool NeedsRebuild =
1966 (Mask.size() !=
1967 cast<FixedVectorType>(I->getType())->getNumElements());
1968 for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
1969 Value *V;
1970 // Recursively call evaluateInDifferentElementOrder on vector arguments
1971 // as well. E.g. GetElementPtr may have scalar operands even if the
1972 // return value is a vector, so we need to examine the operand type.
1973 if (I->getOperand(i)->getType()->isVectorTy())
1974 V = evaluateInDifferentElementOrder(I->getOperand(i), Mask, Builder);
1975 else
1976 V = I->getOperand(i);
1977 NewOps.push_back(V);
1978 NeedsRebuild |= (V != I->getOperand(i));
1979 }
1980 if (NeedsRebuild)
1981 return buildNew(I, NewOps, Builder);
1982 return I;
1983 }
1984 case Instruction::InsertElement: {
1985 int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
1986
1987 // The insertelement was inserting at Element. Figure out which element
1988 // that becomes after shuffling. The answer is guaranteed to be unique
1989 // by CanEvaluateShuffled.
1990 bool Found = false;
1991 int Index = 0;
1992 for (int e = Mask.size(); Index != e; ++Index) {
1993 if (Mask[Index] == Element) {
1994 Found = true;
1995 break;
1996 }
1997 }
1998
1999 // If element is not in Mask, no need to handle the operand 1 (element to
2000 // be inserted). Just evaluate values in operand 0 according to Mask.
2001 if (!Found)
2002 return evaluateInDifferentElementOrder(I->getOperand(0), Mask, Builder);
2003
2004 Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask,
2005 Builder);
2006 Builder.SetInsertPoint(I);
2007 return Builder.CreateInsertElement(V, I->getOperand(1), Index);
2008 }
2009 }
2010 llvm_unreachable("failed to reorder elements of vector instruction!");
2011}
2012
2013// Returns true if the shuffle is extracting a contiguous range of values from
2014// LHS, for example:
2015// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2016// Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
2017// Shuffles to: |EE|FF|GG|HH|
2018// +--+--+--+--+
2020 ArrayRef<int> Mask) {
2021 unsigned LHSElems =
2022 cast<FixedVectorType>(SVI.getOperand(0)->getType())->getNumElements();
2023 unsigned MaskElems = Mask.size();
2024 unsigned BegIdx = Mask.front();
2025 unsigned EndIdx = Mask.back();
2026 if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
2027 return false;
2028 for (unsigned I = 0; I != MaskElems; ++I)
2029 if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
2030 return false;
2031 return true;
2032}
2033
2034/// These are the ingredients in an alternate form binary operator as described
2035/// below.
2041 Value *V0 = nullptr, Value *V1 = nullptr) :
2042 Opcode(Opc), Op0(V0), Op1(V1) {}
2043 operator bool() const { return Opcode != 0; }
2044};
2045
2046/// Binops may be transformed into binops with different opcodes and operands.
2047/// Reverse the usual canonicalization to enable folds with the non-canonical
2048/// form of the binop. If a transform is possible, return the elements of the
2049/// new binop. If not, return invalid elements.
2051 Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
2052 Type *Ty = BO->getType();
2053 switch (BO->getOpcode()) {
2054 case Instruction::Shl: {
2055 // shl X, C --> mul X, (1 << C)
2056 Constant *C;
2057 if (match(BO1, m_Constant(C))) {
2059 return {Instruction::Mul, BO0, ShlOne};
2060 }
2061 break;
2062 }
2063 case Instruction::Or: {
2064 // or X, C --> add X, C (when X and C have no common bits set)
2065 const APInt *C;
2066 if (match(BO1, m_APInt(C)) && MaskedValueIsZero(BO0, *C, DL))
2067 return {Instruction::Add, BO0, BO1};
2068 break;
2069 }
2070 case Instruction::Sub:
2071 // sub 0, X --> mul X, -1
2072 if (match(BO0, m_ZeroInt()))
2073 return {Instruction::Mul, BO1, ConstantInt::getAllOnesValue(Ty)};
2074 break;
2075 default:
2076 break;
2077 }
2078 return {};
2079}
2080
2081/// A select shuffle of a select shuffle with a shared operand can be reduced
2082/// to a single select shuffle. This is an obvious improvement in IR, and the
2083/// backend is expected to lower select shuffles efficiently.
2085 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2086
2087 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2089 Shuf.getShuffleMask(Mask);
2090 unsigned NumElts = Mask.size();
2091
2092 // Canonicalize a select shuffle with common operand as Op1.
2093 auto *ShufOp = dyn_cast<ShuffleVectorInst>(Op0);
2094 if (ShufOp && ShufOp->isSelect() &&
2095 (ShufOp->getOperand(0) == Op1 || ShufOp->getOperand(1) == Op1)) {
2096 std::swap(Op0, Op1);
2098 }
2099
2100 ShufOp = dyn_cast<ShuffleVectorInst>(Op1);
2101 if (!ShufOp || !ShufOp->isSelect() ||
2102 (ShufOp->getOperand(0) != Op0 && ShufOp->getOperand(1) != Op0))
2103 return nullptr;
2104
2105 Value *X = ShufOp->getOperand(0), *Y = ShufOp->getOperand(1);
2107 ShufOp->getShuffleMask(Mask1);
2108 assert(Mask1.size() == NumElts && "Vector size changed with select shuffle");
2109
2110 // Canonicalize common operand (Op0) as X (first operand of first shuffle).
2111 if (Y == Op0) {
2112 std::swap(X, Y);
2114 }
2115
2116 // If the mask chooses from X (operand 0), it stays the same.
2117 // If the mask chooses from the earlier shuffle, the other mask value is
2118 // transferred to the combined select shuffle:
2119 // shuf X, (shuf X, Y, M1), M --> shuf X, Y, M'
2120 SmallVector<int, 16> NewMask(NumElts);
2121 for (unsigned i = 0; i != NumElts; ++i)
2122 NewMask[i] = Mask[i] < (signed)NumElts ? Mask[i] : Mask1[i];
2123
2124 // A select mask with undef elements might look like an identity mask.
2127 "Unexpected shuffle mask");
2128 return new ShuffleVectorInst(X, Y, NewMask);
2129}
2130
2132 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2133
2134 // Are we shuffling together some value and that same value after it has been
2135 // modified by a binop with a constant?
2136 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2137 Constant *C;
2138 bool Op0IsBinop;
2139 if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
2140 Op0IsBinop = true;
2141 else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
2142 Op0IsBinop = false;
2143 else
2144 return nullptr;
2145
2146 // The identity constant for a binop leaves a variable operand unchanged. For
2147 // a vector, this is a splat of something like 0, -1, or 1.
2148 // If there's no identity constant for this binop, we're done.
2149 auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
2150 BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
2151 Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
2152 if (!IdC)
2153 return nullptr;
2154
2155 // Shuffle identity constants into the lanes that return the original value.
2156 // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
2157 // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
2158 // The existing binop constant vector remains in the same operand position.
2159 ArrayRef<int> Mask = Shuf.getShuffleMask();
2160 Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
2162
2163 bool MightCreatePoisonOrUB =
2165 (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
2166 if (MightCreatePoisonOrUB)
2167 NewC = InstCombiner::getSafeVectorConstantForBinop(BOpcode, NewC, true);
2168
2169 // shuf (bop X, C), X, M --> bop X, C'
2170 // shuf X, (bop X, C), M --> bop X, C'
2171 Value *X = Op0IsBinop ? Op1 : Op0;
2172 Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
2173 NewBO->copyIRFlags(BO);
2174
2175 // An undef shuffle mask element may propagate as an undef constant element in
2176 // the new binop. That would produce poison where the original code might not.
2177 // If we already made a safe constant, then there's no danger.
2178 if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB)
2180 return NewBO;
2181}
2182
2183/// If we have an insert of a scalar to a non-zero element of an undefined
2184/// vector and then shuffle that value, that's the same as inserting to the zero
2185/// element and shuffling. Splatting from the zero element is recognized as the
2186/// canonical form of splat.
2188 InstCombiner::BuilderTy &Builder) {
2189 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2190 ArrayRef<int> Mask = Shuf.getShuffleMask();
2191 Value *X;
2192 uint64_t IndexC;
2193
2194 // Match a shuffle that is a splat to a non-zero element.
2195 if (!match(Op0, m_OneUse(m_InsertElt(m_Undef(), m_Value(X),
2196 m_ConstantInt(IndexC)))) ||
2197 !match(Op1, m_Undef()) || match(Mask, m_ZeroMask()) || IndexC == 0)
2198 return nullptr;
2199
2200 // Insert into element 0 of a poison vector.
2201 PoisonValue *PoisonVec = PoisonValue::get(Shuf.getType());
2202 Value *NewIns = Builder.CreateInsertElement(PoisonVec, X, (uint64_t)0);
2203
2204 // Splat from element 0. Any mask element that is undefined remains undefined.
2205 // For example:
2206 // shuf (inselt undef, X, 2), _, <2,2,undef>
2207 // --> shuf (inselt undef, X, 0), poison, <0,0,undef>
2208 unsigned NumMaskElts =
2209 cast<FixedVectorType>(Shuf.getType())->getNumElements();
2210 SmallVector<int, 16> NewMask(NumMaskElts, 0);
2211 for (unsigned i = 0; i != NumMaskElts; ++i)
2212 if (Mask[i] == PoisonMaskElem)
2213 NewMask[i] = Mask[i];
2214
2215 return new ShuffleVectorInst(NewIns, NewMask);
2216}
2217
2218/// Try to fold shuffles that are the equivalent of a vector select.
2220 if (!Shuf.isSelect())
2221 return nullptr;
2222
2223 // Canonicalize to choose from operand 0 first unless operand 1 is undefined.
2224 // Commuting undef to operand 0 conflicts with another canonicalization.
2225 unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2226 if (!match(Shuf.getOperand(1), m_Undef()) &&
2227 Shuf.getMaskValue(0) >= (int)NumElts) {
2228 // TODO: Can we assert that both operands of a shuffle-select are not undef
2229 // (otherwise, it would have been folded by instsimplify?
2230 Shuf.commute();
2231 return &Shuf;
2232 }
2233
2235 return I;
2236
2238 return I;
2239
2240 BinaryOperator *B0, *B1;
2241 if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
2242 !match(Shuf.getOperand(1), m_BinOp(B1)))
2243 return nullptr;
2244
2245 // If one operand is "0 - X", allow that to be viewed as "X * -1"
2246 // (ConstantsAreOp1) by getAlternateBinop below. If the neg is not paired
2247 // with a multiply, we will exit because C0/C1 will not be set.
2248 Value *X, *Y;
2249 Constant *C0 = nullptr, *C1 = nullptr;
2250 bool ConstantsAreOp1;
2251 if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
2252 match(B1, m_BinOp(m_Constant(C1), m_Value(Y))))
2253 ConstantsAreOp1 = false;
2254 else if (match(B0, m_CombineOr(m_BinOp(m_Value(X), m_Constant(C0)),
2255 m_Neg(m_Value(X)))) &&
2257 m_Neg(m_Value(Y)))))
2258 ConstantsAreOp1 = true;
2259 else
2260 return nullptr;
2261
2262 // We need matching binops to fold the lanes together.
2265 bool DropNSW = false;
2266 if (ConstantsAreOp1 && Opc0 != Opc1) {
2267 // TODO: We drop "nsw" if shift is converted into multiply because it may
2268 // not be correct when the shift amount is BitWidth - 1. We could examine
2269 // each vector element to determine if it is safe to keep that flag.
2270 if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
2271 DropNSW = true;
2272 if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
2273 assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
2274 Opc0 = AltB0.Opcode;
2275 C0 = cast<Constant>(AltB0.Op1);
2276 } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
2277 assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
2278 Opc1 = AltB1.Opcode;
2279 C1 = cast<Constant>(AltB1.Op1);
2280 }
2281 }
2282
2283 if (Opc0 != Opc1 || !C0 || !C1)
2284 return nullptr;
2285
2286 // The opcodes must be the same. Use a new name to make that clear.
2287 BinaryOperator::BinaryOps BOpc = Opc0;
2288
2289 // Select the constant elements needed for the single binop.
2290 ArrayRef<int> Mask = Shuf.getShuffleMask();
2291 Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Mask);
2292
2293 // We are moving a binop after a shuffle. When a shuffle has an undefined
2294 // mask element, the result is undefined, but it is not poison or undefined
2295 // behavior. That is not necessarily true for div/rem/shift.
2296 bool MightCreatePoisonOrUB =
2299 if (MightCreatePoisonOrUB)
2301 ConstantsAreOp1);
2302
2303 Value *V;
2304 if (X == Y) {
2305 // Remove a binop and the shuffle by rearranging the constant:
2306 // shuffle (op V, C0), (op V, C1), M --> op V, C'
2307 // shuffle (op C0, V), (op C1, V), M --> op C', V
2308 V = X;
2309 } else {
2310 // If there are 2 different variable operands, we must create a new shuffle
2311 // (select) first, so check uses to ensure that we don't end up with more
2312 // instructions than we started with.
2313 if (!B0->hasOneUse() && !B1->hasOneUse())
2314 return nullptr;
2315
2316 // If we use the original shuffle mask and op1 is *variable*, we would be
2317 // putting an undef into operand 1 of div/rem/shift. This is either UB or
2318 // poison. We do not have to guard against UB when *constants* are op1
2319 // because safe constants guarantee that we do not overflow sdiv/srem (and
2320 // there's no danger for other opcodes).
2321 // TODO: To allow this case, create a new shuffle mask with no undefs.
2322 if (MightCreatePoisonOrUB && !ConstantsAreOp1)
2323 return nullptr;
2324
2325 // Note: In general, we do not create new shuffles in InstCombine because we
2326 // do not know if a target can lower an arbitrary shuffle optimally. In this
2327 // case, the shuffle uses the existing mask, so there is no additional risk.
2328
2329 // Select the variable vectors first, then perform the binop:
2330 // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
2331 // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
2332 V = Builder.CreateShuffleVector(X, Y, Mask);
2333 }
2334
2335 Value *NewBO = ConstantsAreOp1 ? Builder.CreateBinOp(BOpc, V, NewC) :
2336 Builder.CreateBinOp(BOpc, NewC, V);
2337
2338 // Flags are intersected from the 2 source binops. But there are 2 exceptions:
2339 // 1. If we changed an opcode, poison conditions might have changed.
2340 // 2. If the shuffle had undef mask elements, the new binop might have undefs
2341 // where the original code did not. But if we already made a safe constant,
2342 // then there's no danger.
2343 if (auto *NewI = dyn_cast<Instruction>(NewBO)) {
2344 NewI->copyIRFlags(B0);
2345 NewI->andIRFlags(B1);
2346 if (DropNSW)
2347 NewI->setHasNoSignedWrap(false);
2348 if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB)
2349 NewI->dropPoisonGeneratingFlags();
2350 }
2351 return replaceInstUsesWith(Shuf, NewBO);
2352}
2353
2354/// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
2355/// Example (little endian):
2356/// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>
2358 bool IsBigEndian) {
2359 // This must be a bitcasted shuffle of 1 vector integer operand.
2360 Type *DestType = Shuf.getType();
2361 Value *X;
2362 if (!match(Shuf.getOperand(0), m_BitCast(m_Value(X))) ||
2363 !match(Shuf.getOperand(1), m_Undef()) || !DestType->isIntOrIntVectorTy())
2364 return nullptr;
2365
2366 // The source type must have the same number of elements as the shuffle,
2367 // and the source element type must be larger than the shuffle element type.
2368 Type *SrcType = X->getType();
2369 if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() ||
2370 cast<FixedVectorType>(SrcType)->getNumElements() !=
2371 cast<FixedVectorType>(DestType)->getNumElements() ||
2372 SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0)
2373 return nullptr;
2374
2375 assert(Shuf.changesLength() && !Shuf.increasesLength() &&
2376 "Expected a shuffle that decreases length");
2377
2378 // Last, check that the mask chooses the correct low bits for each narrow
2379 // element in the result.
2380 uint64_t TruncRatio =
2381 SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
2382 ArrayRef<int> Mask = Shuf.getShuffleMask();
2383 for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
2384 if (Mask[i] == PoisonMaskElem)
2385 continue;
2386 uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;
2387 assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits");
2388 if (Mask[i] != (int)LSBIndex)
2389 return nullptr;
2390 }
2391
2392 return new TruncInst(X, DestType);
2393}
2394
2395/// Match a shuffle-select-shuffle pattern where the shuffles are widening and
2396/// narrowing (concatenating with undef and extracting back to the original
2397/// length). This allows replacing the wide select with a narrow select.
2399 InstCombiner::BuilderTy &Builder) {
2400 // This must be a narrowing identity shuffle. It extracts the 1st N elements
2401 // of the 1st vector operand of a shuffle.
2402 if (!match(Shuf.getOperand(1), m_Undef()) || !Shuf.isIdentityWithExtract())
2403 return nullptr;
2404
2405 // The vector being shuffled must be a vector select that we can eliminate.
2406 // TODO: The one-use requirement could be eased if X and/or Y are constants.
2407 Value *Cond, *X, *Y;
2408 if (!match(Shuf.getOperand(0),
2410 return nullptr;
2411
2412 // We need a narrow condition value. It must be extended with undef elements
2413 // and have the same number of elements as this shuffle.
2414 unsigned NarrowNumElts =
2415 cast<FixedVectorType>(Shuf.getType())->getNumElements();
2416 Value *NarrowCond;
2417 if (!match(Cond, m_OneUse(m_Shuffle(m_Value(NarrowCond), m_Undef()))) ||
2418 cast<FixedVectorType>(NarrowCond->getType())->getNumElements() !=
2419 NarrowNumElts ||
2420 !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
2421 return nullptr;
2422
2423 // shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
2424 // sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
2425 Value *NarrowX = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2426 Value *NarrowY = Builder.CreateShuffleVector(Y, Shuf.getShuffleMask());
2427 return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
2428}
2429
2430/// Canonicalize FP negate/abs after shuffle.
2432 InstCombiner::BuilderTy &Builder) {
2433 auto *S0 = dyn_cast<Instruction>(Shuf.getOperand(0));
2434 Value *X;
2435 if (!S0 || !match(S0, m_CombineOr(m_FNeg(m_Value(X)), m_FAbs(m_Value(X)))))
2436 return nullptr;
2437
2438 bool IsFNeg = S0->getOpcode() == Instruction::FNeg;
2439
2440 // Match 1-input (unary) shuffle.
2441 // shuffle (fneg/fabs X), Mask --> fneg/fabs (shuffle X, Mask)
2442 if (S0->hasOneUse() && match(Shuf.getOperand(1), m_Undef())) {
2443 Value *NewShuf = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2444 if (IsFNeg)
2445 return UnaryOperator::CreateFNegFMF(NewShuf, S0);
2446
2448 Intrinsic::fabs, Shuf.getType());
2449 CallInst *NewF = CallInst::Create(FAbs, {NewShuf});
2450 NewF->setFastMathFlags(S0->getFastMathFlags());
2451 return NewF;
2452 }
2453
2454 // Match 2-input (binary) shuffle.
2455 auto *S1 = dyn_cast<Instruction>(Shuf.getOperand(1));
2456 Value *Y;
2457 if (!S1 || !match(S1, m_CombineOr(m_FNeg(m_Value(Y)), m_FAbs(m_Value(Y)))) ||
2458 S0->getOpcode() != S1->getOpcode() ||
2459 (!S0->hasOneUse() && !S1->hasOneUse()))
2460 return nullptr;
2461
2462 // shuf (fneg/fabs X), (fneg/fabs Y), Mask --> fneg/fabs (shuf X, Y, Mask)
2463 Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2464 Instruction *NewF;
2465 if (IsFNeg) {
2466 NewF = UnaryOperator::CreateFNeg(NewShuf);
2467 } else {
2469 Intrinsic::fabs, Shuf.getType());
2470 NewF = CallInst::Create(FAbs, {NewShuf});
2471 }
2472 NewF->copyIRFlags(S0);
2473 NewF->andIRFlags(S1);
2474 return NewF;
2475}
2476
2477/// Canonicalize casts after shuffle.
2479 InstCombiner::BuilderTy &Builder) {
2480 // Do we have 2 matching cast operands?
2481 auto *Cast0 = dyn_cast<CastInst>(Shuf.getOperand(0));
2482 auto *Cast1 = dyn_cast<CastInst>(Shuf.getOperand(1));
2483 if (!Cast0 || !Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||
2484 Cast0->getSrcTy() != Cast1->getSrcTy())
2485 return nullptr;
2486
2487 // TODO: Allow other opcodes? That would require easing the type restrictions
2488 // below here.
2489 CastInst::CastOps CastOpcode = Cast0->getOpcode();
2490 switch (CastOpcode) {
2491 case Instruction::FPToSI:
2492 case Instruction::FPToUI:
2493 case Instruction::SIToFP:
2494 case Instruction::UIToFP:
2495 break;
2496 default:
2497 return nullptr;
2498 }
2499
2500 VectorType *ShufTy = Shuf.getType();
2501 VectorType *ShufOpTy = cast<VectorType>(Shuf.getOperand(0)->getType());
2502 VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy());
2503
2504 // TODO: Allow length-increasing shuffles?
2505 if (ShufTy->getElementCount().getKnownMinValue() >
2506 ShufOpTy->getElementCount().getKnownMinValue())
2507 return nullptr;
2508
2509 // TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)?
2510 assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&
2511 "Expected fixed vector operands for casts and binary shuffle");
2512 if (CastSrcTy->getPrimitiveSizeInBits() > ShufOpTy->getPrimitiveSizeInBits())
2513 return nullptr;
2514
2515 // At least one of the operands must have only one use (the shuffle).
2516 if (!Cast0->hasOneUse() && !Cast1->hasOneUse())
2517 return nullptr;
2518
2519 // shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask)
2520 Value *X = Cast0->getOperand(0);
2521 Value *Y = Cast1->getOperand(0);
2522 Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2523 return CastInst::Create(CastOpcode, NewShuf, ShufTy);
2524}
2525
2526/// Try to fold an extract subvector operation.
2528 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2529 if (!Shuf.isIdentityWithExtract() || !match(Op1, m_Undef()))
2530 return nullptr;
2531
2532 // Check if we are extracting all bits of an inserted scalar:
2533 // extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type
2534 Value *X;
2535 if (match(Op0, m_BitCast(m_InsertElt(m_Value(), m_Value(X), m_Zero()))) &&
2536 X->getType()->getPrimitiveSizeInBits() ==
2538 return new BitCastInst(X, Shuf.getType());
2539
2540 // Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
2541 Value *Y;
2542 ArrayRef<int> Mask;
2543 if (!match(Op0, m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask))))
2544 return nullptr;
2545
2546 // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
2547 // then combining may result in worse codegen.
2548 if (!Op0->hasOneUse())
2549 return nullptr;
2550
2551 // We are extracting a subvector from a shuffle. Remove excess elements from
2552 // the 1st shuffle mask to eliminate the extract.
2553 //
2554 // This transform is conservatively limited to identity extracts because we do
2555 // not allow arbitrary shuffle mask creation as a target-independent transform
2556 // (because we can't guarantee that will lower efficiently).
2557 //
2558 // If the extracting shuffle has an undef mask element, it transfers to the
2559 // new shuffle mask. Otherwise, copy the original mask element. Example:
2560 // shuf (shuf X, Y, <C0, C1, C2, undef, C4>), undef, <0, undef, 2, 3> -->
2561 // shuf X, Y, <C0, undef, C2, undef>
2562 unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2563 SmallVector<int, 16> NewMask(NumElts);
2564 assert(NumElts < Mask.size() &&
2565 "Identity with extract must have less elements than its inputs");
2566
2567 for (unsigned i = 0; i != NumElts; ++i) {
2568 int ExtractMaskElt = Shuf.getMaskValue(i);
2569 int MaskElt = Mask[i];
2570 NewMask[i] = ExtractMaskElt == PoisonMaskElem ? ExtractMaskElt : MaskElt;
2571 }
2572 return new ShuffleVectorInst(X, Y, NewMask);
2573}
2574
2575/// Try to replace a shuffle with an insertelement or try to replace a shuffle
2576/// operand with the operand of an insertelement.
2578 InstCombinerImpl &IC) {
2579 Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
2581 Shuf.getShuffleMask(Mask);
2582
2583 int NumElts = Mask.size();
2584 int InpNumElts = cast<FixedVectorType>(V0->getType())->getNumElements();
2585
2586 // This is a specialization of a fold in SimplifyDemandedVectorElts. We may
2587 // not be able to handle it there if the insertelement has >1 use.
2588 // If the shuffle has an insertelement operand but does not choose the
2589 // inserted scalar element from that value, then we can replace that shuffle
2590 // operand with the source vector of the insertelement.
2591 Value *X;
2592 uint64_t IdxC;
2593 if (match(V0, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2594 // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
2595 if (!is_contained(Mask, (int)IdxC))
2596 return IC.replaceOperand(Shuf, 0, X);
2597 }
2598 if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2599 // Offset the index constant by the vector width because we are checking for
2600 // accesses to the 2nd vector input of the shuffle.
2601 IdxC += InpNumElts;
2602 // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
2603 if (!is_contained(Mask, (int)IdxC))
2604 return IC.replaceOperand(Shuf, 1, X);
2605 }
2606 // For the rest of the transform, the shuffle must not change vector sizes.
2607 // TODO: This restriction could be removed if the insert has only one use
2608 // (because the transform would require a new length-changing shuffle).
2609 if (NumElts != InpNumElts)
2610 return nullptr;
2611
2612 // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
2613 auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
2614 // We need an insertelement with a constant index.
2615 if (!match(V0, m_InsertElt(m_Value(), m_Value(Scalar),
2616 m_ConstantInt(IndexC))))
2617 return false;
2618
2619 // Test the shuffle mask to see if it splices the inserted scalar into the
2620 // operand 1 vector of the shuffle.
2621 int NewInsIndex = -1;
2622 for (int i = 0; i != NumElts; ++i) {
2623 // Ignore undef mask elements.
2624 if (Mask[i] == -1)
2625 continue;
2626
2627 // The shuffle takes elements of operand 1 without lane changes.
2628 if (Mask[i] == NumElts + i)
2629 continue;
2630
2631 // The shuffle must choose the inserted scalar exactly once.
2632 if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
2633 return false;
2634
2635 // The shuffle is placing the inserted scalar into element i.
2636 NewInsIndex = i;
2637 }
2638
2639 assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
2640
2641 // Index is updated to the potentially translated insertion lane.
2642 IndexC = ConstantInt::get(IndexC->getType(), NewInsIndex);
2643 return true;
2644 };
2645
2646 // If the shuffle is unnecessary, insert the scalar operand directly into
2647 // operand 1 of the shuffle. Example:
2648 // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
2649 Value *Scalar;
2650 ConstantInt *IndexC;
2651 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2652 return InsertElementInst::Create(V1, Scalar, IndexC);
2653
2654 // Try again after commuting shuffle. Example:
2655 // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
2656 // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
2657 std::swap(V0, V1);
2659 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2660 return InsertElementInst::Create(V1, Scalar, IndexC);
2661
2662 return nullptr;
2663}
2664
2666 // Match the operands as identity with padding (also known as concatenation
2667 // with undef) shuffles of the same source type. The backend is expected to
2668 // recreate these concatenations from a shuffle of narrow operands.
2669 auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
2670 auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
2671 if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
2672 !Shuffle1 || !Shuffle1->isIdentityWithPadding())
2673 return nullptr;
2674
2675 // We limit this transform to power-of-2 types because we expect that the
2676 // backend can convert the simplified IR patterns to identical nodes as the
2677 // original IR.
2678 // TODO: If we can verify the same behavior for arbitrary types, the
2679 // power-of-2 checks can be removed.
2680 Value *X = Shuffle0->getOperand(0);
2681 Value *Y = Shuffle1->getOperand(0);
2682 if (X->getType() != Y->getType() ||
2683 !isPowerOf2_32(cast<FixedVectorType>(Shuf.getType())->getNumElements()) ||
2685 cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) ||
2686 !isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) ||
2687 match(X, m_Undef()) || match(Y, m_Undef()))
2688 return nullptr;
2689 assert(match(Shuffle0->getOperand(1), m_Undef()) &&
2690 match(Shuffle1->getOperand(1), m_Undef()) &&
2691 "Unexpected operand for identity shuffle");
2692
2693 // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
2694 // operands directly by adjusting the shuffle mask to account for the narrower
2695 // types:
2696 // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
2697 int NarrowElts = cast<FixedVectorType>(X->getType())->getNumElements();
2698 int WideElts = cast<FixedVectorType>(Shuffle0->getType())->getNumElements();
2699 assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
2700
2701 ArrayRef<int> Mask = Shuf.getShuffleMask();
2702 SmallVector<int, 16> NewMask(Mask.size(), -1);
2703 for (int i = 0, e = Mask.size(); i != e; ++i) {
2704 if (Mask[i] == -1)
2705 continue;
2706
2707 // If this shuffle is choosing an undef element from 1 of the sources, that
2708 // element is undef.
2709 if (Mask[i] < WideElts) {
2710 if (Shuffle0->getMaskValue(Mask[i]) == -1)
2711 continue;
2712 } else {
2713 if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
2714 continue;
2715 }
2716
2717 // If this shuffle is choosing from the 1st narrow op, the mask element is
2718 // the same. If this shuffle is choosing from the 2nd narrow op, the mask
2719 // element is offset down to adjust for the narrow vector widths.
2720 if (Mask[i] < WideElts) {
2721 assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
2722 NewMask[i] = Mask[i];
2723 } else {
2724 assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
2725 NewMask[i] = Mask[i] - (WideElts - NarrowElts);
2726 }
2727 }
2728 return new ShuffleVectorInst(X, Y, NewMask);
2729}
2730
2731// Splatting the first element of the result of a BinOp, where any of the
2732// BinOp's operands are the result of a first element splat can be simplified to
2733// splatting the first element of the result of the BinOp
2735 if (!match(SVI.getOperand(1), m_Undef()) ||
2736 !match(SVI.getShuffleMask(), m_ZeroMask()) ||
2737 !SVI.getOperand(0)->hasOneUse())
2738 return nullptr;
2739
2740 Value *Op0 = SVI.getOperand(0);
2741 Value *X, *Y;
2742 if (!match(Op0, m_BinOp(m_Shuffle(m_Value(X), m_Undef(), m_ZeroMask()),
2743 m_Value(Y))) &&
2744 !match(Op0, m_BinOp(m_Value(X),
2746 return nullptr;
2747 if (X->getType() != Y->getType())
2748 return nullptr;
2749
2750 auto *BinOp = cast<BinaryOperator>(Op0);
2751 if (!isSafeToSpeculativelyExecute(BinOp))
2752 return nullptr;
2753
2754 Value *NewBO = Builder.CreateBinOp(BinOp->getOpcode(), X, Y);
2755 if (auto NewBOI = dyn_cast<Instruction>(NewBO))
2756 NewBOI->copyIRFlags(BinOp);
2757
2758 return new ShuffleVectorInst(NewBO, SVI.getShuffleMask());
2759}
2760
2762 Value *LHS = SVI.getOperand(0);
2763 Value *RHS = SVI.getOperand(1);
2764 SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI);
2765 if (auto *V = simplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(),
2766 SVI.getType(), ShufQuery))
2767 return replaceInstUsesWith(SVI, V);
2768
2769 if (Instruction *I = simplifyBinOpSplats(SVI))
2770 return I;
2771
2772 if (isa<ScalableVectorType>(LHS->getType()))
2773 return nullptr;
2774
2775 unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements();
2776 unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements();
2777
2778 // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
2779 //
2780 // if X and Y are of the same (vector) type, and the element size is not
2781 // changed by the bitcasts, we can distribute the bitcasts through the
2782 // shuffle, hopefully reducing the number of instructions. We make sure that
2783 // at least one bitcast only has one use, so we don't *increase* the number of
2784 // instructions here.
2785 Value *X, *Y;
2787 X->getType()->isVectorTy() && X->getType() == Y->getType() &&
2788 X->getType()->getScalarSizeInBits() ==
2789 SVI.getType()->getScalarSizeInBits() &&
2790 (LHS->hasOneUse() || RHS->hasOneUse())) {
2792 SVI.getName() + ".uncasted");
2793 return new BitCastInst(V, SVI.getType());
2794 }
2795
2796 ArrayRef<int> Mask = SVI.getShuffleMask();
2797
2798 // Peek through a bitcasted shuffle operand by scaling the mask. If the
2799 // simulated shuffle can simplify, then this shuffle is unnecessary:
2800 // shuf (bitcast X), undef, Mask --> bitcast X'
2801 // TODO: This could be extended to allow length-changing shuffles.
2802 // The transform might also be obsoleted if we allowed canonicalization
2803 // of bitcasted shuffles.
2804 if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) &&
2805 X->getType()->isVectorTy() && VWidth == LHSWidth) {
2806 // Try to create a scaled mask constant.
2807 auto *XType = cast<FixedVectorType>(X->getType());
2808 unsigned XNumElts = XType->getNumElements();
2809 SmallVector<int, 16> ScaledMask;
2810 if (XNumElts >= VWidth) {
2811 assert(XNumElts % VWidth == 0 && "Unexpected vector bitcast");
2812 narrowShuffleMaskElts(XNumElts / VWidth, Mask, ScaledMask);
2813 } else {
2814 assert(VWidth % XNumElts == 0 && "Unexpected vector bitcast");
2815 if (!widenShuffleMaskElts(VWidth / XNumElts, Mask, ScaledMask))
2816 ScaledMask.clear();
2817 }
2818 if (!ScaledMask.empty()) {
2819 // If the shuffled source vector simplifies, cast that value to this
2820 // shuffle's type.
2821 if (auto *V = simplifyShuffleVectorInst(X, UndefValue::get(XType),
2822 ScaledMask, XType, ShufQuery))
2823 return BitCastInst::Create(Instruction::BitCast, V, SVI.getType());
2824 }
2825 }
2826
2827 // shuffle x, x, mask --> shuffle x, undef, mask'
2828 if (LHS == RHS) {
2829 assert(!match(RHS, m_Undef()) &&
2830 "Shuffle with 2 undef ops not simplified?");
2831 return new ShuffleVectorInst(LHS, createUnaryMask(Mask, LHSWidth));
2832 }
2833
2834 // shuffle undef, x, mask --> shuffle x, undef, mask'
2835 if (match(LHS, m_Undef())) {
2836 SVI.commute();
2837 return &SVI;
2838 }
2839
2841 return I;
2842
2843 if (Instruction *I = foldSelectShuffle(SVI))
2844 return I;
2845
2847 return I;
2848
2850 return I;
2851
2853 return I;
2854
2855 if (Instruction *I = foldCastShuffle(SVI, Builder))
2856 return I;
2857
2858 APInt UndefElts(VWidth, 0);
2859 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2860 if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
2861 if (V != &SVI)
2862 return replaceInstUsesWith(SVI, V);
2863 return &SVI;
2864 }
2865
2867 return I;
2868
2869 // These transforms have the potential to lose undef knowledge, so they are
2870 // intentionally placed after SimplifyDemandedVectorElts().
2871 if (Instruction *I = foldShuffleWithInsert(SVI, *this))
2872 return I;
2874 return I;
2875
2876 if (match(RHS, m_Undef()) && canEvaluateShuffled(LHS, Mask)) {
2878 return replaceInstUsesWith(SVI, V);
2879 }
2880
2881 // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
2882 // a non-vector type. We can instead bitcast the original vector followed by
2883 // an extract of the desired element:
2884 //
2885 // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
2886 // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2887 // %1 = bitcast <4 x i8> %sroa to i32
2888 // Becomes:
2889 // %bc = bitcast <16 x i8> %in to <4 x i32>
2890 // %ext = extractelement <4 x i32> %bc, i32 0
2891 //
2892 // If the shuffle is extracting a contiguous range of values from the input
2893 // vector then each use which is a bitcast of the extracted size can be
2894 // replaced. This will work if the vector types are compatible, and the begin
2895 // index is aligned to a value in the casted vector type. If the begin index
2896 // isn't aligned then we can shuffle the original vector (keeping the same
2897 // vector type) before extracting.
2898 //
2899 // This code will bail out if the target type is fundamentally incompatible
2900 // with vectors of the source type.
2901 //
2902 // Example of <16 x i8>, target type i32:
2903 // Index range [4,8): v-----------v Will work.
2904 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2905 // <16 x i8>: | | | | | | | | | | | | | | | | |
2906 // <4 x i32>: | | | | |
2907 // +-----------+-----------+-----------+-----------+
2908 // Index range [6,10): ^-----------^ Needs an extra shuffle.
2909 // Target type i40: ^--------------^ Won't work, bail.
2910 bool MadeChange = false;
2911 if (isShuffleExtractingFromLHS(SVI, Mask)) {
2912 Value *V = LHS;
2913 unsigned MaskElems = Mask.size();
2914 auto *SrcTy = cast<FixedVectorType>(V->getType());
2915 unsigned VecBitWidth = SrcTy->getPrimitiveSizeInBits().getFixedValue();
2916 unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
2917 assert(SrcElemBitWidth && "vector elements must have a bitwidth");
2918 unsigned SrcNumElems = SrcTy->getNumElements();
2921 for (User *U : SVI.users())
2922 if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
2923 if (!BC->use_empty())
2924 // Only visit bitcasts that weren't previously handled.
2925 BCs.push_back(BC);
2926 for (BitCastInst *BC : BCs) {
2927 unsigned BegIdx = Mask.front();
2928 Type *TgtTy = BC->getDestTy();
2929 unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
2930 if (!TgtElemBitWidth)
2931 continue;
2932 unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
2933 bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
2934 bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
2935 if (!VecBitWidthsEqual)
2936 continue;
2938 continue;
2939 auto *CastSrcTy = FixedVectorType::get(TgtTy, TgtNumElems);
2940 if (!BegIsAligned) {
2941 // Shuffle the input so [0,NumElements) contains the output, and
2942 // [NumElems,SrcNumElems) is undef.
2943 SmallVector<int, 16> ShuffleMask(SrcNumElems, -1);
2944 for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
2945 ShuffleMask[I] = Idx;
2946 V = Builder.CreateShuffleVector(V, ShuffleMask,
2947 SVI.getName() + ".extract");
2948 BegIdx = 0;
2949 }
2950 unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
2951 assert(SrcElemsPerTgtElem);
2952 BegIdx /= SrcElemsPerTgtElem;
2953 bool BCAlreadyExists = NewBCs.contains(CastSrcTy);
2954 auto *NewBC =
2955 BCAlreadyExists
2956 ? NewBCs[CastSrcTy]
2957 : Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
2958 if (!BCAlreadyExists)
2959 NewBCs[CastSrcTy] = NewBC;
2960 auto *Ext = Builder.CreateExtractElement(NewBC, BegIdx,
2961 SVI.getName() + ".extract");
2962 // The shufflevector isn't being replaced: the bitcast that used it
2963 // is. InstCombine will visit the newly-created instructions.
2964 replaceInstUsesWith(*BC, Ext);
2965 MadeChange = true;
2966 }
2967 }
2968
2969 // If the LHS is a shufflevector itself, see if we can combine it with this
2970 // one without producing an unusual shuffle.
2971 // Cases that might be simplified:
2972 // 1.
2973 // x1=shuffle(v1,v2,mask1)
2974 // x=shuffle(x1,undef,mask)
2975 // ==>
2976 // x=shuffle(v1,undef,newMask)
2977 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
2978 // 2.
2979 // x1=shuffle(v1,undef,mask1)
2980 // x=shuffle(x1,x2,mask)
2981 // where v1.size() == mask1.size()
2982 // ==>
2983 // x=shuffle(v1,x2,newMask)
2984 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
2985 // 3.
2986 // x2=shuffle(v2,undef,mask2)
2987 // x=shuffle(x1,x2,mask)
2988 // where v2.size() == mask2.size()
2989 // ==>
2990 // x=shuffle(x1,v2,newMask)
2991 // newMask[i] = (mask[i] < x1.size())
2992 // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
2993 // 4.
2994 // x1=shuffle(v1,undef,mask1)
2995 // x2=shuffle(v2,undef,mask2)
2996 // x=shuffle(x1,x2,mask)
2997 // where v1.size() == v2.size()
2998 // ==>
2999 // x=shuffle(v1,v2,newMask)
3000 // newMask[i] = (mask[i] < x1.size())
3001 // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
3002 //
3003 // Here we are really conservative:
3004 // we are absolutely afraid of producing a shuffle mask not in the input
3005 // program, because the code gen may not be smart enough to turn a merged
3006 // shuffle into two specific shuffles: it may produce worse code. As such,
3007 // we only merge two shuffles if the result is either a splat or one of the
3008 // input shuffle masks. In this case, merging the shuffles just removes
3009 // one instruction, which we know is safe. This is good for things like
3010 // turning: (splat(splat)) -> splat, or
3011 // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
3012 ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
3013 ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
3014 if (LHSShuffle)
3015 if (!match(LHSShuffle->getOperand(1), m_Undef()) && !match(RHS, m_Undef()))
3016 LHSShuffle = nullptr;
3017 if (RHSShuffle)
3018 if (!match(RHSShuffle->getOperand(1), m_Undef()))
3019 RHSShuffle = nullptr;
3020 if (!LHSShuffle && !RHSShuffle)
3021 return MadeChange ? &SVI : nullptr;
3022
3023 Value* LHSOp0 = nullptr;
3024 Value* LHSOp1 = nullptr;
3025 Value* RHSOp0 = nullptr;
3026 unsigned LHSOp0Width = 0;
3027 unsigned RHSOp0Width = 0;
3028 if (LHSShuffle) {
3029 LHSOp0 = LHSShuffle->getOperand(0);
3030 LHSOp1 = LHSShuffle->getOperand(1);
3031 LHSOp0Width = cast<FixedVectorType>(LHSOp0->getType())->getNumElements();
3032 }
3033 if (RHSShuffle) {
3034 RHSOp0 = RHSShuffle->getOperand(0);
3035 RHSOp0Width = cast<FixedVectorType>(RHSOp0->getType())->getNumElements();
3036 }
3037 Value* newLHS = LHS;
3038 Value* newRHS = RHS;
3039 if (LHSShuffle) {
3040 // case 1
3041 if (match(RHS, m_Undef())) {
3042 newLHS = LHSOp0;
3043 newRHS = LHSOp1;
3044 }
3045 // case 2 or 4
3046 else if (LHSOp0Width == LHSWidth) {
3047 newLHS = LHSOp0;
3048 }
3049 }
3050 // case 3 or 4
3051 if (RHSShuffle && RHSOp0Width == LHSWidth) {
3052 newRHS = RHSOp0;
3053 }
3054 // case 4
3055 if (LHSOp0 == RHSOp0) {
3056 newLHS = LHSOp0;
3057 newRHS = nullptr;
3058 }
3059
3060 if (newLHS == LHS && newRHS == RHS)
3061 return MadeChange ? &SVI : nullptr;
3062
3063 ArrayRef<int> LHSMask;
3064 ArrayRef<int> RHSMask;
3065 if (newLHS != LHS)
3066 LHSMask = LHSShuffle->getShuffleMask();
3067 if (RHSShuffle && newRHS != RHS)
3068 RHSMask = RHSShuffle->getShuffleMask();
3069
3070 unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
3071 SmallVector<int, 16> newMask;
3072 bool isSplat = true;
3073 int SplatElt = -1;
3074 // Create a new mask for the new ShuffleVectorInst so that the new
3075 // ShuffleVectorInst is equivalent to the original one.
3076 for (unsigned i = 0; i < VWidth; ++i) {
3077 int eltMask;
3078 if (Mask[i] < 0) {
3079 // This element is a poison value.
3080 eltMask = -1;
3081 } else if (Mask[i] < (int)LHSWidth) {
3082 // This element is from left hand side vector operand.
3083 //
3084 // If LHS is going to be replaced (case 1, 2, or 4), calculate the
3085 // new mask value for the element.
3086 if (newLHS != LHS) {
3087 eltMask = LHSMask[Mask[i]];
3088 // If the value selected is an poison value, explicitly specify it
3089 // with a -1 mask value.
3090 if (eltMask >= (int)LHSOp0Width && isa<PoisonValue>(LHSOp1))
3091 eltMask = -1;
3092 } else
3093 eltMask = Mask[i];
3094 } else {
3095 // This element is from right hand side vector operand
3096 //
3097 // If the value selected is a poison value, explicitly specify it
3098 // with a -1 mask value. (case 1)
3099 if (match(RHS, m_Poison()))
3100 eltMask = -1;
3101 // If RHS is going to be replaced (case 3 or 4), calculate the
3102 // new mask value for the element.
3103 else if (newRHS != RHS) {
3104 eltMask = RHSMask[Mask[i]-LHSWidth];
3105 // If the value selected is an poison value, explicitly specify it
3106 // with a -1 mask value.
3107 if (eltMask >= (int)RHSOp0Width) {
3108 assert(match(RHSShuffle->getOperand(1), m_Poison()) &&
3109 "should have been check above");
3110 eltMask = -1;
3111 }
3112 } else
3113 eltMask = Mask[i]-LHSWidth;
3114
3115 // If LHS's width is changed, shift the mask value accordingly.
3116 // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
3117 // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
3118 // If newRHS == newLHS, we want to remap any references from newRHS to
3119 // newLHS so that we can properly identify splats that may occur due to
3120 // obfuscation across the two vectors.
3121 if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
3122 eltMask += newLHSWidth;
3123 }
3124
3125 // Check if this could still be a splat.
3126 if (eltMask >= 0) {
3127 if (SplatElt >= 0 && SplatElt != eltMask)
3128 isSplat = false;
3129 SplatElt = eltMask;
3130 }
3131
3132 newMask.push_back(eltMask);
3133 }
3134
3135 // If the result mask is equal to one of the original shuffle masks,
3136 // or is a splat, do the replacement.
3137 if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
3138 if (!newRHS)
3139 newRHS = PoisonValue::get(newLHS->getType());
3140 return new ShuffleVectorInst(newLHS, newRHS, newMask);
3141 }
3142
3143 return MadeChange ? &SVI : nullptr;
3144}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
assume Assume Builder
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon Common GEP
This file provides internal interfaces used to implement the InstCombine.
static Instruction * foldConstantInsEltIntoShuffle(InsertElementInst &InsElt)
insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex --> shufflevector X,...
static Value * evaluateInDifferentElementOrder(Value *V, ArrayRef< int > Mask, IRBuilderBase &Builder)
static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, SmallVectorImpl< int > &Mask)
If V is a shuffle of values that ONLY returns elements from either LHS or RHS, return the shuffle mas...
static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl< int > &Mask, Value *PermittedRHS, InstCombinerImpl &IC, bool &Rerun)
static APInt findDemandedEltsByAllUsers(Value *V)
Find union of elements of V demanded by all its users.
static Instruction * foldTruncInsEltPair(InsertElementInst &InsElt, bool IsBigEndian, InstCombiner::BuilderTy &Builder)
If we are inserting 2 halves of a value into adjacent elements of a vector, try to convert to a singl...
static Instruction * foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf)
static Instruction * foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf)
static Instruction * foldIdentityExtractShuffle(ShuffleVectorInst &Shuf)
Try to fold an extract subvector operation.
static Instruction * foldInsEltIntoSplat(InsertElementInst &InsElt)
Try to fold an insert element into an existing splat shuffle by changing the shuffle's mask to includ...
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
static Instruction * foldShuffleWithInsert(ShuffleVectorInst &Shuf, InstCombinerImpl &IC)
Try to replace a shuffle with an insertelement or try to replace a shuffle operand with the operand o...
static Instruction * canonicalizeInsertSplat(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
If we have an insert of a scalar to a non-zero element of an undefined vector and then shuffle that v...
static Instruction * foldTruncShuffle(ShuffleVectorInst &Shuf, bool IsBigEndian)
Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
static bool replaceExtractElements(InsertElementInst *InsElt, ExtractElementInst *ExtElt, InstCombinerImpl &IC)
If we have insertion into a vector that is wider than the vector that we are extracting from,...
static bool cheapToScalarize(Value *V, Value *EI)
Return true if the value is cheaper to scalarize than it is to leave as a vector operation.
static Value * buildNew(Instruction *I, ArrayRef< Value * > NewOps, IRBuilderBase &Builder)
Rebuild a new instruction just like 'I' but with the new operands given.
static bool canEvaluateShuffled(Value *V, ArrayRef< int > Mask, unsigned Depth=5)
Return true if we can evaluate the specified expression tree if the vector elements were shuffled in ...
static Instruction * foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf)
A select shuffle of a select shuffle with a shared operand can be reduced to a single select shuffle.
static Instruction * hoistInsEltConst(InsertElementInst &InsElt2, InstCombiner::BuilderTy &Builder)
If we have an insertelement instruction feeding into another insertelement and the 2nd is inserting a...
static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr)
Find elements of V demanded by UserInstr.
static Instruction * foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize FP negate/abs after shuffle.
static Instruction * foldCastShuffle(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize casts after shuffle.
static Instruction * narrowInsElt(InsertElementInst &InsElt, InstCombiner::BuilderTy &Builder)
If both the base vector and the inserted element are extended from the same type, do the insert eleme...
static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf)
static Instruction * foldInsSequenceIntoSplat(InsertElementInst &InsElt)
Turn a chain of inserts that splats a value into an insert + shuffle: insertelt(insertelt(insertelt(i...
static Instruction * foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt)
Try to fold an extract+insert element into an existing identity shuffle by changing the shuffle's mas...
static ConstantInt * getPreferredVectorIndex(ConstantInt *IndexC)
Given a constant index for a extractelement or insertelement instruction, return it with the canonica...
static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, ArrayRef< int > Mask)
static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL)
Binops may be transformed into binops with different opcodes and operands.
This file provides the interface for the instcombine pass implementation.
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define I(x, y, z)
Definition: MD5.cpp:58
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file implements the SmallBitVector class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
static SDValue narrowVectorSelect(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
If both arms of a vector select are concatenated vectors, split the select, and concatenate the resul...
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
Class for arbitrary precision integers.
Definition: APInt.h:76
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1457
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1302
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1083
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:168
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:257
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
BinaryOps getOpcode() const
Definition: InstrTypes.h:391
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:248
This class represents a no-op cast from one type to another.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:711
static CmpInst * Create(OtherOps Op, Predicate predicate, Value *S1, Value *S2, const Twine &Name="", Instruction *InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1579
static Constant * getShuffleVector(Constant *V1, Constant *V2, ArrayRef< int > Mask, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2528
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2591
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2637
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:247
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:176
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:145
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:136
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1342
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:403
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:418
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isBigEndian() const
Definition: DataLayout.h:239
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:672
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
This instruction extracts a single (scalar) element from a VectorType value.
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
VectorType * getVectorOperandType() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:693
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:940
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:966
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1993
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2431
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2419
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1428
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Definition: IRBuilder.h:477
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2089
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2453
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1657
This instruction inserts a single (scalar) element into a VectorType value.
VectorType * getType() const
Overload to return most specific vector type.
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
This instruction inserts a struct field of array element value into an aggregate value.
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &UndefElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
Instruction * foldSelectShuffle(ShuffleVectorInst &Shuf)
Try to fold shuffles that are the equivalent of a vector select.
Instruction * visitInsertValueInst(InsertValueInst &IV)
Try to find redundant insertvalue instructions, like the following ones: %0 = insertvalue { i8,...
Instruction * visitInsertElementInst(InsertElementInst &IE)
Instruction * visitExtractElementInst(ExtractElementInst &EI)
Instruction * simplifyBinOpSplats(ShuffleVectorInst &SVI)
Instruction * foldAggregateConstructionIntoAggregateReuse(InsertValueInst &OrigIVI)
Look for chain of insertvalue's that fully define an aggregate, and trace back the values inserted,...
Instruction * visitShuffleVectorInst(ShuffleVectorInst &SVI)
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:424
const SimplifyQuery SQ
Definition: InstCombiner.h:75
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Definition: InstCombiner.h:63
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
Definition: InstCombiner.h:413
const DataLayout & DL
Definition: InstCombiner.h:74
void addToWorklist(Instruction *I)
Definition: InstCombiner.h:374
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombiner.h:448
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
Definition: InstCombiner.h:322
BuilderTy & Builder
Definition: InstCombiner.h:59
void addValue(Value *V)
Add value to the worklist if it is an instruction.
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:71
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
const BasicBlock * getParent() const
Definition: Instruction.h:90
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:87
bool isExact() const LLVM_READONLY
Determine whether the exact flag is set.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:195
bool isShift() const
Definition: Instruction.h:202
void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
bool isIntDivRem() const
Definition: Instruction.h:201
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition: Constants.h:1416
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1743
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
bool changesLength() const
Return true if this shuffle returns a vector with a different number of elements than its source vect...
static bool isSelectMask(ArrayRef< int > Mask)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
static bool isIdentityMask(ArrayRef< int > Mask)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
VectorType * getType() const
Overload to return most specific vector type.
bool increasesLength() const
Return true if this shuffle returns a vector with a greater number of elements than its source vector...
bool isIdentityWithExtract() const
Return true if this shuffle extracts the first N elements of exactly one source vector.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
void commute()
Swap the operands and adjust the mask to preserve the semantics of the instruction.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool all() const
Returns true if all bits are set.
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:234
unsigned getStructNumElements() const
uint64_t getArrayNumElements() const
@ ArrayTyID
Arrays.
Definition: Type.h:75
@ StructTyID
Structures.
Definition: Type.h:74
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:137
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:163
UnaryOps getOpcode() const
Definition: InstrTypes.h:170
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:155
'undef' values are things that do not have specified contents.
Definition: Constants.h:1368
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1724
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition: Value.cpp:1061
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:535
iterator_range< user_iterator > users()
Definition: Value.h:421
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1069
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:684
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:638
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:677
Type * getElementType() const
Definition: DerivedTypes.h:433
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:166
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:163
self_iterator getIterator()
Definition: ilist_node.h:82
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1422
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
Definition: PatternMatch.h:139
specific_intval< false > m_SpecificInt(APInt V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:862
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:144
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:780
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:147
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:532
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastClass_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:89
CastClass_match< OpTy, Instruction::FPExt > m_FPExt(const OpTy &Op)
class_match< UnaryOperator > m_UnOp()
Match an arbitrary unary operation and ignore it.
Definition: PatternMatch.h:79
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:278
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:136
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:545
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:218
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2338
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * simplifyShuffleVectorInst(Value *Op0, Value *Op1, ArrayRef< int > Mask, Type *RetTy, const SimplifyQuery &Q)
Given operands for a ShuffleVectorInst, fold the result or return null.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
Value * simplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an InsertValueInst, fold the result or return null.
constexpr int PoisonMaskElem
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
Value * simplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx, const SimplifyQuery &Q)
Given operands for an InsertElement, fold the result or return null.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:184
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1926
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1884
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if 'V & Mask' is known to be zero.
Value * simplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q)
Given operands for an ExtractElementInst, fold the result or return null.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
These are the ingredients in an alternate form binary operator as described below.
BinopElts(BinaryOperator::BinaryOps Opc=(BinaryOperator::BinaryOps) 0, Value *V0=nullptr, Value *V1=nullptr)
BinaryOperator::BinaryOps Opcode
SimplifyQuery getWithInstruction(Instruction *I) const