LLVM 19.0.0git
InstCombineVectorOps.cpp
Go to the documentation of this file.
1//===- InstCombineVectorOps.cpp -------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements instcombine for ExtractElement, InsertElement and
10// ShuffleVector.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InstCombineInternal.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/Statistic.h"
24#include "llvm/IR/BasicBlock.h"
25#include "llvm/IR/Constant.h"
26#include "llvm/IR/Constants.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
31#include "llvm/IR/Operator.h"
33#include "llvm/IR/Type.h"
34#include "llvm/IR/User.h"
35#include "llvm/IR/Value.h"
39#include <cassert>
40#include <cstdint>
41#include <iterator>
42#include <utility>
43
44#define DEBUG_TYPE "instcombine"
45
46using namespace llvm;
47using namespace PatternMatch;
48
49STATISTIC(NumAggregateReconstructionsSimplified,
50 "Number of aggregate reconstructions turned into reuse of the "
51 "original aggregate");
52
53/// Return true if the value is cheaper to scalarize than it is to leave as a
54/// vector operation. If the extract index \p EI is a constant integer then
55/// some operations may be cheap to scalarize.
56///
57/// FIXME: It's possible to create more instructions than previously existed.
58static bool cheapToScalarize(Value *V, Value *EI) {
59 ConstantInt *CEI = dyn_cast<ConstantInt>(EI);
60
61 // If we can pick a scalar constant value out of a vector, that is free.
62 if (auto *C = dyn_cast<Constant>(V))
63 return CEI || C->getSplatValue();
64
65 if (CEI && match(V, m_Intrinsic<Intrinsic::experimental_stepvector>())) {
66 ElementCount EC = cast<VectorType>(V->getType())->getElementCount();
67 // Index needs to be lower than the minimum size of the vector, because
68 // for scalable vector, the vector size is known at run time.
69 return CEI->getValue().ult(EC.getKnownMinValue());
70 }
71
72 // An insertelement to the same constant index as our extract will simplify
73 // to the scalar inserted element. An insertelement to a different constant
74 // index is irrelevant to our extract.
76 return CEI;
77
78 if (match(V, m_OneUse(m_Load(m_Value()))))
79 return true;
80
81 if (match(V, m_OneUse(m_UnOp())))
82 return true;
83
84 Value *V0, *V1;
85 if (match(V, m_OneUse(m_BinOp(m_Value(V0), m_Value(V1)))))
86 if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
87 return true;
88
89 CmpInst::Predicate UnusedPred;
90 if (match(V, m_OneUse(m_Cmp(UnusedPred, m_Value(V0), m_Value(V1)))))
91 if (cheapToScalarize(V0, EI) || cheapToScalarize(V1, EI))
92 return true;
93
94 return false;
95}
96
97// If we have a PHI node with a vector type that is only used to feed
98// itself and be an operand of extractelement at a constant location,
99// try to replace the PHI of the vector type with a PHI of a scalar type.
100Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
101 PHINode *PN) {
103 // The users we want the PHI to have are:
104 // 1) The EI ExtractElement (we already know this)
105 // 2) Possibly more ExtractElements with the same index.
106 // 3) Another operand, which will feed back into the PHI.
107 Instruction *PHIUser = nullptr;
108 for (auto *U : PN->users()) {
109 if (ExtractElementInst *EU = dyn_cast<ExtractElementInst>(U)) {
110 if (EI.getIndexOperand() == EU->getIndexOperand())
111 Extracts.push_back(EU);
112 else
113 return nullptr;
114 } else if (!PHIUser) {
115 PHIUser = cast<Instruction>(U);
116 } else {
117 return nullptr;
118 }
119 }
120
121 if (!PHIUser)
122 return nullptr;
123
124 // Verify that this PHI user has one use, which is the PHI itself,
125 // and that it is a binary operation which is cheap to scalarize.
126 // otherwise return nullptr.
127 if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) ||
128 !(isa<BinaryOperator>(PHIUser)) ||
129 !cheapToScalarize(PHIUser, EI.getIndexOperand()))
130 return nullptr;
131
132 // Create a scalar PHI node that will replace the vector PHI node
133 // just before the current PHI node.
134 PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
136 // Scalarize each PHI operand.
137 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
138 Value *PHIInVal = PN->getIncomingValue(i);
139 BasicBlock *inBB = PN->getIncomingBlock(i);
140 Value *Elt = EI.getIndexOperand();
141 // If the operand is the PHI induction variable:
142 if (PHIInVal == PHIUser) {
143 // Scalarize the binary operation. Its first operand is the
144 // scalar PHI, and the second operand is extracted from the other
145 // vector operand.
146 BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
147 unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
150 B0->getOperand(opId)->getName() + ".Elt"),
151 B0->getIterator());
152 Value *newPHIUser = InsertNewInstWith(
154 scalarPHI, Op, B0), B0->getIterator());
155 scalarPHI->addIncoming(newPHIUser, inBB);
156 } else {
157 // Scalarize PHI input:
158 Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
159 // Insert the new instruction into the predecessor basic block.
160 Instruction *pos = dyn_cast<Instruction>(PHIInVal);
161 BasicBlock::iterator InsertPos;
162 if (pos && !isa<PHINode>(pos)) {
163 InsertPos = ++pos->getIterator();
164 } else {
165 InsertPos = inBB->getFirstInsertionPt();
166 }
167
168 InsertNewInstWith(newEI, InsertPos);
169
170 scalarPHI->addIncoming(newEI, inBB);
171 }
172 }
173
174 for (auto *E : Extracts) {
175 replaceInstUsesWith(*E, scalarPHI);
176 // Add old extract to worklist for DCE.
177 addToWorklist(E);
178 }
179
180 return &EI;
181}
182
183Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
184 Value *X;
185 uint64_t ExtIndexC;
186 if (!match(Ext.getVectorOperand(), m_BitCast(m_Value(X))) ||
187 !match(Ext.getIndexOperand(), m_ConstantInt(ExtIndexC)))
188 return nullptr;
189
190 ElementCount NumElts =
191 cast<VectorType>(Ext.getVectorOperandType())->getElementCount();
192 Type *DestTy = Ext.getType();
193 unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
194 bool IsBigEndian = DL.isBigEndian();
195
196 // If we are casting an integer to vector and extracting a portion, that is
197 // a shift-right and truncate.
198 if (X->getType()->isIntegerTy()) {
199 assert(isa<FixedVectorType>(Ext.getVectorOperand()->getType()) &&
200 "Expected fixed vector type for bitcast from scalar integer");
201
202 // Big endian requires adjusting the extract index since MSB is at index 0.
203 // LittleEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 X to i8
204 // BigEndian: extelt (bitcast i32 X to v4i8), 0 -> trunc i32 (X >> 24) to i8
205 if (IsBigEndian)
206 ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC;
207 unsigned ShiftAmountC = ExtIndexC * DestWidth;
208 if (!ShiftAmountC ||
209 (isDesirableIntType(X->getType()->getPrimitiveSizeInBits()) &&
210 Ext.getVectorOperand()->hasOneUse())) {
211 if (ShiftAmountC)
212 X = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset");
213 if (DestTy->isFloatingPointTy()) {
214 Type *DstIntTy = IntegerType::getIntNTy(X->getContext(), DestWidth);
215 Value *Trunc = Builder.CreateTrunc(X, DstIntTy);
216 return new BitCastInst(Trunc, DestTy);
217 }
218 return new TruncInst(X, DestTy);
219 }
220 }
221
222 if (!X->getType()->isVectorTy())
223 return nullptr;
224
225 // If this extractelement is using a bitcast from a vector of the same number
226 // of elements, see if we can find the source element from the source vector:
227 // extelt (bitcast VecX), IndexC --> bitcast X[IndexC]
228 auto *SrcTy = cast<VectorType>(X->getType());
229 ElementCount NumSrcElts = SrcTy->getElementCount();
230 if (NumSrcElts == NumElts)
231 if (Value *Elt = findScalarElement(X, ExtIndexC))
232 return new BitCastInst(Elt, DestTy);
233
234 assert(NumSrcElts.isScalable() == NumElts.isScalable() &&
235 "Src and Dst must be the same sort of vector type");
236
237 // If the source elements are wider than the destination, try to shift and
238 // truncate a subset of scalar bits of an insert op.
239 if (NumSrcElts.getKnownMinValue() < NumElts.getKnownMinValue()) {
240 Value *Scalar;
241 Value *Vec;
242 uint64_t InsIndexC;
243 if (!match(X, m_InsertElt(m_Value(Vec), m_Value(Scalar),
244 m_ConstantInt(InsIndexC))))
245 return nullptr;
246
247 // The extract must be from the subset of vector elements that we inserted
248 // into. Example: if we inserted element 1 of a <2 x i64> and we are
249 // extracting an i16 (narrowing ratio = 4), then this extract must be from 1
250 // of elements 4-7 of the bitcasted vector.
251 unsigned NarrowingRatio =
252 NumElts.getKnownMinValue() / NumSrcElts.getKnownMinValue();
253
254 if (ExtIndexC / NarrowingRatio != InsIndexC) {
255 // Remove insertelement, if we don't use the inserted element.
256 // extractelement (bitcast (insertelement (Vec, b)), a) ->
257 // extractelement (bitcast (Vec), a)
258 // FIXME: this should be removed to SimplifyDemandedVectorElts,
259 // once scale vectors are supported.
260 if (X->hasOneUse() && Ext.getVectorOperand()->hasOneUse()) {
261 Value *NewBC = Builder.CreateBitCast(Vec, Ext.getVectorOperandType());
262 return ExtractElementInst::Create(NewBC, Ext.getIndexOperand());
263 }
264 return nullptr;
265 }
266
267 // We are extracting part of the original scalar. How that scalar is
268 // inserted into the vector depends on the endian-ness. Example:
269 // Vector Byte Elt Index: 0 1 2 3 4 5 6 7
270 // +--+--+--+--+--+--+--+--+
271 // inselt <2 x i32> V, <i32> S, 1: |V0|V1|V2|V3|S0|S1|S2|S3|
272 // extelt <4 x i16> V', 3: | |S2|S3|
273 // +--+--+--+--+--+--+--+--+
274 // If this is little-endian, S2|S3 are the MSB of the 32-bit 'S' value.
275 // If this is big-endian, S2|S3 are the LSB of the 32-bit 'S' value.
276 // In this example, we must right-shift little-endian. Big-endian is just a
277 // truncate.
278 unsigned Chunk = ExtIndexC % NarrowingRatio;
279 if (IsBigEndian)
280 Chunk = NarrowingRatio - 1 - Chunk;
281
282 // Bail out if this is an FP vector to FP vector sequence. That would take
283 // more instructions than we started with unless there is no shift, and it
284 // may not be handled as well in the backend.
285 bool NeedSrcBitcast = SrcTy->getScalarType()->isFloatingPointTy();
286 bool NeedDestBitcast = DestTy->isFloatingPointTy();
287 if (NeedSrcBitcast && NeedDestBitcast)
288 return nullptr;
289
290 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
291 unsigned ShAmt = Chunk * DestWidth;
292
293 // TODO: This limitation is more strict than necessary. We could sum the
294 // number of new instructions and subtract the number eliminated to know if
295 // we can proceed.
296 if (!X->hasOneUse() || !Ext.getVectorOperand()->hasOneUse())
297 if (NeedSrcBitcast || NeedDestBitcast)
298 return nullptr;
299
300 if (NeedSrcBitcast) {
301 Type *SrcIntTy = IntegerType::getIntNTy(Scalar->getContext(), SrcWidth);
302 Scalar = Builder.CreateBitCast(Scalar, SrcIntTy);
303 }
304
305 if (ShAmt) {
306 // Bail out if we could end with more instructions than we started with.
307 if (!Ext.getVectorOperand()->hasOneUse())
308 return nullptr;
309 Scalar = Builder.CreateLShr(Scalar, ShAmt);
310 }
311
312 if (NeedDestBitcast) {
313 Type *DestIntTy = IntegerType::getIntNTy(Scalar->getContext(), DestWidth);
314 return new BitCastInst(Builder.CreateTrunc(Scalar, DestIntTy), DestTy);
315 }
316 return new TruncInst(Scalar, DestTy);
317 }
318
319 return nullptr;
320}
321
322/// Find elements of V demanded by UserInstr.
324 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
325
326 // Conservatively assume that all elements are needed.
327 APInt UsedElts(APInt::getAllOnes(VWidth));
328
329 switch (UserInstr->getOpcode()) {
330 case Instruction::ExtractElement: {
331 ExtractElementInst *EEI = cast<ExtractElementInst>(UserInstr);
332 assert(EEI->getVectorOperand() == V);
333 ConstantInt *EEIIndexC = dyn_cast<ConstantInt>(EEI->getIndexOperand());
334 if (EEIIndexC && EEIIndexC->getValue().ult(VWidth)) {
335 UsedElts = APInt::getOneBitSet(VWidth, EEIIndexC->getZExtValue());
336 }
337 break;
338 }
339 case Instruction::ShuffleVector: {
340 ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(UserInstr);
341 unsigned MaskNumElts =
342 cast<FixedVectorType>(UserInstr->getType())->getNumElements();
343
344 UsedElts = APInt(VWidth, 0);
345 for (unsigned i = 0; i < MaskNumElts; i++) {
346 unsigned MaskVal = Shuffle->getMaskValue(i);
347 if (MaskVal == -1u || MaskVal >= 2 * VWidth)
348 continue;
349 if (Shuffle->getOperand(0) == V && (MaskVal < VWidth))
350 UsedElts.setBit(MaskVal);
351 if (Shuffle->getOperand(1) == V &&
352 ((MaskVal >= VWidth) && (MaskVal < 2 * VWidth)))
353 UsedElts.setBit(MaskVal - VWidth);
354 }
355 break;
356 }
357 default:
358 break;
359 }
360 return UsedElts;
361}
362
363/// Find union of elements of V demanded by all its users.
364/// If it is known by querying findDemandedEltsBySingleUser that
365/// no user demands an element of V, then the corresponding bit
366/// remains unset in the returned value.
368 unsigned VWidth = cast<FixedVectorType>(V->getType())->getNumElements();
369
370 APInt UnionUsedElts(VWidth, 0);
371 for (const Use &U : V->uses()) {
372 if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
373 UnionUsedElts |= findDemandedEltsBySingleUser(V, I);
374 } else {
375 UnionUsedElts = APInt::getAllOnes(VWidth);
376 break;
377 }
378
379 if (UnionUsedElts.isAllOnes())
380 break;
381 }
382
383 return UnionUsedElts;
384}
385
386/// Given a constant index for a extractelement or insertelement instruction,
387/// return it with the canonical type if it isn't already canonical. We
388/// arbitrarily pick 64 bit as our canonical type. The actual bitwidth doesn't
389/// matter, we just want a consistent type to simplify CSE.
391 const unsigned IndexBW = IndexC->getBitWidth();
392 if (IndexBW == 64 || IndexC->getValue().getActiveBits() > 64)
393 return nullptr;
394 return ConstantInt::get(IndexC->getContext(),
395 IndexC->getValue().zextOrTrunc(64));
396}
397
399 Value *SrcVec = EI.getVectorOperand();
401 if (Value *V = simplifyExtractElementInst(SrcVec, Index,
403 return replaceInstUsesWith(EI, V);
404
405 // extractelt (select %x, %vec1, %vec2), %const ->
406 // select %x, %vec1[%const], %vec2[%const]
407 // TODO: Support constant folding of multiple select operands:
408 // extractelt (select %x, %vec1, %vec2), (select %x, %c1, %c2)
409 // If the extractelement will for instance try to do out of bounds accesses
410 // because of the values of %c1 and/or %c2, the sequence could be optimized
411 // early. This is currently not possible because constant folding will reach
412 // an unreachable assertion if it doesn't find a constant operand.
413 if (SelectInst *SI = dyn_cast<SelectInst>(EI.getVectorOperand()))
414 if (SI->getCondition()->getType()->isIntegerTy() &&
415 isa<Constant>(EI.getIndexOperand()))
416 if (Instruction *R = FoldOpIntoSelect(EI, SI))
417 return R;
418
419 // If extracting a specified index from the vector, see if we can recursively
420 // find a previously computed scalar that was inserted into the vector.
421 auto *IndexC = dyn_cast<ConstantInt>(Index);
422 if (IndexC) {
423 // Canonicalize type of constant indices to i64 to simplify CSE
424 if (auto *NewIdx = getPreferredVectorIndex(IndexC))
425 return replaceOperand(EI, 1, NewIdx);
426
428 unsigned NumElts = EC.getKnownMinValue();
429
430 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(SrcVec)) {
431 Intrinsic::ID IID = II->getIntrinsicID();
432 // Index needs to be lower than the minimum size of the vector, because
433 // for scalable vector, the vector size is known at run time.
434 if (IID == Intrinsic::experimental_stepvector &&
435 IndexC->getValue().ult(NumElts)) {
436 Type *Ty = EI.getType();
437 unsigned BitWidth = Ty->getIntegerBitWidth();
438 Value *Idx;
439 // Return index when its value does not exceed the allowed limit
440 // for the element type of the vector, otherwise return undefined.
441 if (IndexC->getValue().getActiveBits() <= BitWidth)
442 Idx = ConstantInt::get(Ty, IndexC->getValue().zextOrTrunc(BitWidth));
443 else
444 Idx = PoisonValue::get(Ty);
445 return replaceInstUsesWith(EI, Idx);
446 }
447 }
448
449 // InstSimplify should handle cases where the index is invalid.
450 // For fixed-length vector, it's invalid to extract out-of-range element.
451 if (!EC.isScalable() && IndexC->getValue().uge(NumElts))
452 return nullptr;
453
454 if (Instruction *I = foldBitcastExtElt(EI))
455 return I;
456
457 // If there's a vector PHI feeding a scalar use through this extractelement
458 // instruction, try to scalarize the PHI.
459 if (auto *Phi = dyn_cast<PHINode>(SrcVec))
460 if (Instruction *ScalarPHI = scalarizePHI(EI, Phi))
461 return ScalarPHI;
462 }
463
464 // TODO come up with a n-ary matcher that subsumes both unary and
465 // binary matchers.
466 UnaryOperator *UO;
467 if (match(SrcVec, m_UnOp(UO)) && cheapToScalarize(SrcVec, Index)) {
468 // extelt (unop X), Index --> unop (extelt X, Index)
469 Value *X = UO->getOperand(0);
472 }
473
474 BinaryOperator *BO;
475 if (match(SrcVec, m_BinOp(BO)) && cheapToScalarize(SrcVec, Index)) {
476 // extelt (binop X, Y), Index --> binop (extelt X, Index), (extelt Y, Index)
477 Value *X = BO->getOperand(0), *Y = BO->getOperand(1);
480 return BinaryOperator::CreateWithCopiedFlags(BO->getOpcode(), E0, E1, BO);
481 }
482
483 Value *X, *Y;
485 if (match(SrcVec, m_Cmp(Pred, m_Value(X), m_Value(Y))) &&
486 cheapToScalarize(SrcVec, Index)) {
487 // extelt (cmp X, Y), Index --> cmp (extelt X, Index), (extelt Y, Index)
490 CmpInst *SrcCmpInst = cast<CmpInst>(SrcVec);
491 return CmpInst::CreateWithCopiedFlags(SrcCmpInst->getOpcode(), Pred, E0, E1,
492 SrcCmpInst);
493 }
494
495 if (auto *I = dyn_cast<Instruction>(SrcVec)) {
496 if (auto *IE = dyn_cast<InsertElementInst>(I)) {
497 // instsimplify already handled the case where the indices are constants
498 // and equal by value, if both are constants, they must not be the same
499 // value, extract from the pre-inserted value instead.
500 if (isa<Constant>(IE->getOperand(2)) && IndexC)
501 return replaceOperand(EI, 0, IE->getOperand(0));
502 } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
503 auto *VecType = cast<VectorType>(GEP->getType());
504 ElementCount EC = VecType->getElementCount();
505 uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0;
506 if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) {
507 // Find out why we have a vector result - these are a few examples:
508 // 1. We have a scalar pointer and a vector of indices, or
509 // 2. We have a vector of pointers and a scalar index, or
510 // 3. We have a vector of pointers and a vector of indices, etc.
511 // Here we only consider combining when there is exactly one vector
512 // operand, since the optimization is less obviously a win due to
513 // needing more than one extractelements.
514
515 unsigned VectorOps =
516 llvm::count_if(GEP->operands(), [](const Value *V) {
517 return isa<VectorType>(V->getType());
518 });
519 if (VectorOps == 1) {
520 Value *NewPtr = GEP->getPointerOperand();
521 if (isa<VectorType>(NewPtr->getType()))
522 NewPtr = Builder.CreateExtractElement(NewPtr, IndexC);
523
525 for (unsigned I = 1; I != GEP->getNumOperands(); ++I) {
526 Value *Op = GEP->getOperand(I);
527 if (isa<VectorType>(Op->getType()))
528 NewOps.push_back(Builder.CreateExtractElement(Op, IndexC));
529 else
530 NewOps.push_back(Op);
531 }
532
534 GEP->getSourceElementType(), NewPtr, NewOps);
535 NewGEP->setIsInBounds(GEP->isInBounds());
536 return NewGEP;
537 }
538 }
539 } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
540 // If this is extracting an element from a shufflevector, figure out where
541 // it came from and extract from the appropriate input element instead.
542 // Restrict the following transformation to fixed-length vector.
543 if (isa<FixedVectorType>(SVI->getType()) && isa<ConstantInt>(Index)) {
544 int SrcIdx =
545 SVI->getMaskValue(cast<ConstantInt>(Index)->getZExtValue());
546 Value *Src;
547 unsigned LHSWidth = cast<FixedVectorType>(SVI->getOperand(0)->getType())
548 ->getNumElements();
549
550 if (SrcIdx < 0)
552 if (SrcIdx < (int)LHSWidth)
553 Src = SVI->getOperand(0);
554 else {
555 SrcIdx -= LHSWidth;
556 Src = SVI->getOperand(1);
557 }
558 Type *Int64Ty = Type::getInt64Ty(EI.getContext());
560 Src, ConstantInt::get(Int64Ty, SrcIdx, false));
561 }
562 } else if (auto *CI = dyn_cast<CastInst>(I)) {
563 // Canonicalize extractelement(cast) -> cast(extractelement).
564 // Bitcasts can change the number of vector elements, and they cost
565 // nothing.
566 if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
567 Value *EE = Builder.CreateExtractElement(CI->getOperand(0), Index);
568 return CastInst::Create(CI->getOpcode(), EE, EI.getType());
569 }
570 }
571 }
572
573 // Run demanded elements after other transforms as this can drop flags on
574 // binops. If there's two paths to the same final result, we prefer the
575 // one which doesn't force us to drop flags.
576 if (IndexC) {
578 unsigned NumElts = EC.getKnownMinValue();
579 // This instruction only demands the single element from the input vector.
580 // Skip for scalable type, the number of elements is unknown at
581 // compile-time.
582 if (!EC.isScalable() && NumElts != 1) {
583 // If the input vector has a single use, simplify it based on this use
584 // property.
585 if (SrcVec->hasOneUse()) {
586 APInt PoisonElts(NumElts, 0);
587 APInt DemandedElts(NumElts, 0);
588 DemandedElts.setBit(IndexC->getZExtValue());
589 if (Value *V =
590 SimplifyDemandedVectorElts(SrcVec, DemandedElts, PoisonElts))
591 return replaceOperand(EI, 0, V);
592 } else {
593 // If the input vector has multiple uses, simplify it based on a union
594 // of all elements used.
595 APInt DemandedElts = findDemandedEltsByAllUsers(SrcVec);
596 if (!DemandedElts.isAllOnes()) {
597 APInt PoisonElts(NumElts, 0);
599 SrcVec, DemandedElts, PoisonElts, 0 /* Depth */,
600 true /* AllowMultipleUsers */)) {
601 if (V != SrcVec) {
602 Worklist.addValue(SrcVec);
603 SrcVec->replaceAllUsesWith(V);
604 return &EI;
605 }
606 }
607 }
608 }
609 }
610 }
611 return nullptr;
612}
613
614/// If V is a shuffle of values that ONLY returns elements from either LHS or
615/// RHS, return the shuffle mask and true. Otherwise, return false.
617 SmallVectorImpl<int> &Mask) {
618 assert(LHS->getType() == RHS->getType() &&
619 "Invalid CollectSingleShuffleElements");
620 unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
621
622 if (match(V, m_Undef())) {
623 Mask.assign(NumElts, -1);
624 return true;
625 }
626
627 if (V == LHS) {
628 for (unsigned i = 0; i != NumElts; ++i)
629 Mask.push_back(i);
630 return true;
631 }
632
633 if (V == RHS) {
634 for (unsigned i = 0; i != NumElts; ++i)
635 Mask.push_back(i + NumElts);
636 return true;
637 }
638
639 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
640 // If this is an insert of an extract from some other vector, include it.
641 Value *VecOp = IEI->getOperand(0);
642 Value *ScalarOp = IEI->getOperand(1);
643 Value *IdxOp = IEI->getOperand(2);
644
645 if (!isa<ConstantInt>(IdxOp))
646 return false;
647 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
648
649 if (isa<PoisonValue>(ScalarOp)) { // inserting poison into vector.
650 // We can handle this if the vector we are inserting into is
651 // transitively ok.
652 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
653 // If so, update the mask to reflect the inserted poison.
654 Mask[InsertedIdx] = -1;
655 return true;
656 }
657 } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
658 if (isa<ConstantInt>(EI->getOperand(1))) {
659 unsigned ExtractedIdx =
660 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
661 unsigned NumLHSElts =
662 cast<FixedVectorType>(LHS->getType())->getNumElements();
663
664 // This must be extracting from either LHS or RHS.
665 if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
666 // We can handle this if the vector we are inserting into is
667 // transitively ok.
668 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
669 // If so, update the mask to reflect the inserted value.
670 if (EI->getOperand(0) == LHS) {
671 Mask[InsertedIdx % NumElts] = ExtractedIdx;
672 } else {
673 assert(EI->getOperand(0) == RHS);
674 Mask[InsertedIdx % NumElts] = ExtractedIdx + NumLHSElts;
675 }
676 return true;
677 }
678 }
679 }
680 }
681 }
682
683 return false;
684}
685
686/// If we have insertion into a vector that is wider than the vector that we
687/// are extracting from, try to widen the source vector to allow a single
688/// shufflevector to replace one or more insert/extract pairs.
690 ExtractElementInst *ExtElt,
691 InstCombinerImpl &IC) {
692 auto *InsVecType = cast<FixedVectorType>(InsElt->getType());
693 auto *ExtVecType = cast<FixedVectorType>(ExtElt->getVectorOperandType());
694 unsigned NumInsElts = InsVecType->getNumElements();
695 unsigned NumExtElts = ExtVecType->getNumElements();
696
697 // The inserted-to vector must be wider than the extracted-from vector.
698 if (InsVecType->getElementType() != ExtVecType->getElementType() ||
699 NumExtElts >= NumInsElts)
700 return false;
701
702 // Create a shuffle mask to widen the extended-from vector using poison
703 // values. The mask selects all of the values of the original vector followed
704 // by as many poison values as needed to create a vector of the same length
705 // as the inserted-to vector.
706 SmallVector<int, 16> ExtendMask;
707 for (unsigned i = 0; i < NumExtElts; ++i)
708 ExtendMask.push_back(i);
709 for (unsigned i = NumExtElts; i < NumInsElts; ++i)
710 ExtendMask.push_back(-1);
711
712 Value *ExtVecOp = ExtElt->getVectorOperand();
713 auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
714 BasicBlock *InsertionBlock = (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
715 ? ExtVecOpInst->getParent()
716 : ExtElt->getParent();
717
718 // TODO: This restriction matches the basic block check below when creating
719 // new extractelement instructions. If that limitation is removed, this one
720 // could also be removed. But for now, we just bail out to ensure that we
721 // will replace the extractelement instruction that is feeding our
722 // insertelement instruction. This allows the insertelement to then be
723 // replaced by a shufflevector. If the insertelement is not replaced, we can
724 // induce infinite looping because there's an optimization for extractelement
725 // that will delete our widening shuffle. This would trigger another attempt
726 // here to create that shuffle, and we spin forever.
727 if (InsertionBlock != InsElt->getParent())
728 return false;
729
730 // TODO: This restriction matches the check in visitInsertElementInst() and
731 // prevents an infinite loop caused by not turning the extract/insert pair
732 // into a shuffle. We really should not need either check, but we're lacking
733 // folds for shufflevectors because we're afraid to generate shuffle masks
734 // that the backend can't handle.
735 if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
736 return false;
737
738 auto *WideVec = new ShuffleVectorInst(ExtVecOp, ExtendMask);
739
740 // Insert the new shuffle after the vector operand of the extract is defined
741 // (as long as it's not a PHI) or at the start of the basic block of the
742 // extract, so any subsequent extracts in the same basic block can use it.
743 // TODO: Insert before the earliest ExtractElementInst that is replaced.
744 if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
745 WideVec->insertAfter(ExtVecOpInst);
746 else
747 IC.InsertNewInstWith(WideVec, ExtElt->getParent()->getFirstInsertionPt());
748
749 // Replace extracts from the original narrow vector with extracts from the new
750 // wide vector.
751 for (User *U : ExtVecOp->users()) {
752 ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
753 if (!OldExt || OldExt->getParent() != WideVec->getParent())
754 continue;
755 auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
756 IC.InsertNewInstWith(NewExt, OldExt->getIterator());
757 IC.replaceInstUsesWith(*OldExt, NewExt);
758 // Add the old extracts to the worklist for DCE. We can't remove the
759 // extracts directly, because they may still be used by the calling code.
760 IC.addToWorklist(OldExt);
761 }
762
763 return true;
764}
765
766/// We are building a shuffle to create V, which is a sequence of insertelement,
767/// extractelement pairs. If PermittedRHS is set, then we must either use it or
768/// not rely on the second vector source. Return a std::pair containing the
769/// left and right vectors of the proposed shuffle (or 0), and set the Mask
770/// parameter as required.
771///
772/// Note: we intentionally don't try to fold earlier shuffles since they have
773/// often been chosen carefully to be efficiently implementable on the target.
774using ShuffleOps = std::pair<Value *, Value *>;
775
777 Value *PermittedRHS,
778 InstCombinerImpl &IC, bool &Rerun) {
779 assert(V->getType()->isVectorTy() && "Invalid shuffle!");
780 unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
781
782 if (match(V, m_Poison())) {
783 Mask.assign(NumElts, -1);
784 return std::make_pair(
785 PermittedRHS ? PoisonValue::get(PermittedRHS->getType()) : V, nullptr);
786 }
787
788 if (isa<ConstantAggregateZero>(V)) {
789 Mask.assign(NumElts, 0);
790 return std::make_pair(V, nullptr);
791 }
792
793 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
794 // If this is an insert of an extract from some other vector, include it.
795 Value *VecOp = IEI->getOperand(0);
796 Value *ScalarOp = IEI->getOperand(1);
797 Value *IdxOp = IEI->getOperand(2);
798
799 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
800 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) {
801 unsigned ExtractedIdx =
802 cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
803 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
804
805 // Either the extracted from or inserted into vector must be RHSVec,
806 // otherwise we'd end up with a shuffle of three inputs.
807 if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
808 Value *RHS = EI->getOperand(0);
809 ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC, Rerun);
810 assert(LR.second == nullptr || LR.second == RHS);
811
812 if (LR.first->getType() != RHS->getType()) {
813 // Although we are giving up for now, see if we can create extracts
814 // that match the inserts for another round of combining.
815 if (replaceExtractElements(IEI, EI, IC))
816 Rerun = true;
817
818 // We tried our best, but we can't find anything compatible with RHS
819 // further up the chain. Return a trivial shuffle.
820 for (unsigned i = 0; i < NumElts; ++i)
821 Mask[i] = i;
822 return std::make_pair(V, nullptr);
823 }
824
825 unsigned NumLHSElts =
826 cast<FixedVectorType>(RHS->getType())->getNumElements();
827 Mask[InsertedIdx % NumElts] = NumLHSElts + ExtractedIdx;
828 return std::make_pair(LR.first, RHS);
829 }
830
831 if (VecOp == PermittedRHS) {
832 // We've gone as far as we can: anything on the other side of the
833 // extractelement will already have been converted into a shuffle.
834 unsigned NumLHSElts =
835 cast<FixedVectorType>(EI->getOperand(0)->getType())
836 ->getNumElements();
837 for (unsigned i = 0; i != NumElts; ++i)
838 Mask.push_back(i == InsertedIdx ? ExtractedIdx : NumLHSElts + i);
839 return std::make_pair(EI->getOperand(0), PermittedRHS);
840 }
841
842 // If this insertelement is a chain that comes from exactly these two
843 // vectors, return the vector and the effective shuffle.
844 if (EI->getOperand(0)->getType() == PermittedRHS->getType() &&
845 collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS,
846 Mask))
847 return std::make_pair(EI->getOperand(0), PermittedRHS);
848 }
849 }
850 }
851
852 // Otherwise, we can't do anything fancy. Return an identity vector.
853 for (unsigned i = 0; i != NumElts; ++i)
854 Mask.push_back(i);
855 return std::make_pair(V, nullptr);
856}
857
858/// Look for chain of insertvalue's that fully define an aggregate, and trace
859/// back the values inserted, see if they are all were extractvalue'd from
860/// the same source aggregate from the exact same element indexes.
861/// If they were, just reuse the source aggregate.
862/// This potentially deals with PHI indirections.
864 InsertValueInst &OrigIVI) {
865 Type *AggTy = OrigIVI.getType();
866 unsigned NumAggElts;
867 switch (AggTy->getTypeID()) {
868 case Type::StructTyID:
869 NumAggElts = AggTy->getStructNumElements();
870 break;
871 case Type::ArrayTyID:
872 NumAggElts = AggTy->getArrayNumElements();
873 break;
874 default:
875 llvm_unreachable("Unhandled aggregate type?");
876 }
877
878 // Arbitrary aggregate size cut-off. Motivation for limit of 2 is to be able
879 // to handle clang C++ exception struct (which is hardcoded as {i8*, i32}),
880 // FIXME: any interesting patterns to be caught with larger limit?
881 assert(NumAggElts > 0 && "Aggregate should have elements.");
882 if (NumAggElts > 2)
883 return nullptr;
884
885 static constexpr auto NotFound = std::nullopt;
886 static constexpr auto FoundMismatch = nullptr;
887
888 // Try to find a value of each element of an aggregate.
889 // FIXME: deal with more complex, not one-dimensional, aggregate types
890 SmallVector<std::optional<Instruction *>, 2> AggElts(NumAggElts, NotFound);
891
892 // Do we know values for each element of the aggregate?
893 auto KnowAllElts = [&AggElts]() {
894 return !llvm::is_contained(AggElts, NotFound);
895 };
896
897 int Depth = 0;
898
899 // Arbitrary `insertvalue` visitation depth limit. Let's be okay with
900 // every element being overwritten twice, which should never happen.
901 static const int DepthLimit = 2 * NumAggElts;
902
903 // Recurse up the chain of `insertvalue` aggregate operands until either we've
904 // reconstructed full initializer or can't visit any more `insertvalue`'s.
905 for (InsertValueInst *CurrIVI = &OrigIVI;
906 Depth < DepthLimit && CurrIVI && !KnowAllElts();
907 CurrIVI = dyn_cast<InsertValueInst>(CurrIVI->getAggregateOperand()),
908 ++Depth) {
909 auto *InsertedValue =
910 dyn_cast<Instruction>(CurrIVI->getInsertedValueOperand());
911 if (!InsertedValue)
912 return nullptr; // Inserted value must be produced by an instruction.
913
914 ArrayRef<unsigned int> Indices = CurrIVI->getIndices();
915
916 // Don't bother with more than single-level aggregates.
917 if (Indices.size() != 1)
918 return nullptr; // FIXME: deal with more complex aggregates?
919
920 // Now, we may have already previously recorded the value for this element
921 // of an aggregate. If we did, that means the CurrIVI will later be
922 // overwritten with the already-recorded value. But if not, let's record it!
923 std::optional<Instruction *> &Elt = AggElts[Indices.front()];
924 Elt = Elt.value_or(InsertedValue);
925
926 // FIXME: should we handle chain-terminating undef base operand?
927 }
928
929 // Was that sufficient to deduce the full initializer for the aggregate?
930 if (!KnowAllElts())
931 return nullptr; // Give up then.
932
933 // We now want to find the source[s] of the aggregate elements we've found.
934 // And with "source" we mean the original aggregate[s] from which
935 // the inserted elements were extracted. This may require PHI translation.
936
937 enum class AggregateDescription {
938 /// When analyzing the value that was inserted into an aggregate, we did
939 /// not manage to find defining `extractvalue` instruction to analyze.
940 NotFound,
941 /// When analyzing the value that was inserted into an aggregate, we did
942 /// manage to find defining `extractvalue` instruction[s], and everything
943 /// matched perfectly - aggregate type, element insertion/extraction index.
944 Found,
945 /// When analyzing the value that was inserted into an aggregate, we did
946 /// manage to find defining `extractvalue` instruction, but there was
947 /// a mismatch: either the source type from which the extraction was didn't
948 /// match the aggregate type into which the insertion was,
949 /// or the extraction/insertion channels mismatched,
950 /// or different elements had different source aggregates.
951 FoundMismatch
952 };
953 auto Describe = [](std::optional<Value *> SourceAggregate) {
954 if (SourceAggregate == NotFound)
955 return AggregateDescription::NotFound;
956 if (*SourceAggregate == FoundMismatch)
957 return AggregateDescription::FoundMismatch;
958 return AggregateDescription::Found;
959 };
960
961 // Given the value \p Elt that was being inserted into element \p EltIdx of an
962 // aggregate AggTy, see if \p Elt was originally defined by an
963 // appropriate extractvalue (same element index, same aggregate type).
964 // If found, return the source aggregate from which the extraction was.
965 // If \p PredBB is provided, does PHI translation of an \p Elt first.
966 auto FindSourceAggregate =
967 [&](Instruction *Elt, unsigned EltIdx, std::optional<BasicBlock *> UseBB,
968 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
969 // For now(?), only deal with, at most, a single level of PHI indirection.
970 if (UseBB && PredBB)
971 Elt = dyn_cast<Instruction>(Elt->DoPHITranslation(*UseBB, *PredBB));
972 // FIXME: deal with multiple levels of PHI indirection?
973
974 // Did we find an extraction?
975 auto *EVI = dyn_cast_or_null<ExtractValueInst>(Elt);
976 if (!EVI)
977 return NotFound;
978
979 Value *SourceAggregate = EVI->getAggregateOperand();
980
981 // Is the extraction from the same type into which the insertion was?
982 if (SourceAggregate->getType() != AggTy)
983 return FoundMismatch;
984 // And the element index doesn't change between extraction and insertion?
985 if (EVI->getNumIndices() != 1 || EltIdx != EVI->getIndices().front())
986 return FoundMismatch;
987
988 return SourceAggregate; // AggregateDescription::Found
989 };
990
991 // Given elements AggElts that were constructing an aggregate OrigIVI,
992 // see if we can find appropriate source aggregate for each of the elements,
993 // and see it's the same aggregate for each element. If so, return it.
994 auto FindCommonSourceAggregate =
995 [&](std::optional<BasicBlock *> UseBB,
996 std::optional<BasicBlock *> PredBB) -> std::optional<Value *> {
997 std::optional<Value *> SourceAggregate;
998
999 for (auto I : enumerate(AggElts)) {
1000 assert(Describe(SourceAggregate) != AggregateDescription::FoundMismatch &&
1001 "We don't store nullptr in SourceAggregate!");
1002 assert((Describe(SourceAggregate) == AggregateDescription::Found) ==
1003 (I.index() != 0) &&
1004 "SourceAggregate should be valid after the first element,");
1005
1006 // For this element, is there a plausible source aggregate?
1007 // FIXME: we could special-case undef element, IFF we know that in the
1008 // source aggregate said element isn't poison.
1009 std::optional<Value *> SourceAggregateForElement =
1010 FindSourceAggregate(*I.value(), I.index(), UseBB, PredBB);
1011
1012 // Okay, what have we found? Does that correlate with previous findings?
1013
1014 // Regardless of whether or not we have previously found source
1015 // aggregate for previous elements (if any), if we didn't find one for
1016 // this element, passthrough whatever we have just found.
1017 if (Describe(SourceAggregateForElement) != AggregateDescription::Found)
1018 return SourceAggregateForElement;
1019
1020 // Okay, we have found source aggregate for this element.
1021 // Let's see what we already know from previous elements, if any.
1022 switch (Describe(SourceAggregate)) {
1023 case AggregateDescription::NotFound:
1024 // This is apparently the first element that we have examined.
1025 SourceAggregate = SourceAggregateForElement; // Record the aggregate!
1026 continue; // Great, now look at next element.
1027 case AggregateDescription::Found:
1028 // We have previously already successfully examined other elements.
1029 // Is this the same source aggregate we've found for other elements?
1030 if (*SourceAggregateForElement != *SourceAggregate)
1031 return FoundMismatch;
1032 continue; // Still the same aggregate, look at next element.
1033 case AggregateDescription::FoundMismatch:
1034 llvm_unreachable("Can't happen. We would have early-exited then.");
1035 };
1036 }
1037
1038 assert(Describe(SourceAggregate) == AggregateDescription::Found &&
1039 "Must be a valid Value");
1040 return *SourceAggregate;
1041 };
1042
1043 std::optional<Value *> SourceAggregate;
1044
1045 // Can we find the source aggregate without looking at predecessors?
1046 SourceAggregate = FindCommonSourceAggregate(/*UseBB=*/std::nullopt,
1047 /*PredBB=*/std::nullopt);
1048 if (Describe(SourceAggregate) != AggregateDescription::NotFound) {
1049 if (Describe(SourceAggregate) == AggregateDescription::FoundMismatch)
1050 return nullptr; // Conflicting source aggregates!
1051 ++NumAggregateReconstructionsSimplified;
1052 return replaceInstUsesWith(OrigIVI, *SourceAggregate);
1053 }
1054
1055 // Okay, apparently we need to look at predecessors.
1056
1057 // We should be smart about picking the "use" basic block, which will be the
1058 // merge point for aggregate, where we'll insert the final PHI that will be
1059 // used instead of OrigIVI. Basic block of OrigIVI is *not* the right choice.
1060 // We should look in which blocks each of the AggElts is being defined,
1061 // they all should be defined in the same basic block.
1062 BasicBlock *UseBB = nullptr;
1063
1064 for (const std::optional<Instruction *> &I : AggElts) {
1065 BasicBlock *BB = (*I)->getParent();
1066 // If it's the first instruction we've encountered, record the basic block.
1067 if (!UseBB) {
1068 UseBB = BB;
1069 continue;
1070 }
1071 // Otherwise, this must be the same basic block we've seen previously.
1072 if (UseBB != BB)
1073 return nullptr;
1074 }
1075
1076 // If *all* of the elements are basic-block-independent, meaning they are
1077 // either function arguments, or constant expressions, then if we didn't
1078 // handle them without predecessor-aware handling, we won't handle them now.
1079 if (!UseBB)
1080 return nullptr;
1081
1082 // If we didn't manage to find source aggregate without looking at
1083 // predecessors, and there are no predecessors to look at, then we're done.
1084 if (pred_empty(UseBB))
1085 return nullptr;
1086
1087 // Arbitrary predecessor count limit.
1088 static const int PredCountLimit = 64;
1089
1090 // Cache the (non-uniqified!) list of predecessors in a vector,
1091 // checking the limit at the same time for efficiency.
1092 SmallVector<BasicBlock *, 4> Preds; // May have duplicates!
1093 for (BasicBlock *Pred : predecessors(UseBB)) {
1094 // Don't bother if there are too many predecessors.
1095 if (Preds.size() >= PredCountLimit) // FIXME: only count duplicates once?
1096 return nullptr;
1097 Preds.emplace_back(Pred);
1098 }
1099
1100 // For each predecessor, what is the source aggregate,
1101 // from which all the elements were originally extracted from?
1102 // Note that we want for the map to have stable iteration order!
1104 for (BasicBlock *Pred : Preds) {
1105 std::pair<decltype(SourceAggregates)::iterator, bool> IV =
1106 SourceAggregates.insert({Pred, nullptr});
1107 // Did we already evaluate this predecessor?
1108 if (!IV.second)
1109 continue;
1110
1111 // Let's hope that when coming from predecessor Pred, all elements of the
1112 // aggregate produced by OrigIVI must have been originally extracted from
1113 // the same aggregate. Is that so? Can we find said original aggregate?
1114 SourceAggregate = FindCommonSourceAggregate(UseBB, Pred);
1115 if (Describe(SourceAggregate) != AggregateDescription::Found)
1116 return nullptr; // Give up.
1117 IV.first->second = *SourceAggregate;
1118 }
1119
1120 // All good! Now we just need to thread the source aggregates here.
1121 // Note that we have to insert the new PHI here, ourselves, because we can't
1122 // rely on InstCombinerImpl::run() inserting it into the right basic block.
1123 // Note that the same block can be a predecessor more than once,
1124 // and we need to preserve that invariant for the PHI node.
1125 BuilderTy::InsertPointGuard Guard(Builder);
1126 Builder.SetInsertPoint(UseBB, UseBB->getFirstNonPHIIt());
1127 auto *PHI =
1128 Builder.CreatePHI(AggTy, Preds.size(), OrigIVI.getName() + ".merged");
1129 for (BasicBlock *Pred : Preds)
1130 PHI->addIncoming(SourceAggregates[Pred], Pred);
1131
1132 ++NumAggregateReconstructionsSimplified;
1133 return replaceInstUsesWith(OrigIVI, PHI);
1134}
1135
1136/// Try to find redundant insertvalue instructions, like the following ones:
1137/// %0 = insertvalue { i8, i32 } undef, i8 %x, 0
1138/// %1 = insertvalue { i8, i32 } %0, i8 %y, 0
1139/// Here the second instruction inserts values at the same indices, as the
1140/// first one, making the first one redundant.
1141/// It should be transformed to:
1142/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
1145 I.getAggregateOperand(), I.getInsertedValueOperand(), I.getIndices(),
1147 return replaceInstUsesWith(I, V);
1148
1149 bool IsRedundant = false;
1150 ArrayRef<unsigned int> FirstIndices = I.getIndices();
1151
1152 // If there is a chain of insertvalue instructions (each of them except the
1153 // last one has only one use and it's another insertvalue insn from this
1154 // chain), check if any of the 'children' uses the same indices as the first
1155 // instruction. In this case, the first one is redundant.
1156 Value *V = &I;
1157 unsigned Depth = 0;
1158 while (V->hasOneUse() && Depth < 10) {
1159 User *U = V->user_back();
1160 auto UserInsInst = dyn_cast<InsertValueInst>(U);
1161 if (!UserInsInst || U->getOperand(0) != V)
1162 break;
1163 if (UserInsInst->getIndices() == FirstIndices) {
1164 IsRedundant = true;
1165 break;
1166 }
1167 V = UserInsInst;
1168 Depth++;
1169 }
1170
1171 if (IsRedundant)
1172 return replaceInstUsesWith(I, I.getOperand(0));
1173
1175 return NewI;
1176
1177 return nullptr;
1178}
1179
1181 // Can not analyze scalable type, the number of elements is not a compile-time
1182 // constant.
1183 if (isa<ScalableVectorType>(Shuf.getOperand(0)->getType()))
1184 return false;
1185
1186 int MaskSize = Shuf.getShuffleMask().size();
1187 int VecSize =
1188 cast<FixedVectorType>(Shuf.getOperand(0)->getType())->getNumElements();
1189
1190 // A vector select does not change the size of the operands.
1191 if (MaskSize != VecSize)
1192 return false;
1193
1194 // Each mask element must be undefined or choose a vector element from one of
1195 // the source operands without crossing vector lanes.
1196 for (int i = 0; i != MaskSize; ++i) {
1197 int Elt = Shuf.getMaskValue(i);
1198 if (Elt != -1 && Elt != i && Elt != i + VecSize)
1199 return false;
1200 }
1201
1202 return true;
1203}
1204
1205/// Turn a chain of inserts that splats a value into an insert + shuffle:
1206/// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
1207/// shufflevector(insertelt(X, %k, 0), poison, zero)
1209 // We are interested in the last insert in a chain. So if this insert has a
1210 // single user and that user is an insert, bail.
1211 if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
1212 return nullptr;
1213
1214 VectorType *VecTy = InsElt.getType();
1215 // Can not handle scalable type, the number of elements is not a compile-time
1216 // constant.
1217 if (isa<ScalableVectorType>(VecTy))
1218 return nullptr;
1219 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
1220
1221 // Do not try to do this for a one-element vector, since that's a nop,
1222 // and will cause an inf-loop.
1223 if (NumElements == 1)
1224 return nullptr;
1225
1226 Value *SplatVal = InsElt.getOperand(1);
1227 InsertElementInst *CurrIE = &InsElt;
1228 SmallBitVector ElementPresent(NumElements, false);
1229 InsertElementInst *FirstIE = nullptr;
1230
1231 // Walk the chain backwards, keeping track of which indices we inserted into,
1232 // until we hit something that isn't an insert of the splatted value.
1233 while (CurrIE) {
1234 auto *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
1235 if (!Idx || CurrIE->getOperand(1) != SplatVal)
1236 return nullptr;
1237
1238 auto *NextIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
1239 // Check none of the intermediate steps have any additional uses, except
1240 // for the root insertelement instruction, which can be re-used, if it
1241 // inserts at position 0.
1242 if (CurrIE != &InsElt &&
1243 (!CurrIE->hasOneUse() && (NextIE != nullptr || !Idx->isZero())))
1244 return nullptr;
1245
1246 ElementPresent[Idx->getZExtValue()] = true;
1247 FirstIE = CurrIE;
1248 CurrIE = NextIE;
1249 }
1250
1251 // If this is just a single insertelement (not a sequence), we are done.
1252 if (FirstIE == &InsElt)
1253 return nullptr;
1254
1255 // If we are not inserting into a poison vector, make sure we've seen an
1256 // insert into every element.
1257 // TODO: If the base vector is not undef, it might be better to create a splat
1258 // and then a select-shuffle (blend) with the base vector.
1259 if (!match(FirstIE->getOperand(0), m_Poison()))
1260 if (!ElementPresent.all())
1261 return nullptr;
1262
1263 // Create the insert + shuffle.
1264 Type *Int64Ty = Type::getInt64Ty(InsElt.getContext());
1265 PoisonValue *PoisonVec = PoisonValue::get(VecTy);
1266 Constant *Zero = ConstantInt::get(Int64Ty, 0);
1267 if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
1268 FirstIE = InsertElementInst::Create(PoisonVec, SplatVal, Zero, "",
1269 InsElt.getIterator());
1270
1271 // Splat from element 0, but replace absent elements with poison in the mask.
1272 SmallVector<int, 16> Mask(NumElements, 0);
1273 for (unsigned i = 0; i != NumElements; ++i)
1274 if (!ElementPresent[i])
1275 Mask[i] = -1;
1276
1277 return new ShuffleVectorInst(FirstIE, Mask);
1278}
1279
1280/// Try to fold an insert element into an existing splat shuffle by changing
1281/// the shuffle's mask to include the index of this insert element.
1283 // Check if the vector operand of this insert is a canonical splat shuffle.
1284 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1285 if (!Shuf || !Shuf->isZeroEltSplat())
1286 return nullptr;
1287
1288 // Bail out early if shuffle is scalable type. The number of elements in
1289 // shuffle mask is unknown at compile-time.
1290 if (isa<ScalableVectorType>(Shuf->getType()))
1291 return nullptr;
1292
1293 // Check for a constant insertion index.
1294 uint64_t IdxC;
1295 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1296 return nullptr;
1297
1298 // Check if the splat shuffle's input is the same as this insert's scalar op.
1299 Value *X = InsElt.getOperand(1);
1300 Value *Op0 = Shuf->getOperand(0);
1301 if (!match(Op0, m_InsertElt(m_Undef(), m_Specific(X), m_ZeroInt())))
1302 return nullptr;
1303
1304 // Replace the shuffle mask element at the index of this insert with a zero.
1305 // For example:
1306 // inselt (shuf (inselt undef, X, 0), _, <0,undef,0,undef>), X, 1
1307 // --> shuf (inselt undef, X, 0), poison, <0,0,0,undef>
1308 unsigned NumMaskElts =
1309 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1310 SmallVector<int, 16> NewMask(NumMaskElts);
1311 for (unsigned i = 0; i != NumMaskElts; ++i)
1312 NewMask[i] = i == IdxC ? 0 : Shuf->getMaskValue(i);
1313
1314 return new ShuffleVectorInst(Op0, NewMask);
1315}
1316
1317/// Try to fold an extract+insert element into an existing identity shuffle by
1318/// changing the shuffle's mask to include the index of this insert element.
1320 // Check if the vector operand of this insert is an identity shuffle.
1321 auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0));
1322 if (!Shuf || !match(Shuf->getOperand(1), m_Undef()) ||
1323 !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding()))
1324 return nullptr;
1325
1326 // Bail out early if shuffle is scalable type. The number of elements in
1327 // shuffle mask is unknown at compile-time.
1328 if (isa<ScalableVectorType>(Shuf->getType()))
1329 return nullptr;
1330
1331 // Check for a constant insertion index.
1332 uint64_t IdxC;
1333 if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC)))
1334 return nullptr;
1335
1336 // Check if this insert's scalar op is extracted from the identity shuffle's
1337 // input vector.
1338 Value *Scalar = InsElt.getOperand(1);
1339 Value *X = Shuf->getOperand(0);
1340 if (!match(Scalar, m_ExtractElt(m_Specific(X), m_SpecificInt(IdxC))))
1341 return nullptr;
1342
1343 // Replace the shuffle mask element at the index of this extract+insert with
1344 // that same index value.
1345 // For example:
1346 // inselt (shuf X, IdMask), (extelt X, IdxC), IdxC --> shuf X, IdMask'
1347 unsigned NumMaskElts =
1348 cast<FixedVectorType>(Shuf->getType())->getNumElements();
1349 SmallVector<int, 16> NewMask(NumMaskElts);
1350 ArrayRef<int> OldMask = Shuf->getShuffleMask();
1351 for (unsigned i = 0; i != NumMaskElts; ++i) {
1352 if (i != IdxC) {
1353 // All mask elements besides the inserted element remain the same.
1354 NewMask[i] = OldMask[i];
1355 } else if (OldMask[i] == (int)IdxC) {
1356 // If the mask element was already set, there's nothing to do
1357 // (demanded elements analysis may unset it later).
1358 return nullptr;
1359 } else {
1360 assert(OldMask[i] == PoisonMaskElem &&
1361 "Unexpected shuffle mask element for identity shuffle");
1362 NewMask[i] = IdxC;
1363 }
1364 }
1365
1366 return new ShuffleVectorInst(X, Shuf->getOperand(1), NewMask);
1367}
1368
1369/// If we have an insertelement instruction feeding into another insertelement
1370/// and the 2nd is inserting a constant into the vector, canonicalize that
1371/// constant insertion before the insertion of a variable:
1372///
1373/// insertelement (insertelement X, Y, IdxC1), ScalarC, IdxC2 -->
1374/// insertelement (insertelement X, ScalarC, IdxC2), Y, IdxC1
1375///
1376/// This has the potential of eliminating the 2nd insertelement instruction
1377/// via constant folding of the scalar constant into a vector constant.
1379 InstCombiner::BuilderTy &Builder) {
1380 auto *InsElt1 = dyn_cast<InsertElementInst>(InsElt2.getOperand(0));
1381 if (!InsElt1 || !InsElt1->hasOneUse())
1382 return nullptr;
1383
1384 Value *X, *Y;
1385 Constant *ScalarC;
1386 ConstantInt *IdxC1, *IdxC2;
1387 if (match(InsElt1->getOperand(0), m_Value(X)) &&
1388 match(InsElt1->getOperand(1), m_Value(Y)) && !isa<Constant>(Y) &&
1389 match(InsElt1->getOperand(2), m_ConstantInt(IdxC1)) &&
1390 match(InsElt2.getOperand(1), m_Constant(ScalarC)) &&
1391 match(InsElt2.getOperand(2), m_ConstantInt(IdxC2)) && IdxC1 != IdxC2) {
1392 Value *NewInsElt1 = Builder.CreateInsertElement(X, ScalarC, IdxC2);
1393 return InsertElementInst::Create(NewInsElt1, Y, IdxC1);
1394 }
1395
1396 return nullptr;
1397}
1398
1399/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
1400/// --> shufflevector X, CVec', Mask'
1402 auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
1403 // Bail out if the parent has more than one use. In that case, we'd be
1404 // replacing the insertelt with a shuffle, and that's not a clear win.
1405 if (!Inst || !Inst->hasOneUse())
1406 return nullptr;
1407 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
1408 // The shuffle must have a constant vector operand. The insertelt must have
1409 // a constant scalar being inserted at a constant position in the vector.
1410 Constant *ShufConstVec, *InsEltScalar;
1411 uint64_t InsEltIndex;
1412 if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
1413 !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
1414 !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
1415 return nullptr;
1416
1417 // Adding an element to an arbitrary shuffle could be expensive, but a
1418 // shuffle that selects elements from vectors without crossing lanes is
1419 // assumed cheap.
1420 // If we're just adding a constant into that shuffle, it will still be
1421 // cheap.
1422 if (!isShuffleEquivalentToSelect(*Shuf))
1423 return nullptr;
1424
1425 // From the above 'select' check, we know that the mask has the same number
1426 // of elements as the vector input operands. We also know that each constant
1427 // input element is used in its lane and can not be used more than once by
1428 // the shuffle. Therefore, replace the constant in the shuffle's constant
1429 // vector with the insertelt constant. Replace the constant in the shuffle's
1430 // mask vector with the insertelt index plus the length of the vector
1431 // (because the constant vector operand of a shuffle is always the 2nd
1432 // operand).
1433 ArrayRef<int> Mask = Shuf->getShuffleMask();
1434 unsigned NumElts = Mask.size();
1435 SmallVector<Constant *, 16> NewShufElts(NumElts);
1436 SmallVector<int, 16> NewMaskElts(NumElts);
1437 for (unsigned I = 0; I != NumElts; ++I) {
1438 if (I == InsEltIndex) {
1439 NewShufElts[I] = InsEltScalar;
1440 NewMaskElts[I] = InsEltIndex + NumElts;
1441 } else {
1442 // Copy over the existing values.
1443 NewShufElts[I] = ShufConstVec->getAggregateElement(I);
1444 NewMaskElts[I] = Mask[I];
1445 }
1446
1447 // Bail if we failed to find an element.
1448 if (!NewShufElts[I])
1449 return nullptr;
1450 }
1451
1452 // Create new operands for a shuffle that includes the constant of the
1453 // original insertelt. The old shuffle will be dead now.
1454 return new ShuffleVectorInst(Shuf->getOperand(0),
1455 ConstantVector::get(NewShufElts), NewMaskElts);
1456 } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
1457 // Transform sequences of insertelements ops with constant data/indexes into
1458 // a single shuffle op.
1459 // Can not handle scalable type, the number of elements needed to create
1460 // shuffle mask is not a compile-time constant.
1461 if (isa<ScalableVectorType>(InsElt.getType()))
1462 return nullptr;
1463 unsigned NumElts =
1464 cast<FixedVectorType>(InsElt.getType())->getNumElements();
1465
1466 uint64_t InsertIdx[2];
1467 Constant *Val[2];
1468 if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
1469 !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
1470 !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
1471 !match(IEI->getOperand(1), m_Constant(Val[1])))
1472 return nullptr;
1473 SmallVector<Constant *, 16> Values(NumElts);
1474 SmallVector<int, 16> Mask(NumElts);
1475 auto ValI = std::begin(Val);
1476 // Generate new constant vector and mask.
1477 // We have 2 values/masks from the insertelements instructions. Insert them
1478 // into new value/mask vectors.
1479 for (uint64_t I : InsertIdx) {
1480 if (!Values[I]) {
1481 Values[I] = *ValI;
1482 Mask[I] = NumElts + I;
1483 }
1484 ++ValI;
1485 }
1486 // Remaining values are filled with 'poison' values.
1487 for (unsigned I = 0; I < NumElts; ++I) {
1488 if (!Values[I]) {
1489 Values[I] = PoisonValue::get(InsElt.getType()->getElementType());
1490 Mask[I] = I;
1491 }
1492 }
1493 // Create new operands for a shuffle that includes the constant of the
1494 // original insertelt.
1495 return new ShuffleVectorInst(IEI->getOperand(0),
1496 ConstantVector::get(Values), Mask);
1497 }
1498 return nullptr;
1499}
1500
1501/// If both the base vector and the inserted element are extended from the same
1502/// type, do the insert element in the narrow source type followed by extend.
1503/// TODO: This can be extended to include other cast opcodes, but particularly
1504/// if we create a wider insertelement, make sure codegen is not harmed.
1506 InstCombiner::BuilderTy &Builder) {
1507 // We are creating a vector extend. If the original vector extend has another
1508 // use, that would mean we end up with 2 vector extends, so avoid that.
1509 // TODO: We could ease the use-clause to "if at least one op has one use"
1510 // (assuming that the source types match - see next TODO comment).
1511 Value *Vec = InsElt.getOperand(0);
1512 if (!Vec->hasOneUse())
1513 return nullptr;
1514
1515 Value *Scalar = InsElt.getOperand(1);
1516 Value *X, *Y;
1517 CastInst::CastOps CastOpcode;
1518 if (match(Vec, m_FPExt(m_Value(X))) && match(Scalar, m_FPExt(m_Value(Y))))
1519 CastOpcode = Instruction::FPExt;
1520 else if (match(Vec, m_SExt(m_Value(X))) && match(Scalar, m_SExt(m_Value(Y))))
1521 CastOpcode = Instruction::SExt;
1522 else if (match(Vec, m_ZExt(m_Value(X))) && match(Scalar, m_ZExt(m_Value(Y))))
1523 CastOpcode = Instruction::ZExt;
1524 else
1525 return nullptr;
1526
1527 // TODO: We can allow mismatched types by creating an intermediate cast.
1528 if (X->getType()->getScalarType() != Y->getType())
1529 return nullptr;
1530
1531 // inselt (ext X), (ext Y), Index --> ext (inselt X, Y, Index)
1532 Value *NewInsElt = Builder.CreateInsertElement(X, Y, InsElt.getOperand(2));
1533 return CastInst::Create(CastOpcode, NewInsElt, InsElt.getType());
1534}
1535
1536/// If we are inserting 2 halves of a value into adjacent elements of a vector,
1537/// try to convert to a single insert with appropriate bitcasts.
1539 bool IsBigEndian,
1540 InstCombiner::BuilderTy &Builder) {
1541 Value *VecOp = InsElt.getOperand(0);
1542 Value *ScalarOp = InsElt.getOperand(1);
1543 Value *IndexOp = InsElt.getOperand(2);
1544
1545 // Pattern depends on endian because we expect lower index is inserted first.
1546 // Big endian:
1547 // inselt (inselt BaseVec, (trunc (lshr X, BW/2), Index0), (trunc X), Index1
1548 // Little endian:
1549 // inselt (inselt BaseVec, (trunc X), Index0), (trunc (lshr X, BW/2)), Index1
1550 // Note: It is not safe to do this transform with an arbitrary base vector
1551 // because the bitcast of that vector to fewer/larger elements could
1552 // allow poison to spill into an element that was not poison before.
1553 // TODO: Detect smaller fractions of the scalar.
1554 // TODO: One-use checks are conservative.
1555 auto *VTy = dyn_cast<FixedVectorType>(InsElt.getType());
1556 Value *Scalar0, *BaseVec;
1557 uint64_t Index0, Index1;
1558 if (!VTy || (VTy->getNumElements() & 1) ||
1559 !match(IndexOp, m_ConstantInt(Index1)) ||
1560 !match(VecOp, m_InsertElt(m_Value(BaseVec), m_Value(Scalar0),
1561 m_ConstantInt(Index0))) ||
1562 !match(BaseVec, m_Undef()))
1563 return nullptr;
1564
1565 // The first insert must be to the index one less than this one, and
1566 // the first insert must be to an even index.
1567 if (Index0 + 1 != Index1 || Index0 & 1)
1568 return nullptr;
1569
1570 // For big endian, the high half of the value should be inserted first.
1571 // For little endian, the low half of the value should be inserted first.
1572 Value *X;
1573 uint64_t ShAmt;
1574 if (IsBigEndian) {
1575 if (!match(ScalarOp, m_Trunc(m_Value(X))) ||
1576 !match(Scalar0, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt)))))
1577 return nullptr;
1578 } else {
1579 if (!match(Scalar0, m_Trunc(m_Value(X))) ||
1580 !match(ScalarOp, m_Trunc(m_LShr(m_Specific(X), m_ConstantInt(ShAmt)))))
1581 return nullptr;
1582 }
1583
1584 Type *SrcTy = X->getType();
1585 unsigned ScalarWidth = SrcTy->getScalarSizeInBits();
1586 unsigned VecEltWidth = VTy->getScalarSizeInBits();
1587 if (ScalarWidth != VecEltWidth * 2 || ShAmt != VecEltWidth)
1588 return nullptr;
1589
1590 // Bitcast the base vector to a vector type with the source element type.
1591 Type *CastTy = FixedVectorType::get(SrcTy, VTy->getNumElements() / 2);
1592 Value *CastBaseVec = Builder.CreateBitCast(BaseVec, CastTy);
1593
1594 // Scale the insert index for a vector with half as many elements.
1595 // bitcast (inselt (bitcast BaseVec), X, NewIndex)
1596 uint64_t NewIndex = IsBigEndian ? Index1 / 2 : Index0 / 2;
1597 Value *NewInsert = Builder.CreateInsertElement(CastBaseVec, X, NewIndex);
1598 return new BitCastInst(NewInsert, VTy);
1599}
1600
1602 Value *VecOp = IE.getOperand(0);
1603 Value *ScalarOp = IE.getOperand(1);
1604 Value *IdxOp = IE.getOperand(2);
1605
1606 if (auto *V = simplifyInsertElementInst(
1607 VecOp, ScalarOp, IdxOp, SQ.getWithInstruction(&IE)))
1608 return replaceInstUsesWith(IE, V);
1609
1610 // Canonicalize type of constant indices to i64 to simplify CSE
1611 if (auto *IndexC = dyn_cast<ConstantInt>(IdxOp)) {
1612 if (auto *NewIdx = getPreferredVectorIndex(IndexC))
1613 return replaceOperand(IE, 2, NewIdx);
1614
1615 Value *BaseVec, *OtherScalar;
1616 uint64_t OtherIndexVal;
1617 if (match(VecOp, m_OneUse(m_InsertElt(m_Value(BaseVec),
1618 m_Value(OtherScalar),
1619 m_ConstantInt(OtherIndexVal)))) &&
1620 !isa<Constant>(OtherScalar) && OtherIndexVal > IndexC->getZExtValue()) {
1621 Value *NewIns = Builder.CreateInsertElement(BaseVec, ScalarOp, IdxOp);
1622 return InsertElementInst::Create(NewIns, OtherScalar,
1623 Builder.getInt64(OtherIndexVal));
1624 }
1625 }
1626
1627 // If the scalar is bitcast and inserted into undef, do the insert in the
1628 // source type followed by bitcast.
1629 // TODO: Generalize for insert into any constant, not just undef?
1630 Value *ScalarSrc;
1631 if (match(VecOp, m_Undef()) &&
1632 match(ScalarOp, m_OneUse(m_BitCast(m_Value(ScalarSrc)))) &&
1633 (ScalarSrc->getType()->isIntegerTy() ||
1634 ScalarSrc->getType()->isFloatingPointTy())) {
1635 // inselt undef, (bitcast ScalarSrc), IdxOp -->
1636 // bitcast (inselt undef, ScalarSrc, IdxOp)
1637 Type *ScalarTy = ScalarSrc->getType();
1638 Type *VecTy = VectorType::get(ScalarTy, IE.getType()->getElementCount());
1639 Constant *NewUndef = isa<PoisonValue>(VecOp) ? PoisonValue::get(VecTy)
1640 : UndefValue::get(VecTy);
1641 Value *NewInsElt = Builder.CreateInsertElement(NewUndef, ScalarSrc, IdxOp);
1642 return new BitCastInst(NewInsElt, IE.getType());
1643 }
1644
1645 // If the vector and scalar are both bitcast from the same element type, do
1646 // the insert in that source type followed by bitcast.
1647 Value *VecSrc;
1648 if (match(VecOp, m_BitCast(m_Value(VecSrc))) &&
1649 match(ScalarOp, m_BitCast(m_Value(ScalarSrc))) &&
1650 (VecOp->hasOneUse() || ScalarOp->hasOneUse()) &&
1651 VecSrc->getType()->isVectorTy() && !ScalarSrc->getType()->isVectorTy() &&
1652 cast<VectorType>(VecSrc->getType())->getElementType() ==
1653 ScalarSrc->getType()) {
1654 // inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp -->
1655 // bitcast (inselt VecSrc, ScalarSrc, IdxOp)
1656 Value *NewInsElt = Builder.CreateInsertElement(VecSrc, ScalarSrc, IdxOp);
1657 return new BitCastInst(NewInsElt, IE.getType());
1658 }
1659
1660 // If the inserted element was extracted from some other fixed-length vector
1661 // and both indexes are valid constants, try to turn this into a shuffle.
1662 // Can not handle scalable vector type, the number of elements needed to
1663 // create shuffle mask is not a compile-time constant.
1664 uint64_t InsertedIdx, ExtractedIdx;
1665 Value *ExtVecOp;
1666 if (isa<FixedVectorType>(IE.getType()) &&
1667 match(IdxOp, m_ConstantInt(InsertedIdx)) &&
1668 match(ScalarOp,
1669 m_ExtractElt(m_Value(ExtVecOp), m_ConstantInt(ExtractedIdx))) &&
1670 isa<FixedVectorType>(ExtVecOp->getType()) &&
1671 ExtractedIdx <
1672 cast<FixedVectorType>(ExtVecOp->getType())->getNumElements()) {
1673 // TODO: Looking at the user(s) to determine if this insert is a
1674 // fold-to-shuffle opportunity does not match the usual instcombine
1675 // constraints. We should decide if the transform is worthy based only
1676 // on this instruction and its operands, but that may not work currently.
1677 //
1678 // Here, we are trying to avoid creating shuffles before reaching
1679 // the end of a chain of extract-insert pairs. This is complicated because
1680 // we do not generally form arbitrary shuffle masks in instcombine
1681 // (because those may codegen poorly), but collectShuffleElements() does
1682 // exactly that.
1683 //
1684 // The rules for determining what is an acceptable target-independent
1685 // shuffle mask are fuzzy because they evolve based on the backend's
1686 // capabilities and real-world impact.
1687 auto isShuffleRootCandidate = [](InsertElementInst &Insert) {
1688 if (!Insert.hasOneUse())
1689 return true;
1690 auto *InsertUser = dyn_cast<InsertElementInst>(Insert.user_back());
1691 if (!InsertUser)
1692 return true;
1693 return false;
1694 };
1695
1696 // Try to form a shuffle from a chain of extract-insert ops.
1697 if (isShuffleRootCandidate(IE)) {
1698 bool Rerun = true;
1699 while (Rerun) {
1700 Rerun = false;
1701
1703 ShuffleOps LR =
1704 collectShuffleElements(&IE, Mask, nullptr, *this, Rerun);
1705
1706 // The proposed shuffle may be trivial, in which case we shouldn't
1707 // perform the combine.
1708 if (LR.first != &IE && LR.second != &IE) {
1709 // We now have a shuffle of LHS, RHS, Mask.
1710 if (LR.second == nullptr)
1711 LR.second = PoisonValue::get(LR.first->getType());
1712 return new ShuffleVectorInst(LR.first, LR.second, Mask);
1713 }
1714 }
1715 }
1716 }
1717
1718 if (auto VecTy = dyn_cast<FixedVectorType>(VecOp->getType())) {
1719 unsigned VWidth = VecTy->getNumElements();
1720 APInt PoisonElts(VWidth, 0);
1721 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
1722 if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask,
1723 PoisonElts)) {
1724 if (V != &IE)
1725 return replaceInstUsesWith(IE, V);
1726 return &IE;
1727 }
1728 }
1729
1731 return Shuf;
1732
1733 if (Instruction *NewInsElt = hoistInsEltConst(IE, Builder))
1734 return NewInsElt;
1735
1736 if (Instruction *Broadcast = foldInsSequenceIntoSplat(IE))
1737 return Broadcast;
1738
1740 return Splat;
1741
1742 if (Instruction *IdentityShuf = foldInsEltIntoIdentityShuffle(IE))
1743 return IdentityShuf;
1744
1745 if (Instruction *Ext = narrowInsElt(IE, Builder))
1746 return Ext;
1747
1749 return Ext;
1750
1751 return nullptr;
1752}
1753
1754/// Return true if we can evaluate the specified expression tree if the vector
1755/// elements were shuffled in a different order.
1757 unsigned Depth = 5) {
1758 // We can always reorder the elements of a constant.
1759 if (isa<Constant>(V))
1760 return true;
1761
1762 // We won't reorder vector arguments. No IPO here.
1763 Instruction *I = dyn_cast<Instruction>(V);
1764 if (!I) return false;
1765
1766 // Two users may expect different orders of the elements. Don't try it.
1767 if (!I->hasOneUse())
1768 return false;
1769
1770 if (Depth == 0) return false;
1771
1772 switch (I->getOpcode()) {
1773 case Instruction::UDiv:
1774 case Instruction::SDiv:
1775 case Instruction::URem:
1776 case Instruction::SRem:
1777 // Propagating an undefined shuffle mask element to integer div/rem is not
1778 // allowed because those opcodes can create immediate undefined behavior
1779 // from an undefined element in an operand.
1780 if (llvm::is_contained(Mask, -1))
1781 return false;
1782 [[fallthrough]];
1783 case Instruction::Add:
1784 case Instruction::FAdd:
1785 case Instruction::Sub:
1786 case Instruction::FSub:
1787 case Instruction::Mul:
1788 case Instruction::FMul:
1789 case Instruction::FDiv:
1790 case Instruction::FRem:
1791 case Instruction::Shl:
1792 case Instruction::LShr:
1793 case Instruction::AShr:
1794 case Instruction::And:
1795 case Instruction::Or:
1796 case Instruction::Xor:
1797 case Instruction::ICmp:
1798 case Instruction::FCmp:
1799 case Instruction::Trunc:
1800 case Instruction::ZExt:
1801 case Instruction::SExt:
1802 case Instruction::FPToUI:
1803 case Instruction::FPToSI:
1804 case Instruction::UIToFP:
1805 case Instruction::SIToFP:
1806 case Instruction::FPTrunc:
1807 case Instruction::FPExt:
1808 case Instruction::GetElementPtr: {
1809 // Bail out if we would create longer vector ops. We could allow creating
1810 // longer vector ops, but that may result in more expensive codegen.
1811 Type *ITy = I->getType();
1812 if (ITy->isVectorTy() &&
1813 Mask.size() > cast<FixedVectorType>(ITy)->getNumElements())
1814 return false;
1815 for (Value *Operand : I->operands()) {
1816 if (!canEvaluateShuffled(Operand, Mask, Depth - 1))
1817 return false;
1818 }
1819 return true;
1820 }
1821 case Instruction::InsertElement: {
1822 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
1823 if (!CI) return false;
1824 int ElementNumber = CI->getLimitedValue();
1825
1826 // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
1827 // can't put an element into multiple indices.
1828 bool SeenOnce = false;
1829 for (int I : Mask) {
1830 if (I == ElementNumber) {
1831 if (SeenOnce)
1832 return false;
1833 SeenOnce = true;
1834 }
1835 }
1836 return canEvaluateShuffled(I->getOperand(0), Mask, Depth - 1);
1837 }
1838 }
1839 return false;
1840}
1841
1842/// Rebuild a new instruction just like 'I' but with the new operands given.
1843/// In the event of type mismatch, the type of the operands is correct.
1845 IRBuilderBase &Builder) {
1846 Builder.SetInsertPoint(I);
1847 switch (I->getOpcode()) {
1848 case Instruction::Add:
1849 case Instruction::FAdd:
1850 case Instruction::Sub:
1851 case Instruction::FSub:
1852 case Instruction::Mul:
1853 case Instruction::FMul:
1854 case Instruction::UDiv:
1855 case Instruction::SDiv:
1856 case Instruction::FDiv:
1857 case Instruction::URem:
1858 case Instruction::SRem:
1859 case Instruction::FRem:
1860 case Instruction::Shl:
1861 case Instruction::LShr:
1862 case Instruction::AShr:
1863 case Instruction::And:
1864 case Instruction::Or:
1865 case Instruction::Xor: {
1866 BinaryOperator *BO = cast<BinaryOperator>(I);
1867 assert(NewOps.size() == 2 && "binary operator with #ops != 2");
1868 Value *New = Builder.CreateBinOp(cast<BinaryOperator>(I)->getOpcode(),
1869 NewOps[0], NewOps[1]);
1870 if (auto *NewI = dyn_cast<Instruction>(New)) {
1871 if (isa<OverflowingBinaryOperator>(BO)) {
1872 NewI->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
1873 NewI->setHasNoSignedWrap(BO->hasNoSignedWrap());
1874 }
1875 if (isa<PossiblyExactOperator>(BO)) {
1876 NewI->setIsExact(BO->isExact());
1877 }
1878 if (isa<FPMathOperator>(BO))
1879 NewI->copyFastMathFlags(I);
1880 }
1881 return New;
1882 }
1883 case Instruction::ICmp:
1884 assert(NewOps.size() == 2 && "icmp with #ops != 2");
1885 return Builder.CreateICmp(cast<ICmpInst>(I)->getPredicate(), NewOps[0],
1886 NewOps[1]);
1887 case Instruction::FCmp:
1888 assert(NewOps.size() == 2 && "fcmp with #ops != 2");
1889 return Builder.CreateFCmp(cast<FCmpInst>(I)->getPredicate(), NewOps[0],
1890 NewOps[1]);
1891 case Instruction::Trunc:
1892 case Instruction::ZExt:
1893 case Instruction::SExt:
1894 case Instruction::FPToUI:
1895 case Instruction::FPToSI:
1896 case Instruction::UIToFP:
1897 case Instruction::SIToFP:
1898 case Instruction::FPTrunc:
1899 case Instruction::FPExt: {
1900 // It's possible that the mask has a different number of elements from
1901 // the original cast. We recompute the destination type to match the mask.
1902 Type *DestTy = VectorType::get(
1903 I->getType()->getScalarType(),
1904 cast<VectorType>(NewOps[0]->getType())->getElementCount());
1905 assert(NewOps.size() == 1 && "cast with #ops != 1");
1906 return Builder.CreateCast(cast<CastInst>(I)->getOpcode(), NewOps[0],
1907 DestTy);
1908 }
1909 case Instruction::GetElementPtr: {
1910 Value *Ptr = NewOps[0];
1911 ArrayRef<Value*> Idx = NewOps.slice(1);
1912 return Builder.CreateGEP(cast<GEPOperator>(I)->getSourceElementType(),
1913 Ptr, Idx, "",
1914 cast<GEPOperator>(I)->isInBounds());
1915 }
1916 }
1917 llvm_unreachable("failed to rebuild vector instructions");
1918}
1919
1921 IRBuilderBase &Builder) {
1922 // Mask.size() does not need to be equal to the number of vector elements.
1923
1924 assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
1925 Type *EltTy = V->getType()->getScalarType();
1926
1927 if (isa<PoisonValue>(V))
1928 return PoisonValue::get(FixedVectorType::get(EltTy, Mask.size()));
1929
1930 if (match(V, m_Undef()))
1931 return UndefValue::get(FixedVectorType::get(EltTy, Mask.size()));
1932
1933 if (isa<ConstantAggregateZero>(V))
1934 return ConstantAggregateZero::get(FixedVectorType::get(EltTy, Mask.size()));
1935
1936 if (Constant *C = dyn_cast<Constant>(V))
1938 Mask);
1939
1940 Instruction *I = cast<Instruction>(V);
1941 switch (I->getOpcode()) {
1942 case Instruction::Add:
1943 case Instruction::FAdd:
1944 case Instruction::Sub:
1945 case Instruction::FSub:
1946 case Instruction::Mul:
1947 case Instruction::FMul:
1948 case Instruction::UDiv:
1949 case Instruction::SDiv:
1950 case Instruction::FDiv:
1951 case Instruction::URem:
1952 case Instruction::SRem:
1953 case Instruction::FRem:
1954 case Instruction::Shl:
1955 case Instruction::LShr:
1956 case Instruction::AShr:
1957 case Instruction::And:
1958 case Instruction::Or:
1959 case Instruction::Xor:
1960 case Instruction::ICmp:
1961 case Instruction::FCmp:
1962 case Instruction::Trunc:
1963 case Instruction::ZExt:
1964 case Instruction::SExt:
1965 case Instruction::FPToUI:
1966 case Instruction::FPToSI:
1967 case Instruction::UIToFP:
1968 case Instruction::SIToFP:
1969 case Instruction::FPTrunc:
1970 case Instruction::FPExt:
1971 case Instruction::Select:
1972 case Instruction::GetElementPtr: {
1974 bool NeedsRebuild =
1975 (Mask.size() !=
1976 cast<FixedVectorType>(I->getType())->getNumElements());
1977 for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
1978 Value *V;
1979 // Recursively call evaluateInDifferentElementOrder on vector arguments
1980 // as well. E.g. GetElementPtr may have scalar operands even if the
1981 // return value is a vector, so we need to examine the operand type.
1982 if (I->getOperand(i)->getType()->isVectorTy())
1983 V = evaluateInDifferentElementOrder(I->getOperand(i), Mask, Builder);
1984 else
1985 V = I->getOperand(i);
1986 NewOps.push_back(V);
1987 NeedsRebuild |= (V != I->getOperand(i));
1988 }
1989 if (NeedsRebuild)
1990 return buildNew(I, NewOps, Builder);
1991 return I;
1992 }
1993 case Instruction::InsertElement: {
1994 int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
1995
1996 // The insertelement was inserting at Element. Figure out which element
1997 // that becomes after shuffling. The answer is guaranteed to be unique
1998 // by CanEvaluateShuffled.
1999 bool Found = false;
2000 int Index = 0;
2001 for (int e = Mask.size(); Index != e; ++Index) {
2002 if (Mask[Index] == Element) {
2003 Found = true;
2004 break;
2005 }
2006 }
2007
2008 // If element is not in Mask, no need to handle the operand 1 (element to
2009 // be inserted). Just evaluate values in operand 0 according to Mask.
2010 if (!Found)
2011 return evaluateInDifferentElementOrder(I->getOperand(0), Mask, Builder);
2012
2013 Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask,
2014 Builder);
2015 Builder.SetInsertPoint(I);
2016 return Builder.CreateInsertElement(V, I->getOperand(1), Index);
2017 }
2018 }
2019 llvm_unreachable("failed to reorder elements of vector instruction!");
2020}
2021
2022// Returns true if the shuffle is extracting a contiguous range of values from
2023// LHS, for example:
2024// +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2025// Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP|
2026// Shuffles to: |EE|FF|GG|HH|
2027// +--+--+--+--+
2029 ArrayRef<int> Mask) {
2030 unsigned LHSElems =
2031 cast<FixedVectorType>(SVI.getOperand(0)->getType())->getNumElements();
2032 unsigned MaskElems = Mask.size();
2033 unsigned BegIdx = Mask.front();
2034 unsigned EndIdx = Mask.back();
2035 if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1)
2036 return false;
2037 for (unsigned I = 0; I != MaskElems; ++I)
2038 if (static_cast<unsigned>(Mask[I]) != BegIdx + I)
2039 return false;
2040 return true;
2041}
2042
2043/// These are the ingredients in an alternate form binary operator as described
2044/// below.
2050 Value *V0 = nullptr, Value *V1 = nullptr) :
2051 Opcode(Opc), Op0(V0), Op1(V1) {}
2052 operator bool() const { return Opcode != 0; }
2053};
2054
2055/// Binops may be transformed into binops with different opcodes and operands.
2056/// Reverse the usual canonicalization to enable folds with the non-canonical
2057/// form of the binop. If a transform is possible, return the elements of the
2058/// new binop. If not, return invalid elements.
2060 Value *BO0 = BO->getOperand(0), *BO1 = BO->getOperand(1);
2061 Type *Ty = BO->getType();
2062 switch (BO->getOpcode()) {
2063 case Instruction::Shl: {
2064 // shl X, C --> mul X, (1 << C)
2065 Constant *C;
2066 if (match(BO1, m_Constant(C))) {
2067 Constant *ShlOne = ConstantExpr::getShl(ConstantInt::get(Ty, 1), C);
2068 return {Instruction::Mul, BO0, ShlOne};
2069 }
2070 break;
2071 }
2072 case Instruction::Or: {
2073 // or X, C --> add X, C (when X and C have no common bits set)
2074 const APInt *C;
2075 if (match(BO1, m_APInt(C)) && MaskedValueIsZero(BO0, *C, DL))
2076 return {Instruction::Add, BO0, BO1};
2077 break;
2078 }
2079 case Instruction::Sub:
2080 // sub 0, X --> mul X, -1
2081 if (match(BO0, m_ZeroInt()))
2082 return {Instruction::Mul, BO1, ConstantInt::getAllOnesValue(Ty)};
2083 break;
2084 default:
2085 break;
2086 }
2087 return {};
2088}
2089
2090/// A select shuffle of a select shuffle with a shared operand can be reduced
2091/// to a single select shuffle. This is an obvious improvement in IR, and the
2092/// backend is expected to lower select shuffles efficiently.
2094 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2095
2096 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2098 Shuf.getShuffleMask(Mask);
2099 unsigned NumElts = Mask.size();
2100
2101 // Canonicalize a select shuffle with common operand as Op1.
2102 auto *ShufOp = dyn_cast<ShuffleVectorInst>(Op0);
2103 if (ShufOp && ShufOp->isSelect() &&
2104 (ShufOp->getOperand(0) == Op1 || ShufOp->getOperand(1) == Op1)) {
2105 std::swap(Op0, Op1);
2107 }
2108
2109 ShufOp = dyn_cast<ShuffleVectorInst>(Op1);
2110 if (!ShufOp || !ShufOp->isSelect() ||
2111 (ShufOp->getOperand(0) != Op0 && ShufOp->getOperand(1) != Op0))
2112 return nullptr;
2113
2114 Value *X = ShufOp->getOperand(0), *Y = ShufOp->getOperand(1);
2116 ShufOp->getShuffleMask(Mask1);
2117 assert(Mask1.size() == NumElts && "Vector size changed with select shuffle");
2118
2119 // Canonicalize common operand (Op0) as X (first operand of first shuffle).
2120 if (Y == Op0) {
2121 std::swap(X, Y);
2123 }
2124
2125 // If the mask chooses from X (operand 0), it stays the same.
2126 // If the mask chooses from the earlier shuffle, the other mask value is
2127 // transferred to the combined select shuffle:
2128 // shuf X, (shuf X, Y, M1), M --> shuf X, Y, M'
2129 SmallVector<int, 16> NewMask(NumElts);
2130 for (unsigned i = 0; i != NumElts; ++i)
2131 NewMask[i] = Mask[i] < (signed)NumElts ? Mask[i] : Mask1[i];
2132
2133 // A select mask with undef elements might look like an identity mask.
2134 assert((ShuffleVectorInst::isSelectMask(NewMask, NumElts) ||
2135 ShuffleVectorInst::isIdentityMask(NewMask, NumElts)) &&
2136 "Unexpected shuffle mask");
2137 return new ShuffleVectorInst(X, Y, NewMask);
2138}
2139
2141 const SimplifyQuery &SQ) {
2142 assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
2143
2144 // Are we shuffling together some value and that same value after it has been
2145 // modified by a binop with a constant?
2146 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2147 Constant *C;
2148 bool Op0IsBinop;
2149 if (match(Op0, m_BinOp(m_Specific(Op1), m_Constant(C))))
2150 Op0IsBinop = true;
2151 else if (match(Op1, m_BinOp(m_Specific(Op0), m_Constant(C))))
2152 Op0IsBinop = false;
2153 else
2154 return nullptr;
2155
2156 // The identity constant for a binop leaves a variable operand unchanged. For
2157 // a vector, this is a splat of something like 0, -1, or 1.
2158 // If there's no identity constant for this binop, we're done.
2159 auto *BO = cast<BinaryOperator>(Op0IsBinop ? Op0 : Op1);
2160 BinaryOperator::BinaryOps BOpcode = BO->getOpcode();
2161 Constant *IdC = ConstantExpr::getBinOpIdentity(BOpcode, Shuf.getType(), true);
2162 if (!IdC)
2163 return nullptr;
2164
2165 Value *X = Op0IsBinop ? Op1 : Op0;
2166
2167 // Prevent folding in the case the non-binop operand might have NaN values.
2168 // If X can have NaN elements then we have that the floating point math
2169 // operation in the transformed code may not preserve the exact NaN
2170 // bit-pattern -- e.g. `fadd sNaN, 0.0 -> qNaN`.
2171 // This makes the transformation incorrect since the original program would
2172 // have preserved the exact NaN bit-pattern.
2173 // Avoid the folding if X can have NaN elements.
2174 if (Shuf.getType()->getElementType()->isFloatingPointTy() &&
2175 !isKnownNeverNaN(X, 0, SQ))
2176 return nullptr;
2177
2178 // Shuffle identity constants into the lanes that return the original value.
2179 // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
2180 // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
2181 // The existing binop constant vector remains in the same operand position.
2182 ArrayRef<int> Mask = Shuf.getShuffleMask();
2183 Constant *NewC = Op0IsBinop ? ConstantExpr::getShuffleVector(C, IdC, Mask) :
2185
2186 bool MightCreatePoisonOrUB =
2188 (Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
2189 if (MightCreatePoisonOrUB)
2190 NewC = InstCombiner::getSafeVectorConstantForBinop(BOpcode, NewC, true);
2191
2192 // shuf (bop X, C), X, M --> bop X, C'
2193 // shuf X, (bop X, C), M --> bop X, C'
2194 Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
2195 NewBO->copyIRFlags(BO);
2196
2197 // An undef shuffle mask element may propagate as an undef constant element in
2198 // the new binop. That would produce poison where the original code might not.
2199 // If we already made a safe constant, then there's no danger.
2200 if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB)
2202 return NewBO;
2203}
2204
2205/// If we have an insert of a scalar to a non-zero element of an undefined
2206/// vector and then shuffle that value, that's the same as inserting to the zero
2207/// element and shuffling. Splatting from the zero element is recognized as the
2208/// canonical form of splat.
2210 InstCombiner::BuilderTy &Builder) {
2211 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2212 ArrayRef<int> Mask = Shuf.getShuffleMask();
2213 Value *X;
2214 uint64_t IndexC;
2215
2216 // Match a shuffle that is a splat to a non-zero element.
2217 if (!match(Op0, m_OneUse(m_InsertElt(m_Undef(), m_Value(X),
2218 m_ConstantInt(IndexC)))) ||
2219 !match(Op1, m_Undef()) || match(Mask, m_ZeroMask()) || IndexC == 0)
2220 return nullptr;
2221
2222 // Insert into element 0 of a poison vector.
2223 PoisonValue *PoisonVec = PoisonValue::get(Shuf.getType());
2224 Value *NewIns = Builder.CreateInsertElement(PoisonVec, X, (uint64_t)0);
2225
2226 // Splat from element 0. Any mask element that is undefined remains undefined.
2227 // For example:
2228 // shuf (inselt undef, X, 2), _, <2,2,undef>
2229 // --> shuf (inselt undef, X, 0), poison, <0,0,undef>
2230 unsigned NumMaskElts =
2231 cast<FixedVectorType>(Shuf.getType())->getNumElements();
2232 SmallVector<int, 16> NewMask(NumMaskElts, 0);
2233 for (unsigned i = 0; i != NumMaskElts; ++i)
2234 if (Mask[i] == PoisonMaskElem)
2235 NewMask[i] = Mask[i];
2236
2237 return new ShuffleVectorInst(NewIns, NewMask);
2238}
2239
2240/// Try to fold shuffles that are the equivalent of a vector select.
2242 if (!Shuf.isSelect())
2243 return nullptr;
2244
2245 // Canonicalize to choose from operand 0 first unless operand 1 is undefined.
2246 // Commuting undef to operand 0 conflicts with another canonicalization.
2247 unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2248 if (!match(Shuf.getOperand(1), m_Undef()) &&
2249 Shuf.getMaskValue(0) >= (int)NumElts) {
2250 // TODO: Can we assert that both operands of a shuffle-select are not undef
2251 // (otherwise, it would have been folded by instsimplify?
2252 Shuf.commute();
2253 return &Shuf;
2254 }
2255
2257 return I;
2258
2260 Shuf, getSimplifyQuery().getWithInstruction(&Shuf)))
2261 return I;
2262
2263 BinaryOperator *B0, *B1;
2264 if (!match(Shuf.getOperand(0), m_BinOp(B0)) ||
2265 !match(Shuf.getOperand(1), m_BinOp(B1)))
2266 return nullptr;
2267
2268 // If one operand is "0 - X", allow that to be viewed as "X * -1"
2269 // (ConstantsAreOp1) by getAlternateBinop below. If the neg is not paired
2270 // with a multiply, we will exit because C0/C1 will not be set.
2271 Value *X, *Y;
2272 Constant *C0 = nullptr, *C1 = nullptr;
2273 bool ConstantsAreOp1;
2274 if (match(B0, m_BinOp(m_Constant(C0), m_Value(X))) &&
2275 match(B1, m_BinOp(m_Constant(C1), m_Value(Y))))
2276 ConstantsAreOp1 = false;
2277 else if (match(B0, m_CombineOr(m_BinOp(m_Value(X), m_Constant(C0)),
2278 m_Neg(m_Value(X)))) &&
2280 m_Neg(m_Value(Y)))))
2281 ConstantsAreOp1 = true;
2282 else
2283 return nullptr;
2284
2285 // We need matching binops to fold the lanes together.
2288 bool DropNSW = false;
2289 if (ConstantsAreOp1 && Opc0 != Opc1) {
2290 // TODO: We drop "nsw" if shift is converted into multiply because it may
2291 // not be correct when the shift amount is BitWidth - 1. We could examine
2292 // each vector element to determine if it is safe to keep that flag.
2293 if (Opc0 == Instruction::Shl || Opc1 == Instruction::Shl)
2294 DropNSW = true;
2295 if (BinopElts AltB0 = getAlternateBinop(B0, DL)) {
2296 assert(isa<Constant>(AltB0.Op1) && "Expecting constant with alt binop");
2297 Opc0 = AltB0.Opcode;
2298 C0 = cast<Constant>(AltB0.Op1);
2299 } else if (BinopElts AltB1 = getAlternateBinop(B1, DL)) {
2300 assert(isa<Constant>(AltB1.Op1) && "Expecting constant with alt binop");
2301 Opc1 = AltB1.Opcode;
2302 C1 = cast<Constant>(AltB1.Op1);
2303 }
2304 }
2305
2306 if (Opc0 != Opc1 || !C0 || !C1)
2307 return nullptr;
2308
2309 // The opcodes must be the same. Use a new name to make that clear.
2310 BinaryOperator::BinaryOps BOpc = Opc0;
2311
2312 // Select the constant elements needed for the single binop.
2313 ArrayRef<int> Mask = Shuf.getShuffleMask();
2314 Constant *NewC = ConstantExpr::getShuffleVector(C0, C1, Mask);
2315
2316 // We are moving a binop after a shuffle. When a shuffle has an undefined
2317 // mask element, the result is undefined, but it is not poison or undefined
2318 // behavior. That is not necessarily true for div/rem/shift.
2319 bool MightCreatePoisonOrUB =
2322 if (MightCreatePoisonOrUB)
2324 ConstantsAreOp1);
2325
2326 Value *V;
2327 if (X == Y) {
2328 // Remove a binop and the shuffle by rearranging the constant:
2329 // shuffle (op V, C0), (op V, C1), M --> op V, C'
2330 // shuffle (op C0, V), (op C1, V), M --> op C', V
2331 V = X;
2332 } else {
2333 // If there are 2 different variable operands, we must create a new shuffle
2334 // (select) first, so check uses to ensure that we don't end up with more
2335 // instructions than we started with.
2336 if (!B0->hasOneUse() && !B1->hasOneUse())
2337 return nullptr;
2338
2339 // If we use the original shuffle mask and op1 is *variable*, we would be
2340 // putting an undef into operand 1 of div/rem/shift. This is either UB or
2341 // poison. We do not have to guard against UB when *constants* are op1
2342 // because safe constants guarantee that we do not overflow sdiv/srem (and
2343 // there's no danger for other opcodes).
2344 // TODO: To allow this case, create a new shuffle mask with no undefs.
2345 if (MightCreatePoisonOrUB && !ConstantsAreOp1)
2346 return nullptr;
2347
2348 // Note: In general, we do not create new shuffles in InstCombine because we
2349 // do not know if a target can lower an arbitrary shuffle optimally. In this
2350 // case, the shuffle uses the existing mask, so there is no additional risk.
2351
2352 // Select the variable vectors first, then perform the binop:
2353 // shuffle (op X, C0), (op Y, C1), M --> op (shuffle X, Y, M), C'
2354 // shuffle (op C0, X), (op C1, Y), M --> op C', (shuffle X, Y, M)
2355 V = Builder.CreateShuffleVector(X, Y, Mask);
2356 }
2357
2358 Value *NewBO = ConstantsAreOp1 ? Builder.CreateBinOp(BOpc, V, NewC) :
2359 Builder.CreateBinOp(BOpc, NewC, V);
2360
2361 // Flags are intersected from the 2 source binops. But there are 2 exceptions:
2362 // 1. If we changed an opcode, poison conditions might have changed.
2363 // 2. If the shuffle had undef mask elements, the new binop might have undefs
2364 // where the original code did not. But if we already made a safe constant,
2365 // then there's no danger.
2366 if (auto *NewI = dyn_cast<Instruction>(NewBO)) {
2367 NewI->copyIRFlags(B0);
2368 NewI->andIRFlags(B1);
2369 if (DropNSW)
2370 NewI->setHasNoSignedWrap(false);
2371 if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB)
2372 NewI->dropPoisonGeneratingFlags();
2373 }
2374 return replaceInstUsesWith(Shuf, NewBO);
2375}
2376
2377/// Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
2378/// Example (little endian):
2379/// shuf (bitcast <4 x i16> X to <8 x i8>), <0, 2, 4, 6> --> trunc X to <4 x i8>
2381 bool IsBigEndian) {
2382 // This must be a bitcasted shuffle of 1 vector integer operand.
2383 Type *DestType = Shuf.getType();
2384 Value *X;
2385 if (!match(Shuf.getOperand(0), m_BitCast(m_Value(X))) ||
2386 !match(Shuf.getOperand(1), m_Undef()) || !DestType->isIntOrIntVectorTy())
2387 return nullptr;
2388
2389 // The source type must have the same number of elements as the shuffle,
2390 // and the source element type must be larger than the shuffle element type.
2391 Type *SrcType = X->getType();
2392 if (!SrcType->isVectorTy() || !SrcType->isIntOrIntVectorTy() ||
2393 cast<FixedVectorType>(SrcType)->getNumElements() !=
2394 cast<FixedVectorType>(DestType)->getNumElements() ||
2395 SrcType->getScalarSizeInBits() % DestType->getScalarSizeInBits() != 0)
2396 return nullptr;
2397
2398 assert(Shuf.changesLength() && !Shuf.increasesLength() &&
2399 "Expected a shuffle that decreases length");
2400
2401 // Last, check that the mask chooses the correct low bits for each narrow
2402 // element in the result.
2403 uint64_t TruncRatio =
2404 SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
2405 ArrayRef<int> Mask = Shuf.getShuffleMask();
2406 for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
2407 if (Mask[i] == PoisonMaskElem)
2408 continue;
2409 uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;
2410 assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits");
2411 if (Mask[i] != (int)LSBIndex)
2412 return nullptr;
2413 }
2414
2415 return new TruncInst(X, DestType);
2416}
2417
2418/// Match a shuffle-select-shuffle pattern where the shuffles are widening and
2419/// narrowing (concatenating with undef and extracting back to the original
2420/// length). This allows replacing the wide select with a narrow select.
2422 InstCombiner::BuilderTy &Builder) {
2423 // This must be a narrowing identity shuffle. It extracts the 1st N elements
2424 // of the 1st vector operand of a shuffle.
2425 if (!match(Shuf.getOperand(1), m_Undef()) || !Shuf.isIdentityWithExtract())
2426 return nullptr;
2427
2428 // The vector being shuffled must be a vector select that we can eliminate.
2429 // TODO: The one-use requirement could be eased if X and/or Y are constants.
2430 Value *Cond, *X, *Y;
2431 if (!match(Shuf.getOperand(0),
2433 return nullptr;
2434
2435 // We need a narrow condition value. It must be extended with undef elements
2436 // and have the same number of elements as this shuffle.
2437 unsigned NarrowNumElts =
2438 cast<FixedVectorType>(Shuf.getType())->getNumElements();
2439 Value *NarrowCond;
2440 if (!match(Cond, m_OneUse(m_Shuffle(m_Value(NarrowCond), m_Undef()))) ||
2441 cast<FixedVectorType>(NarrowCond->getType())->getNumElements() !=
2442 NarrowNumElts ||
2443 !cast<ShuffleVectorInst>(Cond)->isIdentityWithPadding())
2444 return nullptr;
2445
2446 // shuf (sel (shuf NarrowCond, undef, WideMask), X, Y), undef, NarrowMask) -->
2447 // sel NarrowCond, (shuf X, undef, NarrowMask), (shuf Y, undef, NarrowMask)
2448 Value *NarrowX = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2449 Value *NarrowY = Builder.CreateShuffleVector(Y, Shuf.getShuffleMask());
2450 return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
2451}
2452
2453/// Canonicalize FP negate/abs after shuffle.
2455 InstCombiner::BuilderTy &Builder) {
2456 auto *S0 = dyn_cast<Instruction>(Shuf.getOperand(0));
2457 Value *X;
2458 if (!S0 || !match(S0, m_CombineOr(m_FNeg(m_Value(X)), m_FAbs(m_Value(X)))))
2459 return nullptr;
2460
2461 bool IsFNeg = S0->getOpcode() == Instruction::FNeg;
2462
2463 // Match 1-input (unary) shuffle.
2464 // shuffle (fneg/fabs X), Mask --> fneg/fabs (shuffle X, Mask)
2465 if (S0->hasOneUse() && match(Shuf.getOperand(1), m_Undef())) {
2466 Value *NewShuf = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
2467 if (IsFNeg)
2468 return UnaryOperator::CreateFNegFMF(NewShuf, S0);
2469
2471 Intrinsic::fabs, Shuf.getType());
2472 CallInst *NewF = CallInst::Create(FAbs, {NewShuf});
2473 NewF->setFastMathFlags(S0->getFastMathFlags());
2474 return NewF;
2475 }
2476
2477 // Match 2-input (binary) shuffle.
2478 auto *S1 = dyn_cast<Instruction>(Shuf.getOperand(1));
2479 Value *Y;
2480 if (!S1 || !match(S1, m_CombineOr(m_FNeg(m_Value(Y)), m_FAbs(m_Value(Y)))) ||
2481 S0->getOpcode() != S1->getOpcode() ||
2482 (!S0->hasOneUse() && !S1->hasOneUse()))
2483 return nullptr;
2484
2485 // shuf (fneg/fabs X), (fneg/fabs Y), Mask --> fneg/fabs (shuf X, Y, Mask)
2486 Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2487 Instruction *NewF;
2488 if (IsFNeg) {
2489 NewF = UnaryOperator::CreateFNeg(NewShuf);
2490 } else {
2492 Intrinsic::fabs, Shuf.getType());
2493 NewF = CallInst::Create(FAbs, {NewShuf});
2494 }
2495 NewF->copyIRFlags(S0);
2496 NewF->andIRFlags(S1);
2497 return NewF;
2498}
2499
2500/// Canonicalize casts after shuffle.
2502 InstCombiner::BuilderTy &Builder) {
2503 // Do we have 2 matching cast operands?
2504 auto *Cast0 = dyn_cast<CastInst>(Shuf.getOperand(0));
2505 auto *Cast1 = dyn_cast<CastInst>(Shuf.getOperand(1));
2506 if (!Cast0 || !Cast1 || Cast0->getOpcode() != Cast1->getOpcode() ||
2507 Cast0->getSrcTy() != Cast1->getSrcTy())
2508 return nullptr;
2509
2510 // TODO: Allow other opcodes? That would require easing the type restrictions
2511 // below here.
2512 CastInst::CastOps CastOpcode = Cast0->getOpcode();
2513 switch (CastOpcode) {
2514 case Instruction::FPToSI:
2515 case Instruction::FPToUI:
2516 case Instruction::SIToFP:
2517 case Instruction::UIToFP:
2518 break;
2519 default:
2520 return nullptr;
2521 }
2522
2523 VectorType *ShufTy = Shuf.getType();
2524 VectorType *ShufOpTy = cast<VectorType>(Shuf.getOperand(0)->getType());
2525 VectorType *CastSrcTy = cast<VectorType>(Cast0->getSrcTy());
2526
2527 // TODO: Allow length-increasing shuffles?
2528 if (ShufTy->getElementCount().getKnownMinValue() >
2529 ShufOpTy->getElementCount().getKnownMinValue())
2530 return nullptr;
2531
2532 // TODO: Allow element-size-decreasing casts (ex: fptosi float to i8)?
2533 assert(isa<FixedVectorType>(CastSrcTy) && isa<FixedVectorType>(ShufOpTy) &&
2534 "Expected fixed vector operands for casts and binary shuffle");
2535 if (CastSrcTy->getPrimitiveSizeInBits() > ShufOpTy->getPrimitiveSizeInBits())
2536 return nullptr;
2537
2538 // At least one of the operands must have only one use (the shuffle).
2539 if (!Cast0->hasOneUse() && !Cast1->hasOneUse())
2540 return nullptr;
2541
2542 // shuffle (cast X), (cast Y), Mask --> cast (shuffle X, Y, Mask)
2543 Value *X = Cast0->getOperand(0);
2544 Value *Y = Cast1->getOperand(0);
2545 Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
2546 return CastInst::Create(CastOpcode, NewShuf, ShufTy);
2547}
2548
2549/// Try to fold an extract subvector operation.
2551 Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1);
2552 if (!Shuf.isIdentityWithExtract() || !match(Op1, m_Undef()))
2553 return nullptr;
2554
2555 // Check if we are extracting all bits of an inserted scalar:
2556 // extract-subvec (bitcast (inselt ?, X, 0) --> bitcast X to subvec type
2557 Value *X;
2558 if (match(Op0, m_BitCast(m_InsertElt(m_Value(), m_Value(X), m_Zero()))) &&
2559 X->getType()->getPrimitiveSizeInBits() ==
2561 return new BitCastInst(X, Shuf.getType());
2562
2563 // Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask.
2564 Value *Y;
2565 ArrayRef<int> Mask;
2566 if (!match(Op0, m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask))))
2567 return nullptr;
2568
2569 // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
2570 // then combining may result in worse codegen.
2571 if (!Op0->hasOneUse())
2572 return nullptr;
2573
2574 // We are extracting a subvector from a shuffle. Remove excess elements from
2575 // the 1st shuffle mask to eliminate the extract.
2576 //
2577 // This transform is conservatively limited to identity extracts because we do
2578 // not allow arbitrary shuffle mask creation as a target-independent transform
2579 // (because we can't guarantee that will lower efficiently).
2580 //
2581 // If the extracting shuffle has an undef mask element, it transfers to the
2582 // new shuffle mask. Otherwise, copy the original mask element. Example:
2583 // shuf (shuf X, Y, <C0, C1, C2, undef, C4>), undef, <0, undef, 2, 3> -->
2584 // shuf X, Y, <C0, undef, C2, undef>
2585 unsigned NumElts = cast<FixedVectorType>(Shuf.getType())->getNumElements();
2586 SmallVector<int, 16> NewMask(NumElts);
2587 assert(NumElts < Mask.size() &&
2588 "Identity with extract must have less elements than its inputs");
2589
2590 for (unsigned i = 0; i != NumElts; ++i) {
2591 int ExtractMaskElt = Shuf.getMaskValue(i);
2592 int MaskElt = Mask[i];
2593 NewMask[i] = ExtractMaskElt == PoisonMaskElem ? ExtractMaskElt : MaskElt;
2594 }
2595 return new ShuffleVectorInst(X, Y, NewMask);
2596}
2597
2598/// Try to replace a shuffle with an insertelement or try to replace a shuffle
2599/// operand with the operand of an insertelement.
2601 InstCombinerImpl &IC) {
2602 Value *V0 = Shuf.getOperand(0), *V1 = Shuf.getOperand(1);
2604 Shuf.getShuffleMask(Mask);
2605
2606 int NumElts = Mask.size();
2607 int InpNumElts = cast<FixedVectorType>(V0->getType())->getNumElements();
2608
2609 // This is a specialization of a fold in SimplifyDemandedVectorElts. We may
2610 // not be able to handle it there if the insertelement has >1 use.
2611 // If the shuffle has an insertelement operand but does not choose the
2612 // inserted scalar element from that value, then we can replace that shuffle
2613 // operand with the source vector of the insertelement.
2614 Value *X;
2615 uint64_t IdxC;
2616 if (match(V0, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2617 // shuf (inselt X, ?, IdxC), ?, Mask --> shuf X, ?, Mask
2618 if (!is_contained(Mask, (int)IdxC))
2619 return IC.replaceOperand(Shuf, 0, X);
2620 }
2621 if (match(V1, m_InsertElt(m_Value(X), m_Value(), m_ConstantInt(IdxC)))) {
2622 // Offset the index constant by the vector width because we are checking for
2623 // accesses to the 2nd vector input of the shuffle.
2624 IdxC += InpNumElts;
2625 // shuf ?, (inselt X, ?, IdxC), Mask --> shuf ?, X, Mask
2626 if (!is_contained(Mask, (int)IdxC))
2627 return IC.replaceOperand(Shuf, 1, X);
2628 }
2629 // For the rest of the transform, the shuffle must not change vector sizes.
2630 // TODO: This restriction could be removed if the insert has only one use
2631 // (because the transform would require a new length-changing shuffle).
2632 if (NumElts != InpNumElts)
2633 return nullptr;
2634
2635 // shuffle (insert ?, Scalar, IndexC), V1, Mask --> insert V1, Scalar, IndexC'
2636 auto isShufflingScalarIntoOp1 = [&](Value *&Scalar, ConstantInt *&IndexC) {
2637 // We need an insertelement with a constant index.
2638 if (!match(V0, m_InsertElt(m_Value(), m_Value(Scalar),
2639 m_ConstantInt(IndexC))))
2640 return false;
2641
2642 // Test the shuffle mask to see if it splices the inserted scalar into the
2643 // operand 1 vector of the shuffle.
2644 int NewInsIndex = -1;
2645 for (int i = 0; i != NumElts; ++i) {
2646 // Ignore undef mask elements.
2647 if (Mask[i] == -1)
2648 continue;
2649
2650 // The shuffle takes elements of operand 1 without lane changes.
2651 if (Mask[i] == NumElts + i)
2652 continue;
2653
2654 // The shuffle must choose the inserted scalar exactly once.
2655 if (NewInsIndex != -1 || Mask[i] != IndexC->getSExtValue())
2656 return false;
2657
2658 // The shuffle is placing the inserted scalar into element i.
2659 NewInsIndex = i;
2660 }
2661
2662 assert(NewInsIndex != -1 && "Did not fold shuffle with unused operand?");
2663
2664 // Index is updated to the potentially translated insertion lane.
2665 IndexC = ConstantInt::get(IndexC->getIntegerType(), NewInsIndex);
2666 return true;
2667 };
2668
2669 // If the shuffle is unnecessary, insert the scalar operand directly into
2670 // operand 1 of the shuffle. Example:
2671 // shuffle (insert ?, S, 1), V1, <1, 5, 6, 7> --> insert V1, S, 0
2672 Value *Scalar;
2673 ConstantInt *IndexC;
2674 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2675 return InsertElementInst::Create(V1, Scalar, IndexC);
2676
2677 // Try again after commuting shuffle. Example:
2678 // shuffle V0, (insert ?, S, 0), <0, 1, 2, 4> -->
2679 // shuffle (insert ?, S, 0), V0, <4, 5, 6, 0> --> insert V0, S, 3
2680 std::swap(V0, V1);
2682 if (isShufflingScalarIntoOp1(Scalar, IndexC))
2683 return InsertElementInst::Create(V1, Scalar, IndexC);
2684
2685 return nullptr;
2686}
2687
2689 // Match the operands as identity with padding (also known as concatenation
2690 // with undef) shuffles of the same source type. The backend is expected to
2691 // recreate these concatenations from a shuffle of narrow operands.
2692 auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0));
2693 auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1));
2694 if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() ||
2695 !Shuffle1 || !Shuffle1->isIdentityWithPadding())
2696 return nullptr;
2697
2698 // We limit this transform to power-of-2 types because we expect that the
2699 // backend can convert the simplified IR patterns to identical nodes as the
2700 // original IR.
2701 // TODO: If we can verify the same behavior for arbitrary types, the
2702 // power-of-2 checks can be removed.
2703 Value *X = Shuffle0->getOperand(0);
2704 Value *Y = Shuffle1->getOperand(0);
2705 if (X->getType() != Y->getType() ||
2706 !isPowerOf2_32(cast<FixedVectorType>(Shuf.getType())->getNumElements()) ||
2708 cast<FixedVectorType>(Shuffle0->getType())->getNumElements()) ||
2709 !isPowerOf2_32(cast<FixedVectorType>(X->getType())->getNumElements()) ||
2710 match(X, m_Undef()) || match(Y, m_Undef()))
2711 return nullptr;
2712 assert(match(Shuffle0->getOperand(1), m_Undef()) &&
2713 match(Shuffle1->getOperand(1), m_Undef()) &&
2714 "Unexpected operand for identity shuffle");
2715
2716 // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source
2717 // operands directly by adjusting the shuffle mask to account for the narrower
2718 // types:
2719 // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask'
2720 int NarrowElts = cast<FixedVectorType>(X->getType())->getNumElements();
2721 int WideElts = cast<FixedVectorType>(Shuffle0->getType())->getNumElements();
2722 assert(WideElts > NarrowElts && "Unexpected types for identity with padding");
2723
2724 ArrayRef<int> Mask = Shuf.getShuffleMask();
2725 SmallVector<int, 16> NewMask(Mask.size(), -1);
2726 for (int i = 0, e = Mask.size(); i != e; ++i) {
2727 if (Mask[i] == -1)
2728 continue;
2729
2730 // If this shuffle is choosing an undef element from 1 of the sources, that
2731 // element is undef.
2732 if (Mask[i] < WideElts) {
2733 if (Shuffle0->getMaskValue(Mask[i]) == -1)
2734 continue;
2735 } else {
2736 if (Shuffle1->getMaskValue(Mask[i] - WideElts) == -1)
2737 continue;
2738 }
2739
2740 // If this shuffle is choosing from the 1st narrow op, the mask element is
2741 // the same. If this shuffle is choosing from the 2nd narrow op, the mask
2742 // element is offset down to adjust for the narrow vector widths.
2743 if (Mask[i] < WideElts) {
2744 assert(Mask[i] < NarrowElts && "Unexpected shuffle mask");
2745 NewMask[i] = Mask[i];
2746 } else {
2747 assert(Mask[i] < (WideElts + NarrowElts) && "Unexpected shuffle mask");
2748 NewMask[i] = Mask[i] - (WideElts - NarrowElts);
2749 }
2750 }
2751 return new ShuffleVectorInst(X, Y, NewMask);
2752}
2753
2754// Splatting the first element of the result of a BinOp, where any of the
2755// BinOp's operands are the result of a first element splat can be simplified to
2756// splatting the first element of the result of the BinOp
2758 if (!match(SVI.getOperand(1), m_Undef()) ||
2759 !match(SVI.getShuffleMask(), m_ZeroMask()) ||
2760 !SVI.getOperand(0)->hasOneUse())
2761 return nullptr;
2762
2763 Value *Op0 = SVI.getOperand(0);
2764 Value *X, *Y;
2765 if (!match(Op0, m_BinOp(m_Shuffle(m_Value(X), m_Undef(), m_ZeroMask()),
2766 m_Value(Y))) &&
2767 !match(Op0, m_BinOp(m_Value(X),
2769 return nullptr;
2770 if (X->getType() != Y->getType())
2771 return nullptr;
2772
2773 auto *BinOp = cast<BinaryOperator>(Op0);
2774 if (!isSafeToSpeculativelyExecute(BinOp))
2775 return nullptr;
2776
2777 Value *NewBO = Builder.CreateBinOp(BinOp->getOpcode(), X, Y);
2778 if (auto NewBOI = dyn_cast<Instruction>(NewBO))
2779 NewBOI->copyIRFlags(BinOp);
2780
2781 return new ShuffleVectorInst(NewBO, SVI.getShuffleMask());
2782}
2783
2785 Value *LHS = SVI.getOperand(0);
2786 Value *RHS = SVI.getOperand(1);
2787 SimplifyQuery ShufQuery = SQ.getWithInstruction(&SVI);
2788 if (auto *V = simplifyShuffleVectorInst(LHS, RHS, SVI.getShuffleMask(),
2789 SVI.getType(), ShufQuery))
2790 return replaceInstUsesWith(SVI, V);
2791
2792 if (Instruction *I = simplifyBinOpSplats(SVI))
2793 return I;
2794
2795 // Canonicalize splat shuffle to use poison RHS. Handle this explicitly in
2796 // order to support scalable vectors.
2797 if (match(SVI.getShuffleMask(), m_ZeroMask()) && !isa<PoisonValue>(RHS))
2798 return replaceOperand(SVI, 1, PoisonValue::get(RHS->getType()));
2799
2800 if (isa<ScalableVectorType>(LHS->getType()))
2801 return nullptr;
2802
2803 unsigned VWidth = cast<FixedVectorType>(SVI.getType())->getNumElements();
2804 unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements();
2805
2806 // shuffle (bitcast X), (bitcast Y), Mask --> bitcast (shuffle X, Y, Mask)
2807 //
2808 // if X and Y are of the same (vector) type, and the element size is not
2809 // changed by the bitcasts, we can distribute the bitcasts through the
2810 // shuffle, hopefully reducing the number of instructions. We make sure that
2811 // at least one bitcast only has one use, so we don't *increase* the number of
2812 // instructions here.
2813 Value *X, *Y;
2815 X->getType()->isVectorTy() && X->getType() == Y->getType() &&
2816 X->getType()->getScalarSizeInBits() ==
2817 SVI.getType()->getScalarSizeInBits() &&
2818 (LHS->hasOneUse() || RHS->hasOneUse())) {
2820 SVI.getName() + ".uncasted");
2821 return new BitCastInst(V, SVI.getType());
2822 }
2823
2824 ArrayRef<int> Mask = SVI.getShuffleMask();
2825
2826 // Peek through a bitcasted shuffle operand by scaling the mask. If the
2827 // simulated shuffle can simplify, then this shuffle is unnecessary:
2828 // shuf (bitcast X), undef, Mask --> bitcast X'
2829 // TODO: This could be extended to allow length-changing shuffles.
2830 // The transform might also be obsoleted if we allowed canonicalization
2831 // of bitcasted shuffles.
2832 if (match(LHS, m_BitCast(m_Value(X))) && match(RHS, m_Undef()) &&
2833 X->getType()->isVectorTy() && VWidth == LHSWidth) {
2834 // Try to create a scaled mask constant.
2835 auto *XType = cast<FixedVectorType>(X->getType());
2836 unsigned XNumElts = XType->getNumElements();
2837 SmallVector<int, 16> ScaledMask;
2838 if (XNumElts >= VWidth) {
2839 assert(XNumElts % VWidth == 0 && "Unexpected vector bitcast");
2840 narrowShuffleMaskElts(XNumElts / VWidth, Mask, ScaledMask);
2841 } else {
2842 assert(VWidth % XNumElts == 0 && "Unexpected vector bitcast");
2843 if (!widenShuffleMaskElts(VWidth / XNumElts, Mask, ScaledMask))
2844 ScaledMask.clear();
2845 }
2846 if (!ScaledMask.empty()) {
2847 // If the shuffled source vector simplifies, cast that value to this
2848 // shuffle's type.
2849 if (auto *V = simplifyShuffleVectorInst(X, UndefValue::get(XType),
2850 ScaledMask, XType, ShufQuery))
2851 return BitCastInst::Create(Instruction::BitCast, V, SVI.getType());
2852 }
2853 }
2854
2855 // shuffle x, x, mask --> shuffle x, undef, mask'
2856 if (LHS == RHS) {
2857 assert(!match(RHS, m_Undef()) &&
2858 "Shuffle with 2 undef ops not simplified?");
2859 return new ShuffleVectorInst(LHS, createUnaryMask(Mask, LHSWidth));
2860 }
2861
2862 // shuffle undef, x, mask --> shuffle x, undef, mask'
2863 if (match(LHS, m_Undef())) {
2864 SVI.commute();
2865 return &SVI;
2866 }
2867
2869 return I;
2870
2871 if (Instruction *I = foldSelectShuffle(SVI))
2872 return I;
2873
2875 return I;
2876
2878 return I;
2879
2881 return I;
2882
2883 if (Instruction *I = foldCastShuffle(SVI, Builder))
2884 return I;
2885
2886 APInt PoisonElts(VWidth, 0);
2887 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2888 if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, PoisonElts)) {
2889 if (V != &SVI)
2890 return replaceInstUsesWith(SVI, V);
2891 return &SVI;
2892 }
2893
2895 return I;
2896
2897 // These transforms have the potential to lose undef knowledge, so they are
2898 // intentionally placed after SimplifyDemandedVectorElts().
2899 if (Instruction *I = foldShuffleWithInsert(SVI, *this))
2900 return I;
2902 return I;
2903
2904 if (match(RHS, m_Undef()) && canEvaluateShuffled(LHS, Mask)) {
2906 return replaceInstUsesWith(SVI, V);
2907 }
2908
2909 // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to
2910 // a non-vector type. We can instead bitcast the original vector followed by
2911 // an extract of the desired element:
2912 //
2913 // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef,
2914 // <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2915 // %1 = bitcast <4 x i8> %sroa to i32
2916 // Becomes:
2917 // %bc = bitcast <16 x i8> %in to <4 x i32>
2918 // %ext = extractelement <4 x i32> %bc, i32 0
2919 //
2920 // If the shuffle is extracting a contiguous range of values from the input
2921 // vector then each use which is a bitcast of the extracted size can be
2922 // replaced. This will work if the vector types are compatible, and the begin
2923 // index is aligned to a value in the casted vector type. If the begin index
2924 // isn't aligned then we can shuffle the original vector (keeping the same
2925 // vector type) before extracting.
2926 //
2927 // This code will bail out if the target type is fundamentally incompatible
2928 // with vectors of the source type.
2929 //
2930 // Example of <16 x i8>, target type i32:
2931 // Index range [4,8): v-----------v Will work.
2932 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+
2933 // <16 x i8>: | | | | | | | | | | | | | | | | |
2934 // <4 x i32>: | | | | |
2935 // +-----------+-----------+-----------+-----------+
2936 // Index range [6,10): ^-----------^ Needs an extra shuffle.
2937 // Target type i40: ^--------------^ Won't work, bail.
2938 bool MadeChange = false;
2939 if (isShuffleExtractingFromLHS(SVI, Mask)) {
2940 Value *V = LHS;
2941 unsigned MaskElems = Mask.size();
2942 auto *SrcTy = cast<FixedVectorType>(V->getType());
2943 unsigned VecBitWidth = SrcTy->getPrimitiveSizeInBits().getFixedValue();
2944 unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType());
2945 assert(SrcElemBitWidth && "vector elements must have a bitwidth");
2946 unsigned SrcNumElems = SrcTy->getNumElements();
2949 for (User *U : SVI.users())
2950 if (BitCastInst *BC = dyn_cast<BitCastInst>(U))
2951 if (!BC->use_empty())
2952 // Only visit bitcasts that weren't previously handled.
2953 BCs.push_back(BC);
2954 for (BitCastInst *BC : BCs) {
2955 unsigned BegIdx = Mask.front();
2956 Type *TgtTy = BC->getDestTy();
2957 unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy);
2958 if (!TgtElemBitWidth)
2959 continue;
2960 unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth;
2961 bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth;
2962 bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth);
2963 if (!VecBitWidthsEqual)
2964 continue;
2966 continue;
2967 auto *CastSrcTy = FixedVectorType::get(TgtTy, TgtNumElems);
2968 if (!BegIsAligned) {
2969 // Shuffle the input so [0,NumElements) contains the output, and
2970 // [NumElems,SrcNumElems) is undef.
2971 SmallVector<int, 16> ShuffleMask(SrcNumElems, -1);
2972 for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I)
2973 ShuffleMask[I] = Idx;
2974 V = Builder.CreateShuffleVector(V, ShuffleMask,
2975 SVI.getName() + ".extract");
2976 BegIdx = 0;
2977 }
2978 unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
2979 assert(SrcElemsPerTgtElem);
2980 BegIdx /= SrcElemsPerTgtElem;
2981 bool BCAlreadyExists = NewBCs.contains(CastSrcTy);
2982 auto *NewBC =
2983 BCAlreadyExists
2984 ? NewBCs[CastSrcTy]
2985 : Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
2986 if (!BCAlreadyExists)
2987 NewBCs[CastSrcTy] = NewBC;
2988 auto *Ext = Builder.CreateExtractElement(NewBC, BegIdx,
2989 SVI.getName() + ".extract");
2990 // The shufflevector isn't being replaced: the bitcast that used it
2991 // is. InstCombine will visit the newly-created instructions.
2992 replaceInstUsesWith(*BC, Ext);
2993 MadeChange = true;
2994 }
2995 }
2996
2997 // If the LHS is a shufflevector itself, see if we can combine it with this
2998 // one without producing an unusual shuffle.
2999 // Cases that might be simplified:
3000 // 1.
3001 // x1=shuffle(v1,v2,mask1)
3002 // x=shuffle(x1,undef,mask)
3003 // ==>
3004 // x=shuffle(v1,undef,newMask)
3005 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
3006 // 2.
3007 // x1=shuffle(v1,undef,mask1)
3008 // x=shuffle(x1,x2,mask)
3009 // where v1.size() == mask1.size()
3010 // ==>
3011 // x=shuffle(v1,x2,newMask)
3012 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
3013 // 3.
3014 // x2=shuffle(v2,undef,mask2)
3015 // x=shuffle(x1,x2,mask)
3016 // where v2.size() == mask2.size()
3017 // ==>
3018 // x=shuffle(x1,v2,newMask)
3019 // newMask[i] = (mask[i] < x1.size())
3020 // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
3021 // 4.
3022 // x1=shuffle(v1,undef,mask1)
3023 // x2=shuffle(v2,undef,mask2)
3024 // x=shuffle(x1,x2,mask)
3025 // where v1.size() == v2.size()
3026 // ==>
3027 // x=shuffle(v1,v2,newMask)
3028 // newMask[i] = (mask[i] < x1.size())
3029 // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
3030 //
3031 // Here we are really conservative:
3032 // we are absolutely afraid of producing a shuffle mask not in the input
3033 // program, because the code gen may not be smart enough to turn a merged
3034 // shuffle into two specific shuffles: it may produce worse code. As such,
3035 // we only merge two shuffles if the result is either a splat or one of the
3036 // input shuffle masks. In this case, merging the shuffles just removes
3037 // one instruction, which we know is safe. This is good for things like
3038 // turning: (splat(splat)) -> splat, or
3039 // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
3040 ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
3041 ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
3042 if (LHSShuffle)
3043 if (!match(LHSShuffle->getOperand(1), m_Poison()) &&
3044 !match(RHS, m_Poison()))
3045 LHSShuffle = nullptr;
3046 if (RHSShuffle)
3047 if (!match(RHSShuffle->getOperand(1), m_Poison()))
3048 RHSShuffle = nullptr;
3049 if (!LHSShuffle && !RHSShuffle)
3050 return MadeChange ? &SVI : nullptr;
3051
3052 Value* LHSOp0 = nullptr;
3053 Value* LHSOp1 = nullptr;
3054 Value* RHSOp0 = nullptr;
3055 unsigned LHSOp0Width = 0;
3056 unsigned RHSOp0Width = 0;
3057 if (LHSShuffle) {
3058 LHSOp0 = LHSShuffle->getOperand(0);
3059 LHSOp1 = LHSShuffle->getOperand(1);
3060 LHSOp0Width = cast<FixedVectorType>(LHSOp0->getType())->getNumElements();
3061 }
3062 if (RHSShuffle) {
3063 RHSOp0 = RHSShuffle->getOperand(0);
3064 RHSOp0Width = cast<FixedVectorType>(RHSOp0->getType())->getNumElements();
3065 }
3066 Value* newLHS = LHS;
3067 Value* newRHS = RHS;
3068 if (LHSShuffle) {
3069 // case 1
3070 if (match(RHS, m_Poison())) {
3071 newLHS = LHSOp0;
3072 newRHS = LHSOp1;
3073 }
3074 // case 2 or 4
3075 else if (LHSOp0Width == LHSWidth) {
3076 newLHS = LHSOp0;
3077 }
3078 }
3079 // case 3 or 4
3080 if (RHSShuffle && RHSOp0Width == LHSWidth) {
3081 newRHS = RHSOp0;
3082 }
3083 // case 4
3084 if (LHSOp0 == RHSOp0) {
3085 newLHS = LHSOp0;
3086 newRHS = nullptr;
3087 }
3088
3089 if (newLHS == LHS && newRHS == RHS)
3090 return MadeChange ? &SVI : nullptr;
3091
3092 ArrayRef<int> LHSMask;
3093 ArrayRef<int> RHSMask;
3094 if (newLHS != LHS)
3095 LHSMask = LHSShuffle->getShuffleMask();
3096 if (RHSShuffle && newRHS != RHS)
3097 RHSMask = RHSShuffle->getShuffleMask();
3098
3099 unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
3100 SmallVector<int, 16> newMask;
3101 bool isSplat = true;
3102 int SplatElt = -1;
3103 // Create a new mask for the new ShuffleVectorInst so that the new
3104 // ShuffleVectorInst is equivalent to the original one.
3105 for (unsigned i = 0; i < VWidth; ++i) {
3106 int eltMask;
3107 if (Mask[i] < 0) {
3108 // This element is a poison value.
3109 eltMask = -1;
3110 } else if (Mask[i] < (int)LHSWidth) {
3111 // This element is from left hand side vector operand.
3112 //
3113 // If LHS is going to be replaced (case 1, 2, or 4), calculate the
3114 // new mask value for the element.
3115 if (newLHS != LHS) {
3116 eltMask = LHSMask[Mask[i]];
3117 // If the value selected is an poison value, explicitly specify it
3118 // with a -1 mask value.
3119 if (eltMask >= (int)LHSOp0Width && isa<PoisonValue>(LHSOp1))
3120 eltMask = -1;
3121 } else
3122 eltMask = Mask[i];
3123 } else {
3124 // This element is from right hand side vector operand
3125 //
3126 // If the value selected is a poison value, explicitly specify it
3127 // with a -1 mask value. (case 1)
3128 if (match(RHS, m_Poison()))
3129 eltMask = -1;
3130 // If RHS is going to be replaced (case 3 or 4), calculate the
3131 // new mask value for the element.
3132 else if (newRHS != RHS) {
3133 eltMask = RHSMask[Mask[i]-LHSWidth];
3134 // If the value selected is an poison value, explicitly specify it
3135 // with a -1 mask value.
3136 if (eltMask >= (int)RHSOp0Width) {
3137 assert(match(RHSShuffle->getOperand(1), m_Poison()) &&
3138 "should have been check above");
3139 eltMask = -1;
3140 }
3141 } else
3142 eltMask = Mask[i]-LHSWidth;
3143
3144 // If LHS's width is changed, shift the mask value accordingly.
3145 // If newRHS == nullptr, i.e. LHSOp0 == RHSOp0, we want to remap any
3146 // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
3147 // If newRHS == newLHS, we want to remap any references from newRHS to
3148 // newLHS so that we can properly identify splats that may occur due to
3149 // obfuscation across the two vectors.
3150 if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS)
3151 eltMask += newLHSWidth;
3152 }
3153
3154 // Check if this could still be a splat.
3155 if (eltMask >= 0) {
3156 if (SplatElt >= 0 && SplatElt != eltMask)
3157 isSplat = false;
3158 SplatElt = eltMask;
3159 }
3160
3161 newMask.push_back(eltMask);
3162 }
3163
3164 // If the result mask is equal to one of the original shuffle masks,
3165 // or is a splat, do the replacement.
3166 if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
3167 if (!newRHS)
3168 newRHS = PoisonValue::get(newLHS->getType());
3169 return new ShuffleVectorInst(newLHS, newRHS, newMask);
3170 }
3171
3172 return MadeChange ? &SVI : nullptr;
3173}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const LLT S1
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon Common GEP
This file provides internal interfaces used to implement the InstCombine.
static Instruction * foldConstantInsEltIntoShuffle(InsertElementInst &InsElt)
insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex --> shufflevector X,...
static Value * evaluateInDifferentElementOrder(Value *V, ArrayRef< int > Mask, IRBuilderBase &Builder)
static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, SmallVectorImpl< int > &Mask)
If V is a shuffle of values that ONLY returns elements from either LHS or RHS, return the shuffle mas...
static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl< int > &Mask, Value *PermittedRHS, InstCombinerImpl &IC, bool &Rerun)
static APInt findDemandedEltsByAllUsers(Value *V)
Find union of elements of V demanded by all its users.
static Instruction * foldTruncInsEltPair(InsertElementInst &InsElt, bool IsBigEndian, InstCombiner::BuilderTy &Builder)
If we are inserting 2 halves of a value into adjacent elements of a vector, try to convert to a singl...
static Instruction * foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf, const SimplifyQuery &SQ)
static Instruction * foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf)
static Instruction * foldIdentityExtractShuffle(ShuffleVectorInst &Shuf)
Try to fold an extract subvector operation.
static Instruction * foldInsEltIntoSplat(InsertElementInst &InsElt)
Try to fold an insert element into an existing splat shuffle by changing the shuffle's mask to includ...
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
static Instruction * foldShuffleWithInsert(ShuffleVectorInst &Shuf, InstCombinerImpl &IC)
Try to replace a shuffle with an insertelement or try to replace a shuffle operand with the operand o...
static Instruction * canonicalizeInsertSplat(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
If we have an insert of a scalar to a non-zero element of an undefined vector and then shuffle that v...
static Instruction * foldTruncShuffle(ShuffleVectorInst &Shuf, bool IsBigEndian)
Convert a narrowing shuffle of a bitcasted vector into a vector truncate.
static bool replaceExtractElements(InsertElementInst *InsElt, ExtractElementInst *ExtElt, InstCombinerImpl &IC)
If we have insertion into a vector that is wider than the vector that we are extracting from,...
static bool cheapToScalarize(Value *V, Value *EI)
Return true if the value is cheaper to scalarize than it is to leave as a vector operation.
static Value * buildNew(Instruction *I, ArrayRef< Value * > NewOps, IRBuilderBase &Builder)
Rebuild a new instruction just like 'I' but with the new operands given.
static bool canEvaluateShuffled(Value *V, ArrayRef< int > Mask, unsigned Depth=5)
Return true if we can evaluate the specified expression tree if the vector elements were shuffled in ...
static Instruction * foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf)
A select shuffle of a select shuffle with a shared operand can be reduced to a single select shuffle.
static Instruction * hoistInsEltConst(InsertElementInst &InsElt2, InstCombiner::BuilderTy &Builder)
If we have an insertelement instruction feeding into another insertelement and the 2nd is inserting a...
static APInt findDemandedEltsBySingleUser(Value *V, Instruction *UserInstr)
Find elements of V demanded by UserInstr.
static Instruction * foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize FP negate/abs after shuffle.
static Instruction * foldCastShuffle(ShuffleVectorInst &Shuf, InstCombiner::BuilderTy &Builder)
Canonicalize casts after shuffle.
static Instruction * narrowInsElt(InsertElementInst &InsElt, InstCombiner::BuilderTy &Builder)
If both the base vector and the inserted element are extended from the same type, do the insert eleme...
static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf)
static Instruction * foldInsSequenceIntoSplat(InsertElementInst &InsElt)
Turn a chain of inserts that splats a value into an insert + shuffle: insertelt(insertelt(insertelt(i...
static Instruction * foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt)
Try to fold an extract+insert element into an existing identity shuffle by changing the shuffle's mas...
static ConstantInt * getPreferredVectorIndex(ConstantInt *IndexC)
Given a constant index for a extractelement or insertelement instruction, return it with the canonica...
static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, ArrayRef< int > Mask)
static BinopElts getAlternateBinop(BinaryOperator *BO, const DataLayout &DL)
Binops may be transformed into binops with different opcodes and operands.
This file provides the interface for the instcombine pass implementation.
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define I(x, y, z)
Definition: MD5.cpp:58
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file implements the SmallBitVector class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
static SDValue narrowVectorSelect(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget)
If both arms of a vector select are concatenated vectors, split the select, and concatenate the resul...
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition: blake3_impl.h:78
Class for arbitrary precision integers.
Definition: APInt.h:76
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1089
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:168
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:398
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:205
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:164
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore)
Construct a binary instruction, given the opcode and the two operands.
BinaryOps getOpcode() const
Definition: InstrTypes.h:486
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO, const Twine &Name, BasicBlock::iterator InsertBefore)
Definition: InstrTypes.h:298
This class represents a no-op cast from one type to another.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr, BasicBlock::iterator InsertBefore)
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name, BasicBlock::iterator InsertBefore)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:950
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:960
static CmpInst * CreateWithCopiedFlags(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Instruction *FlagsSource, const Twine &Name="", Instruction *InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate, the two operands and the instructio...
OtherOps getOpcode() const
Get the opcode casted to the right type.
Definition: InstrTypes.h:1067
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1663
static Constant * getShuffleVector(Constant *V1, Constant *V2, ArrayRef< int > Mask, Type *OnlyIfReducedTy=nullptr)
Definition: Constants.cpp:2497
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2560
static Constant * getBinOpIdentity(unsigned Opcode, Type *Ty, bool AllowRHSConstant=false, bool NSZ=false)
Return the identity constant for a binary opcode.
Definition: Constants.cpp:2596
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:255
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition: Constants.h:148
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:154
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:145
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1398
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:432
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isBigEndian() const
Definition: DataLayout.h:239
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:672
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
This instruction extracts a single (scalar) element from a VectorType value.
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr, BasicBlock::iterator InsertBefore)
VectorType * getVectorOperandType() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:973
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr, BasicBlock::iterator InsertBefore)
void setIsInBounds(bool b=true)
Set or clear the inbounds flag on this GEP instruction.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2001
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2339
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2450
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2438
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1431
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Definition: IRBuilder.h:485
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2105
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2472
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1660
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2139
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1860
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2329
This instruction inserts a single (scalar) element into a VectorType value.
VectorType * getType() const
Overload to return most specific vector type.
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr, BasicBlock::iterator InsertBefore)
This instruction inserts a struct field of array element value into an aggregate value.
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
Instruction * foldSelectShuffle(ShuffleVectorInst &Shuf)
Try to fold shuffles that are the equivalent of a vector select.
Instruction * visitInsertValueInst(InsertValueInst &IV)
Try to find redundant insertvalue instructions, like the following ones: %0 = insertvalue { i8,...
Instruction * visitInsertElementInst(InsertElementInst &IE)
Instruction * visitExtractElementInst(ExtractElementInst &EI)
Instruction * simplifyBinOpSplats(ShuffleVectorInst &SVI)
Instruction * foldAggregateConstructionIntoAggregateReuse(InsertValueInst &OrigIVI)
Look for chain of insertvalue's that fully define an aggregate, and trace back the values inserted,...
Instruction * visitShuffleVectorInst(ShuffleVectorInst &SVI)
SimplifyQuery SQ
Definition: InstCombiner.h:76
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:385
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Definition: InstCombiner.h:64
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
Definition: InstCombiner.h:374
const DataLayout & DL
Definition: InstCombiner.h:75
void addToWorklist(Instruction *I)
Definition: InstCombiner.h:335
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombiner.h:409
static Constant * getSafeVectorConstantForBinop(BinaryOperator::BinaryOps Opcode, Constant *In, bool IsRHSConstant)
Some binary operators require special handling to avoid poison and undefined behavior.
Definition: InstCombiner.h:283
BuilderTy & Builder
Definition: InstCombiner.h:60
const SimplifyQuery & getSimplifyQuery() const
Definition: InstCombiner.h:341
void addValue(Value *V)
Add value to the worklist if it is an instruction.
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:80
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
const BasicBlock * getParent() const
Definition: Instruction.h:152
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:149
bool isExact() const LLVM_READONLY
Determine whether the exact flag is set.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:252
bool isShift() const
Definition: Instruction.h:259
void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
bool isIntDivRem() const
Definition: Instruction.h:258
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
In order to facilitate speculative execution, many instructions do not invoke immediate undefined beh...
Definition: Constants.h:1396
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr, BasicBlock::iterator InsertBefore, Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
bool changesLength() const
Return true if this shuffle returns a vector with a different number of elements than its source vect...
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
static bool isSelectMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
VectorType * getType() const
Overload to return most specific vector type.
bool increasesLength() const
Return true if this shuffle returns a vector with a greater number of elements than its source vector...
bool isIdentityWithExtract() const
Return true if this shuffle extracts the first N elements of exactly one source vector.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
static void commuteShuffleMask(MutableArrayRef< int > Mask, unsigned InVecNumElts)
Change values in a shuffle permute mask assuming the two vector operands of length InVecNumElts have ...
void commute()
Swap the operands and adjust the mask to preserve the semantics of the instruction.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool all() const
Returns true if all bits are set.
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:234
unsigned getStructNumElements() const
uint64_t getArrayNumElements() const
@ ArrayTyID
Arrays.
Definition: Type.h:75
@ StructTyID
Structures.
Definition: Type.h:74
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:137
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name, BasicBlock::iterator InsertBefore)
Definition: InstrTypes.h:174
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name, BasicBlock::iterator InsertBefore)
Definition: InstrTypes.h:190
UnaryOps getOpcode() const
Definition: InstrTypes.h:204
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1808
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB) const
Translate PHI node to its predecessor from the given basic block.
Definition: Value.cpp:1066
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:683
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:641
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:676
Type * getElementType() const
Definition: DerivedTypes.h:436
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
self_iterator getIterator()
Definition: ilist_node.h:109
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1459
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
Definition: PatternMatch.h:155
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:160
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:918
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:821
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:163
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
CastOperator_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:548
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
OneOps_match< OpTy, Instruction::Load > m_Load(const OpTy &Op)
Matches LoadInst.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:105
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
class_match< UnaryOperator > m_UnOp()
Match an arbitrary unary operation and ignore it.
Definition: PatternMatch.h:95
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:294
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:561
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:234
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &DL, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2415
llvm::SmallVector< int, 16 > createUnaryMask(ArrayRef< int > Mask, unsigned NumElts)
Given a shuffle mask for a binary shuffle, create the equivalent shuffle mask assuming both operands ...
bool widenShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Try to transform a shuffle mask by replacing elements with the scaled index for an equivalent mask of...
Value * simplifyShuffleVectorInst(Value *Op0, Value *Op1, ArrayRef< int > Mask, Type *RetTy, const SimplifyQuery &Q)
Given operands for a ShuffleVectorInst, fold the result or return null.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
Value * simplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const SimplifyQuery &Q)
Given operands for an InsertValueInst, fold the result or return null.
constexpr int PoisonMaskElem
void narrowShuffleMaskElts(int Scale, ArrayRef< int > Mask, SmallVectorImpl< int > &ScaledMask)
Replace each shuffle mask index with the scaled sequential indices for an equivalent mask of narrowed...
Value * findScalarElement(Value *V, unsigned EltNo)
Given a vector and an element number, see if the scalar value is already around as a register,...
Value * simplifyInsertElementInst(Value *Vec, Value *Elt, Value *Idx, const SimplifyQuery &Q)
Given operands for an InsertElement, fold the result or return null.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1930
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1888
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
bool isKnownNeverNaN(const Value *V, unsigned Depth, const SimplifyQuery &SQ)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
Value * simplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q)
Given operands for an ExtractElementInst, fold the result or return null.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
These are the ingredients in an alternate form binary operator as described below.
BinopElts(BinaryOperator::BinaryOps Opc=(BinaryOperator::BinaryOps) 0, Value *V0=nullptr, Value *V1=nullptr)
BinaryOperator::BinaryOps Opcode
SimplifyQuery getWithInstruction(const Instruction *I) const
Definition: SimplifyQuery.h:96