LLVM 19.0.0git
InstCombineCasts.cpp
Go to the documentation of this file.
1//===- InstCombineCasts.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visit functions for cast operations.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/SetVector.h"
16#include "llvm/IR/DataLayout.h"
17#include "llvm/IR/DebugInfo.h"
21#include <optional>
22
23using namespace llvm;
24using namespace PatternMatch;
25
26#define DEBUG_TYPE "instcombine"
27
28/// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
29/// true for, actually insert the code to evaluate the expression.
31 bool isSigned) {
32 if (Constant *C = dyn_cast<Constant>(V))
34
35 // Otherwise, it must be an instruction.
36 Instruction *I = cast<Instruction>(V);
37 Instruction *Res = nullptr;
38 unsigned Opc = I->getOpcode();
39 switch (Opc) {
40 case Instruction::Add:
41 case Instruction::Sub:
42 case Instruction::Mul:
43 case Instruction::And:
44 case Instruction::Or:
45 case Instruction::Xor:
46 case Instruction::AShr:
47 case Instruction::LShr:
48 case Instruction::Shl:
49 case Instruction::UDiv:
50 case Instruction::URem: {
51 Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
52 Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
54 break;
55 }
56 case Instruction::Trunc:
57 case Instruction::ZExt:
58 case Instruction::SExt:
59 // If the source type of the cast is the type we're trying for then we can
60 // just return the source. There's no need to insert it because it is not
61 // new.
62 if (I->getOperand(0)->getType() == Ty)
63 return I->getOperand(0);
64
65 // Otherwise, must be the same type of cast, so just reinsert a new one.
66 // This also handles the case of zext(trunc(x)) -> zext(x).
67 Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
68 Opc == Instruction::SExt);
69 break;
70 case Instruction::Select: {
71 Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
72 Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned);
73 Res = SelectInst::Create(I->getOperand(0), True, False);
74 break;
75 }
76 case Instruction::PHI: {
77 PHINode *OPN = cast<PHINode>(I);
79 for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
80 Value *V =
82 NPN->addIncoming(V, OPN->getIncomingBlock(i));
83 }
84 Res = NPN;
85 break;
86 }
87 case Instruction::FPToUI:
88 case Instruction::FPToSI:
89 Res = CastInst::Create(
90 static_cast<Instruction::CastOps>(Opc), I->getOperand(0), Ty);
91 break;
92 case Instruction::Call:
93 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
94 switch (II->getIntrinsicID()) {
95 default:
96 llvm_unreachable("Unsupported call!");
97 case Intrinsic::vscale: {
98 Function *Fn =
99 Intrinsic::getDeclaration(I->getModule(), Intrinsic::vscale, {Ty});
100 Res = CallInst::Create(Fn->getFunctionType(), Fn);
101 break;
102 }
103 }
104 }
105 break;
106 case Instruction::ShuffleVector: {
107 auto *ScalarTy = cast<VectorType>(Ty)->getElementType();
108 auto *VTy = cast<VectorType>(I->getOperand(0)->getType());
109 auto *FixedTy = VectorType::get(ScalarTy, VTy->getElementCount());
110 Value *Op0 = EvaluateInDifferentType(I->getOperand(0), FixedTy, isSigned);
111 Value *Op1 = EvaluateInDifferentType(I->getOperand(1), FixedTy, isSigned);
112 Res = new ShuffleVectorInst(Op0, Op1,
113 cast<ShuffleVectorInst>(I)->getShuffleMask());
114 break;
115 }
116 default:
117 // TODO: Can handle more cases here.
118 llvm_unreachable("Unreachable!");
119 }
120
121 Res->takeName(I);
122 return InsertNewInstWith(Res, I->getIterator());
123}
124
126InstCombinerImpl::isEliminableCastPair(const CastInst *CI1,
127 const CastInst *CI2) {
128 Type *SrcTy = CI1->getSrcTy();
129 Type *MidTy = CI1->getDestTy();
130 Type *DstTy = CI2->getDestTy();
131
132 Instruction::CastOps firstOp = CI1->getOpcode();
133 Instruction::CastOps secondOp = CI2->getOpcode();
134 Type *SrcIntPtrTy =
135 SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
136 Type *MidIntPtrTy =
137 MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr;
138 Type *DstIntPtrTy =
139 DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
140 unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
141 DstTy, SrcIntPtrTy, MidIntPtrTy,
142 DstIntPtrTy);
143
144 // We don't want to form an inttoptr or ptrtoint that converts to an integer
145 // type that differs from the pointer size.
146 if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) ||
147 (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy))
148 Res = 0;
149
150 return Instruction::CastOps(Res);
151}
152
153/// Implement the transforms common to all CastInst visitors.
155 Value *Src = CI.getOperand(0);
156 Type *Ty = CI.getType();
157
158 if (auto *SrcC = dyn_cast<Constant>(Src))
159 if (Constant *Res = ConstantFoldCastOperand(CI.getOpcode(), SrcC, Ty, DL))
160 return replaceInstUsesWith(CI, Res);
161
162 // Try to eliminate a cast of a cast.
163 if (auto *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
164 if (Instruction::CastOps NewOpc = isEliminableCastPair(CSrc, &CI)) {
165 // The first cast (CSrc) is eliminable so we need to fix up or replace
166 // the second cast (CI). CSrc will then have a good chance of being dead.
167 auto *Res = CastInst::Create(NewOpc, CSrc->getOperand(0), Ty);
168 // Point debug users of the dying cast to the new one.
169 if (CSrc->hasOneUse())
170 replaceAllDbgUsesWith(*CSrc, *Res, CI, DT);
171 return Res;
172 }
173 }
174
175 if (auto *Sel = dyn_cast<SelectInst>(Src)) {
176 // We are casting a select. Try to fold the cast into the select if the
177 // select does not have a compare instruction with matching operand types
178 // or the select is likely better done in a narrow type.
179 // Creating a select with operands that are different sizes than its
180 // condition may inhibit other folds and lead to worse codegen.
181 auto *Cmp = dyn_cast<CmpInst>(Sel->getCondition());
182 if (!Cmp || Cmp->getOperand(0)->getType() != Sel->getType() ||
183 (CI.getOpcode() == Instruction::Trunc &&
184 shouldChangeType(CI.getSrcTy(), CI.getType()))) {
185
186 // If it's a bitcast involving vectors, make sure it has the same number
187 // of elements on both sides.
188 if (CI.getOpcode() != Instruction::BitCast ||
190 if (Instruction *NV = FoldOpIntoSelect(CI, Sel)) {
191 replaceAllDbgUsesWith(*Sel, *NV, CI, DT);
192 return NV;
193 }
194 }
195 }
196 }
197
198 // If we are casting a PHI, then fold the cast into the PHI.
199 if (auto *PN = dyn_cast<PHINode>(Src)) {
200 // Don't do this if it would create a PHI node with an illegal type from a
201 // legal type.
202 if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() ||
203 shouldChangeType(CI.getSrcTy(), CI.getType()))
204 if (Instruction *NV = foldOpIntoPhi(CI, PN))
205 return NV;
206 }
207
208 // Canonicalize a unary shuffle after the cast if neither operation changes
209 // the size or element size of the input vector.
210 // TODO: We could allow size-changing ops if that doesn't harm codegen.
211 // cast (shuffle X, Mask) --> shuffle (cast X), Mask
212 Value *X;
213 ArrayRef<int> Mask;
214 if (match(Src, m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))) {
215 // TODO: Allow scalable vectors?
216 auto *SrcTy = dyn_cast<FixedVectorType>(X->getType());
217 auto *DestTy = dyn_cast<FixedVectorType>(Ty);
218 if (SrcTy && DestTy &&
219 SrcTy->getNumElements() == DestTy->getNumElements() &&
220 SrcTy->getPrimitiveSizeInBits() == DestTy->getPrimitiveSizeInBits()) {
221 Value *CastX = Builder.CreateCast(CI.getOpcode(), X, DestTy);
222 return new ShuffleVectorInst(CastX, Mask);
223 }
224 }
225
226 return nullptr;
227}
228
229/// Constants and extensions/truncates from the destination type are always
230/// free to be evaluated in that type. This is a helper for canEvaluate*.
231static bool canAlwaysEvaluateInType(Value *V, Type *Ty) {
232 if (isa<Constant>(V))
233 return match(V, m_ImmConstant());
234
235 Value *X;
236 if ((match(V, m_ZExtOrSExt(m_Value(X))) || match(V, m_Trunc(m_Value(X)))) &&
237 X->getType() == Ty)
238 return true;
239
240 return false;
241}
242
243/// Filter out values that we can not evaluate in the destination type for free.
244/// This is a helper for canEvaluate*.
245static bool canNotEvaluateInType(Value *V, Type *Ty) {
246 if (!isa<Instruction>(V))
247 return true;
248 // We don't extend or shrink something that has multiple uses -- doing so
249 // would require duplicating the instruction which isn't profitable.
250 if (!V->hasOneUse())
251 return true;
252
253 return false;
254}
255
256/// Return true if we can evaluate the specified expression tree as type Ty
257/// instead of its larger type, and arrive with the same value.
258/// This is used by code that tries to eliminate truncates.
259///
260/// Ty will always be a type smaller than V. We should return true if trunc(V)
261/// can be computed by computing V in the smaller type. If V is an instruction,
262/// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
263/// makes sense if x and y can be efficiently truncated.
264///
265/// This function works on both vectors and scalars.
266///
268 Instruction *CxtI) {
269 if (canAlwaysEvaluateInType(V, Ty))
270 return true;
271 if (canNotEvaluateInType(V, Ty))
272 return false;
273
274 auto *I = cast<Instruction>(V);
275 Type *OrigTy = V->getType();
276 switch (I->getOpcode()) {
277 case Instruction::Add:
278 case Instruction::Sub:
279 case Instruction::Mul:
280 case Instruction::And:
281 case Instruction::Or:
282 case Instruction::Xor:
283 // These operators can all arbitrarily be extended or truncated.
284 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
285 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
286
287 case Instruction::UDiv:
288 case Instruction::URem: {
289 // UDiv and URem can be truncated if all the truncated bits are zero.
290 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
292 assert(BitWidth < OrigBitWidth && "Unexpected bitwidths!");
293 APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
294 if (IC.MaskedValueIsZero(I->getOperand(0), Mask, 0, CxtI) &&
295 IC.MaskedValueIsZero(I->getOperand(1), Mask, 0, CxtI)) {
296 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
297 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
298 }
299 break;
300 }
301 case Instruction::Shl: {
302 // If we are truncating the result of this SHL, and if it's a shift of an
303 // inrange amount, we can always perform a SHL in a smaller type.
305 KnownBits AmtKnownBits =
306 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
307 if (AmtKnownBits.getMaxValue().ult(BitWidth))
308 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
309 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
310 break;
311 }
312 case Instruction::LShr: {
313 // If this is a truncate of a logical shr, we can truncate it to a smaller
314 // lshr iff we know that the bits we would otherwise be shifting in are
315 // already zeros.
316 // TODO: It is enough to check that the bits we would be shifting in are
317 // zero - use AmtKnownBits.getMaxValue().
318 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
320 KnownBits AmtKnownBits =
321 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
322 APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
323 if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
324 IC.MaskedValueIsZero(I->getOperand(0), ShiftedBits, 0, CxtI)) {
325 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
326 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
327 }
328 break;
329 }
330 case Instruction::AShr: {
331 // If this is a truncate of an arithmetic shr, we can truncate it to a
332 // smaller ashr iff we know that all the bits from the sign bit of the
333 // original type and the sign bit of the truncate type are similar.
334 // TODO: It is enough to check that the bits we would be shifting in are
335 // similar to sign bit of the truncate type.
336 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
338 KnownBits AmtKnownBits =
339 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
340 unsigned ShiftedBits = OrigBitWidth - BitWidth;
341 if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
342 ShiftedBits < IC.ComputeNumSignBits(I->getOperand(0), 0, CxtI))
343 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
344 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
345 break;
346 }
347 case Instruction::Trunc:
348 // trunc(trunc(x)) -> trunc(x)
349 return true;
350 case Instruction::ZExt:
351 case Instruction::SExt:
352 // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
353 // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
354 return true;
355 case Instruction::Select: {
356 SelectInst *SI = cast<SelectInst>(I);
357 return canEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) &&
358 canEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI);
359 }
360 case Instruction::PHI: {
361 // We can change a phi if we can change all operands. Note that we never
362 // get into trouble with cyclic PHIs here because we only consider
363 // instructions with a single use.
364 PHINode *PN = cast<PHINode>(I);
365 for (Value *IncValue : PN->incoming_values())
366 if (!canEvaluateTruncated(IncValue, Ty, IC, CxtI))
367 return false;
368 return true;
369 }
370 case Instruction::FPToUI:
371 case Instruction::FPToSI: {
372 // If the integer type can hold the max FP value, it is safe to cast
373 // directly to that type. Otherwise, we may create poison via overflow
374 // that did not exist in the original code.
375 Type *InputTy = I->getOperand(0)->getType()->getScalarType();
376 const fltSemantics &Semantics = InputTy->getFltSemantics();
377 uint32_t MinBitWidth =
379 I->getOpcode() == Instruction::FPToSI);
380 return Ty->getScalarSizeInBits() >= MinBitWidth;
381 }
382 case Instruction::ShuffleVector:
383 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
384 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
385 default:
386 // TODO: Can handle more cases here.
387 break;
388 }
389
390 return false;
391}
392
393/// Given a vector that is bitcast to an integer, optionally logically
394/// right-shifted, and truncated, convert it to an extractelement.
395/// Example (big endian):
396/// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
397/// --->
398/// extractelement <4 x i32> %X, 1
400 InstCombinerImpl &IC) {
401 Value *TruncOp = Trunc.getOperand(0);
402 Type *DestType = Trunc.getType();
403 if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType))
404 return nullptr;
405
406 Value *VecInput = nullptr;
407 ConstantInt *ShiftVal = nullptr;
408 if (!match(TruncOp, m_CombineOr(m_BitCast(m_Value(VecInput)),
409 m_LShr(m_BitCast(m_Value(VecInput)),
410 m_ConstantInt(ShiftVal)))) ||
411 !isa<VectorType>(VecInput->getType()))
412 return nullptr;
413
414 VectorType *VecType = cast<VectorType>(VecInput->getType());
415 unsigned VecWidth = VecType->getPrimitiveSizeInBits();
416 unsigned DestWidth = DestType->getPrimitiveSizeInBits();
417 unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0;
418
419 if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0))
420 return nullptr;
421
422 // If the element type of the vector doesn't match the result type,
423 // bitcast it to a vector type that we can extract from.
424 unsigned NumVecElts = VecWidth / DestWidth;
425 if (VecType->getElementType() != DestType) {
426 VecType = FixedVectorType::get(DestType, NumVecElts);
427 VecInput = IC.Builder.CreateBitCast(VecInput, VecType, "bc");
428 }
429
430 unsigned Elt = ShiftAmount / DestWidth;
431 if (IC.getDataLayout().isBigEndian())
432 Elt = NumVecElts - 1 - Elt;
433
434 return ExtractElementInst::Create(VecInput, IC.Builder.getInt32(Elt));
435}
436
437/// Funnel/Rotate left/right may occur in a wider type than necessary because of
438/// type promotion rules. Try to narrow the inputs and convert to funnel shift.
439Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {
440 assert((isa<VectorType>(Trunc.getSrcTy()) ||
441 shouldChangeType(Trunc.getSrcTy(), Trunc.getType())) &&
442 "Don't narrow to an illegal scalar type");
443
444 // Bail out on strange types. It is possible to handle some of these patterns
445 // even with non-power-of-2 sizes, but it is not a likely scenario.
446 Type *DestTy = Trunc.getType();
447 unsigned NarrowWidth = DestTy->getScalarSizeInBits();
448 unsigned WideWidth = Trunc.getSrcTy()->getScalarSizeInBits();
449 if (!isPowerOf2_32(NarrowWidth))
450 return nullptr;
451
452 // First, find an or'd pair of opposite shifts:
453 // trunc (or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1))
454 BinaryOperator *Or0, *Or1;
455 if (!match(Trunc.getOperand(0), m_OneUse(m_Or(m_BinOp(Or0), m_BinOp(Or1)))))
456 return nullptr;
457
458 Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
459 if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
460 !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||
461 Or0->getOpcode() == Or1->getOpcode())
462 return nullptr;
463
464 // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
465 if (Or0->getOpcode() == BinaryOperator::LShr) {
466 std::swap(Or0, Or1);
467 std::swap(ShVal0, ShVal1);
468 std::swap(ShAmt0, ShAmt1);
469 }
470 assert(Or0->getOpcode() == BinaryOperator::Shl &&
471 Or1->getOpcode() == BinaryOperator::LShr &&
472 "Illegal or(shift,shift) pair");
473
474 // Match the shift amount operands for a funnel/rotate pattern. This always
475 // matches a subtraction on the R operand.
476 auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
477 // The shift amounts may add up to the narrow bit width:
478 // (shl ShVal0, L) | (lshr ShVal1, Width - L)
479 // If this is a funnel shift (different operands are shifted), then the
480 // shift amount can not over-shift (create poison) in the narrow type.
481 unsigned MaxShiftAmountWidth = Log2_32(NarrowWidth);
482 APInt HiBitMask = ~APInt::getLowBitsSet(WideWidth, MaxShiftAmountWidth);
483 if (ShVal0 == ShVal1 || MaskedValueIsZero(L, HiBitMask))
484 if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L)))))
485 return L;
486
487 // The following patterns currently only work for rotation patterns.
488 // TODO: Add more general funnel-shift compatible patterns.
489 if (ShVal0 != ShVal1)
490 return nullptr;
491
492 // The shift amount may be masked with negation:
493 // (shl ShVal0, (X & (Width - 1))) | (lshr ShVal1, ((-X) & (Width - 1)))
494 Value *X;
495 unsigned Mask = Width - 1;
496 if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
498 return X;
499
500 // Same as above, but the shift amount may be extended after masking:
501 if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
503 return X;
504
505 return nullptr;
506 };
507
508 Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, NarrowWidth);
509 bool IsFshl = true; // Sub on LSHR.
510 if (!ShAmt) {
511 ShAmt = matchShiftAmount(ShAmt1, ShAmt0, NarrowWidth);
512 IsFshl = false; // Sub on SHL.
513 }
514 if (!ShAmt)
515 return nullptr;
516
517 // The right-shifted value must have high zeros in the wide type (for example
518 // from 'zext', 'and' or 'shift'). High bits of the left-shifted value are
519 // truncated, so those do not matter.
520 APInt HiBitMask = APInt::getHighBitsSet(WideWidth, WideWidth - NarrowWidth);
521 if (!MaskedValueIsZero(ShVal1, HiBitMask, 0, &Trunc))
522 return nullptr;
523
524 // Adjust the width of ShAmt for narrowed funnel shift operation:
525 // - Zero-extend if ShAmt is narrower than the destination type.
526 // - Truncate if ShAmt is wider, discarding non-significant high-order bits.
527 // This prepares ShAmt for llvm.fshl.i8(trunc(ShVal), trunc(ShVal),
528 // zext/trunc(ShAmt)).
529 Value *NarrowShAmt = Builder.CreateZExtOrTrunc(ShAmt, DestTy);
530
531 Value *X, *Y;
532 X = Y = Builder.CreateTrunc(ShVal0, DestTy);
533 if (ShVal0 != ShVal1)
534 Y = Builder.CreateTrunc(ShVal1, DestTy);
535 Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
536 Function *F = Intrinsic::getDeclaration(Trunc.getModule(), IID, DestTy);
537 return CallInst::Create(F, {X, Y, NarrowShAmt});
538}
539
540/// Try to narrow the width of math or bitwise logic instructions by pulling a
541/// truncate ahead of binary operators.
542Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) {
543 Type *SrcTy = Trunc.getSrcTy();
544 Type *DestTy = Trunc.getType();
545 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
546 unsigned DestWidth = DestTy->getScalarSizeInBits();
547
548 if (!isa<VectorType>(SrcTy) && !shouldChangeType(SrcTy, DestTy))
549 return nullptr;
550
551 BinaryOperator *BinOp;
552 if (!match(Trunc.getOperand(0), m_OneUse(m_BinOp(BinOp))))
553 return nullptr;
554
555 Value *BinOp0 = BinOp->getOperand(0);
556 Value *BinOp1 = BinOp->getOperand(1);
557 switch (BinOp->getOpcode()) {
558 case Instruction::And:
559 case Instruction::Or:
560 case Instruction::Xor:
561 case Instruction::Add:
562 case Instruction::Sub:
563 case Instruction::Mul: {
564 Constant *C;
565 if (match(BinOp0, m_Constant(C))) {
566 // trunc (binop C, X) --> binop (trunc C', X)
567 Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
568 Value *TruncX = Builder.CreateTrunc(BinOp1, DestTy);
569 return BinaryOperator::Create(BinOp->getOpcode(), NarrowC, TruncX);
570 }
571 if (match(BinOp1, m_Constant(C))) {
572 // trunc (binop X, C) --> binop (trunc X, C')
573 Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
574 Value *TruncX = Builder.CreateTrunc(BinOp0, DestTy);
575 return BinaryOperator::Create(BinOp->getOpcode(), TruncX, NarrowC);
576 }
577 Value *X;
578 if (match(BinOp0, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
579 // trunc (binop (ext X), Y) --> binop X, (trunc Y)
580 Value *NarrowOp1 = Builder.CreateTrunc(BinOp1, DestTy);
581 return BinaryOperator::Create(BinOp->getOpcode(), X, NarrowOp1);
582 }
583 if (match(BinOp1, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
584 // trunc (binop Y, (ext X)) --> binop (trunc Y), X
585 Value *NarrowOp0 = Builder.CreateTrunc(BinOp0, DestTy);
586 return BinaryOperator::Create(BinOp->getOpcode(), NarrowOp0, X);
587 }
588 break;
589 }
590 case Instruction::LShr:
591 case Instruction::AShr: {
592 // trunc (*shr (trunc A), C) --> trunc(*shr A, C)
593 Value *A;
594 Constant *C;
595 if (match(BinOp0, m_Trunc(m_Value(A))) && match(BinOp1, m_Constant(C))) {
596 unsigned MaxShiftAmt = SrcWidth - DestWidth;
597 // If the shift is small enough, all zero/sign bits created by the shift
598 // are removed by the trunc.
600 APInt(SrcWidth, MaxShiftAmt)))) {
601 auto *OldShift = cast<Instruction>(Trunc.getOperand(0));
602 bool IsExact = OldShift->isExact();
603 if (Constant *ShAmt = ConstantFoldIntegerCast(C, A->getType(),
604 /*IsSigned*/ true, DL)) {
605 ShAmt = Constant::mergeUndefsWith(ShAmt, C);
606 Value *Shift =
607 OldShift->getOpcode() == Instruction::AShr
608 ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact)
609 : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact);
610 return CastInst::CreateTruncOrBitCast(Shift, DestTy);
611 }
612 }
613 }
614 break;
615 }
616 default: break;
617 }
618
619 if (Instruction *NarrowOr = narrowFunnelShift(Trunc))
620 return NarrowOr;
621
622 return nullptr;
623}
624
625/// Try to narrow the width of a splat shuffle. This could be generalized to any
626/// shuffle with a constant operand, but we limit the transform to avoid
627/// creating a shuffle type that targets may not be able to lower effectively.
629 InstCombiner::BuilderTy &Builder) {
630 auto *Shuf = dyn_cast<ShuffleVectorInst>(Trunc.getOperand(0));
631 if (Shuf && Shuf->hasOneUse() && match(Shuf->getOperand(1), m_Undef()) &&
632 all_equal(Shuf->getShuffleMask()) &&
633 Shuf->getType() == Shuf->getOperand(0)->getType()) {
634 // trunc (shuf X, Undef, SplatMask) --> shuf (trunc X), Poison, SplatMask
635 // trunc (shuf X, Poison, SplatMask) --> shuf (trunc X), Poison, SplatMask
636 Value *NarrowOp = Builder.CreateTrunc(Shuf->getOperand(0), Trunc.getType());
637 return new ShuffleVectorInst(NarrowOp, Shuf->getShuffleMask());
638 }
639
640 return nullptr;
641}
642
643/// Try to narrow the width of an insert element. This could be generalized for
644/// any vector constant, but we limit the transform to insertion into undef to
645/// avoid potential backend problems from unsupported insertion widths. This
646/// could also be extended to handle the case of inserting a scalar constant
647/// into a vector variable.
649 InstCombiner::BuilderTy &Builder) {
650 Instruction::CastOps Opcode = Trunc.getOpcode();
651 assert((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
652 "Unexpected instruction for shrinking");
653
654 auto *InsElt = dyn_cast<InsertElementInst>(Trunc.getOperand(0));
655 if (!InsElt || !InsElt->hasOneUse())
656 return nullptr;
657
658 Type *DestTy = Trunc.getType();
659 Type *DestScalarTy = DestTy->getScalarType();
660 Value *VecOp = InsElt->getOperand(0);
661 Value *ScalarOp = InsElt->getOperand(1);
662 Value *Index = InsElt->getOperand(2);
663
664 if (match(VecOp, m_Undef())) {
665 // trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
666 // fptrunc (inselt undef, X, Index) --> inselt undef, (fptrunc X), Index
667 UndefValue *NarrowUndef = UndefValue::get(DestTy);
668 Value *NarrowOp = Builder.CreateCast(Opcode, ScalarOp, DestScalarTy);
669 return InsertElementInst::Create(NarrowUndef, NarrowOp, Index);
670 }
671
672 return nullptr;
673}
674
676 if (Instruction *Result = commonCastTransforms(Trunc))
677 return Result;
678
679 Value *Src = Trunc.getOperand(0);
680 Type *DestTy = Trunc.getType(), *SrcTy = Src->getType();
681 unsigned DestWidth = DestTy->getScalarSizeInBits();
682 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
683
684 // Attempt to truncate the entire input expression tree to the destination
685 // type. Only do this if the dest type is a simple type, don't convert the
686 // expression tree to something weird like i93 unless the source is also
687 // strange.
688 if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) &&
689 canEvaluateTruncated(Src, DestTy, *this, &Trunc)) {
690
691 // If this cast is a truncate, evaluting in a different type always
692 // eliminates the cast, so it is always a win.
694 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
695 " to avoid cast: "
696 << Trunc << '\n');
697 Value *Res = EvaluateInDifferentType(Src, DestTy, false);
698 assert(Res->getType() == DestTy);
699 return replaceInstUsesWith(Trunc, Res);
700 }
701
702 // For integer types, check if we can shorten the entire input expression to
703 // DestWidth * 2, which won't allow removing the truncate, but reducing the
704 // width may enable further optimizations, e.g. allowing for larger
705 // vectorization factors.
706 if (auto *DestITy = dyn_cast<IntegerType>(DestTy)) {
707 if (DestWidth * 2 < SrcWidth) {
708 auto *NewDestTy = DestITy->getExtendedType();
709 if (shouldChangeType(SrcTy, NewDestTy) &&
710 canEvaluateTruncated(Src, NewDestTy, *this, &Trunc)) {
712 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
713 " to reduce the width of operand of"
714 << Trunc << '\n');
715 Value *Res = EvaluateInDifferentType(Src, NewDestTy, false);
716 return new TruncInst(Res, DestTy);
717 }
718 }
719 }
720
721 // Test if the trunc is the user of a select which is part of a
722 // minimum or maximum operation. If so, don't do any more simplification.
723 // Even simplifying demanded bits can break the canonical form of a
724 // min/max.
725 Value *LHS, *RHS;
726 if (SelectInst *Sel = dyn_cast<SelectInst>(Src))
727 if (matchSelectPattern(Sel, LHS, RHS).Flavor != SPF_UNKNOWN)
728 return nullptr;
729
730 // See if we can simplify any instructions used by the input whose sole
731 // purpose is to compute bits we don't care about.
733 return &Trunc;
734
735 if (DestWidth == 1) {
736 Value *Zero = Constant::getNullValue(SrcTy);
737 if (DestTy->isIntegerTy()) {
738 // Canonicalize trunc x to i1 -> icmp ne (and x, 1), 0 (scalar only).
739 // TODO: We canonicalize to more instructions here because we are probably
740 // lacking equivalent analysis for trunc relative to icmp. There may also
741 // be codegen concerns. If those trunc limitations were removed, we could
742 // remove this transform.
743 Value *And = Builder.CreateAnd(Src, ConstantInt::get(SrcTy, 1));
744 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
745 }
746
747 // For vectors, we do not canonicalize all truncs to icmp, so optimize
748 // patterns that would be covered within visitICmpInst.
749 Value *X;
750 Constant *C;
751 if (match(Src, m_OneUse(m_LShr(m_Value(X), m_Constant(C))))) {
752 // trunc (lshr X, C) to i1 --> icmp ne (and X, C'), 0
753 Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
754 Constant *MaskC = ConstantExpr::getShl(One, C);
755 Value *And = Builder.CreateAnd(X, MaskC);
756 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
757 }
759 m_Deferred(X))))) {
760 // trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0
761 Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
762 Constant *MaskC = ConstantExpr::getShl(One, C);
763 Value *And = Builder.CreateAnd(X, Builder.CreateOr(MaskC, One));
764 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
765 }
766 }
767
768 Value *A, *B;
769 Constant *C;
770 if (match(Src, m_LShr(m_SExt(m_Value(A)), m_Constant(C)))) {
771 unsigned AWidth = A->getType()->getScalarSizeInBits();
772 unsigned MaxShiftAmt = SrcWidth - std::max(DestWidth, AWidth);
773 auto *OldSh = cast<Instruction>(Src);
774 bool IsExact = OldSh->isExact();
775
776 // If the shift is small enough, all zero bits created by the shift are
777 // removed by the trunc.
779 APInt(SrcWidth, MaxShiftAmt)))) {
780 auto GetNewShAmt = [&](unsigned Width) {
781 Constant *MaxAmt = ConstantInt::get(SrcTy, Width - 1, false);
782 Constant *Cmp =
784 Constant *ShAmt = ConstantFoldSelectInstruction(Cmp, C, MaxAmt);
785 return ConstantFoldCastOperand(Instruction::Trunc, ShAmt, A->getType(),
786 DL);
787 };
788
789 // trunc (lshr (sext A), C) --> ashr A, C
790 if (A->getType() == DestTy) {
791 Constant *ShAmt = GetNewShAmt(DestWidth);
792 ShAmt = Constant::mergeUndefsWith(ShAmt, C);
793 return IsExact ? BinaryOperator::CreateExactAShr(A, ShAmt)
794 : BinaryOperator::CreateAShr(A, ShAmt);
795 }
796 // The types are mismatched, so create a cast after shifting:
797 // trunc (lshr (sext A), C) --> sext/trunc (ashr A, C)
798 if (Src->hasOneUse()) {
799 Constant *ShAmt = GetNewShAmt(AWidth);
800 Value *Shift = Builder.CreateAShr(A, ShAmt, "", IsExact);
801 return CastInst::CreateIntegerCast(Shift, DestTy, true);
802 }
803 }
804 // TODO: Mask high bits with 'and'.
805 }
806
807 if (Instruction *I = narrowBinOp(Trunc))
808 return I;
809
811 return I;
812
813 if (Instruction *I = shrinkInsertElt(Trunc, Builder))
814 return I;
815
816 if (Src->hasOneUse() &&
817 (isa<VectorType>(SrcTy) || shouldChangeType(SrcTy, DestTy))) {
818 // Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the
819 // dest type is native and cst < dest size.
820 if (match(Src, m_Shl(m_Value(A), m_Constant(C))) &&
821 !match(A, m_Shr(m_Value(), m_Constant()))) {
822 // Skip shifts of shift by constants. It undoes a combine in
823 // FoldShiftByConstant and is the extend in reg pattern.
824 APInt Threshold = APInt(C->getType()->getScalarSizeInBits(), DestWidth);
825 if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold))) {
826 Value *NewTrunc = Builder.CreateTrunc(A, DestTy, A->getName() + ".tr");
827 return BinaryOperator::Create(Instruction::Shl, NewTrunc,
828 ConstantExpr::getTrunc(C, DestTy));
829 }
830 }
831 }
832
833 if (Instruction *I = foldVecTruncToExtElt(Trunc, *this))
834 return I;
835
836 // Whenever an element is extracted from a vector, and then truncated,
837 // canonicalize by converting it to a bitcast followed by an
838 // extractelement.
839 //
840 // Example (little endian):
841 // trunc (extractelement <4 x i64> %X, 0) to i32
842 // --->
843 // extractelement <8 x i32> (bitcast <4 x i64> %X to <8 x i32>), i32 0
844 Value *VecOp;
845 ConstantInt *Cst;
846 if (match(Src, m_OneUse(m_ExtractElt(m_Value(VecOp), m_ConstantInt(Cst))))) {
847 auto *VecOpTy = cast<VectorType>(VecOp->getType());
848 auto VecElts = VecOpTy->getElementCount();
849
850 // A badly fit destination size would result in an invalid cast.
851 if (SrcWidth % DestWidth == 0) {
852 uint64_t TruncRatio = SrcWidth / DestWidth;
853 uint64_t BitCastNumElts = VecElts.getKnownMinValue() * TruncRatio;
854 uint64_t VecOpIdx = Cst->getZExtValue();
855 uint64_t NewIdx = DL.isBigEndian() ? (VecOpIdx + 1) * TruncRatio - 1
856 : VecOpIdx * TruncRatio;
857 assert(BitCastNumElts <= std::numeric_limits<uint32_t>::max() &&
858 "overflow 32-bits");
859
860 auto *BitCastTo =
861 VectorType::get(DestTy, BitCastNumElts, VecElts.isScalable());
862 Value *BitCast = Builder.CreateBitCast(VecOp, BitCastTo);
863 return ExtractElementInst::Create(BitCast, Builder.getInt32(NewIdx));
864 }
865 }
866
867 // trunc (ctlz_i32(zext(A), B) --> add(ctlz_i16(A, B), C)
868 if (match(Src, m_OneUse(m_Intrinsic<Intrinsic::ctlz>(m_ZExt(m_Value(A)),
869 m_Value(B))))) {
870 unsigned AWidth = A->getType()->getScalarSizeInBits();
871 if (AWidth == DestWidth && AWidth > Log2_32(SrcWidth)) {
872 Value *WidthDiff = ConstantInt::get(A->getType(), SrcWidth - AWidth);
873 Value *NarrowCtlz =
874 Builder.CreateIntrinsic(Intrinsic::ctlz, {Trunc.getType()}, {A, B});
875 return BinaryOperator::CreateAdd(NarrowCtlz, WidthDiff);
876 }
877 }
878
879 if (match(Src, m_VScale())) {
880 if (Trunc.getFunction() &&
881 Trunc.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
882 Attribute Attr =
883 Trunc.getFunction()->getFnAttribute(Attribute::VScaleRange);
884 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
885 if (Log2_32(*MaxVScale) < DestWidth) {
886 Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
887 return replaceInstUsesWith(Trunc, VScale);
888 }
889 }
890 }
891 }
892
893 return nullptr;
894}
895
896Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp,
897 ZExtInst &Zext) {
898 // If we are just checking for a icmp eq of a single bit and zext'ing it
899 // to an integer, then shift the bit to the appropriate place and then
900 // cast to integer to avoid the comparison.
901
902 // FIXME: This set of transforms does not check for extra uses and/or creates
903 // an extra instruction (an optional final cast is not included
904 // in the transform comments). We may also want to favor icmp over
905 // shifts in cases of equal instructions because icmp has better
906 // analysis in general (invert the transform).
907
908 const APInt *Op1CV;
909 if (match(Cmp->getOperand(1), m_APInt(Op1CV))) {
910
911 // zext (x <s 0) to i32 --> x>>u31 true if signbit set.
912 if (Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isZero()) {
913 Value *In = Cmp->getOperand(0);
914 Value *Sh = ConstantInt::get(In->getType(),
915 In->getType()->getScalarSizeInBits() - 1);
916 In = Builder.CreateLShr(In, Sh, In->getName() + ".lobit");
917 if (In->getType() != Zext.getType())
918 In = Builder.CreateIntCast(In, Zext.getType(), false /*ZExt*/);
919
920 return replaceInstUsesWith(Zext, In);
921 }
922
923 // zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
924 // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
925 // zext (X != 0) to i32 --> X iff X has only the low bit set.
926 // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
927
928 if (Op1CV->isZero() && Cmp->isEquality()) {
929 // Exactly 1 possible 1? But not the high-bit because that is
930 // canonicalized to this form.
931 KnownBits Known = computeKnownBits(Cmp->getOperand(0), 0, &Zext);
932 APInt KnownZeroMask(~Known.Zero);
933 uint32_t ShAmt = KnownZeroMask.logBase2();
934 bool IsExpectShAmt = KnownZeroMask.isPowerOf2() &&
935 (Zext.getType()->getScalarSizeInBits() != ShAmt + 1);
936 if (IsExpectShAmt &&
937 (Cmp->getOperand(0)->getType() == Zext.getType() ||
938 Cmp->getPredicate() == ICmpInst::ICMP_NE || ShAmt == 0)) {
939 Value *In = Cmp->getOperand(0);
940 if (ShAmt) {
941 // Perform a logical shr by shiftamt.
942 // Insert the shift to put the result in the low bit.
943 In = Builder.CreateLShr(In, ConstantInt::get(In->getType(), ShAmt),
944 In->getName() + ".lobit");
945 }
946
947 // Toggle the low bit for "X == 0".
948 if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
949 In = Builder.CreateXor(In, ConstantInt::get(In->getType(), 1));
950
951 if (Zext.getType() == In->getType())
952 return replaceInstUsesWith(Zext, In);
953
954 Value *IntCast = Builder.CreateIntCast(In, Zext.getType(), false);
955 return replaceInstUsesWith(Zext, IntCast);
956 }
957 }
958 }
959
960 if (Cmp->isEquality() && Zext.getType() == Cmp->getOperand(0)->getType()) {
961 // Test if a bit is clear/set using a shifted-one mask:
962 // zext (icmp eq (and X, (1 << ShAmt)), 0) --> and (lshr (not X), ShAmt), 1
963 // zext (icmp ne (and X, (1 << ShAmt)), 0) --> and (lshr X, ShAmt), 1
964 Value *X, *ShAmt;
965 if (Cmp->hasOneUse() && match(Cmp->getOperand(1), m_ZeroInt()) &&
966 match(Cmp->getOperand(0),
967 m_OneUse(m_c_And(m_Shl(m_One(), m_Value(ShAmt)), m_Value(X))))) {
968 if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
969 X = Builder.CreateNot(X);
970 Value *Lshr = Builder.CreateLShr(X, ShAmt);
971 Value *And1 = Builder.CreateAnd(Lshr, ConstantInt::get(X->getType(), 1));
972 return replaceInstUsesWith(Zext, And1);
973 }
974 }
975
976 return nullptr;
977}
978
979/// Determine if the specified value can be computed in the specified wider type
980/// and produce the same low bits. If not, return false.
981///
982/// If this function returns true, it can also return a non-zero number of bits
983/// (in BitsToClear) which indicates that the value it computes is correct for
984/// the zero extend, but that the additional BitsToClear bits need to be zero'd
985/// out. For example, to promote something like:
986///
987/// %B = trunc i64 %A to i32
988/// %C = lshr i32 %B, 8
989/// %E = zext i32 %C to i64
990///
991/// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
992/// set to 8 to indicate that the promoted value needs to have bits 24-31
993/// cleared in addition to bits 32-63. Since an 'and' will be generated to
994/// clear the top bits anyway, doing this has no extra cost.
995///
996/// This function works on both vectors and scalars.
997static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
998 InstCombinerImpl &IC, Instruction *CxtI) {
999 BitsToClear = 0;
1000 if (canAlwaysEvaluateInType(V, Ty))
1001 return true;
1002 if (canNotEvaluateInType(V, Ty))
1003 return false;
1004
1005 auto *I = cast<Instruction>(V);
1006 unsigned Tmp;
1007 switch (I->getOpcode()) {
1008 case Instruction::ZExt: // zext(zext(x)) -> zext(x).
1009 case Instruction::SExt: // zext(sext(x)) -> sext(x).
1010 case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x)
1011 return true;
1012 case Instruction::And:
1013 case Instruction::Or:
1014 case Instruction::Xor:
1015 case Instruction::Add:
1016 case Instruction::Sub:
1017 case Instruction::Mul:
1018 if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||
1019 !canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI))
1020 return false;
1021 // These can all be promoted if neither operand has 'bits to clear'.
1022 if (BitsToClear == 0 && Tmp == 0)
1023 return true;
1024
1025 // If the operation is an AND/OR/XOR and the bits to clear are zero in the
1026 // other side, BitsToClear is ok.
1027 if (Tmp == 0 && I->isBitwiseLogicOp()) {
1028 // We use MaskedValueIsZero here for generality, but the case we care
1029 // about the most is constant RHS.
1030 unsigned VSize = V->getType()->getScalarSizeInBits();
1031 if (IC.MaskedValueIsZero(I->getOperand(1),
1032 APInt::getHighBitsSet(VSize, BitsToClear),
1033 0, CxtI)) {
1034 // If this is an And instruction and all of the BitsToClear are
1035 // known to be zero we can reset BitsToClear.
1036 if (I->getOpcode() == Instruction::And)
1037 BitsToClear = 0;
1038 return true;
1039 }
1040 }
1041
1042 // Otherwise, we don't know how to analyze this BitsToClear case yet.
1043 return false;
1044
1045 case Instruction::Shl: {
1046 // We can promote shl(x, cst) if we can promote x. Since shl overwrites the
1047 // upper bits we can reduce BitsToClear by the shift amount.
1048 const APInt *Amt;
1049 if (match(I->getOperand(1), m_APInt(Amt))) {
1050 if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1051 return false;
1052 uint64_t ShiftAmt = Amt->getZExtValue();
1053 BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0;
1054 return true;
1055 }
1056 return false;
1057 }
1058 case Instruction::LShr: {
1059 // We can promote lshr(x, cst) if we can promote x. This requires the
1060 // ultimate 'and' to clear out the high zero bits we're clearing out though.
1061 const APInt *Amt;
1062 if (match(I->getOperand(1), m_APInt(Amt))) {
1063 if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1064 return false;
1065 BitsToClear += Amt->getZExtValue();
1066 if (BitsToClear > V->getType()->getScalarSizeInBits())
1067 BitsToClear = V->getType()->getScalarSizeInBits();
1068 return true;
1069 }
1070 // Cannot promote variable LSHR.
1071 return false;
1072 }
1073 case Instruction::Select:
1074 if (!canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI) ||
1075 !canEvaluateZExtd(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||
1076 // TODO: If important, we could handle the case when the BitsToClear are
1077 // known zero in the disagreeing side.
1078 Tmp != BitsToClear)
1079 return false;
1080 return true;
1081
1082 case Instruction::PHI: {
1083 // We can change a phi if we can change all operands. Note that we never
1084 // get into trouble with cyclic PHIs here because we only consider
1085 // instructions with a single use.
1086 PHINode *PN = cast<PHINode>(I);
1087 if (!canEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear, IC, CxtI))
1088 return false;
1089 for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
1090 if (!canEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||
1091 // TODO: If important, we could handle the case when the BitsToClear
1092 // are known zero in the disagreeing input.
1093 Tmp != BitsToClear)
1094 return false;
1095 return true;
1096 }
1097 case Instruction::Call:
1098 // llvm.vscale() can always be executed in larger type, because the
1099 // value is automatically zero-extended.
1100 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
1101 if (II->getIntrinsicID() == Intrinsic::vscale)
1102 return true;
1103 return false;
1104 default:
1105 // TODO: Can handle more cases here.
1106 return false;
1107 }
1108}
1109
1111 // If this zero extend is only used by a truncate, let the truncate be
1112 // eliminated before we try to optimize this zext.
1113 if (Zext.hasOneUse() && isa<TruncInst>(Zext.user_back()) &&
1114 !isa<Constant>(Zext.getOperand(0)))
1115 return nullptr;
1116
1117 // If one of the common conversion will work, do it.
1118 if (Instruction *Result = commonCastTransforms(Zext))
1119 return Result;
1120
1121 Value *Src = Zext.getOperand(0);
1122 Type *SrcTy = Src->getType(), *DestTy = Zext.getType();
1123
1124 // Try to extend the entire expression tree to the wide destination type.
1125 unsigned BitsToClear;
1126 if (shouldChangeType(SrcTy, DestTy) &&
1127 canEvaluateZExtd(Src, DestTy, BitsToClear, *this, &Zext)) {
1128 assert(BitsToClear <= SrcTy->getScalarSizeInBits() &&
1129 "Can't clear more bits than in SrcTy");
1130
1131 // Okay, we can transform this! Insert the new expression now.
1132 LLVM_DEBUG(
1133 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1134 " to avoid zero extend: "
1135 << Zext << '\n');
1136 Value *Res = EvaluateInDifferentType(Src, DestTy, false);
1137 assert(Res->getType() == DestTy);
1138
1139 // Preserve debug values referring to Src if the zext is its last use.
1140 if (auto *SrcOp = dyn_cast<Instruction>(Src))
1141 if (SrcOp->hasOneUse())
1142 replaceAllDbgUsesWith(*SrcOp, *Res, Zext, DT);
1143
1144 uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits() - BitsToClear;
1145 uint32_t DestBitSize = DestTy->getScalarSizeInBits();
1146
1147 // If the high bits are already filled with zeros, just replace this
1148 // cast with the result.
1149 if (MaskedValueIsZero(Res,
1150 APInt::getHighBitsSet(DestBitSize,
1151 DestBitSize - SrcBitsKept),
1152 0, &Zext))
1153 return replaceInstUsesWith(Zext, Res);
1154
1155 // We need to emit an AND to clear the high bits.
1157 APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
1158 return BinaryOperator::CreateAnd(Res, C);
1159 }
1160
1161 // If this is a TRUNC followed by a ZEXT then we are dealing with integral
1162 // types and if the sizes are just right we can convert this into a logical
1163 // 'and' which will be much cheaper than the pair of casts.
1164 if (auto *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast
1165 // TODO: Subsume this into EvaluateInDifferentType.
1166
1167 // Get the sizes of the types involved. We know that the intermediate type
1168 // will be smaller than A or C, but don't know the relation between A and C.
1169 Value *A = CSrc->getOperand(0);
1170 unsigned SrcSize = A->getType()->getScalarSizeInBits();
1171 unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
1172 unsigned DstSize = DestTy->getScalarSizeInBits();
1173 // If we're actually extending zero bits, then if
1174 // SrcSize < DstSize: zext(a & mask)
1175 // SrcSize == DstSize: a & mask
1176 // SrcSize > DstSize: trunc(a) & mask
1177 if (SrcSize < DstSize) {
1178 APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1179 Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
1180 Value *And = Builder.CreateAnd(A, AndConst, CSrc->getName() + ".mask");
1181 return new ZExtInst(And, DestTy);
1182 }
1183
1184 if (SrcSize == DstSize) {
1185 APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1186 return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
1187 AndValue));
1188 }
1189 if (SrcSize > DstSize) {
1190 Value *Trunc = Builder.CreateTrunc(A, DestTy);
1191 APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
1192 return BinaryOperator::CreateAnd(Trunc,
1193 ConstantInt::get(Trunc->getType(),
1194 AndValue));
1195 }
1196 }
1197
1198 if (auto *Cmp = dyn_cast<ICmpInst>(Src))
1199 return transformZExtICmp(Cmp, Zext);
1200
1201 // zext(trunc(X) & C) -> (X & zext(C)).
1202 Constant *C;
1203 Value *X;
1204 if (match(Src, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&
1205 X->getType() == DestTy)
1206 return BinaryOperator::CreateAnd(X, Builder.CreateZExt(C, DestTy));
1207
1208 // zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
1209 Value *And;
1210 if (match(Src, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
1212 X->getType() == DestTy) {
1213 Value *ZC = Builder.CreateZExt(C, DestTy);
1214 return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC);
1215 }
1216
1217 // If we are truncating, masking, and then zexting back to the original type,
1218 // that's just a mask. This is not handled by canEvaluateZextd if the
1219 // intermediate values have extra uses. This could be generalized further for
1220 // a non-constant mask operand.
1221 // zext (and (trunc X), C) --> and X, (zext C)
1222 if (match(Src, m_And(m_Trunc(m_Value(X)), m_Constant(C))) &&
1223 X->getType() == DestTy) {
1224 Value *ZextC = Builder.CreateZExt(C, DestTy);
1225 return BinaryOperator::CreateAnd(X, ZextC);
1226 }
1227
1228 if (match(Src, m_VScale())) {
1229 if (Zext.getFunction() &&
1230 Zext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1231 Attribute Attr =
1232 Zext.getFunction()->getFnAttribute(Attribute::VScaleRange);
1233 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
1234 unsigned TypeWidth = Src->getType()->getScalarSizeInBits();
1235 if (Log2_32(*MaxVScale) < TypeWidth) {
1236 Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
1237 return replaceInstUsesWith(Zext, VScale);
1238 }
1239 }
1240 }
1241 }
1242
1243 if (!Zext.hasNonNeg()) {
1244 // If this zero extend is only used by a shift, add nneg flag.
1245 if (Zext.hasOneUse() &&
1246 SrcTy->getScalarSizeInBits() >
1247 Log2_64_Ceil(DestTy->getScalarSizeInBits()) &&
1248 match(Zext.user_back(), m_Shift(m_Value(), m_Specific(&Zext)))) {
1249 Zext.setNonNeg();
1250 return &Zext;
1251 }
1252
1253 if (isKnownNonNegative(Src, SQ.getWithInstruction(&Zext))) {
1254 Zext.setNonNeg();
1255 return &Zext;
1256 }
1257 }
1258
1259 return nullptr;
1260}
1261
1262/// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.
1263Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *Cmp,
1264 SExtInst &Sext) {
1265 Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
1266 ICmpInst::Predicate Pred = Cmp->getPredicate();
1267
1268 // Don't bother if Op1 isn't of vector or integer type.
1269 if (!Op1->getType()->isIntOrIntVectorTy())
1270 return nullptr;
1271
1272 if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_ZeroInt())) {
1273 // sext (x <s 0) --> ashr x, 31 (all ones if negative)
1274 Value *Sh = ConstantInt::get(Op0->getType(),
1275 Op0->getType()->getScalarSizeInBits() - 1);
1276 Value *In = Builder.CreateAShr(Op0, Sh, Op0->getName() + ".lobit");
1277 if (In->getType() != Sext.getType())
1278 In = Builder.CreateIntCast(In, Sext.getType(), true /*SExt*/);
1279
1280 return replaceInstUsesWith(Sext, In);
1281 }
1282
1283 if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1284 // If we know that only one bit of the LHS of the icmp can be set and we
1285 // have an equality comparison with zero or a power of 2, we can transform
1286 // the icmp and sext into bitwise/integer operations.
1287 if (Cmp->hasOneUse() &&
1288 Cmp->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
1289 KnownBits Known = computeKnownBits(Op0, 0, &Sext);
1290
1291 APInt KnownZeroMask(~Known.Zero);
1292 if (KnownZeroMask.isPowerOf2()) {
1293 Value *In = Cmp->getOperand(0);
1294
1295 // If the icmp tests for a known zero bit we can constant fold it.
1296 if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {
1297 Value *V = Pred == ICmpInst::ICMP_NE ?
1299 ConstantInt::getNullValue(Sext.getType());
1300 return replaceInstUsesWith(Sext, V);
1301 }
1302
1303 if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
1304 // sext ((x & 2^n) == 0) -> (x >> n) - 1
1305 // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
1306 unsigned ShiftAmt = KnownZeroMask.countr_zero();
1307 // Perform a right shift to place the desired bit in the LSB.
1308 if (ShiftAmt)
1309 In = Builder.CreateLShr(In,
1310 ConstantInt::get(In->getType(), ShiftAmt));
1311
1312 // At this point "In" is either 1 or 0. Subtract 1 to turn
1313 // {1, 0} -> {0, -1}.
1314 In = Builder.CreateAdd(In,
1315 ConstantInt::getAllOnesValue(In->getType()),
1316 "sext");
1317 } else {
1318 // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
1319 // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
1320 unsigned ShiftAmt = KnownZeroMask.countl_zero();
1321 // Perform a left shift to place the desired bit in the MSB.
1322 if (ShiftAmt)
1323 In = Builder.CreateShl(In,
1324 ConstantInt::get(In->getType(), ShiftAmt));
1325
1326 // Distribute the bit over the whole bit width.
1327 In = Builder.CreateAShr(In, ConstantInt::get(In->getType(),
1328 KnownZeroMask.getBitWidth() - 1), "sext");
1329 }
1330
1331 if (Sext.getType() == In->getType())
1332 return replaceInstUsesWith(Sext, In);
1333 return CastInst::CreateIntegerCast(In, Sext.getType(), true/*SExt*/);
1334 }
1335 }
1336 }
1337
1338 return nullptr;
1339}
1340
1341/// Return true if we can take the specified value and return it as type Ty
1342/// without inserting any new casts and without changing the value of the common
1343/// low bits. This is used by code that tries to promote integer operations to
1344/// a wider types will allow us to eliminate the extension.
1345///
1346/// This function works on both vectors and scalars.
1347///
1348static bool canEvaluateSExtd(Value *V, Type *Ty) {
1349 assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
1350 "Can't sign extend type to a smaller type");
1351 if (canAlwaysEvaluateInType(V, Ty))
1352 return true;
1353 if (canNotEvaluateInType(V, Ty))
1354 return false;
1355
1356 auto *I = cast<Instruction>(V);
1357 switch (I->getOpcode()) {
1358 case Instruction::SExt: // sext(sext(x)) -> sext(x)
1359 case Instruction::ZExt: // sext(zext(x)) -> zext(x)
1360 case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x)
1361 return true;
1362 case Instruction::And:
1363 case Instruction::Or:
1364 case Instruction::Xor:
1365 case Instruction::Add:
1366 case Instruction::Sub:
1367 case Instruction::Mul:
1368 // These operators can all arbitrarily be extended if their inputs can.
1369 return canEvaluateSExtd(I->getOperand(0), Ty) &&
1370 canEvaluateSExtd(I->getOperand(1), Ty);
1371
1372 //case Instruction::Shl: TODO
1373 //case Instruction::LShr: TODO
1374
1375 case Instruction::Select:
1376 return canEvaluateSExtd(I->getOperand(1), Ty) &&
1377 canEvaluateSExtd(I->getOperand(2), Ty);
1378
1379 case Instruction::PHI: {
1380 // We can change a phi if we can change all operands. Note that we never
1381 // get into trouble with cyclic PHIs here because we only consider
1382 // instructions with a single use.
1383 PHINode *PN = cast<PHINode>(I);
1384 for (Value *IncValue : PN->incoming_values())
1385 if (!canEvaluateSExtd(IncValue, Ty)) return false;
1386 return true;
1387 }
1388 default:
1389 // TODO: Can handle more cases here.
1390 break;
1391 }
1392
1393 return false;
1394}
1395
1397 // If this sign extend is only used by a truncate, let the truncate be
1398 // eliminated before we try to optimize this sext.
1399 if (Sext.hasOneUse() && isa<TruncInst>(Sext.user_back()))
1400 return nullptr;
1401
1402 if (Instruction *I = commonCastTransforms(Sext))
1403 return I;
1404
1405 Value *Src = Sext.getOperand(0);
1406 Type *SrcTy = Src->getType(), *DestTy = Sext.getType();
1407 unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
1408 unsigned DestBitSize = DestTy->getScalarSizeInBits();
1409
1410 // If the value being extended is zero or positive, use a zext instead.
1411 if (isKnownNonNegative(Src, SQ.getWithInstruction(&Sext))) {
1412 auto CI = CastInst::Create(Instruction::ZExt, Src, DestTy);
1413 CI->setNonNeg(true);
1414 return CI;
1415 }
1416
1417 // Try to extend the entire expression tree to the wide destination type.
1418 if (shouldChangeType(SrcTy, DestTy) && canEvaluateSExtd(Src, DestTy)) {
1419 // Okay, we can transform this! Insert the new expression now.
1420 LLVM_DEBUG(
1421 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1422 " to avoid sign extend: "
1423 << Sext << '\n');
1424 Value *Res = EvaluateInDifferentType(Src, DestTy, true);
1425 assert(Res->getType() == DestTy);
1426
1427 // If the high bits are already filled with sign bit, just replace this
1428 // cast with the result.
1429 if (ComputeNumSignBits(Res, 0, &Sext) > DestBitSize - SrcBitSize)
1430 return replaceInstUsesWith(Sext, Res);
1431
1432 // We need to emit a shl + ashr to do the sign extend.
1433 Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
1434 return BinaryOperator::CreateAShr(Builder.CreateShl(Res, ShAmt, "sext"),
1435 ShAmt);
1436 }
1437
1438 Value *X;
1439 if (match(Src, m_Trunc(m_Value(X)))) {
1440 // If the input has more sign bits than bits truncated, then convert
1441 // directly to final type.
1442 unsigned XBitSize = X->getType()->getScalarSizeInBits();
1443 if (ComputeNumSignBits(X, 0, &Sext) > XBitSize - SrcBitSize)
1444 return CastInst::CreateIntegerCast(X, DestTy, /* isSigned */ true);
1445
1446 // If input is a trunc from the destination type, then convert into shifts.
1447 if (Src->hasOneUse() && X->getType() == DestTy) {
1448 // sext (trunc X) --> ashr (shl X, C), C
1449 Constant *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
1450 return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShAmt), ShAmt);
1451 }
1452
1453 // If we are replacing shifted-in high zero bits with sign bits, convert
1454 // the logic shift to arithmetic shift and eliminate the cast to
1455 // intermediate type:
1456 // sext (trunc (lshr Y, C)) --> sext/trunc (ashr Y, C)
1457 Value *Y;
1458 if (Src->hasOneUse() &&
1460 m_SpecificIntAllowUndef(XBitSize - SrcBitSize)))) {
1461 Value *Ashr = Builder.CreateAShr(Y, XBitSize - SrcBitSize);
1462 return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
1463 }
1464 }
1465
1466 if (auto *Cmp = dyn_cast<ICmpInst>(Src))
1467 return transformSExtICmp(Cmp, Sext);
1468
1469 // If the input is a shl/ashr pair of a same constant, then this is a sign
1470 // extension from a smaller value. If we could trust arbitrary bitwidth
1471 // integers, we could turn this into a truncate to the smaller bit and then
1472 // use a sext for the whole extension. Since we don't, look deeper and check
1473 // for a truncate. If the source and dest are the same type, eliminate the
1474 // trunc and extend and just do shifts. For example, turn:
1475 // %a = trunc i32 %i to i8
1476 // %b = shl i8 %a, C
1477 // %c = ashr i8 %b, C
1478 // %d = sext i8 %c to i32
1479 // into:
1480 // %a = shl i32 %i, 32-(8-C)
1481 // %d = ashr i32 %a, 32-(8-C)
1482 Value *A = nullptr;
1483 // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
1484 Constant *BA = nullptr, *CA = nullptr;
1485 if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_Constant(BA)),
1486 m_ImmConstant(CA))) &&
1487 BA->isElementWiseEqual(CA) && A->getType() == DestTy) {
1488 Constant *WideCurrShAmt =
1489 ConstantFoldCastOperand(Instruction::SExt, CA, DestTy, DL);
1490 assert(WideCurrShAmt && "Constant folding of ImmConstant cannot fail");
1491 Constant *NumLowbitsLeft = ConstantExpr::getSub(
1492 ConstantInt::get(DestTy, SrcTy->getScalarSizeInBits()), WideCurrShAmt);
1493 Constant *NewShAmt = ConstantExpr::getSub(
1494 ConstantInt::get(DestTy, DestTy->getScalarSizeInBits()),
1495 NumLowbitsLeft);
1496 NewShAmt =
1498 A = Builder.CreateShl(A, NewShAmt, Sext.getName());
1499 return BinaryOperator::CreateAShr(A, NewShAmt);
1500 }
1501
1502 // Splatting a bit of constant-index across a value:
1503 // sext (ashr (trunc iN X to iM), M-1) to iN --> ashr (shl X, N-M), N-1
1504 // If the dest type is different, use a cast (adjust use check).
1505 if (match(Src, m_OneUse(m_AShr(m_Trunc(m_Value(X)),
1506 m_SpecificInt(SrcBitSize - 1))))) {
1507 Type *XTy = X->getType();
1508 unsigned XBitSize = XTy->getScalarSizeInBits();
1509 Constant *ShlAmtC = ConstantInt::get(XTy, XBitSize - SrcBitSize);
1510 Constant *AshrAmtC = ConstantInt::get(XTy, XBitSize - 1);
1511 if (XTy == DestTy)
1512 return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShlAmtC),
1513 AshrAmtC);
1514 if (cast<BinaryOperator>(Src)->getOperand(0)->hasOneUse()) {
1515 Value *Ashr = Builder.CreateAShr(Builder.CreateShl(X, ShlAmtC), AshrAmtC);
1516 return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
1517 }
1518 }
1519
1520 if (match(Src, m_VScale())) {
1521 if (Sext.getFunction() &&
1522 Sext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1523 Attribute Attr =
1524 Sext.getFunction()->getFnAttribute(Attribute::VScaleRange);
1525 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
1526 if (Log2_32(*MaxVScale) < (SrcBitSize - 1)) {
1527 Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
1528 return replaceInstUsesWith(Sext, VScale);
1529 }
1530 }
1531 }
1532 }
1533
1534 return nullptr;
1535}
1536
1537/// Return a Constant* for the specified floating-point constant if it fits
1538/// in the specified FP type without changing its value.
1539static bool fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
1540 bool losesInfo;
1541 APFloat F = CFP->getValueAPF();
1542 (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
1543 return !losesInfo;
1544}
1545
1547 if (CFP->getType() == Type::getPPC_FP128Ty(CFP->getContext()))
1548 return nullptr; // No constant folding of this.
1549 // See if the value can be truncated to half and then reextended.
1550 if (fitsInFPType(CFP, APFloat::IEEEhalf()))
1551 return Type::getHalfTy(CFP->getContext());
1552 // See if the value can be truncated to float and then reextended.
1554 return Type::getFloatTy(CFP->getContext());
1555 if (CFP->getType()->isDoubleTy())
1556 return nullptr; // Won't shrink.
1558 return Type::getDoubleTy(CFP->getContext());
1559 // Don't try to shrink to various long double types.
1560 return nullptr;
1561}
1562
1563// Determine if this is a vector of ConstantFPs and if so, return the minimal
1564// type we can safely truncate all elements to.
1566 auto *CV = dyn_cast<Constant>(V);
1567 auto *CVVTy = dyn_cast<FixedVectorType>(V->getType());
1568 if (!CV || !CVVTy)
1569 return nullptr;
1570
1571 Type *MinType = nullptr;
1572
1573 unsigned NumElts = CVVTy->getNumElements();
1574
1575 // For fixed-width vectors we find the minimal type by looking
1576 // through the constant values of the vector.
1577 for (unsigned i = 0; i != NumElts; ++i) {
1578 if (isa<UndefValue>(CV->getAggregateElement(i)))
1579 continue;
1580
1581 auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i));
1582 if (!CFP)
1583 return nullptr;
1584
1585 Type *T = shrinkFPConstant(CFP);
1586 if (!T)
1587 return nullptr;
1588
1589 // If we haven't found a type yet or this type has a larger mantissa than
1590 // our previous type, this is our new minimal type.
1591 if (!MinType || T->getFPMantissaWidth() > MinType->getFPMantissaWidth())
1592 MinType = T;
1593 }
1594
1595 // Make a vector type from the minimal type.
1596 return MinType ? FixedVectorType::get(MinType, NumElts) : nullptr;
1597}
1598
1599/// Find the minimum FP type we can safely truncate to.
1601 if (auto *FPExt = dyn_cast<FPExtInst>(V))
1602 return FPExt->getOperand(0)->getType();
1603
1604 // If this value is a constant, return the constant in the smallest FP type
1605 // that can accurately represent it. This allows us to turn
1606 // (float)((double)X+2.0) into x+2.0f.
1607 if (auto *CFP = dyn_cast<ConstantFP>(V))
1608 if (Type *T = shrinkFPConstant(CFP))
1609 return T;
1610
1611 // We can only correctly find a minimum type for a scalable vector when it is
1612 // a splat. For splats of constant values the fpext is wrapped up as a
1613 // ConstantExpr.
1614 if (auto *FPCExt = dyn_cast<ConstantExpr>(V))
1615 if (FPCExt->getOpcode() == Instruction::FPExt)
1616 return FPCExt->getOperand(0)->getType();
1617
1618 // Try to shrink a vector of FP constants. This returns nullptr on scalable
1619 // vectors
1620 if (Type *T = shrinkFPConstantVector(V))
1621 return T;
1622
1623 return V->getType();
1624}
1625
1626/// Return true if the cast from integer to FP can be proven to be exact for all
1627/// possible inputs (the conversion does not lose any precision).
1629 CastInst::CastOps Opcode = I.getOpcode();
1630 assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) &&
1631 "Unexpected cast");
1632 Value *Src = I.getOperand(0);
1633 Type *SrcTy = Src->getType();
1634 Type *FPTy = I.getType();
1635 bool IsSigned = Opcode == Instruction::SIToFP;
1636 int SrcSize = (int)SrcTy->getScalarSizeInBits() - IsSigned;
1637
1638 // Easy case - if the source integer type has less bits than the FP mantissa,
1639 // then the cast must be exact.
1640 int DestNumSigBits = FPTy->getFPMantissaWidth();
1641 if (SrcSize <= DestNumSigBits)
1642 return true;
1643
1644 // Cast from FP to integer and back to FP is independent of the intermediate
1645 // integer width because of poison on overflow.
1646 Value *F;
1647 if (match(Src, m_FPToSI(m_Value(F))) || match(Src, m_FPToUI(m_Value(F)))) {
1648 // If this is uitofp (fptosi F), the source needs an extra bit to avoid
1649 // potential rounding of negative FP input values.
1650 int SrcNumSigBits = F->getType()->getFPMantissaWidth();
1651 if (!IsSigned && match(Src, m_FPToSI(m_Value())))
1652 SrcNumSigBits++;
1653
1654 // [su]itofp (fpto[su]i F) --> exact if the source type has less or equal
1655 // significant bits than the destination (and make sure neither type is
1656 // weird -- ppc_fp128).
1657 if (SrcNumSigBits > 0 && DestNumSigBits > 0 &&
1658 SrcNumSigBits <= DestNumSigBits)
1659 return true;
1660 }
1661
1662 // TODO:
1663 // Try harder to find if the source integer type has less significant bits.
1664 // For example, compute number of sign bits.
1665 KnownBits SrcKnown = IC.computeKnownBits(Src, 0, &I);
1666 int SigBits = (int)SrcTy->getScalarSizeInBits() -
1667 SrcKnown.countMinLeadingZeros() -
1668 SrcKnown.countMinTrailingZeros();
1669 if (SigBits <= DestNumSigBits)
1670 return true;
1671
1672 return false;
1673}
1674
1677 return I;
1678
1679 // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
1680 // simplify this expression to avoid one or more of the trunc/extend
1681 // operations if we can do so without changing the numerical results.
1682 //
1683 // The exact manner in which the widths of the operands interact to limit
1684 // what we can and cannot do safely varies from operation to operation, and
1685 // is explained below in the various case statements.
1686 Type *Ty = FPT.getType();
1687 auto *BO = dyn_cast<BinaryOperator>(FPT.getOperand(0));
1688 if (BO && BO->hasOneUse()) {
1689 Type *LHSMinType = getMinimumFPType(BO->getOperand(0));
1690 Type *RHSMinType = getMinimumFPType(BO->getOperand(1));
1691 unsigned OpWidth = BO->getType()->getFPMantissaWidth();
1692 unsigned LHSWidth = LHSMinType->getFPMantissaWidth();
1693 unsigned RHSWidth = RHSMinType->getFPMantissaWidth();
1694 unsigned SrcWidth = std::max(LHSWidth, RHSWidth);
1695 unsigned DstWidth = Ty->getFPMantissaWidth();
1696 switch (BO->getOpcode()) {
1697 default: break;
1698 case Instruction::FAdd:
1699 case Instruction::FSub:
1700 // For addition and subtraction, the infinitely precise result can
1701 // essentially be arbitrarily wide; proving that double rounding
1702 // will not occur because the result of OpI is exact (as we will for
1703 // FMul, for example) is hopeless. However, we *can* nonetheless
1704 // frequently know that double rounding cannot occur (or that it is
1705 // innocuous) by taking advantage of the specific structure of
1706 // infinitely-precise results that admit double rounding.
1707 //
1708 // Specifically, if OpWidth >= 2*DstWdith+1 and DstWidth is sufficient
1709 // to represent both sources, we can guarantee that the double
1710 // rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis,
1711 // "A Rigorous Framework for Fully Supporting the IEEE Standard ..."
1712 // for proof of this fact).
1713 //
1714 // Note: Figueroa does not consider the case where DstFormat !=
1715 // SrcFormat. It's possible (likely even!) that this analysis
1716 // could be tightened for those cases, but they are rare (the main
1717 // case of interest here is (float)((double)float + float)).
1718 if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) {
1719 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1720 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1721 Instruction *RI = BinaryOperator::Create(BO->getOpcode(), LHS, RHS);
1722 RI->copyFastMathFlags(BO);
1723 return RI;
1724 }
1725 break;
1726 case Instruction::FMul:
1727 // For multiplication, the infinitely precise result has at most
1728 // LHSWidth + RHSWidth significant bits; if OpWidth is sufficient
1729 // that such a value can be exactly represented, then no double
1730 // rounding can possibly occur; we can safely perform the operation
1731 // in the destination format if it can represent both sources.
1732 if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) {
1733 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1734 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1736 }
1737 break;
1738 case Instruction::FDiv:
1739 // For division, we use again use the bound from Figueroa's
1740 // dissertation. I am entirely certain that this bound can be
1741 // tightened in the unbalanced operand case by an analysis based on
1742 // the diophantine rational approximation bound, but the well-known
1743 // condition used here is a good conservative first pass.
1744 // TODO: Tighten bound via rigorous analysis of the unbalanced case.
1745 if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) {
1746 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1747 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1749 }
1750 break;
1751 case Instruction::FRem: {
1752 // Remainder is straightforward. Remainder is always exact, so the
1753 // type of OpI doesn't enter into things at all. We simply evaluate
1754 // in whichever source type is larger, then convert to the
1755 // destination type.
1756 if (SrcWidth == OpWidth)
1757 break;
1758 Value *LHS, *RHS;
1759 if (LHSWidth == SrcWidth) {
1760 LHS = Builder.CreateFPTrunc(BO->getOperand(0), LHSMinType);
1761 RHS = Builder.CreateFPTrunc(BO->getOperand(1), LHSMinType);
1762 } else {
1763 LHS = Builder.CreateFPTrunc(BO->getOperand(0), RHSMinType);
1764 RHS = Builder.CreateFPTrunc(BO->getOperand(1), RHSMinType);
1765 }
1766
1767 Value *ExactResult = Builder.CreateFRemFMF(LHS, RHS, BO);
1768 return CastInst::CreateFPCast(ExactResult, Ty);
1769 }
1770 }
1771 }
1772
1773 // (fptrunc (fneg x)) -> (fneg (fptrunc x))
1774 Value *X;
1775 Instruction *Op = dyn_cast<Instruction>(FPT.getOperand(0));
1776 if (Op && Op->hasOneUse()) {
1777 // FIXME: The FMF should propagate from the fptrunc, not the source op.
1779 if (isa<FPMathOperator>(Op))
1780 Builder.setFastMathFlags(Op->getFastMathFlags());
1781
1782 if (match(Op, m_FNeg(m_Value(X)))) {
1783 Value *InnerTrunc = Builder.CreateFPTrunc(X, Ty);
1784
1785 return UnaryOperator::CreateFNegFMF(InnerTrunc, Op);
1786 }
1787
1788 // If we are truncating a select that has an extended operand, we can
1789 // narrow the other operand and do the select as a narrow op.
1790 Value *Cond, *X, *Y;
1792 X->getType() == Ty) {
1793 // fptrunc (select Cond, (fpext X), Y --> select Cond, X, (fptrunc Y)
1794 Value *NarrowY = Builder.CreateFPTrunc(Y, Ty);
1795 Value *Sel = Builder.CreateSelect(Cond, X, NarrowY, "narrow.sel", Op);
1796 return replaceInstUsesWith(FPT, Sel);
1797 }
1799 X->getType() == Ty) {
1800 // fptrunc (select Cond, Y, (fpext X) --> select Cond, (fptrunc Y), X
1801 Value *NarrowY = Builder.CreateFPTrunc(Y, Ty);
1802 Value *Sel = Builder.CreateSelect(Cond, NarrowY, X, "narrow.sel", Op);
1803 return replaceInstUsesWith(FPT, Sel);
1804 }
1805 }
1806
1807 if (auto *II = dyn_cast<IntrinsicInst>(FPT.getOperand(0))) {
1808 switch (II->getIntrinsicID()) {
1809 default: break;
1810 case Intrinsic::ceil:
1811 case Intrinsic::fabs:
1812 case Intrinsic::floor:
1813 case Intrinsic::nearbyint:
1814 case Intrinsic::rint:
1815 case Intrinsic::round:
1816 case Intrinsic::roundeven:
1817 case Intrinsic::trunc: {
1818 Value *Src = II->getArgOperand(0);
1819 if (!Src->hasOneUse())
1820 break;
1821
1822 // Except for fabs, this transformation requires the input of the unary FP
1823 // operation to be itself an fpext from the type to which we're
1824 // truncating.
1825 if (II->getIntrinsicID() != Intrinsic::fabs) {
1826 FPExtInst *FPExtSrc = dyn_cast<FPExtInst>(Src);
1827 if (!FPExtSrc || FPExtSrc->getSrcTy() != Ty)
1828 break;
1829 }
1830
1831 // Do unary FP operation on smaller type.
1832 // (fptrunc (fabs x)) -> (fabs (fptrunc x))
1833 Value *InnerTrunc = Builder.CreateFPTrunc(Src, Ty);
1835 II->getIntrinsicID(), Ty);
1837 II->getOperandBundlesAsDefs(OpBundles);
1838 CallInst *NewCI =
1839 CallInst::Create(Overload, {InnerTrunc}, OpBundles, II->getName());
1840 NewCI->copyFastMathFlags(II);
1841 return NewCI;
1842 }
1843 }
1844 }
1845
1846 if (Instruction *I = shrinkInsertElt(FPT, Builder))
1847 return I;
1848
1849 Value *Src = FPT.getOperand(0);
1850 if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
1851 auto *FPCast = cast<CastInst>(Src);
1852 if (isKnownExactCastIntToFP(*FPCast, *this))
1853 return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
1854 }
1855
1856 return nullptr;
1857}
1858
1860 // If the source operand is a cast from integer to FP and known exact, then
1861 // cast the integer operand directly to the destination type.
1862 Type *Ty = FPExt.getType();
1863 Value *Src = FPExt.getOperand(0);
1864 if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
1865 auto *FPCast = cast<CastInst>(Src);
1866 if (isKnownExactCastIntToFP(*FPCast, *this))
1867 return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
1868 }
1869
1870 return commonCastTransforms(FPExt);
1871}
1872
1873/// fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
1874/// This is safe if the intermediate type has enough bits in its mantissa to
1875/// accurately represent all values of X. For example, this won't work with
1876/// i64 -> float -> i64.
1878 if (!isa<UIToFPInst>(FI.getOperand(0)) && !isa<SIToFPInst>(FI.getOperand(0)))
1879 return nullptr;
1880
1881 auto *OpI = cast<CastInst>(FI.getOperand(0));
1882 Value *X = OpI->getOperand(0);
1883 Type *XType = X->getType();
1884 Type *DestType = FI.getType();
1885 bool IsOutputSigned = isa<FPToSIInst>(FI);
1886
1887 // Since we can assume the conversion won't overflow, our decision as to
1888 // whether the input will fit in the float should depend on the minimum
1889 // of the input range and output range.
1890
1891 // This means this is also safe for a signed input and unsigned output, since
1892 // a negative input would lead to undefined behavior.
1893 if (!isKnownExactCastIntToFP(*OpI, *this)) {
1894 // The first cast may not round exactly based on the source integer width
1895 // and FP width, but the overflow UB rules can still allow this to fold.
1896 // If the destination type is narrow, that means the intermediate FP value
1897 // must be large enough to hold the source value exactly.
1898 // For example, (uint8_t)((float)(uint32_t 16777217) is undefined behavior.
1899 int OutputSize = (int)DestType->getScalarSizeInBits();
1900 if (OutputSize > OpI->getType()->getFPMantissaWidth())
1901 return nullptr;
1902 }
1903
1904 if (DestType->getScalarSizeInBits() > XType->getScalarSizeInBits()) {
1905 bool IsInputSigned = isa<SIToFPInst>(OpI);
1906 if (IsInputSigned && IsOutputSigned)
1907 return new SExtInst(X, DestType);
1908 return new ZExtInst(X, DestType);
1909 }
1910 if (DestType->getScalarSizeInBits() < XType->getScalarSizeInBits())
1911 return new TruncInst(X, DestType);
1912
1913 assert(XType == DestType && "Unexpected types for int to FP to int casts");
1914 return replaceInstUsesWith(FI, X);
1915}
1916
1918 if (Instruction *I = foldItoFPtoI(FI))
1919 return I;
1920
1921 return commonCastTransforms(FI);
1922}
1923
1925 if (Instruction *I = foldItoFPtoI(FI))
1926 return I;
1927
1928 return commonCastTransforms(FI);
1929}
1930
1932 return commonCastTransforms(CI);
1933}
1934
1936 return commonCastTransforms(CI);
1937}
1938
1940 // If the source integer type is not the intptr_t type for this target, do a
1941 // trunc or zext to the intptr_t type, then inttoptr of it. This allows the
1942 // cast to be exposed to other transforms.
1943 unsigned AS = CI.getAddressSpace();
1944 if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
1946 Type *Ty = CI.getOperand(0)->getType()->getWithNewType(
1947 DL.getIntPtrType(CI.getContext(), AS));
1949 return new IntToPtrInst(P, CI.getType());
1950 }
1951
1953 return I;
1954
1955 return nullptr;
1956}
1957
1959 // If the destination integer type is not the intptr_t type for this target,
1960 // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
1961 // to be exposed to other transforms.
1963 Type *SrcTy = SrcOp->getType();
1964 Type *Ty = CI.getType();
1965 unsigned AS = CI.getPointerAddressSpace();
1966 unsigned TySize = Ty->getScalarSizeInBits();
1967 unsigned PtrSize = DL.getPointerSizeInBits(AS);
1968 if (TySize != PtrSize) {
1969 Type *IntPtrTy =
1970 SrcTy->getWithNewType(DL.getIntPtrType(CI.getContext(), AS));
1971 Value *P = Builder.CreatePtrToInt(SrcOp, IntPtrTy);
1972 return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
1973 }
1974
1975 // (ptrtoint (ptrmask P, M))
1976 // -> (and (ptrtoint P), M)
1977 // This is generally beneficial as `and` is better supported than `ptrmask`.
1978 Value *Ptr, *Mask;
1979 if (match(SrcOp, m_OneUse(m_Intrinsic<Intrinsic::ptrmask>(m_Value(Ptr),
1980 m_Value(Mask)))) &&
1981 Mask->getType() == Ty)
1982 return BinaryOperator::CreateAnd(Builder.CreatePtrToInt(Ptr, Ty), Mask);
1983
1984 if (auto *GEP = dyn_cast<GetElementPtrInst>(SrcOp)) {
1985 // Fold ptrtoint(gep null, x) to multiply + constant if the GEP has one use.
1986 // While this can increase the number of instructions it doesn't actually
1987 // increase the overall complexity since the arithmetic is just part of
1988 // the GEP otherwise.
1989 if (GEP->hasOneUse() &&
1990 isa<ConstantPointerNull>(GEP->getPointerOperand())) {
1991 return replaceInstUsesWith(CI,
1992 Builder.CreateIntCast(EmitGEPOffset(GEP), Ty,
1993 /*isSigned=*/false));
1994 }
1995 }
1996
1997 Value *Vec, *Scalar, *Index;
1999 m_Value(Scalar), m_Value(Index)))) &&
2000 Vec->getType() == Ty) {
2001 assert(Vec->getType()->getScalarSizeInBits() == PtrSize && "Wrong type");
2002 // Convert the scalar to int followed by insert to eliminate one cast:
2003 // p2i (ins (i2p Vec), Scalar, Index --> ins Vec, (p2i Scalar), Index
2004 Value *NewCast = Builder.CreatePtrToInt(Scalar, Ty->getScalarType());
2005 return InsertElementInst::Create(Vec, NewCast, Index);
2006 }
2007
2008 return commonCastTransforms(CI);
2009}
2010
2011/// This input value (which is known to have vector type) is being zero extended
2012/// or truncated to the specified vector type. Since the zext/trunc is done
2013/// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
2014/// endianness will impact which end of the vector that is extended or
2015/// truncated.
2016///
2017/// A vector is always stored with index 0 at the lowest address, which
2018/// corresponds to the most significant bits for a big endian stored integer and
2019/// the least significant bits for little endian. A trunc/zext of an integer
2020/// impacts the big end of the integer. Thus, we need to add/remove elements at
2021/// the front of the vector for big endian targets, and the back of the vector
2022/// for little endian targets.
2023///
2024/// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
2025///
2026/// The source and destination vector types may have different element types.
2027static Instruction *
2029 InstCombinerImpl &IC) {
2030 // We can only do this optimization if the output is a multiple of the input
2031 // element size, or the input is a multiple of the output element size.
2032 // Convert the input type to have the same element type as the output.
2033 VectorType *SrcTy = cast<VectorType>(InVal->getType());
2034
2035 if (SrcTy->getElementType() != DestTy->getElementType()) {
2036 // The input types don't need to be identical, but for now they must be the
2037 // same size. There is no specific reason we couldn't handle things like
2038 // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
2039 // there yet.
2040 if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
2041 DestTy->getElementType()->getPrimitiveSizeInBits())
2042 return nullptr;
2043
2044 SrcTy =
2045 FixedVectorType::get(DestTy->getElementType(),
2046 cast<FixedVectorType>(SrcTy)->getNumElements());
2047 InVal = IC.Builder.CreateBitCast(InVal, SrcTy);
2048 }
2049
2050 bool IsBigEndian = IC.getDataLayout().isBigEndian();
2051 unsigned SrcElts = cast<FixedVectorType>(SrcTy)->getNumElements();
2052 unsigned DestElts = cast<FixedVectorType>(DestTy)->getNumElements();
2053
2054 assert(SrcElts != DestElts && "Element counts should be different.");
2055
2056 // Now that the element types match, get the shuffle mask and RHS of the
2057 // shuffle to use, which depends on whether we're increasing or decreasing the
2058 // size of the input.
2059 auto ShuffleMaskStorage = llvm::to_vector<16>(llvm::seq<int>(0, SrcElts));
2060 ArrayRef<int> ShuffleMask;
2061 Value *V2;
2062
2063 if (SrcElts > DestElts) {
2064 // If we're shrinking the number of elements (rewriting an integer
2065 // truncate), just shuffle in the elements corresponding to the least
2066 // significant bits from the input and use poison as the second shuffle
2067 // input.
2068 V2 = PoisonValue::get(SrcTy);
2069 // Make sure the shuffle mask selects the "least significant bits" by
2070 // keeping elements from back of the src vector for big endian, and from the
2071 // front for little endian.
2072 ShuffleMask = ShuffleMaskStorage;
2073 if (IsBigEndian)
2074 ShuffleMask = ShuffleMask.take_back(DestElts);
2075 else
2076 ShuffleMask = ShuffleMask.take_front(DestElts);
2077 } else {
2078 // If we're increasing the number of elements (rewriting an integer zext),
2079 // shuffle in all of the elements from InVal. Fill the rest of the result
2080 // elements with zeros from a constant zero.
2081 V2 = Constant::getNullValue(SrcTy);
2082 // Use first elt from V2 when indicating zero in the shuffle mask.
2083 uint32_t NullElt = SrcElts;
2084 // Extend with null values in the "most significant bits" by adding elements
2085 // in front of the src vector for big endian, and at the back for little
2086 // endian.
2087 unsigned DeltaElts = DestElts - SrcElts;
2088 if (IsBigEndian)
2089 ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt);
2090 else
2091 ShuffleMaskStorage.append(DeltaElts, NullElt);
2092 ShuffleMask = ShuffleMaskStorage;
2093 }
2094
2095 return new ShuffleVectorInst(InVal, V2, ShuffleMask);
2096}
2097
2098static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) {
2099 return Value % Ty->getPrimitiveSizeInBits() == 0;
2100}
2101
2102static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
2103 return Value / Ty->getPrimitiveSizeInBits();
2104}
2105
2106/// V is a value which is inserted into a vector of VecEltTy.
2107/// Look through the value to see if we can decompose it into
2108/// insertions into the vector. See the example in the comment for
2109/// OptimizeIntegerToVectorInsertions for the pattern this handles.
2110/// The type of V is always a non-zero multiple of VecEltTy's size.
2111/// Shift is the number of bits between the lsb of V and the lsb of
2112/// the vector.
2113///
2114/// This returns false if the pattern can't be matched or true if it can,
2115/// filling in Elements with the elements found here.
2116static bool collectInsertionElements(Value *V, unsigned Shift,
2117 SmallVectorImpl<Value *> &Elements,
2118 Type *VecEltTy, bool isBigEndian) {
2119 assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
2120 "Shift should be a multiple of the element type size");
2121
2122 // Undef values never contribute useful bits to the result.
2123 if (isa<UndefValue>(V)) return true;
2124
2125 // If we got down to a value of the right type, we win, try inserting into the
2126 // right element.
2127 if (V->getType() == VecEltTy) {
2128 // Inserting null doesn't actually insert any elements.
2129 if (Constant *C = dyn_cast<Constant>(V))
2130 if (C->isNullValue())
2131 return true;
2132
2133 unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
2134 if (isBigEndian)
2135 ElementIndex = Elements.size() - ElementIndex - 1;
2136
2137 // Fail if multiple elements are inserted into this slot.
2138 if (Elements[ElementIndex])
2139 return false;
2140
2141 Elements[ElementIndex] = V;
2142 return true;
2143 }
2144
2145 if (Constant *C = dyn_cast<Constant>(V)) {
2146 // Figure out the # elements this provides, and bitcast it or slice it up
2147 // as required.
2148 unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
2149 VecEltTy);
2150 // If the constant is the size of a vector element, we just need to bitcast
2151 // it to the right type so it gets properly inserted.
2152 if (NumElts == 1)
2154 Shift, Elements, VecEltTy, isBigEndian);
2155
2156 // Okay, this is a constant that covers multiple elements. Slice it up into
2157 // pieces and insert each element-sized piece into the vector.
2158 if (!isa<IntegerType>(C->getType()))
2159 C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),
2160 C->getType()->getPrimitiveSizeInBits()));
2161 unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
2162 Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
2163
2164 for (unsigned i = 0; i != NumElts; ++i) {
2165 unsigned ShiftI = Shift + i * ElementSize;
2167 Instruction::LShr, C, ConstantInt::get(C->getType(), ShiftI));
2168 if (!Piece)
2169 return false;
2170
2171 Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
2172 if (!collectInsertionElements(Piece, ShiftI, Elements, VecEltTy,
2173 isBigEndian))
2174 return false;
2175 }
2176 return true;
2177 }
2178
2179 if (!V->hasOneUse()) return false;
2180
2181 Instruction *I = dyn_cast<Instruction>(V);
2182 if (!I) return false;
2183 switch (I->getOpcode()) {
2184 default: return false; // Unhandled case.
2185 case Instruction::BitCast:
2186 if (I->getOperand(0)->getType()->isVectorTy())
2187 return false;
2188 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2189 isBigEndian);
2190 case Instruction::ZExt:
2192 I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
2193 VecEltTy))
2194 return false;
2195 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2196 isBigEndian);
2197 case Instruction::Or:
2198 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2199 isBigEndian) &&
2200 collectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
2201 isBigEndian);
2202 case Instruction::Shl: {
2203 // Must be shifting by a constant that is a multiple of the element size.
2204 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
2205 if (!CI) return false;
2206 Shift += CI->getZExtValue();
2207 if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
2208 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2209 isBigEndian);
2210 }
2211
2212 }
2213}
2214
2215
2216/// If the input is an 'or' instruction, we may be doing shifts and ors to
2217/// assemble the elements of the vector manually.
2218/// Try to rip the code out and replace it with insertelements. This is to
2219/// optimize code like this:
2220///
2221/// %tmp37 = bitcast float %inc to i32
2222/// %tmp38 = zext i32 %tmp37 to i64
2223/// %tmp31 = bitcast float %inc5 to i32
2224/// %tmp32 = zext i32 %tmp31 to i64
2225/// %tmp33 = shl i64 %tmp32, 32
2226/// %ins35 = or i64 %tmp33, %tmp38
2227/// %tmp43 = bitcast i64 %ins35 to <2 x float>
2228///
2229/// Into two insertelements that do "buildvector{%inc, %inc5}".
2231 InstCombinerImpl &IC) {
2232 auto *DestVecTy = cast<FixedVectorType>(CI.getType());
2233 Value *IntInput = CI.getOperand(0);
2234
2235 SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
2236 if (!collectInsertionElements(IntInput, 0, Elements,
2237 DestVecTy->getElementType(),
2238 IC.getDataLayout().isBigEndian()))
2239 return nullptr;
2240
2241 // If we succeeded, we know that all of the element are specified by Elements
2242 // or are zero if Elements has a null entry. Recast this as a set of
2243 // insertions.
2244 Value *Result = Constant::getNullValue(CI.getType());
2245 for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
2246 if (!Elements[i]) continue; // Unset element.
2247
2248 Result = IC.Builder.CreateInsertElement(Result, Elements[i],
2249 IC.Builder.getInt32(i));
2250 }
2251
2252 return Result;
2253}
2254
2255/// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
2256/// vector followed by extract element. The backend tends to handle bitcasts of
2257/// vectors better than bitcasts of scalars because vector registers are
2258/// usually not type-specific like scalar integer or scalar floating-point.
2260 InstCombinerImpl &IC) {
2261 Value *VecOp, *Index;
2262 if (!match(BitCast.getOperand(0),
2264 return nullptr;
2265
2266 // The bitcast must be to a vectorizable type, otherwise we can't make a new
2267 // type to extract from.
2268 Type *DestType = BitCast.getType();
2269 VectorType *VecType = cast<VectorType>(VecOp->getType());
2270 if (VectorType::isValidElementType(DestType)) {
2271 auto *NewVecType = VectorType::get(DestType, VecType);
2272 auto *NewBC = IC.Builder.CreateBitCast(VecOp, NewVecType, "bc");
2273 return ExtractElementInst::Create(NewBC, Index);
2274 }
2275
2276 // Only solve DestType is vector to avoid inverse transform in visitBitCast.
2277 // bitcast (extractelement <1 x elt>, dest) -> bitcast(<1 x elt>, dest)
2278 auto *FixedVType = dyn_cast<FixedVectorType>(VecType);
2279 if (DestType->isVectorTy() && FixedVType && FixedVType->getNumElements() == 1)
2280 return CastInst::Create(Instruction::BitCast, VecOp, DestType);
2281
2282 return nullptr;
2283}
2284
2285/// Change the type of a bitwise logic operation if we can eliminate a bitcast.
2287 InstCombiner::BuilderTy &Builder) {
2288 Type *DestTy = BitCast.getType();
2289 BinaryOperator *BO;
2290
2291 if (!match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) ||
2292 !BO->isBitwiseLogicOp())
2293 return nullptr;
2294
2295 // FIXME: This transform is restricted to vector types to avoid backend
2296 // problems caused by creating potentially illegal operations. If a fix-up is
2297 // added to handle that situation, we can remove this check.
2298 if (!DestTy->isVectorTy() || !BO->getType()->isVectorTy())
2299 return nullptr;
2300
2301 if (DestTy->isFPOrFPVectorTy()) {
2302 Value *X, *Y;
2303 // bitcast(logic(bitcast(X), bitcast(Y))) -> bitcast'(logic(bitcast'(X), Y))
2304 if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
2306 if (X->getType()->isFPOrFPVectorTy() &&
2307 Y->getType()->isIntOrIntVectorTy()) {
2308 Value *CastedOp =
2309 Builder.CreateBitCast(BO->getOperand(0), Y->getType());
2310 Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, Y);
2311 return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
2312 }
2313 if (X->getType()->isIntOrIntVectorTy() &&
2314 Y->getType()->isFPOrFPVectorTy()) {
2315 Value *CastedOp =
2316 Builder.CreateBitCast(BO->getOperand(1), X->getType());
2317 Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, X);
2318 return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
2319 }
2320 }
2321 return nullptr;
2322 }
2323
2324 if (!DestTy->isIntOrIntVectorTy())
2325 return nullptr;
2326
2327 Value *X;
2328 if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
2329 X->getType() == DestTy && !isa<Constant>(X)) {
2330 // bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
2331 Value *CastedOp1 = Builder.CreateBitCast(BO->getOperand(1), DestTy);
2332 return BinaryOperator::Create(BO->getOpcode(), X, CastedOp1);
2333 }
2334
2335 if (match(BO->getOperand(1), m_OneUse(m_BitCast(m_Value(X)))) &&
2336 X->getType() == DestTy && !isa<Constant>(X)) {
2337 // bitcast(logic(Y, bitcast(X))) --> logic'(bitcast(Y), X)
2338 Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2339 return BinaryOperator::Create(BO->getOpcode(), CastedOp0, X);
2340 }
2341
2342 // Canonicalize vector bitcasts to come before vector bitwise logic with a
2343 // constant. This eases recognition of special constants for later ops.
2344 // Example:
2345 // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
2346 Constant *C;
2347 if (match(BO->getOperand(1), m_Constant(C))) {
2348 // bitcast (logic X, C) --> logic (bitcast X, C')
2349 Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2350 Value *CastedC = Builder.CreateBitCast(C, DestTy);
2351 return BinaryOperator::Create(BO->getOpcode(), CastedOp0, CastedC);
2352 }
2353
2354 return nullptr;
2355}
2356
2357/// Change the type of a select if we can eliminate a bitcast.
2359 InstCombiner::BuilderTy &Builder) {
2360 Value *Cond, *TVal, *FVal;
2361 if (!match(BitCast.getOperand(0),
2362 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
2363 return nullptr;
2364
2365 // A vector select must maintain the same number of elements in its operands.
2366 Type *CondTy = Cond->getType();
2367 Type *DestTy = BitCast.getType();
2368 if (auto *CondVTy = dyn_cast<VectorType>(CondTy))
2369 if (!DestTy->isVectorTy() ||
2370 CondVTy->getElementCount() !=
2371 cast<VectorType>(DestTy)->getElementCount())
2372 return nullptr;
2373
2374 // FIXME: This transform is restricted from changing the select between
2375 // scalars and vectors to avoid backend problems caused by creating
2376 // potentially illegal operations. If a fix-up is added to handle that
2377 // situation, we can remove this check.
2378 if (DestTy->isVectorTy() != TVal->getType()->isVectorTy())
2379 return nullptr;
2380
2381 auto *Sel = cast<Instruction>(BitCast.getOperand(0));
2382 Value *X;
2383 if (match(TVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
2384 !isa<Constant>(X)) {
2385 // bitcast(select(Cond, bitcast(X), Y)) --> select'(Cond, X, bitcast(Y))
2386 Value *CastedVal = Builder.CreateBitCast(FVal, DestTy);
2387 return SelectInst::Create(Cond, X, CastedVal, "", nullptr, Sel);
2388 }
2389
2390 if (match(FVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
2391 !isa<Constant>(X)) {
2392 // bitcast(select(Cond, Y, bitcast(X))) --> select'(Cond, bitcast(Y), X)
2393 Value *CastedVal = Builder.CreateBitCast(TVal, DestTy);
2394 return SelectInst::Create(Cond, CastedVal, X, "", nullptr, Sel);
2395 }
2396
2397 return nullptr;
2398}
2399
2400/// Check if all users of CI are StoreInsts.
2401static bool hasStoreUsersOnly(CastInst &CI) {
2402 for (User *U : CI.users()) {
2403 if (!isa<StoreInst>(U))
2404 return false;
2405 }
2406 return true;
2407}
2408
2409/// This function handles following case
2410///
2411/// A -> B cast
2412/// PHI
2413/// B -> A cast
2414///
2415/// All the related PHI nodes can be replaced by new PHI nodes with type A.
2416/// The uses of \p CI can be changed to the new PHI node corresponding to \p PN.
2417Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI,
2418 PHINode *PN) {
2419 // BitCast used by Store can be handled in InstCombineLoadStoreAlloca.cpp.
2420 if (hasStoreUsersOnly(CI))
2421 return nullptr;
2422
2423 Value *Src = CI.getOperand(0);
2424 Type *SrcTy = Src->getType(); // Type B
2425 Type *DestTy = CI.getType(); // Type A
2426
2427 SmallVector<PHINode *, 4> PhiWorklist;
2428 SmallSetVector<PHINode *, 4> OldPhiNodes;
2429
2430 // Find all of the A->B casts and PHI nodes.
2431 // We need to inspect all related PHI nodes, but PHIs can be cyclic, so
2432 // OldPhiNodes is used to track all known PHI nodes, before adding a new
2433 // PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
2434 PhiWorklist.push_back(PN);
2435 OldPhiNodes.insert(PN);
2436 while (!PhiWorklist.empty()) {
2437 auto *OldPN = PhiWorklist.pop_back_val();
2438 for (Value *IncValue : OldPN->incoming_values()) {
2439 if (isa<Constant>(IncValue))
2440 continue;
2441
2442 if (auto *LI = dyn_cast<LoadInst>(IncValue)) {
2443 // If there is a sequence of one or more load instructions, each loaded
2444 // value is used as address of later load instruction, bitcast is
2445 // necessary to change the value type, don't optimize it. For
2446 // simplicity we give up if the load address comes from another load.
2447 Value *Addr = LI->getOperand(0);
2448 if (Addr == &CI || isa<LoadInst>(Addr))
2449 return nullptr;
2450 // Don't tranform "load <256 x i32>, <256 x i32>*" to
2451 // "load x86_amx, x86_amx*", because x86_amx* is invalid.
2452 // TODO: Remove this check when bitcast between vector and x86_amx
2453 // is replaced with a specific intrinsic.
2454 if (DestTy->isX86_AMXTy())
2455 return nullptr;
2456 if (LI->hasOneUse() && LI->isSimple())
2457 continue;
2458 // If a LoadInst has more than one use, changing the type of loaded
2459 // value may create another bitcast.
2460 return nullptr;
2461 }
2462
2463 if (auto *PNode = dyn_cast<PHINode>(IncValue)) {
2464 if (OldPhiNodes.insert(PNode))
2465 PhiWorklist.push_back(PNode);
2466 continue;
2467 }
2468
2469 auto *BCI = dyn_cast<BitCastInst>(IncValue);
2470 // We can't handle other instructions.
2471 if (!BCI)
2472 return nullptr;
2473
2474 // Verify it's a A->B cast.
2475 Type *TyA = BCI->getOperand(0)->getType();
2476 Type *TyB = BCI->getType();
2477 if (TyA != DestTy || TyB != SrcTy)
2478 return nullptr;
2479 }
2480 }
2481
2482 // Check that each user of each old PHI node is something that we can
2483 // rewrite, so that all of the old PHI nodes can be cleaned up afterwards.
2484 for (auto *OldPN : OldPhiNodes) {
2485 for (User *V : OldPN->users()) {
2486 if (auto *SI = dyn_cast<StoreInst>(V)) {
2487 if (!SI->isSimple() || SI->getOperand(0) != OldPN)
2488 return nullptr;
2489 } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2490 // Verify it's a B->A cast.
2491 Type *TyB = BCI->getOperand(0)->getType();
2492 Type *TyA = BCI->getType();
2493 if (TyA != DestTy || TyB != SrcTy)
2494 return nullptr;
2495 } else if (auto *PHI = dyn_cast<PHINode>(V)) {
2496 // As long as the user is another old PHI node, then even if we don't
2497 // rewrite it, the PHI web we're considering won't have any users
2498 // outside itself, so it'll be dead.
2499 if (!OldPhiNodes.contains(PHI))
2500 return nullptr;
2501 } else {
2502 return nullptr;
2503 }
2504 }
2505 }
2506
2507 // For each old PHI node, create a corresponding new PHI node with a type A.
2509 for (auto *OldPN : OldPhiNodes) {
2510 Builder.SetInsertPoint(OldPN);
2511 PHINode *NewPN = Builder.CreatePHI(DestTy, OldPN->getNumOperands());
2512 NewPNodes[OldPN] = NewPN;
2513 }
2514
2515 // Fill in the operands of new PHI nodes.
2516 for (auto *OldPN : OldPhiNodes) {
2517 PHINode *NewPN = NewPNodes[OldPN];
2518 for (unsigned j = 0, e = OldPN->getNumOperands(); j != e; ++j) {
2519 Value *V = OldPN->getOperand(j);
2520 Value *NewV = nullptr;
2521 if (auto *C = dyn_cast<Constant>(V)) {
2522 NewV = ConstantExpr::getBitCast(C, DestTy);
2523 } else if (auto *LI = dyn_cast<LoadInst>(V)) {
2524 // Explicitly perform load combine to make sure no opposing transform
2525 // can remove the bitcast in the meantime and trigger an infinite loop.
2527 NewV = combineLoadToNewType(*LI, DestTy);
2528 // Remove the old load and its use in the old phi, which itself becomes
2529 // dead once the whole transform finishes.
2532 } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2533 NewV = BCI->getOperand(0);
2534 } else if (auto *PrevPN = dyn_cast<PHINode>(V)) {
2535 NewV = NewPNodes[PrevPN];
2536 }
2537 assert(NewV);
2538 NewPN->addIncoming(NewV, OldPN->getIncomingBlock(j));
2539 }
2540 }
2541
2542 // Traverse all accumulated PHI nodes and process its users,
2543 // which are Stores and BitcCasts. Without this processing
2544 // NewPHI nodes could be replicated and could lead to extra
2545 // moves generated after DeSSA.
2546 // If there is a store with type B, change it to type A.
2547
2548
2549 // Replace users of BitCast B->A with NewPHI. These will help
2550 // later to get rid off a closure formed by OldPHI nodes.
2551 Instruction *RetVal = nullptr;
2552 for (auto *OldPN : OldPhiNodes) {
2553 PHINode *NewPN = NewPNodes[OldPN];
2554 for (User *V : make_early_inc_range(OldPN->users())) {
2555 if (auto *SI = dyn_cast<StoreInst>(V)) {
2556 assert(SI->isSimple() && SI->getOperand(0) == OldPN);
2558 auto *NewBC =
2559 cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));
2560 SI->setOperand(0, NewBC);
2561 Worklist.push(SI);
2562 assert(hasStoreUsersOnly(*NewBC));
2563 }
2564 else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2565 Type *TyB = BCI->getOperand(0)->getType();
2566 Type *TyA = BCI->getType();
2567 assert(TyA == DestTy && TyB == SrcTy);
2568 (void) TyA;
2569 (void) TyB;
2570 Instruction *I = replaceInstUsesWith(*BCI, NewPN);
2571 if (BCI == &CI)
2572 RetVal = I;
2573 } else if (auto *PHI = dyn_cast<PHINode>(V)) {
2574 assert(OldPhiNodes.contains(PHI));
2575 (void) PHI;
2576 } else {
2577 llvm_unreachable("all uses should be handled");
2578 }
2579 }
2580 }
2581
2582 return RetVal;
2583}
2584
2586 // If the operands are integer typed then apply the integer transforms,
2587 // otherwise just apply the common ones.
2588 Value *Src = CI.getOperand(0);
2589 Type *SrcTy = Src->getType();
2590 Type *DestTy = CI.getType();
2591
2592 // Get rid of casts from one type to the same type. These are useless and can
2593 // be replaced by the operand.
2594 if (DestTy == Src->getType())
2595 return replaceInstUsesWith(CI, Src);
2596
2597 if (FixedVectorType *DestVTy = dyn_cast<FixedVectorType>(DestTy)) {
2598 // Beware: messing with this target-specific oddity may cause trouble.
2599 if (DestVTy->getNumElements() == 1 && SrcTy->isX86_MMXTy()) {
2600 Value *Elem = Builder.CreateBitCast(Src, DestVTy->getElementType());
2601 return InsertElementInst::Create(PoisonValue::get(DestTy), Elem,
2603 }
2604
2605 if (isa<IntegerType>(SrcTy)) {
2606 // If this is a cast from an integer to vector, check to see if the input
2607 // is a trunc or zext of a bitcast from vector. If so, we can replace all
2608 // the casts with a shuffle and (potentially) a bitcast.
2609 if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {
2610 CastInst *SrcCast = cast<CastInst>(Src);
2611 if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
2612 if (isa<VectorType>(BCIn->getOperand(0)->getType()))
2614 BCIn->getOperand(0), cast<VectorType>(DestTy), *this))
2615 return I;
2616 }
2617
2618 // If the input is an 'or' instruction, we may be doing shifts and ors to
2619 // assemble the elements of the vector manually. Try to rip the code out
2620 // and replace it with insertelements.
2621 if (Value *V = optimizeIntegerToVectorInsertions(CI, *this))
2622 return replaceInstUsesWith(CI, V);
2623 }
2624 }
2625
2626 if (FixedVectorType *SrcVTy = dyn_cast<FixedVectorType>(SrcTy)) {
2627 if (SrcVTy->getNumElements() == 1) {
2628 // If our destination is not a vector, then make this a straight
2629 // scalar-scalar cast.
2630 if (!DestTy->isVectorTy()) {
2631 Value *Elem =
2634 return CastInst::Create(Instruction::BitCast, Elem, DestTy);
2635 }
2636
2637 // Otherwise, see if our source is an insert. If so, then use the scalar
2638 // component directly:
2639 // bitcast (inselt <1 x elt> V, X, 0) to <n x m> --> bitcast X to <n x m>
2640 if (auto *InsElt = dyn_cast<InsertElementInst>(Src))
2641 return new BitCastInst(InsElt->getOperand(1), DestTy);
2642 }
2643
2644 // Convert an artificial vector insert into more analyzable bitwise logic.
2645 unsigned BitWidth = DestTy->getScalarSizeInBits();
2646 Value *X, *Y;
2647 uint64_t IndexC;
2649 m_Value(Y), m_ConstantInt(IndexC)))) &&
2650 DestTy->isIntegerTy() && X->getType() == DestTy &&
2651 Y->getType()->isIntegerTy() && isDesirableIntType(BitWidth)) {
2652 // Adjust for big endian - the LSBs are at the high index.
2653 if (DL.isBigEndian())
2654 IndexC = SrcVTy->getNumElements() - 1 - IndexC;
2655
2656 // We only handle (endian-normalized) insert to index 0. Any other insert
2657 // would require a left-shift, so that is an extra instruction.
2658 if (IndexC == 0) {
2659 // bitcast (inselt (bitcast X), Y, 0) --> or (and X, MaskC), (zext Y)
2660 unsigned EltWidth = Y->getType()->getScalarSizeInBits();
2661 APInt MaskC = APInt::getHighBitsSet(BitWidth, BitWidth - EltWidth);
2662 Value *AndX = Builder.CreateAnd(X, MaskC);
2663 Value *ZextY = Builder.CreateZExt(Y, DestTy);
2664 return BinaryOperator::CreateOr(AndX, ZextY);
2665 }
2666 }
2667 }
2668
2669 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Src)) {
2670 // Okay, we have (bitcast (shuffle ..)). Check to see if this is
2671 // a bitcast to a vector with the same # elts.
2672 Value *ShufOp0 = Shuf->getOperand(0);
2673 Value *ShufOp1 = Shuf->getOperand(1);
2674 auto ShufElts = cast<VectorType>(Shuf->getType())->getElementCount();
2675 auto SrcVecElts = cast<VectorType>(ShufOp0->getType())->getElementCount();
2676 if (Shuf->hasOneUse() && DestTy->isVectorTy() &&
2677 cast<VectorType>(DestTy)->getElementCount() == ShufElts &&
2678 ShufElts == SrcVecElts) {
2679 BitCastInst *Tmp;
2680 // If either of the operands is a cast from CI.getType(), then
2681 // evaluating the shuffle in the casted destination's type will allow
2682 // us to eliminate at least one cast.
2683 if (((Tmp = dyn_cast<BitCastInst>(ShufOp0)) &&
2684 Tmp->getOperand(0)->getType() == DestTy) ||
2685 ((Tmp = dyn_cast<BitCastInst>(ShufOp1)) &&
2686 Tmp->getOperand(0)->getType() == DestTy)) {
2687 Value *LHS = Builder.CreateBitCast(ShufOp0, DestTy);
2688 Value *RHS = Builder.CreateBitCast(ShufOp1, DestTy);
2689 // Return a new shuffle vector. Use the same element ID's, as we
2690 // know the vector types match #elts.
2691 return new ShuffleVectorInst(LHS, RHS, Shuf->getShuffleMask());
2692 }
2693 }
2694
2695 // A bitcasted-to-scalar and byte/bit reversing shuffle is better recognized
2696 // as a byte/bit swap:
2697 // bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) -> bswap (bitcast X)
2698 // bitcast <N x i1> (shuf X, undef, <N, N-1,...0>) -> bitreverse (bitcast X)
2699 if (DestTy->isIntegerTy() && ShufElts.getKnownMinValue() % 2 == 0 &&
2700 Shuf->hasOneUse() && Shuf->isReverse()) {
2701 unsigned IntrinsicNum = 0;
2702 if (DL.isLegalInteger(DestTy->getScalarSizeInBits()) &&
2703 SrcTy->getScalarSizeInBits() == 8) {
2704 IntrinsicNum = Intrinsic::bswap;
2705 } else if (SrcTy->getScalarSizeInBits() == 1) {
2706 IntrinsicNum = Intrinsic::bitreverse;
2707 }
2708 if (IntrinsicNum != 0) {
2709 assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask");
2710 assert(match(ShufOp1, m_Undef()) && "Unexpected shuffle op");
2711 Function *BswapOrBitreverse =
2712 Intrinsic::getDeclaration(CI.getModule(), IntrinsicNum, DestTy);
2713 Value *ScalarX = Builder.CreateBitCast(ShufOp0, DestTy);
2714 return CallInst::Create(BswapOrBitreverse, {ScalarX});
2715 }
2716 }
2717 }
2718
2719 // Handle the A->B->A cast, and there is an intervening PHI node.
2720 if (PHINode *PN = dyn_cast<PHINode>(Src))
2721 if (Instruction *I = optimizeBitCastFromPhi(CI, PN))
2722 return I;
2723
2724 if (Instruction *I = canonicalizeBitCastExtElt(CI, *this))
2725 return I;
2726
2728 return I;
2729
2731 return I;
2732
2733 return commonCastTransforms(CI);
2734}
2735
2737 return commonCastTransforms(CI);
2738}
Rewrite undef for PHI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
Hexagon Common GEP
static bool collectInsertionElements(Value *V, unsigned Shift, SmallVectorImpl< Value * > &Elements, Type *VecEltTy, bool isBigEndian)
V is a value which is inserted into a vector of VecEltTy.
static bool canEvaluateSExtd(Value *V, Type *Ty)
Return true if we can take the specified value and return it as type Ty without inserting any new cas...
static bool hasStoreUsersOnly(CastInst &CI)
Check if all users of CI are StoreInsts.
static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear, InstCombinerImpl &IC, Instruction *CxtI)
Determine if the specified value can be computed in the specified wider type and produce the same low...
static Instruction * canonicalizeBitCastExtElt(BitCastInst &BitCast, InstCombinerImpl &IC)
Canonicalize scalar bitcasts of extracted elements into a bitcast of the vector followed by extract e...
static Instruction * shrinkSplatShuffle(TruncInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of a splat shuffle.
static Instruction * foldBitCastSelect(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a select if we can eliminate a bitcast.
static Instruction * foldBitCastBitwiseLogic(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a bitwise logic operation if we can eliminate a bitcast.
static bool fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem)
Return a Constant* for the specified floating-point constant if it fits in the specified FP type with...
static Instruction * optimizeVectorResizeWithIntegerBitCasts(Value *InVal, VectorType *DestTy, InstCombinerImpl &IC)
This input value (which is known to have vector type) is being zero extended or truncated to the spec...
static Instruction * shrinkInsertElt(CastInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of an insert element.
static bool isMultipleOfTypeSize(unsigned Value, Type *Ty)
static Value * optimizeIntegerToVectorInsertions(BitCastInst &CI, InstCombinerImpl &IC)
If the input is an 'or' instruction, we may be doing shifts and ors to assemble the elements of the v...
static bool canAlwaysEvaluateInType(Value *V, Type *Ty)
Constants and extensions/truncates from the destination type are always free to be evaluated in that ...
static Type * shrinkFPConstant(ConstantFP *CFP)
static bool canNotEvaluateInType(Value *V, Type *Ty)
Filter out values that we can not evaluate in the destination type for free.
static bool isKnownExactCastIntToFP(CastInst &I, InstCombinerImpl &IC)
Return true if the cast from integer to FP can be proven to be exact for all possible inputs (the con...
static Type * getMinimumFPType(Value *V)
Find the minimum FP type we can safely truncate to.
static unsigned getTypeSizeIndex(unsigned Value, Type *Ty)
static Type * shrinkFPConstantVector(Value *V)
static Instruction * foldVecTruncToExtElt(TruncInst &Trunc, InstCombinerImpl &IC)
Given a vector that is bitcast to an integer, optionally logically right-shifted, and truncated,...
static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC, Instruction *CxtI)
Return true if we can evaluate the specified expression tree as type Ty instead of its larger type,...
This file provides internal interfaces used to implement the InstCombine.
This file provides the interface for the instcombine pass implementation.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
static unsigned getScalarSizeInBits(Type *Ty)
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:76
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1485
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1083
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:264
This class represents a conversion between pointers from one address space to another.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:228
ArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:235
std::optional< unsigned > getVScaleRangeMax() const
Returns the maximum value for the vscale_range attribute or std::nullopt when unknown.
Definition: Attributes.cpp:381
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateFDivFMF(Value *V1, Value *V2, Instruction *FMFSource, const Twine &Name="")
Definition: InstrTypes.h:271
BinaryOps getOpcode() const
Definition: InstrTypes.h:402
static BinaryOperator * CreateFMulFMF(Value *V1, Value *V2, Instruction *FMFSource, const Twine &Name="")
Definition: InstrTypes.h:266
This class represents a no-op cast from one type to another.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:483
Type * getSrcTy() const
Return the source type, as a convenience.
Definition: InstrTypes.h:728
static CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:723
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static CastInst * CreateFPCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create an FPExt, BitCast, or FPTrunc for fp -> fp casts.
static unsigned isEliminableCastPair(Instruction::CastOps firstOpcode, Instruction::CastOps secondOpcode, Type *SrcTy, Type *MidTy, Type *DstTy, Type *SrcIntPtrTy, Type *MidIntPtrTy, Type *DstIntPtrTy)
Determine how a pair of casts can be eliminated, if they can be at all.
static CastInst * CreateTruncOrBitCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a Trunc or BitCast cast instruction.
Type * getDestTy() const
Return the destination type, as a convenience.
Definition: InstrTypes.h:730
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:780
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:809
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:805
@ ICMP_EQ
equal
Definition: InstrTypes.h:801
@ ICMP_NE
not equal
Definition: InstrTypes.h:802
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:806
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2460
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2478
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2056
static Constant * getTrunc(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2014
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:260
const APFloat & getValueAPF() const
Definition: Constants.h:296
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:146
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * mergeUndefsWith(Constant *C, Constant *Other)
Merges undefs of a Constant with another Constant, along with the undefs already present.
Definition: Constants.cpp:777
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:403
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:356
bool isElementWiseEqual(Value *Y) const
Return true if this constant and a constant 'Y' are element-wise equal.
Definition: Constants.cpp:284
This class represents an Operation in the Expression.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:410
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition: DataLayout.h:260
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:878
bool isBigEndian() const
Definition: DataLayout.h:239
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
This class represents an extension of floating point types.
This class represents a cast from floating point to signed integer.
This class represents a cast from floating point to unsigned integer.
This class represents a truncation of floating point types.
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:539
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:200
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:695
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:669
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2006
Value * CreateVScale(Constant *Scaling, const Twine &Name="")
Create a call to llvm.vscale, multiplied by Scaling.
Definition: IRBuilder.cpp:88
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2455
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2443
Value * CreateFPTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2084
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2028
Value * CreateFRemFMF(Value *L, Value *R, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Definition: IRBuilder.h:1648
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:930
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1108
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1431
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:305
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:480
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2380
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1748
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2110
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1410
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2010
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1469
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1321
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2100
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1491
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1660
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2144
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2179
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1450
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1513
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * visitZExt(ZExtInst &Zext)
Instruction * visitAddrSpaceCast(AddrSpaceCastInst &CI)
Instruction * visitSExt(SExtInst &Sext)
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Instruction * visitFPToSI(FPToSIInst &FI)
Instruction * visitTrunc(TruncInst &CI)
Instruction * visitUIToFP(CastInst &CI)
Instruction * visitPtrToInt(PtrToIntInst &CI)
Instruction * visitSIToFP(CastInst &CI)
Instruction * commonCastTransforms(CastInst &CI)
Implement the transforms common to all CastInst visitors.
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Instruction * foldItoFPtoI(CastInst &FI)
fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X) This is safe if the intermediate ty...
Instruction * visitFPTrunc(FPTruncInst &CI)
Instruction * visitBitCast(BitCastInst &CI)
Instruction * visitIntToPtr(IntToPtrInst &CI)
Instruction * visitFPToUI(FPToUIInst &FI)
Value * EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned)
Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns true for,...
bool SimplifyDemandedInstructionBits(Instruction &Inst)
Tries to simplify operands to an integer instruction based on its demanded bits.
Instruction * visitFPExt(CastInst &CI)
LoadInst * combineLoadToNewType(LoadInst &LI, Type *NewTy, const Twine &Suffix="")
Helper to combine a load to a new type.
SimplifyQuery SQ
Definition: InstCombiner.h:76
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:340
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:385
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Definition: InstCombiner.h:64
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
Definition: InstCombiner.h:374
const DataLayout & DL
Definition: InstCombiner.h:75
unsigned ComputeNumSignBits(const Value *Op, unsigned Depth=0, const Instruction *CxtI=nullptr) const
Definition: InstCombiner.h:451
DominatorTree & DT
Definition: InstCombiner.h:74
void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Instruction *CxtI) const
Definition: InstCombiner.h:430
BuilderTy & Builder
Definition: InstCombiner.h:60
bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth=0, const Instruction *CxtI=nullptr) const
Definition: InstCombiner.h:446
void push(Instruction *I)
Push the instruction onto the worklist stack.
void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
Definition: Instruction.h:299
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:71
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:147
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:75
void setNonNeg(bool b=true)
Set or clear the nneg flag on this instruction, which must be a zext instruction.
bool hasNonNeg() const LLVM_READONLY
Determine whether the the nneg flag is set.
This class represents a cast from an integer to a pointer.
unsigned getAddressSpace() const
Returns the address space of this instruction's pointer type.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1743
This class represents a cast from a pointer to an integer.
Value * getPointerOperand()
Gets the pointer operand.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
This class represents a sign extension of integer types.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
This instruction constructs a fixed permutation of two input vectors.
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getHalfTy(LLVMContext &C)
static Type * getDoubleTy(LLVMContext &C)
const fltSemantics & getFltSemantics() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:234
bool isX86_MMXTy() const
Return true if this is X86 MMX.
Definition: Type.h:201
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
int getFPMantissaWidth() const
Return the width of the mantissa of this type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:157
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:262
bool isX86_AMXTy() const
Return true if this is X86 AMX.
Definition: Type.h:204
static IntegerType * getInt32Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:216
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static Type * getPPC_FP128Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:163
'undef' values are things that do not have specified contents.
Definition: Constants.h:1336
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1724
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
iterator_range< user_iterator > users()
Definition: Value.h:421
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:683
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:676
This class represents zero extension of integer types.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1447
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(APInt V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:862
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:144
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:780
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:147
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:525
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
BinOpPred_match< LHS, RHS, is_logical_shift_op > m_LogicalShift(const LHS &L, const RHS &R)
Matches logical shift operations.
CastOperator_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
CastInst_match< OpTy, FPToUIInst > m_FPToUI(const OpTy &Op)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition: PatternMatch.h:798
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:532
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:759
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
VScaleVal_match m_VScale()
CastInst_match< OpTy, FPToSIInst > m_FPToSI(const OpTy &Op)
specific_intval< true > m_SpecificIntAllowUndef(APInt V)
Definition: PatternMatch.h:870
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:278
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:136
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
CastOperator_match< OpTy, Instruction::IntToPtr > m_IntToPtr(const OpTy &Op)
Matches IntToPtr.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
ElementWiseBitCast_match< OpTy > m_ElementWiseBitCast(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:994
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:218
cst_pred_ty< icmp_pred_with_threshold > m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold)
Match an integer or vector with every element comparing 'pred' (eg/ne/...) to Threshold.
Definition: PatternMatch.h:606
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
Constant * ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2)
Attempt to constant fold a select instruction with the specified operands.
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
Definition: MathExtras.h:332
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:665
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:313
@ SPF_UNKNOWN
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition: Local.cpp:2693
@ And
Bitwise or logical AND of integers.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition: STLExtras.h:2019
Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
Constant * ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1, Constant *V2)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:249
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:250
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:247
static unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition: APFloat.cpp:306
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:233
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:239
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:136
SimplifyQuery getWithInstruction(const Instruction *I) const
Definition: SimplifyQuery.h:96