LLVM 18.0.0git
InstCombineCasts.cpp
Go to the documentation of this file.
1//===- InstCombineCasts.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visit functions for cast operations.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/SetVector.h"
16#include "llvm/IR/DataLayout.h"
17#include "llvm/IR/DebugInfo.h"
21#include <optional>
22
23using namespace llvm;
24using namespace PatternMatch;
25
26#define DEBUG_TYPE "instcombine"
27
28/// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
29/// true for, actually insert the code to evaluate the expression.
31 bool isSigned) {
32 if (Constant *C = dyn_cast<Constant>(V)) {
33 C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
34 // If we got a constantexpr back, try to simplify it with DL info.
35 return ConstantFoldConstant(C, DL, &TLI);
36 }
37
38 // Otherwise, it must be an instruction.
39 Instruction *I = cast<Instruction>(V);
40 Instruction *Res = nullptr;
41 unsigned Opc = I->getOpcode();
42 switch (Opc) {
43 case Instruction::Add:
44 case Instruction::Sub:
45 case Instruction::Mul:
46 case Instruction::And:
47 case Instruction::Or:
48 case Instruction::Xor:
49 case Instruction::AShr:
50 case Instruction::LShr:
51 case Instruction::Shl:
52 case Instruction::UDiv:
53 case Instruction::URem: {
54 Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
55 Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
57 break;
58 }
59 case Instruction::Trunc:
60 case Instruction::ZExt:
61 case Instruction::SExt:
62 // If the source type of the cast is the type we're trying for then we can
63 // just return the source. There's no need to insert it because it is not
64 // new.
65 if (I->getOperand(0)->getType() == Ty)
66 return I->getOperand(0);
67
68 // Otherwise, must be the same type of cast, so just reinsert a new one.
69 // This also handles the case of zext(trunc(x)) -> zext(x).
70 Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
71 Opc == Instruction::SExt);
72 break;
73 case Instruction::Select: {
74 Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
75 Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned);
76 Res = SelectInst::Create(I->getOperand(0), True, False);
77 break;
78 }
79 case Instruction::PHI: {
80 PHINode *OPN = cast<PHINode>(I);
82 for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
83 Value *V =
85 NPN->addIncoming(V, OPN->getIncomingBlock(i));
86 }
87 Res = NPN;
88 break;
89 }
90 case Instruction::FPToUI:
91 case Instruction::FPToSI:
92 Res = CastInst::Create(
93 static_cast<Instruction::CastOps>(Opc), I->getOperand(0), Ty);
94 break;
95 case Instruction::Call:
96 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
97 switch (II->getIntrinsicID()) {
98 default:
99 llvm_unreachable("Unsupported call!");
100 case Intrinsic::vscale: {
101 Function *Fn =
102 Intrinsic::getDeclaration(I->getModule(), Intrinsic::vscale, {Ty});
103 Res = CallInst::Create(Fn->getFunctionType(), Fn);
104 break;
105 }
106 }
107 }
108 break;
109 default:
110 // TODO: Can handle more cases here.
111 llvm_unreachable("Unreachable!");
112 }
113
114 Res->takeName(I);
115 return InsertNewInstWith(Res, I->getIterator());
116}
117
119InstCombinerImpl::isEliminableCastPair(const CastInst *CI1,
120 const CastInst *CI2) {
121 Type *SrcTy = CI1->getSrcTy();
122 Type *MidTy = CI1->getDestTy();
123 Type *DstTy = CI2->getDestTy();
124
125 Instruction::CastOps firstOp = CI1->getOpcode();
126 Instruction::CastOps secondOp = CI2->getOpcode();
127 Type *SrcIntPtrTy =
128 SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
129 Type *MidIntPtrTy =
130 MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr;
131 Type *DstIntPtrTy =
132 DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
133 unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
134 DstTy, SrcIntPtrTy, MidIntPtrTy,
135 DstIntPtrTy);
136
137 // We don't want to form an inttoptr or ptrtoint that converts to an integer
138 // type that differs from the pointer size.
139 if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) ||
140 (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy))
141 Res = 0;
142
143 return Instruction::CastOps(Res);
144}
145
146/// Implement the transforms common to all CastInst visitors.
148 Value *Src = CI.getOperand(0);
149 Type *Ty = CI.getType();
150
151 if (auto *SrcC = dyn_cast<Constant>(Src))
152 if (Constant *Res = ConstantFoldCastOperand(CI.getOpcode(), SrcC, Ty, DL))
153 return replaceInstUsesWith(CI, Res);
154
155 // Try to eliminate a cast of a cast.
156 if (auto *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
157 if (Instruction::CastOps NewOpc = isEliminableCastPair(CSrc, &CI)) {
158 // The first cast (CSrc) is eliminable so we need to fix up or replace
159 // the second cast (CI). CSrc will then have a good chance of being dead.
160 auto *Res = CastInst::Create(NewOpc, CSrc->getOperand(0), Ty);
161 // Point debug users of the dying cast to the new one.
162 if (CSrc->hasOneUse())
163 replaceAllDbgUsesWith(*CSrc, *Res, CI, DT);
164 return Res;
165 }
166 }
167
168 if (auto *Sel = dyn_cast<SelectInst>(Src)) {
169 // We are casting a select. Try to fold the cast into the select if the
170 // select does not have a compare instruction with matching operand types
171 // or the select is likely better done in a narrow type.
172 // Creating a select with operands that are different sizes than its
173 // condition may inhibit other folds and lead to worse codegen.
174 auto *Cmp = dyn_cast<CmpInst>(Sel->getCondition());
175 if (!Cmp || Cmp->getOperand(0)->getType() != Sel->getType() ||
176 (CI.getOpcode() == Instruction::Trunc &&
177 shouldChangeType(CI.getSrcTy(), CI.getType()))) {
178 if (Instruction *NV = FoldOpIntoSelect(CI, Sel)) {
179 replaceAllDbgUsesWith(*Sel, *NV, CI, DT);
180 return NV;
181 }
182 }
183 }
184
185 // If we are casting a PHI, then fold the cast into the PHI.
186 if (auto *PN = dyn_cast<PHINode>(Src)) {
187 // Don't do this if it would create a PHI node with an illegal type from a
188 // legal type.
189 if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() ||
190 shouldChangeType(CI.getSrcTy(), CI.getType()))
191 if (Instruction *NV = foldOpIntoPhi(CI, PN))
192 return NV;
193 }
194
195 // Canonicalize a unary shuffle after the cast if neither operation changes
196 // the size or element size of the input vector.
197 // TODO: We could allow size-changing ops if that doesn't harm codegen.
198 // cast (shuffle X, Mask) --> shuffle (cast X), Mask
199 Value *X;
200 ArrayRef<int> Mask;
201 if (match(Src, m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))) {
202 // TODO: Allow scalable vectors?
203 auto *SrcTy = dyn_cast<FixedVectorType>(X->getType());
204 auto *DestTy = dyn_cast<FixedVectorType>(Ty);
205 if (SrcTy && DestTy &&
206 SrcTy->getNumElements() == DestTy->getNumElements() &&
207 SrcTy->getPrimitiveSizeInBits() == DestTy->getPrimitiveSizeInBits()) {
208 Value *CastX = Builder.CreateCast(CI.getOpcode(), X, DestTy);
209 return new ShuffleVectorInst(CastX, Mask);
210 }
211 }
212
213 return nullptr;
214}
215
216/// Constants and extensions/truncates from the destination type are always
217/// free to be evaluated in that type. This is a helper for canEvaluate*.
218static bool canAlwaysEvaluateInType(Value *V, Type *Ty) {
219 if (isa<Constant>(V))
220 return true;
221 Value *X;
222 if ((match(V, m_ZExtOrSExt(m_Value(X))) || match(V, m_Trunc(m_Value(X)))) &&
223 X->getType() == Ty)
224 return true;
225
226 return false;
227}
228
229/// Filter out values that we can not evaluate in the destination type for free.
230/// This is a helper for canEvaluate*.
231static bool canNotEvaluateInType(Value *V, Type *Ty) {
232 assert(!isa<Constant>(V) && "Constant should already be handled.");
233 if (!isa<Instruction>(V))
234 return true;
235 // We don't extend or shrink something that has multiple uses -- doing so
236 // would require duplicating the instruction which isn't profitable.
237 if (!V->hasOneUse())
238 return true;
239
240 return false;
241}
242
243/// Return true if we can evaluate the specified expression tree as type Ty
244/// instead of its larger type, and arrive with the same value.
245/// This is used by code that tries to eliminate truncates.
246///
247/// Ty will always be a type smaller than V. We should return true if trunc(V)
248/// can be computed by computing V in the smaller type. If V is an instruction,
249/// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
250/// makes sense if x and y can be efficiently truncated.
251///
252/// This function works on both vectors and scalars.
253///
255 Instruction *CxtI) {
256 if (canAlwaysEvaluateInType(V, Ty))
257 return true;
258 if (canNotEvaluateInType(V, Ty))
259 return false;
260
261 auto *I = cast<Instruction>(V);
262 Type *OrigTy = V->getType();
263 switch (I->getOpcode()) {
264 case Instruction::Add:
265 case Instruction::Sub:
266 case Instruction::Mul:
267 case Instruction::And:
268 case Instruction::Or:
269 case Instruction::Xor:
270 // These operators can all arbitrarily be extended or truncated.
271 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
272 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
273
274 case Instruction::UDiv:
275 case Instruction::URem: {
276 // UDiv and URem can be truncated if all the truncated bits are zero.
277 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
279 assert(BitWidth < OrigBitWidth && "Unexpected bitwidths!");
280 APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
281 if (IC.MaskedValueIsZero(I->getOperand(0), Mask, 0, CxtI) &&
282 IC.MaskedValueIsZero(I->getOperand(1), Mask, 0, CxtI)) {
283 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
284 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
285 }
286 break;
287 }
288 case Instruction::Shl: {
289 // If we are truncating the result of this SHL, and if it's a shift of an
290 // inrange amount, we can always perform a SHL in a smaller type.
292 KnownBits AmtKnownBits =
293 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
294 if (AmtKnownBits.getMaxValue().ult(BitWidth))
295 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
296 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
297 break;
298 }
299 case Instruction::LShr: {
300 // If this is a truncate of a logical shr, we can truncate it to a smaller
301 // lshr iff we know that the bits we would otherwise be shifting in are
302 // already zeros.
303 // TODO: It is enough to check that the bits we would be shifting in are
304 // zero - use AmtKnownBits.getMaxValue().
305 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
307 KnownBits AmtKnownBits =
308 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
309 APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
310 if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
311 IC.MaskedValueIsZero(I->getOperand(0), ShiftedBits, 0, CxtI)) {
312 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
313 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
314 }
315 break;
316 }
317 case Instruction::AShr: {
318 // If this is a truncate of an arithmetic shr, we can truncate it to a
319 // smaller ashr iff we know that all the bits from the sign bit of the
320 // original type and the sign bit of the truncate type are similar.
321 // TODO: It is enough to check that the bits we would be shifting in are
322 // similar to sign bit of the truncate type.
323 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
325 KnownBits AmtKnownBits =
326 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
327 unsigned ShiftedBits = OrigBitWidth - BitWidth;
328 if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
329 ShiftedBits < IC.ComputeNumSignBits(I->getOperand(0), 0, CxtI))
330 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
331 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
332 break;
333 }
334 case Instruction::Trunc:
335 // trunc(trunc(x)) -> trunc(x)
336 return true;
337 case Instruction::ZExt:
338 case Instruction::SExt:
339 // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
340 // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
341 return true;
342 case Instruction::Select: {
343 SelectInst *SI = cast<SelectInst>(I);
344 return canEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) &&
345 canEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI);
346 }
347 case Instruction::PHI: {
348 // We can change a phi if we can change all operands. Note that we never
349 // get into trouble with cyclic PHIs here because we only consider
350 // instructions with a single use.
351 PHINode *PN = cast<PHINode>(I);
352 for (Value *IncValue : PN->incoming_values())
353 if (!canEvaluateTruncated(IncValue, Ty, IC, CxtI))
354 return false;
355 return true;
356 }
357 case Instruction::FPToUI:
358 case Instruction::FPToSI: {
359 // If the integer type can hold the max FP value, it is safe to cast
360 // directly to that type. Otherwise, we may create poison via overflow
361 // that did not exist in the original code.
362 Type *InputTy = I->getOperand(0)->getType()->getScalarType();
363 const fltSemantics &Semantics = InputTy->getFltSemantics();
364 uint32_t MinBitWidth =
366 I->getOpcode() == Instruction::FPToSI);
367 return Ty->getScalarSizeInBits() >= MinBitWidth;
368 }
369 default:
370 // TODO: Can handle more cases here.
371 break;
372 }
373
374 return false;
375}
376
377/// Given a vector that is bitcast to an integer, optionally logically
378/// right-shifted, and truncated, convert it to an extractelement.
379/// Example (big endian):
380/// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
381/// --->
382/// extractelement <4 x i32> %X, 1
384 InstCombinerImpl &IC) {
385 Value *TruncOp = Trunc.getOperand(0);
386 Type *DestType = Trunc.getType();
387 if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType))
388 return nullptr;
389
390 Value *VecInput = nullptr;
391 ConstantInt *ShiftVal = nullptr;
392 if (!match(TruncOp, m_CombineOr(m_BitCast(m_Value(VecInput)),
393 m_LShr(m_BitCast(m_Value(VecInput)),
394 m_ConstantInt(ShiftVal)))) ||
395 !isa<VectorType>(VecInput->getType()))
396 return nullptr;
397
398 VectorType *VecType = cast<VectorType>(VecInput->getType());
399 unsigned VecWidth = VecType->getPrimitiveSizeInBits();
400 unsigned DestWidth = DestType->getPrimitiveSizeInBits();
401 unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0;
402
403 if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0))
404 return nullptr;
405
406 // If the element type of the vector doesn't match the result type,
407 // bitcast it to a vector type that we can extract from.
408 unsigned NumVecElts = VecWidth / DestWidth;
409 if (VecType->getElementType() != DestType) {
410 VecType = FixedVectorType::get(DestType, NumVecElts);
411 VecInput = IC.Builder.CreateBitCast(VecInput, VecType, "bc");
412 }
413
414 unsigned Elt = ShiftAmount / DestWidth;
415 if (IC.getDataLayout().isBigEndian())
416 Elt = NumVecElts - 1 - Elt;
417
418 return ExtractElementInst::Create(VecInput, IC.Builder.getInt32(Elt));
419}
420
421/// Funnel/Rotate left/right may occur in a wider type than necessary because of
422/// type promotion rules. Try to narrow the inputs and convert to funnel shift.
423Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {
424 assert((isa<VectorType>(Trunc.getSrcTy()) ||
425 shouldChangeType(Trunc.getSrcTy(), Trunc.getType())) &&
426 "Don't narrow to an illegal scalar type");
427
428 // Bail out on strange types. It is possible to handle some of these patterns
429 // even with non-power-of-2 sizes, but it is not a likely scenario.
430 Type *DestTy = Trunc.getType();
431 unsigned NarrowWidth = DestTy->getScalarSizeInBits();
432 unsigned WideWidth = Trunc.getSrcTy()->getScalarSizeInBits();
433 if (!isPowerOf2_32(NarrowWidth))
434 return nullptr;
435
436 // First, find an or'd pair of opposite shifts:
437 // trunc (or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1))
438 BinaryOperator *Or0, *Or1;
439 if (!match(Trunc.getOperand(0), m_OneUse(m_Or(m_BinOp(Or0), m_BinOp(Or1)))))
440 return nullptr;
441
442 Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
443 if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
444 !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||
445 Or0->getOpcode() == Or1->getOpcode())
446 return nullptr;
447
448 // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
449 if (Or0->getOpcode() == BinaryOperator::LShr) {
450 std::swap(Or0, Or1);
451 std::swap(ShVal0, ShVal1);
452 std::swap(ShAmt0, ShAmt1);
453 }
454 assert(Or0->getOpcode() == BinaryOperator::Shl &&
455 Or1->getOpcode() == BinaryOperator::LShr &&
456 "Illegal or(shift,shift) pair");
457
458 // Match the shift amount operands for a funnel/rotate pattern. This always
459 // matches a subtraction on the R operand.
460 auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
461 // The shift amounts may add up to the narrow bit width:
462 // (shl ShVal0, L) | (lshr ShVal1, Width - L)
463 // If this is a funnel shift (different operands are shifted), then the
464 // shift amount can not over-shift (create poison) in the narrow type.
465 unsigned MaxShiftAmountWidth = Log2_32(NarrowWidth);
466 APInt HiBitMask = ~APInt::getLowBitsSet(WideWidth, MaxShiftAmountWidth);
467 if (ShVal0 == ShVal1 || MaskedValueIsZero(L, HiBitMask))
468 if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L)))))
469 return L;
470
471 // The following patterns currently only work for rotation patterns.
472 // TODO: Add more general funnel-shift compatible patterns.
473 if (ShVal0 != ShVal1)
474 return nullptr;
475
476 // The shift amount may be masked with negation:
477 // (shl ShVal0, (X & (Width - 1))) | (lshr ShVal1, ((-X) & (Width - 1)))
478 Value *X;
479 unsigned Mask = Width - 1;
480 if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
482 return X;
483
484 // Same as above, but the shift amount may be extended after masking:
485 if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
487 return X;
488
489 return nullptr;
490 };
491
492 Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, NarrowWidth);
493 bool IsFshl = true; // Sub on LSHR.
494 if (!ShAmt) {
495 ShAmt = matchShiftAmount(ShAmt1, ShAmt0, NarrowWidth);
496 IsFshl = false; // Sub on SHL.
497 }
498 if (!ShAmt)
499 return nullptr;
500
501 // The right-shifted value must have high zeros in the wide type (for example
502 // from 'zext', 'and' or 'shift'). High bits of the left-shifted value are
503 // truncated, so those do not matter.
504 APInt HiBitMask = APInt::getHighBitsSet(WideWidth, WideWidth - NarrowWidth);
505 if (!MaskedValueIsZero(ShVal1, HiBitMask, 0, &Trunc))
506 return nullptr;
507
508 // We have an unnecessarily wide rotate!
509 // trunc (or (shl ShVal0, ShAmt), (lshr ShVal1, BitWidth - ShAmt))
510 // Narrow the inputs and convert to funnel shift intrinsic:
511 // llvm.fshl.i8(trunc(ShVal), trunc(ShVal), trunc(ShAmt))
512 Value *NarrowShAmt = Builder.CreateTrunc(ShAmt, DestTy);
513 Value *X, *Y;
514 X = Y = Builder.CreateTrunc(ShVal0, DestTy);
515 if (ShVal0 != ShVal1)
516 Y = Builder.CreateTrunc(ShVal1, DestTy);
517 Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
518 Function *F = Intrinsic::getDeclaration(Trunc.getModule(), IID, DestTy);
519 return CallInst::Create(F, {X, Y, NarrowShAmt});
520}
521
522/// Try to narrow the width of math or bitwise logic instructions by pulling a
523/// truncate ahead of binary operators.
524Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) {
525 Type *SrcTy = Trunc.getSrcTy();
526 Type *DestTy = Trunc.getType();
527 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
528 unsigned DestWidth = DestTy->getScalarSizeInBits();
529
530 if (!isa<VectorType>(SrcTy) && !shouldChangeType(SrcTy, DestTy))
531 return nullptr;
532
533 BinaryOperator *BinOp;
534 if (!match(Trunc.getOperand(0), m_OneUse(m_BinOp(BinOp))))
535 return nullptr;
536
537 Value *BinOp0 = BinOp->getOperand(0);
538 Value *BinOp1 = BinOp->getOperand(1);
539 switch (BinOp->getOpcode()) {
540 case Instruction::And:
541 case Instruction::Or:
542 case Instruction::Xor:
543 case Instruction::Add:
544 case Instruction::Sub:
545 case Instruction::Mul: {
546 Constant *C;
547 if (match(BinOp0, m_Constant(C))) {
548 // trunc (binop C, X) --> binop (trunc C', X)
549 Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
550 Value *TruncX = Builder.CreateTrunc(BinOp1, DestTy);
551 return BinaryOperator::Create(BinOp->getOpcode(), NarrowC, TruncX);
552 }
553 if (match(BinOp1, m_Constant(C))) {
554 // trunc (binop X, C) --> binop (trunc X, C')
555 Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
556 Value *TruncX = Builder.CreateTrunc(BinOp0, DestTy);
557 return BinaryOperator::Create(BinOp->getOpcode(), TruncX, NarrowC);
558 }
559 Value *X;
560 if (match(BinOp0, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
561 // trunc (binop (ext X), Y) --> binop X, (trunc Y)
562 Value *NarrowOp1 = Builder.CreateTrunc(BinOp1, DestTy);
563 return BinaryOperator::Create(BinOp->getOpcode(), X, NarrowOp1);
564 }
565 if (match(BinOp1, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
566 // trunc (binop Y, (ext X)) --> binop (trunc Y), X
567 Value *NarrowOp0 = Builder.CreateTrunc(BinOp0, DestTy);
568 return BinaryOperator::Create(BinOp->getOpcode(), NarrowOp0, X);
569 }
570 break;
571 }
572 case Instruction::LShr:
573 case Instruction::AShr: {
574 // trunc (*shr (trunc A), C) --> trunc(*shr A, C)
575 Value *A;
576 Constant *C;
577 if (match(BinOp0, m_Trunc(m_Value(A))) && match(BinOp1, m_Constant(C))) {
578 unsigned MaxShiftAmt = SrcWidth - DestWidth;
579 // If the shift is small enough, all zero/sign bits created by the shift
580 // are removed by the trunc.
582 APInt(SrcWidth, MaxShiftAmt)))) {
583 auto *OldShift = cast<Instruction>(Trunc.getOperand(0));
584 bool IsExact = OldShift->isExact();
585 auto *ShAmt = ConstantExpr::getIntegerCast(C, A->getType(), true);
586 ShAmt = Constant::mergeUndefsWith(ShAmt, C);
587 Value *Shift =
588 OldShift->getOpcode() == Instruction::AShr
589 ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact)
590 : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact);
591 return CastInst::CreateTruncOrBitCast(Shift, DestTy);
592 }
593 }
594 break;
595 }
596 default: break;
597 }
598
599 if (Instruction *NarrowOr = narrowFunnelShift(Trunc))
600 return NarrowOr;
601
602 return nullptr;
603}
604
605/// Try to narrow the width of a splat shuffle. This could be generalized to any
606/// shuffle with a constant operand, but we limit the transform to avoid
607/// creating a shuffle type that targets may not be able to lower effectively.
609 InstCombiner::BuilderTy &Builder) {
610 auto *Shuf = dyn_cast<ShuffleVectorInst>(Trunc.getOperand(0));
611 if (Shuf && Shuf->hasOneUse() && match(Shuf->getOperand(1), m_Undef()) &&
612 all_equal(Shuf->getShuffleMask()) &&
613 Shuf->getType() == Shuf->getOperand(0)->getType()) {
614 // trunc (shuf X, Undef, SplatMask) --> shuf (trunc X), Poison, SplatMask
615 // trunc (shuf X, Poison, SplatMask) --> shuf (trunc X), Poison, SplatMask
616 Value *NarrowOp = Builder.CreateTrunc(Shuf->getOperand(0), Trunc.getType());
617 return new ShuffleVectorInst(NarrowOp, Shuf->getShuffleMask());
618 }
619
620 return nullptr;
621}
622
623/// Try to narrow the width of an insert element. This could be generalized for
624/// any vector constant, but we limit the transform to insertion into undef to
625/// avoid potential backend problems from unsupported insertion widths. This
626/// could also be extended to handle the case of inserting a scalar constant
627/// into a vector variable.
629 InstCombiner::BuilderTy &Builder) {
630 Instruction::CastOps Opcode = Trunc.getOpcode();
631 assert((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
632 "Unexpected instruction for shrinking");
633
634 auto *InsElt = dyn_cast<InsertElementInst>(Trunc.getOperand(0));
635 if (!InsElt || !InsElt->hasOneUse())
636 return nullptr;
637
638 Type *DestTy = Trunc.getType();
639 Type *DestScalarTy = DestTy->getScalarType();
640 Value *VecOp = InsElt->getOperand(0);
641 Value *ScalarOp = InsElt->getOperand(1);
642 Value *Index = InsElt->getOperand(2);
643
644 if (match(VecOp, m_Undef())) {
645 // trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
646 // fptrunc (inselt undef, X, Index) --> inselt undef, (fptrunc X), Index
647 UndefValue *NarrowUndef = UndefValue::get(DestTy);
648 Value *NarrowOp = Builder.CreateCast(Opcode, ScalarOp, DestScalarTy);
649 return InsertElementInst::Create(NarrowUndef, NarrowOp, Index);
650 }
651
652 return nullptr;
653}
654
656 if (Instruction *Result = commonCastTransforms(Trunc))
657 return Result;
658
659 Value *Src = Trunc.getOperand(0);
660 Type *DestTy = Trunc.getType(), *SrcTy = Src->getType();
661 unsigned DestWidth = DestTy->getScalarSizeInBits();
662 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
663
664 // Attempt to truncate the entire input expression tree to the destination
665 // type. Only do this if the dest type is a simple type, don't convert the
666 // expression tree to something weird like i93 unless the source is also
667 // strange.
668 if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) &&
669 canEvaluateTruncated(Src, DestTy, *this, &Trunc)) {
670
671 // If this cast is a truncate, evaluting in a different type always
672 // eliminates the cast, so it is always a win.
674 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
675 " to avoid cast: "
676 << Trunc << '\n');
677 Value *Res = EvaluateInDifferentType(Src, DestTy, false);
678 assert(Res->getType() == DestTy);
679 return replaceInstUsesWith(Trunc, Res);
680 }
681
682 // For integer types, check if we can shorten the entire input expression to
683 // DestWidth * 2, which won't allow removing the truncate, but reducing the
684 // width may enable further optimizations, e.g. allowing for larger
685 // vectorization factors.
686 if (auto *DestITy = dyn_cast<IntegerType>(DestTy)) {
687 if (DestWidth * 2 < SrcWidth) {
688 auto *NewDestTy = DestITy->getExtendedType();
689 if (shouldChangeType(SrcTy, NewDestTy) &&
690 canEvaluateTruncated(Src, NewDestTy, *this, &Trunc)) {
692 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
693 " to reduce the width of operand of"
694 << Trunc << '\n');
695 Value *Res = EvaluateInDifferentType(Src, NewDestTy, false);
696 return new TruncInst(Res, DestTy);
697 }
698 }
699 }
700
701 // Test if the trunc is the user of a select which is part of a
702 // minimum or maximum operation. If so, don't do any more simplification.
703 // Even simplifying demanded bits can break the canonical form of a
704 // min/max.
705 Value *LHS, *RHS;
706 if (SelectInst *Sel = dyn_cast<SelectInst>(Src))
707 if (matchSelectPattern(Sel, LHS, RHS).Flavor != SPF_UNKNOWN)
708 return nullptr;
709
710 // See if we can simplify any instructions used by the input whose sole
711 // purpose is to compute bits we don't care about.
713 return &Trunc;
714
715 if (DestWidth == 1) {
716 Value *Zero = Constant::getNullValue(SrcTy);
717 if (DestTy->isIntegerTy()) {
718 // Canonicalize trunc x to i1 -> icmp ne (and x, 1), 0 (scalar only).
719 // TODO: We canonicalize to more instructions here because we are probably
720 // lacking equivalent analysis for trunc relative to icmp. There may also
721 // be codegen concerns. If those trunc limitations were removed, we could
722 // remove this transform.
723 Value *And = Builder.CreateAnd(Src, ConstantInt::get(SrcTy, 1));
724 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
725 }
726
727 // For vectors, we do not canonicalize all truncs to icmp, so optimize
728 // patterns that would be covered within visitICmpInst.
729 Value *X;
730 Constant *C;
731 if (match(Src, m_OneUse(m_LShr(m_Value(X), m_Constant(C))))) {
732 // trunc (lshr X, C) to i1 --> icmp ne (and X, C'), 0
733 Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
734 Constant *MaskC = ConstantExpr::getShl(One, C);
735 Value *And = Builder.CreateAnd(X, MaskC);
736 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
737 }
739 m_Deferred(X))))) {
740 // trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0
741 Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
742 Constant *MaskC = ConstantExpr::getShl(One, C);
743 Value *And = Builder.CreateAnd(X, Builder.CreateOr(MaskC, One));
744 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
745 }
746 }
747
748 Value *A, *B;
749 Constant *C;
750 if (match(Src, m_LShr(m_SExt(m_Value(A)), m_Constant(C)))) {
751 unsigned AWidth = A->getType()->getScalarSizeInBits();
752 unsigned MaxShiftAmt = SrcWidth - std::max(DestWidth, AWidth);
753 auto *OldSh = cast<Instruction>(Src);
754 bool IsExact = OldSh->isExact();
755
756 // If the shift is small enough, all zero bits created by the shift are
757 // removed by the trunc.
759 APInt(SrcWidth, MaxShiftAmt)))) {
760 auto GetNewShAmt = [&](unsigned Width) {
761 Constant *MaxAmt = ConstantInt::get(SrcTy, Width - 1, false);
762 Constant *Cmp =
764 Constant *ShAmt = ConstantFoldSelectInstruction(Cmp, C, MaxAmt);
765 return ConstantFoldCastOperand(Instruction::Trunc, ShAmt, A->getType(),
766 DL);
767 };
768
769 // trunc (lshr (sext A), C) --> ashr A, C
770 if (A->getType() == DestTy) {
771 Constant *ShAmt = GetNewShAmt(DestWidth);
772 ShAmt = Constant::mergeUndefsWith(ShAmt, C);
773 return IsExact ? BinaryOperator::CreateExactAShr(A, ShAmt)
774 : BinaryOperator::CreateAShr(A, ShAmt);
775 }
776 // The types are mismatched, so create a cast after shifting:
777 // trunc (lshr (sext A), C) --> sext/trunc (ashr A, C)
778 if (Src->hasOneUse()) {
779 Constant *ShAmt = GetNewShAmt(AWidth);
780 Value *Shift = Builder.CreateAShr(A, ShAmt, "", IsExact);
781 return CastInst::CreateIntegerCast(Shift, DestTy, true);
782 }
783 }
784 // TODO: Mask high bits with 'and'.
785 }
786
787 if (Instruction *I = narrowBinOp(Trunc))
788 return I;
789
791 return I;
792
793 if (Instruction *I = shrinkInsertElt(Trunc, Builder))
794 return I;
795
796 if (Src->hasOneUse() &&
797 (isa<VectorType>(SrcTy) || shouldChangeType(SrcTy, DestTy))) {
798 // Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the
799 // dest type is native and cst < dest size.
800 if (match(Src, m_Shl(m_Value(A), m_Constant(C))) &&
801 !match(A, m_Shr(m_Value(), m_Constant()))) {
802 // Skip shifts of shift by constants. It undoes a combine in
803 // FoldShiftByConstant and is the extend in reg pattern.
804 APInt Threshold = APInt(C->getType()->getScalarSizeInBits(), DestWidth);
805 if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold))) {
806 Value *NewTrunc = Builder.CreateTrunc(A, DestTy, A->getName() + ".tr");
807 return BinaryOperator::Create(Instruction::Shl, NewTrunc,
808 ConstantExpr::getTrunc(C, DestTy));
809 }
810 }
811 }
812
813 if (Instruction *I = foldVecTruncToExtElt(Trunc, *this))
814 return I;
815
816 // Whenever an element is extracted from a vector, and then truncated,
817 // canonicalize by converting it to a bitcast followed by an
818 // extractelement.
819 //
820 // Example (little endian):
821 // trunc (extractelement <4 x i64> %X, 0) to i32
822 // --->
823 // extractelement <8 x i32> (bitcast <4 x i64> %X to <8 x i32>), i32 0
824 Value *VecOp;
825 ConstantInt *Cst;
826 if (match(Src, m_OneUse(m_ExtractElt(m_Value(VecOp), m_ConstantInt(Cst))))) {
827 auto *VecOpTy = cast<VectorType>(VecOp->getType());
828 auto VecElts = VecOpTy->getElementCount();
829
830 // A badly fit destination size would result in an invalid cast.
831 if (SrcWidth % DestWidth == 0) {
832 uint64_t TruncRatio = SrcWidth / DestWidth;
833 uint64_t BitCastNumElts = VecElts.getKnownMinValue() * TruncRatio;
834 uint64_t VecOpIdx = Cst->getZExtValue();
835 uint64_t NewIdx = DL.isBigEndian() ? (VecOpIdx + 1) * TruncRatio - 1
836 : VecOpIdx * TruncRatio;
837 assert(BitCastNumElts <= std::numeric_limits<uint32_t>::max() &&
838 "overflow 32-bits");
839
840 auto *BitCastTo =
841 VectorType::get(DestTy, BitCastNumElts, VecElts.isScalable());
842 Value *BitCast = Builder.CreateBitCast(VecOp, BitCastTo);
843 return ExtractElementInst::Create(BitCast, Builder.getInt32(NewIdx));
844 }
845 }
846
847 // trunc (ctlz_i32(zext(A), B) --> add(ctlz_i16(A, B), C)
848 if (match(Src, m_OneUse(m_Intrinsic<Intrinsic::ctlz>(m_ZExt(m_Value(A)),
849 m_Value(B))))) {
850 unsigned AWidth = A->getType()->getScalarSizeInBits();
851 if (AWidth == DestWidth && AWidth > Log2_32(SrcWidth)) {
852 Value *WidthDiff = ConstantInt::get(A->getType(), SrcWidth - AWidth);
853 Value *NarrowCtlz =
854 Builder.CreateIntrinsic(Intrinsic::ctlz, {Trunc.getType()}, {A, B});
855 return BinaryOperator::CreateAdd(NarrowCtlz, WidthDiff);
856 }
857 }
858
859 if (match(Src, m_VScale())) {
860 if (Trunc.getFunction() &&
861 Trunc.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
862 Attribute Attr =
863 Trunc.getFunction()->getFnAttribute(Attribute::VScaleRange);
864 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
865 if (Log2_32(*MaxVScale) < DestWidth) {
866 Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
867 return replaceInstUsesWith(Trunc, VScale);
868 }
869 }
870 }
871 }
872
873 return nullptr;
874}
875
876Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp,
877 ZExtInst &Zext) {
878 // If we are just checking for a icmp eq of a single bit and zext'ing it
879 // to an integer, then shift the bit to the appropriate place and then
880 // cast to integer to avoid the comparison.
881
882 // FIXME: This set of transforms does not check for extra uses and/or creates
883 // an extra instruction (an optional final cast is not included
884 // in the transform comments). We may also want to favor icmp over
885 // shifts in cases of equal instructions because icmp has better
886 // analysis in general (invert the transform).
887
888 const APInt *Op1CV;
889 if (match(Cmp->getOperand(1), m_APInt(Op1CV))) {
890
891 // zext (x <s 0) to i32 --> x>>u31 true if signbit set.
892 if (Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isZero()) {
893 Value *In = Cmp->getOperand(0);
894 Value *Sh = ConstantInt::get(In->getType(),
895 In->getType()->getScalarSizeInBits() - 1);
896 In = Builder.CreateLShr(In, Sh, In->getName() + ".lobit");
897 if (In->getType() != Zext.getType())
898 In = Builder.CreateIntCast(In, Zext.getType(), false /*ZExt*/);
899
900 return replaceInstUsesWith(Zext, In);
901 }
902
903 // zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
904 // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
905 // zext (X != 0) to i32 --> X iff X has only the low bit set.
906 // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
907 if (Op1CV->isZero() && Cmp->isEquality() &&
908 (Cmp->getOperand(0)->getType() == Zext.getType() ||
909 Cmp->getPredicate() == ICmpInst::ICMP_NE)) {
910 // If Op1C some other power of two, convert:
911 KnownBits Known = computeKnownBits(Cmp->getOperand(0), 0, &Zext);
912
913 // Exactly 1 possible 1? But not the high-bit because that is
914 // canonicalized to this form.
915 APInt KnownZeroMask(~Known.Zero);
916 if (KnownZeroMask.isPowerOf2() &&
917 (Zext.getType()->getScalarSizeInBits() !=
918 KnownZeroMask.logBase2() + 1)) {
919 uint32_t ShAmt = KnownZeroMask.logBase2();
920 Value *In = Cmp->getOperand(0);
921 if (ShAmt) {
922 // Perform a logical shr by shiftamt.
923 // Insert the shift to put the result in the low bit.
924 In = Builder.CreateLShr(In, ConstantInt::get(In->getType(), ShAmt),
925 In->getName() + ".lobit");
926 }
927
928 // Toggle the low bit for "X == 0".
929 if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
930 In = Builder.CreateXor(In, ConstantInt::get(In->getType(), 1));
931
932 if (Zext.getType() == In->getType())
933 return replaceInstUsesWith(Zext, In);
934
935 Value *IntCast = Builder.CreateIntCast(In, Zext.getType(), false);
936 return replaceInstUsesWith(Zext, IntCast);
937 }
938 }
939 }
940
941 if (Cmp->isEquality() && Zext.getType() == Cmp->getOperand(0)->getType()) {
942 // Test if a bit is clear/set using a shifted-one mask:
943 // zext (icmp eq (and X, (1 << ShAmt)), 0) --> and (lshr (not X), ShAmt), 1
944 // zext (icmp ne (and X, (1 << ShAmt)), 0) --> and (lshr X, ShAmt), 1
945 Value *X, *ShAmt;
946 if (Cmp->hasOneUse() && match(Cmp->getOperand(1), m_ZeroInt()) &&
947 match(Cmp->getOperand(0),
948 m_OneUse(m_c_And(m_Shl(m_One(), m_Value(ShAmt)), m_Value(X))))) {
949 if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
950 X = Builder.CreateNot(X);
951 Value *Lshr = Builder.CreateLShr(X, ShAmt);
952 Value *And1 = Builder.CreateAnd(Lshr, ConstantInt::get(X->getType(), 1));
953 return replaceInstUsesWith(Zext, And1);
954 }
955 }
956
957 return nullptr;
958}
959
960/// Determine if the specified value can be computed in the specified wider type
961/// and produce the same low bits. If not, return false.
962///
963/// If this function returns true, it can also return a non-zero number of bits
964/// (in BitsToClear) which indicates that the value it computes is correct for
965/// the zero extend, but that the additional BitsToClear bits need to be zero'd
966/// out. For example, to promote something like:
967///
968/// %B = trunc i64 %A to i32
969/// %C = lshr i32 %B, 8
970/// %E = zext i32 %C to i64
971///
972/// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
973/// set to 8 to indicate that the promoted value needs to have bits 24-31
974/// cleared in addition to bits 32-63. Since an 'and' will be generated to
975/// clear the top bits anyway, doing this has no extra cost.
976///
977/// This function works on both vectors and scalars.
978static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
979 InstCombinerImpl &IC, Instruction *CxtI) {
980 BitsToClear = 0;
981 if (canAlwaysEvaluateInType(V, Ty))
982 return true;
983 if (canNotEvaluateInType(V, Ty))
984 return false;
985
986 auto *I = cast<Instruction>(V);
987 unsigned Tmp;
988 switch (I->getOpcode()) {
989 case Instruction::ZExt: // zext(zext(x)) -> zext(x).
990 case Instruction::SExt: // zext(sext(x)) -> sext(x).
991 case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x)
992 return true;
993 case Instruction::And:
994 case Instruction::Or:
995 case Instruction::Xor:
996 case Instruction::Add:
997 case Instruction::Sub:
998 case Instruction::Mul:
999 if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||
1000 !canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI))
1001 return false;
1002 // These can all be promoted if neither operand has 'bits to clear'.
1003 if (BitsToClear == 0 && Tmp == 0)
1004 return true;
1005
1006 // If the operation is an AND/OR/XOR and the bits to clear are zero in the
1007 // other side, BitsToClear is ok.
1008 if (Tmp == 0 && I->isBitwiseLogicOp()) {
1009 // We use MaskedValueIsZero here for generality, but the case we care
1010 // about the most is constant RHS.
1011 unsigned VSize = V->getType()->getScalarSizeInBits();
1012 if (IC.MaskedValueIsZero(I->getOperand(1),
1013 APInt::getHighBitsSet(VSize, BitsToClear),
1014 0, CxtI)) {
1015 // If this is an And instruction and all of the BitsToClear are
1016 // known to be zero we can reset BitsToClear.
1017 if (I->getOpcode() == Instruction::And)
1018 BitsToClear = 0;
1019 return true;
1020 }
1021 }
1022
1023 // Otherwise, we don't know how to analyze this BitsToClear case yet.
1024 return false;
1025
1026 case Instruction::Shl: {
1027 // We can promote shl(x, cst) if we can promote x. Since shl overwrites the
1028 // upper bits we can reduce BitsToClear by the shift amount.
1029 const APInt *Amt;
1030 if (match(I->getOperand(1), m_APInt(Amt))) {
1031 if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1032 return false;
1033 uint64_t ShiftAmt = Amt->getZExtValue();
1034 BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0;
1035 return true;
1036 }
1037 return false;
1038 }
1039 case Instruction::LShr: {
1040 // We can promote lshr(x, cst) if we can promote x. This requires the
1041 // ultimate 'and' to clear out the high zero bits we're clearing out though.
1042 const APInt *Amt;
1043 if (match(I->getOperand(1), m_APInt(Amt))) {
1044 if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1045 return false;
1046 BitsToClear += Amt->getZExtValue();
1047 if (BitsToClear > V->getType()->getScalarSizeInBits())
1048 BitsToClear = V->getType()->getScalarSizeInBits();
1049 return true;
1050 }
1051 // Cannot promote variable LSHR.
1052 return false;
1053 }
1054 case Instruction::Select:
1055 if (!canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI) ||
1056 !canEvaluateZExtd(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||
1057 // TODO: If important, we could handle the case when the BitsToClear are
1058 // known zero in the disagreeing side.
1059 Tmp != BitsToClear)
1060 return false;
1061 return true;
1062
1063 case Instruction::PHI: {
1064 // We can change a phi if we can change all operands. Note that we never
1065 // get into trouble with cyclic PHIs here because we only consider
1066 // instructions with a single use.
1067 PHINode *PN = cast<PHINode>(I);
1068 if (!canEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear, IC, CxtI))
1069 return false;
1070 for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
1071 if (!canEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||
1072 // TODO: If important, we could handle the case when the BitsToClear
1073 // are known zero in the disagreeing input.
1074 Tmp != BitsToClear)
1075 return false;
1076 return true;
1077 }
1078 case Instruction::Call:
1079 // llvm.vscale() can always be executed in larger type, because the
1080 // value is automatically zero-extended.
1081 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
1082 if (II->getIntrinsicID() == Intrinsic::vscale)
1083 return true;
1084 return false;
1085 default:
1086 // TODO: Can handle more cases here.
1087 return false;
1088 }
1089}
1090
1092 // If this zero extend is only used by a truncate, let the truncate be
1093 // eliminated before we try to optimize this zext.
1094 if (Zext.hasOneUse() && isa<TruncInst>(Zext.user_back()) &&
1095 !isa<Constant>(Zext.getOperand(0)))
1096 return nullptr;
1097
1098 // If one of the common conversion will work, do it.
1099 if (Instruction *Result = commonCastTransforms(Zext))
1100 return Result;
1101
1102 Value *Src = Zext.getOperand(0);
1103 Type *SrcTy = Src->getType(), *DestTy = Zext.getType();
1104
1105 // Try to extend the entire expression tree to the wide destination type.
1106 unsigned BitsToClear;
1107 if (shouldChangeType(SrcTy, DestTy) &&
1108 canEvaluateZExtd(Src, DestTy, BitsToClear, *this, &Zext)) {
1109 assert(BitsToClear <= SrcTy->getScalarSizeInBits() &&
1110 "Can't clear more bits than in SrcTy");
1111
1112 // Okay, we can transform this! Insert the new expression now.
1113 LLVM_DEBUG(
1114 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1115 " to avoid zero extend: "
1116 << Zext << '\n');
1117 Value *Res = EvaluateInDifferentType(Src, DestTy, false);
1118 assert(Res->getType() == DestTy);
1119
1120 // Preserve debug values referring to Src if the zext is its last use.
1121 if (auto *SrcOp = dyn_cast<Instruction>(Src))
1122 if (SrcOp->hasOneUse())
1123 replaceAllDbgUsesWith(*SrcOp, *Res, Zext, DT);
1124
1125 uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits() - BitsToClear;
1126 uint32_t DestBitSize = DestTy->getScalarSizeInBits();
1127
1128 // If the high bits are already filled with zeros, just replace this
1129 // cast with the result.
1130 if (MaskedValueIsZero(Res,
1131 APInt::getHighBitsSet(DestBitSize,
1132 DestBitSize - SrcBitsKept),
1133 0, &Zext))
1134 return replaceInstUsesWith(Zext, Res);
1135
1136 // We need to emit an AND to clear the high bits.
1138 APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
1139 return BinaryOperator::CreateAnd(Res, C);
1140 }
1141
1142 // If this is a TRUNC followed by a ZEXT then we are dealing with integral
1143 // types and if the sizes are just right we can convert this into a logical
1144 // 'and' which will be much cheaper than the pair of casts.
1145 if (auto *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast
1146 // TODO: Subsume this into EvaluateInDifferentType.
1147
1148 // Get the sizes of the types involved. We know that the intermediate type
1149 // will be smaller than A or C, but don't know the relation between A and C.
1150 Value *A = CSrc->getOperand(0);
1151 unsigned SrcSize = A->getType()->getScalarSizeInBits();
1152 unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
1153 unsigned DstSize = DestTy->getScalarSizeInBits();
1154 // If we're actually extending zero bits, then if
1155 // SrcSize < DstSize: zext(a & mask)
1156 // SrcSize == DstSize: a & mask
1157 // SrcSize > DstSize: trunc(a) & mask
1158 if (SrcSize < DstSize) {
1159 APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1160 Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
1161 Value *And = Builder.CreateAnd(A, AndConst, CSrc->getName() + ".mask");
1162 return new ZExtInst(And, DestTy);
1163 }
1164
1165 if (SrcSize == DstSize) {
1166 APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1167 return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
1168 AndValue));
1169 }
1170 if (SrcSize > DstSize) {
1171 Value *Trunc = Builder.CreateTrunc(A, DestTy);
1172 APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
1173 return BinaryOperator::CreateAnd(Trunc,
1174 ConstantInt::get(Trunc->getType(),
1175 AndValue));
1176 }
1177 }
1178
1179 if (auto *Cmp = dyn_cast<ICmpInst>(Src))
1180 return transformZExtICmp(Cmp, Zext);
1181
1182 // zext(trunc(X) & C) -> (X & zext(C)).
1183 Constant *C;
1184 Value *X;
1185 if (match(Src, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&
1186 X->getType() == DestTy)
1187 return BinaryOperator::CreateAnd(X, ConstantExpr::getZExt(C, DestTy));
1188
1189 // zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
1190 Value *And;
1191 if (match(Src, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
1193 X->getType() == DestTy) {
1194 Constant *ZC = ConstantExpr::getZExt(C, DestTy);
1195 return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC);
1196 }
1197
1198 // If we are truncating, masking, and then zexting back to the original type,
1199 // that's just a mask. This is not handled by canEvaluateZextd if the
1200 // intermediate values have extra uses. This could be generalized further for
1201 // a non-constant mask operand.
1202 // zext (and (trunc X), C) --> and X, (zext C)
1203 if (match(Src, m_And(m_Trunc(m_Value(X)), m_Constant(C))) &&
1204 X->getType() == DestTy) {
1205 Constant *ZextC = ConstantExpr::getZExt(C, DestTy);
1206 return BinaryOperator::CreateAnd(X, ZextC);
1207 }
1208
1209 if (match(Src, m_VScale())) {
1210 if (Zext.getFunction() &&
1211 Zext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1212 Attribute Attr =
1213 Zext.getFunction()->getFnAttribute(Attribute::VScaleRange);
1214 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
1215 unsigned TypeWidth = Src->getType()->getScalarSizeInBits();
1216 if (Log2_32(*MaxVScale) < TypeWidth) {
1217 Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
1218 return replaceInstUsesWith(Zext, VScale);
1219 }
1220 }
1221 }
1222 }
1223
1224 return nullptr;
1225}
1226
1227/// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.
1228Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *Cmp,
1229 SExtInst &Sext) {
1230 Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
1231 ICmpInst::Predicate Pred = Cmp->getPredicate();
1232
1233 // Don't bother if Op1 isn't of vector or integer type.
1234 if (!Op1->getType()->isIntOrIntVectorTy())
1235 return nullptr;
1236
1237 if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_ZeroInt())) {
1238 // sext (x <s 0) --> ashr x, 31 (all ones if negative)
1239 Value *Sh = ConstantInt::get(Op0->getType(),
1240 Op0->getType()->getScalarSizeInBits() - 1);
1241 Value *In = Builder.CreateAShr(Op0, Sh, Op0->getName() + ".lobit");
1242 if (In->getType() != Sext.getType())
1243 In = Builder.CreateIntCast(In, Sext.getType(), true /*SExt*/);
1244
1245 return replaceInstUsesWith(Sext, In);
1246 }
1247
1248 if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1249 // If we know that only one bit of the LHS of the icmp can be set and we
1250 // have an equality comparison with zero or a power of 2, we can transform
1251 // the icmp and sext into bitwise/integer operations.
1252 if (Cmp->hasOneUse() &&
1253 Cmp->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
1254 KnownBits Known = computeKnownBits(Op0, 0, &Sext);
1255
1256 APInt KnownZeroMask(~Known.Zero);
1257 if (KnownZeroMask.isPowerOf2()) {
1258 Value *In = Cmp->getOperand(0);
1259
1260 // If the icmp tests for a known zero bit we can constant fold it.
1261 if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {
1262 Value *V = Pred == ICmpInst::ICMP_NE ?
1264 ConstantInt::getNullValue(Sext.getType());
1265 return replaceInstUsesWith(Sext, V);
1266 }
1267
1268 if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
1269 // sext ((x & 2^n) == 0) -> (x >> n) - 1
1270 // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
1271 unsigned ShiftAmt = KnownZeroMask.countr_zero();
1272 // Perform a right shift to place the desired bit in the LSB.
1273 if (ShiftAmt)
1274 In = Builder.CreateLShr(In,
1275 ConstantInt::get(In->getType(), ShiftAmt));
1276
1277 // At this point "In" is either 1 or 0. Subtract 1 to turn
1278 // {1, 0} -> {0, -1}.
1279 In = Builder.CreateAdd(In,
1280 ConstantInt::getAllOnesValue(In->getType()),
1281 "sext");
1282 } else {
1283 // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
1284 // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
1285 unsigned ShiftAmt = KnownZeroMask.countl_zero();
1286 // Perform a left shift to place the desired bit in the MSB.
1287 if (ShiftAmt)
1288 In = Builder.CreateShl(In,
1289 ConstantInt::get(In->getType(), ShiftAmt));
1290
1291 // Distribute the bit over the whole bit width.
1292 In = Builder.CreateAShr(In, ConstantInt::get(In->getType(),
1293 KnownZeroMask.getBitWidth() - 1), "sext");
1294 }
1295
1296 if (Sext.getType() == In->getType())
1297 return replaceInstUsesWith(Sext, In);
1298 return CastInst::CreateIntegerCast(In, Sext.getType(), true/*SExt*/);
1299 }
1300 }
1301 }
1302
1303 return nullptr;
1304}
1305
1306/// Return true if we can take the specified value and return it as type Ty
1307/// without inserting any new casts and without changing the value of the common
1308/// low bits. This is used by code that tries to promote integer operations to
1309/// a wider types will allow us to eliminate the extension.
1310///
1311/// This function works on both vectors and scalars.
1312///
1313static bool canEvaluateSExtd(Value *V, Type *Ty) {
1314 assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
1315 "Can't sign extend type to a smaller type");
1316 if (canAlwaysEvaluateInType(V, Ty))
1317 return true;
1318 if (canNotEvaluateInType(V, Ty))
1319 return false;
1320
1321 auto *I = cast<Instruction>(V);
1322 switch (I->getOpcode()) {
1323 case Instruction::SExt: // sext(sext(x)) -> sext(x)
1324 case Instruction::ZExt: // sext(zext(x)) -> zext(x)
1325 case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x)
1326 return true;
1327 case Instruction::And:
1328 case Instruction::Or:
1329 case Instruction::Xor:
1330 case Instruction::Add:
1331 case Instruction::Sub:
1332 case Instruction::Mul:
1333 // These operators can all arbitrarily be extended if their inputs can.
1334 return canEvaluateSExtd(I->getOperand(0), Ty) &&
1335 canEvaluateSExtd(I->getOperand(1), Ty);
1336
1337 //case Instruction::Shl: TODO
1338 //case Instruction::LShr: TODO
1339
1340 case Instruction::Select:
1341 return canEvaluateSExtd(I->getOperand(1), Ty) &&
1342 canEvaluateSExtd(I->getOperand(2), Ty);
1343
1344 case Instruction::PHI: {
1345 // We can change a phi if we can change all operands. Note that we never
1346 // get into trouble with cyclic PHIs here because we only consider
1347 // instructions with a single use.
1348 PHINode *PN = cast<PHINode>(I);
1349 for (Value *IncValue : PN->incoming_values())
1350 if (!canEvaluateSExtd(IncValue, Ty)) return false;
1351 return true;
1352 }
1353 default:
1354 // TODO: Can handle more cases here.
1355 break;
1356 }
1357
1358 return false;
1359}
1360
1362 // If this sign extend is only used by a truncate, let the truncate be
1363 // eliminated before we try to optimize this sext.
1364 if (Sext.hasOneUse() && isa<TruncInst>(Sext.user_back()))
1365 return nullptr;
1366
1367 if (Instruction *I = commonCastTransforms(Sext))
1368 return I;
1369
1370 Value *Src = Sext.getOperand(0);
1371 Type *SrcTy = Src->getType(), *DestTy = Sext.getType();
1372 unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
1373 unsigned DestBitSize = DestTy->getScalarSizeInBits();
1374
1375 // If the value being extended is zero or positive, use a zext instead.
1376 if (isKnownNonNegative(Src, DL, 0, &AC, &Sext, &DT))
1377 return CastInst::Create(Instruction::ZExt, Src, DestTy);
1378
1379 // Try to extend the entire expression tree to the wide destination type.
1380 if (shouldChangeType(SrcTy, DestTy) && canEvaluateSExtd(Src, DestTy)) {
1381 // Okay, we can transform this! Insert the new expression now.
1382 LLVM_DEBUG(
1383 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1384 " to avoid sign extend: "
1385 << Sext << '\n');
1386 Value *Res = EvaluateInDifferentType(Src, DestTy, true);
1387 assert(Res->getType() == DestTy);
1388
1389 // If the high bits are already filled with sign bit, just replace this
1390 // cast with the result.
1391 if (ComputeNumSignBits(Res, 0, &Sext) > DestBitSize - SrcBitSize)
1392 return replaceInstUsesWith(Sext, Res);
1393
1394 // We need to emit a shl + ashr to do the sign extend.
1395 Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
1396 return BinaryOperator::CreateAShr(Builder.CreateShl(Res, ShAmt, "sext"),
1397 ShAmt);
1398 }
1399
1400 Value *X;
1401 if (match(Src, m_Trunc(m_Value(X)))) {
1402 // If the input has more sign bits than bits truncated, then convert
1403 // directly to final type.
1404 unsigned XBitSize = X->getType()->getScalarSizeInBits();
1405 if (ComputeNumSignBits(X, 0, &Sext) > XBitSize - SrcBitSize)
1406 return CastInst::CreateIntegerCast(X, DestTy, /* isSigned */ true);
1407
1408 // If input is a trunc from the destination type, then convert into shifts.
1409 if (Src->hasOneUse() && X->getType() == DestTy) {
1410 // sext (trunc X) --> ashr (shl X, C), C
1411 Constant *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
1412 return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShAmt), ShAmt);
1413 }
1414
1415 // If we are replacing shifted-in high zero bits with sign bits, convert
1416 // the logic shift to arithmetic shift and eliminate the cast to
1417 // intermediate type:
1418 // sext (trunc (lshr Y, C)) --> sext/trunc (ashr Y, C)
1419 Value *Y;
1420 if (Src->hasOneUse() &&
1422 m_SpecificIntAllowUndef(XBitSize - SrcBitSize)))) {
1423 Value *Ashr = Builder.CreateAShr(Y, XBitSize - SrcBitSize);
1424 return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
1425 }
1426 }
1427
1428 if (auto *Cmp = dyn_cast<ICmpInst>(Src))
1429 return transformSExtICmp(Cmp, Sext);
1430
1431 // If the input is a shl/ashr pair of a same constant, then this is a sign
1432 // extension from a smaller value. If we could trust arbitrary bitwidth
1433 // integers, we could turn this into a truncate to the smaller bit and then
1434 // use a sext for the whole extension. Since we don't, look deeper and check
1435 // for a truncate. If the source and dest are the same type, eliminate the
1436 // trunc and extend and just do shifts. For example, turn:
1437 // %a = trunc i32 %i to i8
1438 // %b = shl i8 %a, C
1439 // %c = ashr i8 %b, C
1440 // %d = sext i8 %c to i32
1441 // into:
1442 // %a = shl i32 %i, 32-(8-C)
1443 // %d = ashr i32 %a, 32-(8-C)
1444 Value *A = nullptr;
1445 // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
1446 Constant *BA = nullptr, *CA = nullptr;
1447 if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_Constant(BA)),
1448 m_Constant(CA))) &&
1449 BA->isElementWiseEqual(CA) && A->getType() == DestTy) {
1450 Constant *WideCurrShAmt = ConstantExpr::getSExt(CA, DestTy);
1451 Constant *NumLowbitsLeft = ConstantExpr::getSub(
1452 ConstantInt::get(DestTy, SrcTy->getScalarSizeInBits()), WideCurrShAmt);
1453 Constant *NewShAmt = ConstantExpr::getSub(
1454 ConstantInt::get(DestTy, DestTy->getScalarSizeInBits()),
1455 NumLowbitsLeft);
1456 NewShAmt =
1458 A = Builder.CreateShl(A, NewShAmt, Sext.getName());
1459 return BinaryOperator::CreateAShr(A, NewShAmt);
1460 }
1461
1462 // Splatting a bit of constant-index across a value:
1463 // sext (ashr (trunc iN X to iM), M-1) to iN --> ashr (shl X, N-M), N-1
1464 // If the dest type is different, use a cast (adjust use check).
1465 if (match(Src, m_OneUse(m_AShr(m_Trunc(m_Value(X)),
1466 m_SpecificInt(SrcBitSize - 1))))) {
1467 Type *XTy = X->getType();
1468 unsigned XBitSize = XTy->getScalarSizeInBits();
1469 Constant *ShlAmtC = ConstantInt::get(XTy, XBitSize - SrcBitSize);
1470 Constant *AshrAmtC = ConstantInt::get(XTy, XBitSize - 1);
1471 if (XTy == DestTy)
1472 return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShlAmtC),
1473 AshrAmtC);
1474 if (cast<BinaryOperator>(Src)->getOperand(0)->hasOneUse()) {
1475 Value *Ashr = Builder.CreateAShr(Builder.CreateShl(X, ShlAmtC), AshrAmtC);
1476 return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
1477 }
1478 }
1479
1480 if (match(Src, m_VScale())) {
1481 if (Sext.getFunction() &&
1482 Sext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1483 Attribute Attr =
1484 Sext.getFunction()->getFnAttribute(Attribute::VScaleRange);
1485 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
1486 if (Log2_32(*MaxVScale) < (SrcBitSize - 1)) {
1487 Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
1488 return replaceInstUsesWith(Sext, VScale);
1489 }
1490 }
1491 }
1492 }
1493
1494 return nullptr;
1495}
1496
1497/// Return a Constant* for the specified floating-point constant if it fits
1498/// in the specified FP type without changing its value.
1499static bool fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
1500 bool losesInfo;
1501 APFloat F = CFP->getValueAPF();
1502 (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
1503 return !losesInfo;
1504}
1505
1507 if (CFP->getType() == Type::getPPC_FP128Ty(CFP->getContext()))
1508 return nullptr; // No constant folding of this.
1509 // See if the value can be truncated to half and then reextended.
1510 if (fitsInFPType(CFP, APFloat::IEEEhalf()))
1511 return Type::getHalfTy(CFP->getContext());
1512 // See if the value can be truncated to float and then reextended.
1514 return Type::getFloatTy(CFP->getContext());
1515 if (CFP->getType()->isDoubleTy())
1516 return nullptr; // Won't shrink.
1518 return Type::getDoubleTy(CFP->getContext());
1519 // Don't try to shrink to various long double types.
1520 return nullptr;
1521}
1522
1523// Determine if this is a vector of ConstantFPs and if so, return the minimal
1524// type we can safely truncate all elements to.
1526 auto *CV = dyn_cast<Constant>(V);
1527 auto *CVVTy = dyn_cast<FixedVectorType>(V->getType());
1528 if (!CV || !CVVTy)
1529 return nullptr;
1530
1531 Type *MinType = nullptr;
1532
1533 unsigned NumElts = CVVTy->getNumElements();
1534
1535 // For fixed-width vectors we find the minimal type by looking
1536 // through the constant values of the vector.
1537 for (unsigned i = 0; i != NumElts; ++i) {
1538 if (isa<UndefValue>(CV->getAggregateElement(i)))
1539 continue;
1540
1541 auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i));
1542 if (!CFP)
1543 return nullptr;
1544
1545 Type *T = shrinkFPConstant(CFP);
1546 if (!T)
1547 return nullptr;
1548
1549 // If we haven't found a type yet or this type has a larger mantissa than
1550 // our previous type, this is our new minimal type.
1551 if (!MinType || T->getFPMantissaWidth() > MinType->getFPMantissaWidth())
1552 MinType = T;
1553 }
1554
1555 // Make a vector type from the minimal type.
1556 return MinType ? FixedVectorType::get(MinType, NumElts) : nullptr;
1557}
1558
1559/// Find the minimum FP type we can safely truncate to.
1561 if (auto *FPExt = dyn_cast<FPExtInst>(V))
1562 return FPExt->getOperand(0)->getType();
1563
1564 // If this value is a constant, return the constant in the smallest FP type
1565 // that can accurately represent it. This allows us to turn
1566 // (float)((double)X+2.0) into x+2.0f.
1567 if (auto *CFP = dyn_cast<ConstantFP>(V))
1568 if (Type *T = shrinkFPConstant(CFP))
1569 return T;
1570
1571 // We can only correctly find a minimum type for a scalable vector when it is
1572 // a splat. For splats of constant values the fpext is wrapped up as a
1573 // ConstantExpr.
1574 if (auto *FPCExt = dyn_cast<ConstantExpr>(V))
1575 if (FPCExt->getOpcode() == Instruction::FPExt)
1576 return FPCExt->getOperand(0)->getType();
1577
1578 // Try to shrink a vector of FP constants. This returns nullptr on scalable
1579 // vectors
1580 if (Type *T = shrinkFPConstantVector(V))
1581 return T;
1582
1583 return V->getType();
1584}
1585
1586/// Return true if the cast from integer to FP can be proven to be exact for all
1587/// possible inputs (the conversion does not lose any precision).
1589 CastInst::CastOps Opcode = I.getOpcode();
1590 assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) &&
1591 "Unexpected cast");
1592 Value *Src = I.getOperand(0);
1593 Type *SrcTy = Src->getType();
1594 Type *FPTy = I.getType();
1595 bool IsSigned = Opcode == Instruction::SIToFP;
1596 int SrcSize = (int)SrcTy->getScalarSizeInBits() - IsSigned;
1597
1598 // Easy case - if the source integer type has less bits than the FP mantissa,
1599 // then the cast must be exact.
1600 int DestNumSigBits = FPTy->getFPMantissaWidth();
1601 if (SrcSize <= DestNumSigBits)
1602 return true;
1603
1604 // Cast from FP to integer and back to FP is independent of the intermediate
1605 // integer width because of poison on overflow.
1606 Value *F;
1607 if (match(Src, m_FPToSI(m_Value(F))) || match(Src, m_FPToUI(m_Value(F)))) {
1608 // If this is uitofp (fptosi F), the source needs an extra bit to avoid
1609 // potential rounding of negative FP input values.
1610 int SrcNumSigBits = F->getType()->getFPMantissaWidth();
1611 if (!IsSigned && match(Src, m_FPToSI(m_Value())))
1612 SrcNumSigBits++;
1613
1614 // [su]itofp (fpto[su]i F) --> exact if the source type has less or equal
1615 // significant bits than the destination (and make sure neither type is
1616 // weird -- ppc_fp128).
1617 if (SrcNumSigBits > 0 && DestNumSigBits > 0 &&
1618 SrcNumSigBits <= DestNumSigBits)
1619 return true;
1620 }
1621
1622 // TODO:
1623 // Try harder to find if the source integer type has less significant bits.
1624 // For example, compute number of sign bits.
1625 KnownBits SrcKnown = IC.computeKnownBits(Src, 0, &I);
1626 int SigBits = (int)SrcTy->getScalarSizeInBits() -
1627 SrcKnown.countMinLeadingZeros() -
1628 SrcKnown.countMinTrailingZeros();
1629 if (SigBits <= DestNumSigBits)
1630 return true;
1631
1632 return false;
1633}
1634
1637 return I;
1638
1639 // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
1640 // simplify this expression to avoid one or more of the trunc/extend
1641 // operations if we can do so without changing the numerical results.
1642 //
1643 // The exact manner in which the widths of the operands interact to limit
1644 // what we can and cannot do safely varies from operation to operation, and
1645 // is explained below in the various case statements.
1646 Type *Ty = FPT.getType();
1647 auto *BO = dyn_cast<BinaryOperator>(FPT.getOperand(0));
1648 if (BO && BO->hasOneUse()) {
1649 Type *LHSMinType = getMinimumFPType(BO->getOperand(0));
1650 Type *RHSMinType = getMinimumFPType(BO->getOperand(1));
1651 unsigned OpWidth = BO->getType()->getFPMantissaWidth();
1652 unsigned LHSWidth = LHSMinType->getFPMantissaWidth();
1653 unsigned RHSWidth = RHSMinType->getFPMantissaWidth();
1654 unsigned SrcWidth = std::max(LHSWidth, RHSWidth);
1655 unsigned DstWidth = Ty->getFPMantissaWidth();
1656 switch (BO->getOpcode()) {
1657 default: break;
1658 case Instruction::FAdd:
1659 case Instruction::FSub:
1660 // For addition and subtraction, the infinitely precise result can
1661 // essentially be arbitrarily wide; proving that double rounding
1662 // will not occur because the result of OpI is exact (as we will for
1663 // FMul, for example) is hopeless. However, we *can* nonetheless
1664 // frequently know that double rounding cannot occur (or that it is
1665 // innocuous) by taking advantage of the specific structure of
1666 // infinitely-precise results that admit double rounding.
1667 //
1668 // Specifically, if OpWidth >= 2*DstWdith+1 and DstWidth is sufficient
1669 // to represent both sources, we can guarantee that the double
1670 // rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis,
1671 // "A Rigorous Framework for Fully Supporting the IEEE Standard ..."
1672 // for proof of this fact).
1673 //
1674 // Note: Figueroa does not consider the case where DstFormat !=
1675 // SrcFormat. It's possible (likely even!) that this analysis
1676 // could be tightened for those cases, but they are rare (the main
1677 // case of interest here is (float)((double)float + float)).
1678 if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) {
1679 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1680 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1681 Instruction *RI = BinaryOperator::Create(BO->getOpcode(), LHS, RHS);
1682 RI->copyFastMathFlags(BO);
1683 return RI;
1684 }
1685 break;
1686 case Instruction::FMul:
1687 // For multiplication, the infinitely precise result has at most
1688 // LHSWidth + RHSWidth significant bits; if OpWidth is sufficient
1689 // that such a value can be exactly represented, then no double
1690 // rounding can possibly occur; we can safely perform the operation
1691 // in the destination format if it can represent both sources.
1692 if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) {
1693 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1694 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1696 }
1697 break;
1698 case Instruction::FDiv:
1699 // For division, we use again use the bound from Figueroa's
1700 // dissertation. I am entirely certain that this bound can be
1701 // tightened in the unbalanced operand case by an analysis based on
1702 // the diophantine rational approximation bound, but the well-known
1703 // condition used here is a good conservative first pass.
1704 // TODO: Tighten bound via rigorous analysis of the unbalanced case.
1705 if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) {
1706 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1707 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1709 }
1710 break;
1711 case Instruction::FRem: {
1712 // Remainder is straightforward. Remainder is always exact, so the
1713 // type of OpI doesn't enter into things at all. We simply evaluate
1714 // in whichever source type is larger, then convert to the
1715 // destination type.
1716 if (SrcWidth == OpWidth)
1717 break;
1718 Value *LHS, *RHS;
1719 if (LHSWidth == SrcWidth) {
1720 LHS = Builder.CreateFPTrunc(BO->getOperand(0), LHSMinType);
1721 RHS = Builder.CreateFPTrunc(BO->getOperand(1), LHSMinType);
1722 } else {
1723 LHS = Builder.CreateFPTrunc(BO->getOperand(0), RHSMinType);
1724 RHS = Builder.CreateFPTrunc(BO->getOperand(1), RHSMinType);
1725 }
1726
1727 Value *ExactResult = Builder.CreateFRemFMF(LHS, RHS, BO);
1728 return CastInst::CreateFPCast(ExactResult, Ty);
1729 }
1730 }
1731 }
1732
1733 // (fptrunc (fneg x)) -> (fneg (fptrunc x))
1734 Value *X;
1735 Instruction *Op = dyn_cast<Instruction>(FPT.getOperand(0));
1736 if (Op && Op->hasOneUse()) {
1737 // FIXME: The FMF should propagate from the fptrunc, not the source op.
1739 if (isa<FPMathOperator>(Op))
1740 Builder.setFastMathFlags(Op->getFastMathFlags());
1741
1742 if (match(Op, m_FNeg(m_Value(X)))) {
1743 Value *InnerTrunc = Builder.CreateFPTrunc(X, Ty);
1744
1745 return UnaryOperator::CreateFNegFMF(InnerTrunc, Op);
1746 }
1747
1748 // If we are truncating a select that has an extended operand, we can
1749 // narrow the other operand and do the select as a narrow op.
1750 Value *Cond, *X, *Y;
1752 X->getType() == Ty) {
1753 // fptrunc (select Cond, (fpext X), Y --> select Cond, X, (fptrunc Y)
1754 Value *NarrowY = Builder.CreateFPTrunc(Y, Ty);
1755 Value *Sel = Builder.CreateSelect(Cond, X, NarrowY, "narrow.sel", Op);
1756 return replaceInstUsesWith(FPT, Sel);
1757 }
1759 X->getType() == Ty) {
1760 // fptrunc (select Cond, Y, (fpext X) --> select Cond, (fptrunc Y), X
1761 Value *NarrowY = Builder.CreateFPTrunc(Y, Ty);
1762 Value *Sel = Builder.CreateSelect(Cond, NarrowY, X, "narrow.sel", Op);
1763 return replaceInstUsesWith(FPT, Sel);
1764 }
1765 }
1766
1767 if (auto *II = dyn_cast<IntrinsicInst>(FPT.getOperand(0))) {
1768 switch (II->getIntrinsicID()) {
1769 default: break;
1770 case Intrinsic::ceil:
1771 case Intrinsic::fabs:
1772 case Intrinsic::floor:
1773 case Intrinsic::nearbyint:
1774 case Intrinsic::rint:
1775 case Intrinsic::round:
1776 case Intrinsic::roundeven:
1777 case Intrinsic::trunc: {
1778 Value *Src = II->getArgOperand(0);
1779 if (!Src->hasOneUse())
1780 break;
1781
1782 // Except for fabs, this transformation requires the input of the unary FP
1783 // operation to be itself an fpext from the type to which we're
1784 // truncating.
1785 if (II->getIntrinsicID() != Intrinsic::fabs) {
1786 FPExtInst *FPExtSrc = dyn_cast<FPExtInst>(Src);
1787 if (!FPExtSrc || FPExtSrc->getSrcTy() != Ty)
1788 break;
1789 }
1790
1791 // Do unary FP operation on smaller type.
1792 // (fptrunc (fabs x)) -> (fabs (fptrunc x))
1793 Value *InnerTrunc = Builder.CreateFPTrunc(Src, Ty);
1795 II->getIntrinsicID(), Ty);
1797 II->getOperandBundlesAsDefs(OpBundles);
1798 CallInst *NewCI =
1799 CallInst::Create(Overload, {InnerTrunc}, OpBundles, II->getName());
1800 NewCI->copyFastMathFlags(II);
1801 return NewCI;
1802 }
1803 }
1804 }
1805
1806 if (Instruction *I = shrinkInsertElt(FPT, Builder))
1807 return I;
1808
1809 Value *Src = FPT.getOperand(0);
1810 if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
1811 auto *FPCast = cast<CastInst>(Src);
1812 if (isKnownExactCastIntToFP(*FPCast, *this))
1813 return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
1814 }
1815
1816 return nullptr;
1817}
1818
1820 // If the source operand is a cast from integer to FP and known exact, then
1821 // cast the integer operand directly to the destination type.
1822 Type *Ty = FPExt.getType();
1823 Value *Src = FPExt.getOperand(0);
1824 if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
1825 auto *FPCast = cast<CastInst>(Src);
1826 if (isKnownExactCastIntToFP(*FPCast, *this))
1827 return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
1828 }
1829
1830 return commonCastTransforms(FPExt);
1831}
1832
1833/// fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
1834/// This is safe if the intermediate type has enough bits in its mantissa to
1835/// accurately represent all values of X. For example, this won't work with
1836/// i64 -> float -> i64.
1838 if (!isa<UIToFPInst>(FI.getOperand(0)) && !isa<SIToFPInst>(FI.getOperand(0)))
1839 return nullptr;
1840
1841 auto *OpI = cast<CastInst>(FI.getOperand(0));
1842 Value *X = OpI->getOperand(0);
1843 Type *XType = X->getType();
1844 Type *DestType = FI.getType();
1845 bool IsOutputSigned = isa<FPToSIInst>(FI);
1846
1847 // Since we can assume the conversion won't overflow, our decision as to
1848 // whether the input will fit in the float should depend on the minimum
1849 // of the input range and output range.
1850
1851 // This means this is also safe for a signed input and unsigned output, since
1852 // a negative input would lead to undefined behavior.
1853 if (!isKnownExactCastIntToFP(*OpI, *this)) {
1854 // The first cast may not round exactly based on the source integer width
1855 // and FP width, but the overflow UB rules can still allow this to fold.
1856 // If the destination type is narrow, that means the intermediate FP value
1857 // must be large enough to hold the source value exactly.
1858 // For example, (uint8_t)((float)(uint32_t 16777217) is undefined behavior.
1859 int OutputSize = (int)DestType->getScalarSizeInBits();
1860 if (OutputSize > OpI->getType()->getFPMantissaWidth())
1861 return nullptr;
1862 }
1863
1864 if (DestType->getScalarSizeInBits() > XType->getScalarSizeInBits()) {
1865 bool IsInputSigned = isa<SIToFPInst>(OpI);
1866 if (IsInputSigned && IsOutputSigned)
1867 return new SExtInst(X, DestType);
1868 return new ZExtInst(X, DestType);
1869 }
1870 if (DestType->getScalarSizeInBits() < XType->getScalarSizeInBits())
1871 return new TruncInst(X, DestType);
1872
1873 assert(XType == DestType && "Unexpected types for int to FP to int casts");
1874 return replaceInstUsesWith(FI, X);
1875}
1876
1878 if (Instruction *I = foldItoFPtoI(FI))
1879 return I;
1880
1881 return commonCastTransforms(FI);
1882}
1883
1885 if (Instruction *I = foldItoFPtoI(FI))
1886 return I;
1887
1888 return commonCastTransforms(FI);
1889}
1890
1892 return commonCastTransforms(CI);
1893}
1894
1896 return commonCastTransforms(CI);
1897}
1898
1900 // If the source integer type is not the intptr_t type for this target, do a
1901 // trunc or zext to the intptr_t type, then inttoptr of it. This allows the
1902 // cast to be exposed to other transforms.
1903 unsigned AS = CI.getAddressSpace();
1904 if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
1906 Type *Ty = CI.getOperand(0)->getType()->getWithNewType(
1907 DL.getIntPtrType(CI.getContext(), AS));
1909 return new IntToPtrInst(P, CI.getType());
1910 }
1911
1913 return I;
1914
1915 return nullptr;
1916}
1917
1918/// Implement the transforms for cast of pointer (bitcast/ptrtoint)
1920 Value *Src = CI.getOperand(0);
1921
1922 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
1923 // If casting the result of a getelementptr instruction with no offset, turn
1924 // this into a cast of the original pointer!
1925 if (GEP->hasAllZeroIndices() &&
1926 // If CI is an addrspacecast and GEP changes the poiner type, merging
1927 // GEP into CI would undo canonicalizing addrspacecast with different
1928 // pointer types, causing infinite loops.
1929 (!isa<AddrSpaceCastInst>(CI) ||
1930 GEP->getType() == GEP->getPointerOperandType())) {
1931 // Changing the cast operand is usually not a good idea but it is safe
1932 // here because the pointer operand is being replaced with another
1933 // pointer operand so the opcode doesn't need to change.
1934 return replaceOperand(CI, 0, GEP->getOperand(0));
1935 }
1936 }
1937
1938 return commonCastTransforms(CI);
1939}
1940
1942 // If the destination integer type is not the intptr_t type for this target,
1943 // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
1944 // to be exposed to other transforms.
1946 Type *SrcTy = SrcOp->getType();
1947 Type *Ty = CI.getType();
1948 unsigned AS = CI.getPointerAddressSpace();
1949 unsigned TySize = Ty->getScalarSizeInBits();
1950 unsigned PtrSize = DL.getPointerSizeInBits(AS);
1951 if (TySize != PtrSize) {
1952 Type *IntPtrTy =
1953 SrcTy->getWithNewType(DL.getIntPtrType(CI.getContext(), AS));
1954 Value *P = Builder.CreatePtrToInt(SrcOp, IntPtrTy);
1955 return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
1956 }
1957
1958 if (auto *GEP = dyn_cast<GetElementPtrInst>(SrcOp)) {
1959 // Fold ptrtoint(gep null, x) to multiply + constant if the GEP has one use.
1960 // While this can increase the number of instructions it doesn't actually
1961 // increase the overall complexity since the arithmetic is just part of
1962 // the GEP otherwise.
1963 if (GEP->hasOneUse() &&
1964 isa<ConstantPointerNull>(GEP->getPointerOperand())) {
1965 return replaceInstUsesWith(CI,
1966 Builder.CreateIntCast(EmitGEPOffset(GEP), Ty,
1967 /*isSigned=*/false));
1968 }
1969 }
1970
1971 Value *Vec, *Scalar, *Index;
1973 m_Value(Scalar), m_Value(Index)))) &&
1974 Vec->getType() == Ty) {
1975 assert(Vec->getType()->getScalarSizeInBits() == PtrSize && "Wrong type");
1976 // Convert the scalar to int followed by insert to eliminate one cast:
1977 // p2i (ins (i2p Vec), Scalar, Index --> ins Vec, (p2i Scalar), Index
1978 Value *NewCast = Builder.CreatePtrToInt(Scalar, Ty->getScalarType());
1979 return InsertElementInst::Create(Vec, NewCast, Index);
1980 }
1981
1982 return commonPointerCastTransforms(CI);
1983}
1984
1985/// This input value (which is known to have vector type) is being zero extended
1986/// or truncated to the specified vector type. Since the zext/trunc is done
1987/// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
1988/// endianness will impact which end of the vector that is extended or
1989/// truncated.
1990///
1991/// A vector is always stored with index 0 at the lowest address, which
1992/// corresponds to the most significant bits for a big endian stored integer and
1993/// the least significant bits for little endian. A trunc/zext of an integer
1994/// impacts the big end of the integer. Thus, we need to add/remove elements at
1995/// the front of the vector for big endian targets, and the back of the vector
1996/// for little endian targets.
1997///
1998/// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
1999///
2000/// The source and destination vector types may have different element types.
2001static Instruction *
2003 InstCombinerImpl &IC) {
2004 // We can only do this optimization if the output is a multiple of the input
2005 // element size, or the input is a multiple of the output element size.
2006 // Convert the input type to have the same element type as the output.
2007 VectorType *SrcTy = cast<VectorType>(InVal->getType());
2008
2009 if (SrcTy->getElementType() != DestTy->getElementType()) {
2010 // The input types don't need to be identical, but for now they must be the
2011 // same size. There is no specific reason we couldn't handle things like
2012 // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
2013 // there yet.
2014 if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
2015 DestTy->getElementType()->getPrimitiveSizeInBits())
2016 return nullptr;
2017
2018 SrcTy =
2019 FixedVectorType::get(DestTy->getElementType(),
2020 cast<FixedVectorType>(SrcTy)->getNumElements());
2021 InVal = IC.Builder.CreateBitCast(InVal, SrcTy);
2022 }
2023
2024 bool IsBigEndian = IC.getDataLayout().isBigEndian();
2025 unsigned SrcElts = cast<FixedVectorType>(SrcTy)->getNumElements();
2026 unsigned DestElts = cast<FixedVectorType>(DestTy)->getNumElements();
2027
2028 assert(SrcElts != DestElts && "Element counts should be different.");
2029
2030 // Now that the element types match, get the shuffle mask and RHS of the
2031 // shuffle to use, which depends on whether we're increasing or decreasing the
2032 // size of the input.
2033 auto ShuffleMaskStorage = llvm::to_vector<16>(llvm::seq<int>(0, SrcElts));
2034 ArrayRef<int> ShuffleMask;
2035 Value *V2;
2036
2037 if (SrcElts > DestElts) {
2038 // If we're shrinking the number of elements (rewriting an integer
2039 // truncate), just shuffle in the elements corresponding to the least
2040 // significant bits from the input and use poison as the second shuffle
2041 // input.
2042 V2 = PoisonValue::get(SrcTy);
2043 // Make sure the shuffle mask selects the "least significant bits" by
2044 // keeping elements from back of the src vector for big endian, and from the
2045 // front for little endian.
2046 ShuffleMask = ShuffleMaskStorage;
2047 if (IsBigEndian)
2048 ShuffleMask = ShuffleMask.take_back(DestElts);
2049 else
2050 ShuffleMask = ShuffleMask.take_front(DestElts);
2051 } else {
2052 // If we're increasing the number of elements (rewriting an integer zext),
2053 // shuffle in all of the elements from InVal. Fill the rest of the result
2054 // elements with zeros from a constant zero.
2055 V2 = Constant::getNullValue(SrcTy);
2056 // Use first elt from V2 when indicating zero in the shuffle mask.
2057 uint32_t NullElt = SrcElts;
2058 // Extend with null values in the "most significant bits" by adding elements
2059 // in front of the src vector for big endian, and at the back for little
2060 // endian.
2061 unsigned DeltaElts = DestElts - SrcElts;
2062 if (IsBigEndian)
2063 ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt);
2064 else
2065 ShuffleMaskStorage.append(DeltaElts, NullElt);
2066 ShuffleMask = ShuffleMaskStorage;
2067 }
2068
2069 return new ShuffleVectorInst(InVal, V2, ShuffleMask);
2070}
2071
2072static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) {
2073 return Value % Ty->getPrimitiveSizeInBits() == 0;
2074}
2075
2076static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
2077 return Value / Ty->getPrimitiveSizeInBits();
2078}
2079
2080/// V is a value which is inserted into a vector of VecEltTy.
2081/// Look through the value to see if we can decompose it into
2082/// insertions into the vector. See the example in the comment for
2083/// OptimizeIntegerToVectorInsertions for the pattern this handles.
2084/// The type of V is always a non-zero multiple of VecEltTy's size.
2085/// Shift is the number of bits between the lsb of V and the lsb of
2086/// the vector.
2087///
2088/// This returns false if the pattern can't be matched or true if it can,
2089/// filling in Elements with the elements found here.
2090static bool collectInsertionElements(Value *V, unsigned Shift,
2091 SmallVectorImpl<Value *> &Elements,
2092 Type *VecEltTy, bool isBigEndian) {
2093 assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
2094 "Shift should be a multiple of the element type size");
2095
2096 // Undef values never contribute useful bits to the result.
2097 if (isa<UndefValue>(V)) return true;
2098
2099 // If we got down to a value of the right type, we win, try inserting into the
2100 // right element.
2101 if (V->getType() == VecEltTy) {
2102 // Inserting null doesn't actually insert any elements.
2103 if (Constant *C = dyn_cast<Constant>(V))
2104 if (C->isNullValue())
2105 return true;
2106
2107 unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
2108 if (isBigEndian)
2109 ElementIndex = Elements.size() - ElementIndex - 1;
2110
2111 // Fail if multiple elements are inserted into this slot.
2112 if (Elements[ElementIndex])
2113 return false;
2114
2115 Elements[ElementIndex] = V;
2116 return true;
2117 }
2118
2119 if (Constant *C = dyn_cast<Constant>(V)) {
2120 // Figure out the # elements this provides, and bitcast it or slice it up
2121 // as required.
2122 unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
2123 VecEltTy);
2124 // If the constant is the size of a vector element, we just need to bitcast
2125 // it to the right type so it gets properly inserted.
2126 if (NumElts == 1)
2128 Shift, Elements, VecEltTy, isBigEndian);
2129
2130 // Okay, this is a constant that covers multiple elements. Slice it up into
2131 // pieces and insert each element-sized piece into the vector.
2132 if (!isa<IntegerType>(C->getType()))
2133 C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),
2134 C->getType()->getPrimitiveSizeInBits()));
2135 unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
2136 Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
2137
2138 for (unsigned i = 0; i != NumElts; ++i) {
2139 unsigned ShiftI = Shift+i*ElementSize;
2140 Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
2141 ShiftI));
2142 Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
2143 if (!collectInsertionElements(Piece, ShiftI, Elements, VecEltTy,
2144 isBigEndian))
2145 return false;
2146 }
2147 return true;
2148 }
2149
2150 if (!V->hasOneUse()) return false;
2151
2152 Instruction *I = dyn_cast<Instruction>(V);
2153 if (!I) return false;
2154 switch (I->getOpcode()) {
2155 default: return false; // Unhandled case.
2156 case Instruction::BitCast:
2157 if (I->getOperand(0)->getType()->isVectorTy())
2158 return false;
2159 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2160 isBigEndian);
2161 case Instruction::ZExt:
2163 I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
2164 VecEltTy))
2165 return false;
2166 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2167 isBigEndian);
2168 case Instruction::Or:
2169 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2170 isBigEndian) &&
2171 collectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
2172 isBigEndian);
2173 case Instruction::Shl: {
2174 // Must be shifting by a constant that is a multiple of the element size.
2175 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
2176 if (!CI) return false;
2177 Shift += CI->getZExtValue();
2178 if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
2179 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2180 isBigEndian);
2181 }
2182
2183 }
2184}
2185
2186
2187/// If the input is an 'or' instruction, we may be doing shifts and ors to
2188/// assemble the elements of the vector manually.
2189/// Try to rip the code out and replace it with insertelements. This is to
2190/// optimize code like this:
2191///
2192/// %tmp37 = bitcast float %inc to i32
2193/// %tmp38 = zext i32 %tmp37 to i64
2194/// %tmp31 = bitcast float %inc5 to i32
2195/// %tmp32 = zext i32 %tmp31 to i64
2196/// %tmp33 = shl i64 %tmp32, 32
2197/// %ins35 = or i64 %tmp33, %tmp38
2198/// %tmp43 = bitcast i64 %ins35 to <2 x float>
2199///
2200/// Into two insertelements that do "buildvector{%inc, %inc5}".
2202 InstCombinerImpl &IC) {
2203 auto *DestVecTy = cast<FixedVectorType>(CI.getType());
2204 Value *IntInput = CI.getOperand(0);
2205
2206 SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
2207 if (!collectInsertionElements(IntInput, 0, Elements,
2208 DestVecTy->getElementType(),
2209 IC.getDataLayout().isBigEndian()))
2210 return nullptr;
2211
2212 // If we succeeded, we know that all of the element are specified by Elements
2213 // or are zero if Elements has a null entry. Recast this as a set of
2214 // insertions.
2215 Value *Result = Constant::getNullValue(CI.getType());
2216 for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
2217 if (!Elements[i]) continue; // Unset element.
2218
2219 Result = IC.Builder.CreateInsertElement(Result, Elements[i],
2220 IC.Builder.getInt32(i));
2221 }
2222
2223 return Result;
2224}
2225
2226/// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
2227/// vector followed by extract element. The backend tends to handle bitcasts of
2228/// vectors better than bitcasts of scalars because vector registers are
2229/// usually not type-specific like scalar integer or scalar floating-point.
2231 InstCombinerImpl &IC) {
2232 Value *VecOp, *Index;
2233 if (!match(BitCast.getOperand(0),
2235 return nullptr;
2236
2237 // The bitcast must be to a vectorizable type, otherwise we can't make a new
2238 // type to extract from.
2239 Type *DestType = BitCast.getType();
2240 VectorType *VecType = cast<VectorType>(VecOp->getType());
2241 if (VectorType::isValidElementType(DestType)) {
2242 auto *NewVecType = VectorType::get(DestType, VecType);
2243 auto *NewBC = IC.Builder.CreateBitCast(VecOp, NewVecType, "bc");
2244 return ExtractElementInst::Create(NewBC, Index);
2245 }
2246
2247 // Only solve DestType is vector to avoid inverse transform in visitBitCast.
2248 // bitcast (extractelement <1 x elt>, dest) -> bitcast(<1 x elt>, dest)
2249 auto *FixedVType = dyn_cast<FixedVectorType>(VecType);
2250 if (DestType->isVectorTy() && FixedVType && FixedVType->getNumElements() == 1)
2251 return CastInst::Create(Instruction::BitCast, VecOp, DestType);
2252
2253 return nullptr;
2254}
2255
2256/// Change the type of a bitwise logic operation if we can eliminate a bitcast.
2258 InstCombiner::BuilderTy &Builder) {
2259 Type *DestTy = BitCast.getType();
2260 BinaryOperator *BO;
2261
2262 if (!match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) ||
2263 !BO->isBitwiseLogicOp())
2264 return nullptr;
2265
2266 // FIXME: This transform is restricted to vector types to avoid backend
2267 // problems caused by creating potentially illegal operations. If a fix-up is
2268 // added to handle that situation, we can remove this check.
2269 if (!DestTy->isVectorTy() || !BO->getType()->isVectorTy())
2270 return nullptr;
2271
2272 if (DestTy->isFPOrFPVectorTy()) {
2273 Value *X, *Y;
2274 // bitcast(logic(bitcast(X), bitcast(Y))) -> bitcast'(logic(bitcast'(X), Y))
2275 if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
2277 if (X->getType()->isFPOrFPVectorTy() &&
2278 Y->getType()->isIntOrIntVectorTy()) {
2279 Value *CastedOp =
2280 Builder.CreateBitCast(BO->getOperand(0), Y->getType());
2281 Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, Y);
2282 return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
2283 }
2284 if (X->getType()->isIntOrIntVectorTy() &&
2285 Y->getType()->isFPOrFPVectorTy()) {
2286 Value *CastedOp =
2287 Builder.CreateBitCast(BO->getOperand(1), X->getType());
2288 Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, X);
2289 return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
2290 }
2291 }
2292 return nullptr;
2293 }
2294
2295 if (!DestTy->isIntOrIntVectorTy())
2296 return nullptr;
2297
2298 Value *X;
2299 if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
2300 X->getType() == DestTy && !isa<Constant>(X)) {
2301 // bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
2302 Value *CastedOp1 = Builder.CreateBitCast(BO->getOperand(1), DestTy);
2303 return BinaryOperator::Create(BO->getOpcode(), X, CastedOp1);
2304 }
2305
2306 if (match(BO->getOperand(1), m_OneUse(m_BitCast(m_Value(X)))) &&
2307 X->getType() == DestTy && !isa<Constant>(X)) {
2308 // bitcast(logic(Y, bitcast(X))) --> logic'(bitcast(Y), X)
2309 Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2310 return BinaryOperator::Create(BO->getOpcode(), CastedOp0, X);
2311 }
2312
2313 // Canonicalize vector bitcasts to come before vector bitwise logic with a
2314 // constant. This eases recognition of special constants for later ops.
2315 // Example:
2316 // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
2317 Constant *C;
2318 if (match(BO->getOperand(1), m_Constant(C))) {
2319 // bitcast (logic X, C) --> logic (bitcast X, C')
2320 Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2321 Value *CastedC = Builder.CreateBitCast(C, DestTy);
2322 return BinaryOperator::Create(BO->getOpcode(), CastedOp0, CastedC);
2323 }
2324
2325 return nullptr;
2326}
2327
2328/// Change the type of a select if we can eliminate a bitcast.
2330 InstCombiner::BuilderTy &Builder) {
2331 Value *Cond, *TVal, *FVal;
2332 if (!match(BitCast.getOperand(0),
2333 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
2334 return nullptr;
2335
2336 // A vector select must maintain the same number of elements in its operands.
2337 Type *CondTy = Cond->getType();
2338 Type *DestTy = BitCast.getType();
2339 if (auto *CondVTy = dyn_cast<VectorType>(CondTy))
2340 if (!DestTy->isVectorTy() ||
2341 CondVTy->getElementCount() !=
2342 cast<VectorType>(DestTy)->getElementCount())
2343 return nullptr;
2344
2345 // FIXME: This transform is restricted from changing the select between
2346 // scalars and vectors to avoid backend problems caused by creating
2347 // potentially illegal operations. If a fix-up is added to handle that
2348 // situation, we can remove this check.
2349 if (DestTy->isVectorTy() != TVal->getType()->isVectorTy())
2350 return nullptr;
2351
2352 auto *Sel = cast<Instruction>(BitCast.getOperand(0));
2353 Value *X;
2354 if (match(TVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
2355 !isa<Constant>(X)) {
2356 // bitcast(select(Cond, bitcast(X), Y)) --> select'(Cond, X, bitcast(Y))
2357 Value *CastedVal = Builder.CreateBitCast(FVal, DestTy);
2358 return SelectInst::Create(Cond, X, CastedVal, "", nullptr, Sel);
2359 }
2360
2361 if (match(FVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
2362 !isa<Constant>(X)) {
2363 // bitcast(select(Cond, Y, bitcast(X))) --> select'(Cond, bitcast(Y), X)
2364 Value *CastedVal = Builder.CreateBitCast(TVal, DestTy);
2365 return SelectInst::Create(Cond, CastedVal, X, "", nullptr, Sel);
2366 }
2367
2368 return nullptr;
2369}
2370
2371/// Check if all users of CI are StoreInsts.
2372static bool hasStoreUsersOnly(CastInst &CI) {
2373 for (User *U : CI.users()) {
2374 if (!isa<StoreInst>(U))
2375 return false;
2376 }
2377 return true;
2378}
2379
2380/// This function handles following case
2381///
2382/// A -> B cast
2383/// PHI
2384/// B -> A cast
2385///
2386/// All the related PHI nodes can be replaced by new PHI nodes with type A.
2387/// The uses of \p CI can be changed to the new PHI node corresponding to \p PN.
2388Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI,
2389 PHINode *PN) {
2390 // BitCast used by Store can be handled in InstCombineLoadStoreAlloca.cpp.
2391 if (hasStoreUsersOnly(CI))
2392 return nullptr;
2393
2394 Value *Src = CI.getOperand(0);
2395 Type *SrcTy = Src->getType(); // Type B
2396 Type *DestTy = CI.getType(); // Type A
2397
2398 SmallVector<PHINode *, 4> PhiWorklist;
2399 SmallSetVector<PHINode *, 4> OldPhiNodes;
2400
2401 // Find all of the A->B casts and PHI nodes.
2402 // We need to inspect all related PHI nodes, but PHIs can be cyclic, so
2403 // OldPhiNodes is used to track all known PHI nodes, before adding a new
2404 // PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
2405 PhiWorklist.push_back(PN);
2406 OldPhiNodes.insert(PN);
2407 while (!PhiWorklist.empty()) {
2408 auto *OldPN = PhiWorklist.pop_back_val();
2409 for (Value *IncValue : OldPN->incoming_values()) {
2410 if (isa<Constant>(IncValue))
2411 continue;
2412
2413 if (auto *LI = dyn_cast<LoadInst>(IncValue)) {
2414 // If there is a sequence of one or more load instructions, each loaded
2415 // value is used as address of later load instruction, bitcast is
2416 // necessary to change the value type, don't optimize it. For
2417 // simplicity we give up if the load address comes from another load.
2418 Value *Addr = LI->getOperand(0);
2419 if (Addr == &CI || isa<LoadInst>(Addr))
2420 return nullptr;
2421 // Don't tranform "load <256 x i32>, <256 x i32>*" to
2422 // "load x86_amx, x86_amx*", because x86_amx* is invalid.
2423 // TODO: Remove this check when bitcast between vector and x86_amx
2424 // is replaced with a specific intrinsic.
2425 if (DestTy->isX86_AMXTy())
2426 return nullptr;
2427 if (LI->hasOneUse() && LI->isSimple())
2428 continue;
2429 // If a LoadInst has more than one use, changing the type of loaded
2430 // value may create another bitcast.
2431 return nullptr;
2432 }
2433
2434 if (auto *PNode = dyn_cast<PHINode>(IncValue)) {
2435 if (OldPhiNodes.insert(PNode))
2436 PhiWorklist.push_back(PNode);
2437 continue;
2438 }
2439
2440 auto *BCI = dyn_cast<BitCastInst>(IncValue);
2441 // We can't handle other instructions.
2442 if (!BCI)
2443 return nullptr;
2444
2445 // Verify it's a A->B cast.
2446 Type *TyA = BCI->getOperand(0)->getType();
2447 Type *TyB = BCI->getType();
2448 if (TyA != DestTy || TyB != SrcTy)
2449 return nullptr;
2450 }
2451 }
2452
2453 // Check that each user of each old PHI node is something that we can
2454 // rewrite, so that all of the old PHI nodes can be cleaned up afterwards.
2455 for (auto *OldPN : OldPhiNodes) {
2456 for (User *V : OldPN->users()) {
2457 if (auto *SI = dyn_cast<StoreInst>(V)) {
2458 if (!SI->isSimple() || SI->getOperand(0) != OldPN)
2459 return nullptr;
2460 } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2461 // Verify it's a B->A cast.
2462 Type *TyB = BCI->getOperand(0)->getType();
2463 Type *TyA = BCI->getType();
2464 if (TyA != DestTy || TyB != SrcTy)
2465 return nullptr;
2466 } else if (auto *PHI = dyn_cast<PHINode>(V)) {
2467 // As long as the user is another old PHI node, then even if we don't
2468 // rewrite it, the PHI web we're considering won't have any users
2469 // outside itself, so it'll be dead.
2470 if (!OldPhiNodes.contains(PHI))
2471 return nullptr;
2472 } else {
2473 return nullptr;
2474 }
2475 }
2476 }
2477
2478 // For each old PHI node, create a corresponding new PHI node with a type A.
2480 for (auto *OldPN : OldPhiNodes) {
2481 Builder.SetInsertPoint(OldPN);
2482 PHINode *NewPN = Builder.CreatePHI(DestTy, OldPN->getNumOperands());
2483 NewPNodes[OldPN] = NewPN;
2484 }
2485
2486 // Fill in the operands of new PHI nodes.
2487 for (auto *OldPN : OldPhiNodes) {
2488 PHINode *NewPN = NewPNodes[OldPN];
2489 for (unsigned j = 0, e = OldPN->getNumOperands(); j != e; ++j) {
2490 Value *V = OldPN->getOperand(j);
2491 Value *NewV = nullptr;
2492 if (auto *C = dyn_cast<Constant>(V)) {
2493 NewV = ConstantExpr::getBitCast(C, DestTy);
2494 } else if (auto *LI = dyn_cast<LoadInst>(V)) {
2495 // Explicitly perform load combine to make sure no opposing transform
2496 // can remove the bitcast in the meantime and trigger an infinite loop.
2498 NewV = combineLoadToNewType(*LI, DestTy);
2499 // Remove the old load and its use in the old phi, which itself becomes
2500 // dead once the whole transform finishes.
2503 } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2504 NewV = BCI->getOperand(0);
2505 } else if (auto *PrevPN = dyn_cast<PHINode>(V)) {
2506 NewV = NewPNodes[PrevPN];
2507 }
2508 assert(NewV);
2509 NewPN->addIncoming(NewV, OldPN->getIncomingBlock(j));
2510 }
2511 }
2512
2513 // Traverse all accumulated PHI nodes and process its users,
2514 // which are Stores and BitcCasts. Without this processing
2515 // NewPHI nodes could be replicated and could lead to extra
2516 // moves generated after DeSSA.
2517 // If there is a store with type B, change it to type A.
2518
2519
2520 // Replace users of BitCast B->A with NewPHI. These will help
2521 // later to get rid off a closure formed by OldPHI nodes.
2522 Instruction *RetVal = nullptr;
2523 for (auto *OldPN : OldPhiNodes) {
2524 PHINode *NewPN = NewPNodes[OldPN];
2525 for (User *V : make_early_inc_range(OldPN->users())) {
2526 if (auto *SI = dyn_cast<StoreInst>(V)) {
2527 assert(SI->isSimple() && SI->getOperand(0) == OldPN);
2529 auto *NewBC =
2530 cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));
2531 SI->setOperand(0, NewBC);
2532 Worklist.push(SI);
2533 assert(hasStoreUsersOnly(*NewBC));
2534 }
2535 else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2536 Type *TyB = BCI->getOperand(0)->getType();
2537 Type *TyA = BCI->getType();
2538 assert(TyA == DestTy && TyB == SrcTy);
2539 (void) TyA;
2540 (void) TyB;
2541 Instruction *I = replaceInstUsesWith(*BCI, NewPN);
2542 if (BCI == &CI)
2543 RetVal = I;
2544 } else if (auto *PHI = dyn_cast<PHINode>(V)) {
2545 assert(OldPhiNodes.contains(PHI));
2546 (void) PHI;
2547 } else {
2548 llvm_unreachable("all uses should be handled");
2549 }
2550 }
2551 }
2552
2553 return RetVal;
2554}
2555
2557 // If the operands are integer typed then apply the integer transforms,
2558 // otherwise just apply the common ones.
2559 Value *Src = CI.getOperand(0);
2560 Type *SrcTy = Src->getType();
2561 Type *DestTy = CI.getType();
2562
2563 // Get rid of casts from one type to the same type. These are useless and can
2564 // be replaced by the operand.
2565 if (DestTy == Src->getType())
2566 return replaceInstUsesWith(CI, Src);
2567
2568 if (FixedVectorType *DestVTy = dyn_cast<FixedVectorType>(DestTy)) {
2569 // Beware: messing with this target-specific oddity may cause trouble.
2570 if (DestVTy->getNumElements() == 1 && SrcTy->isX86_MMXTy()) {
2571 Value *Elem = Builder.CreateBitCast(Src, DestVTy->getElementType());
2572 return InsertElementInst::Create(PoisonValue::get(DestTy), Elem,
2574 }
2575
2576 if (isa<IntegerType>(SrcTy)) {
2577 // If this is a cast from an integer to vector, check to see if the input
2578 // is a trunc or zext of a bitcast from vector. If so, we can replace all
2579 // the casts with a shuffle and (potentially) a bitcast.
2580 if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {
2581 CastInst *SrcCast = cast<CastInst>(Src);
2582 if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
2583 if (isa<VectorType>(BCIn->getOperand(0)->getType()))
2585 BCIn->getOperand(0), cast<VectorType>(DestTy), *this))
2586 return I;
2587 }
2588
2589 // If the input is an 'or' instruction, we may be doing shifts and ors to
2590 // assemble the elements of the vector manually. Try to rip the code out
2591 // and replace it with insertelements.
2592 if (Value *V = optimizeIntegerToVectorInsertions(CI, *this))
2593 return replaceInstUsesWith(CI, V);
2594 }
2595 }
2596
2597 if (FixedVectorType *SrcVTy = dyn_cast<FixedVectorType>(SrcTy)) {
2598 if (SrcVTy->getNumElements() == 1) {
2599 // If our destination is not a vector, then make this a straight
2600 // scalar-scalar cast.
2601 if (!DestTy->isVectorTy()) {
2602 Value *Elem =
2605 return CastInst::Create(Instruction::BitCast, Elem, DestTy);
2606 }
2607
2608 // Otherwise, see if our source is an insert. If so, then use the scalar
2609 // component directly:
2610 // bitcast (inselt <1 x elt> V, X, 0) to <n x m> --> bitcast X to <n x m>
2611 if (auto *InsElt = dyn_cast<InsertElementInst>(Src))
2612 return new BitCastInst(InsElt->getOperand(1), DestTy);
2613 }
2614
2615 // Convert an artificial vector insert into more analyzable bitwise logic.
2616 unsigned BitWidth = DestTy->getScalarSizeInBits();
2617 Value *X, *Y;
2618 uint64_t IndexC;
2620 m_Value(Y), m_ConstantInt(IndexC)))) &&
2621 DestTy->isIntegerTy() && X->getType() == DestTy &&
2622 Y->getType()->isIntegerTy() && isDesirableIntType(BitWidth)) {
2623 // Adjust for big endian - the LSBs are at the high index.
2624 if (DL.isBigEndian())
2625 IndexC = SrcVTy->getNumElements() - 1 - IndexC;
2626
2627 // We only handle (endian-normalized) insert to index 0. Any other insert
2628 // would require a left-shift, so that is an extra instruction.
2629 if (IndexC == 0) {
2630 // bitcast (inselt (bitcast X), Y, 0) --> or (and X, MaskC), (zext Y)
2631 unsigned EltWidth = Y->getType()->getScalarSizeInBits();
2632 APInt MaskC = APInt::getHighBitsSet(BitWidth, BitWidth - EltWidth);
2633 Value *AndX = Builder.CreateAnd(X, MaskC);
2634 Value *ZextY = Builder.CreateZExt(Y, DestTy);
2635 return BinaryOperator::CreateOr(AndX, ZextY);
2636 }
2637 }
2638 }
2639
2640 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Src)) {
2641 // Okay, we have (bitcast (shuffle ..)). Check to see if this is
2642 // a bitcast to a vector with the same # elts.
2643 Value *ShufOp0 = Shuf->getOperand(0);
2644 Value *ShufOp1 = Shuf->getOperand(1);
2645 auto ShufElts = cast<VectorType>(Shuf->getType())->getElementCount();
2646 auto SrcVecElts = cast<VectorType>(ShufOp0->getType())->getElementCount();
2647 if (Shuf->hasOneUse() && DestTy->isVectorTy() &&
2648 cast<VectorType>(DestTy)->getElementCount() == ShufElts &&
2649 ShufElts == SrcVecElts) {
2650 BitCastInst *Tmp;
2651 // If either of the operands is a cast from CI.getType(), then
2652 // evaluating the shuffle in the casted destination's type will allow
2653 // us to eliminate at least one cast.
2654 if (((Tmp = dyn_cast<BitCastInst>(ShufOp0)) &&
2655 Tmp->getOperand(0)->getType() == DestTy) ||
2656 ((Tmp = dyn_cast<BitCastInst>(ShufOp1)) &&
2657 Tmp->getOperand(0)->getType() == DestTy)) {
2658 Value *LHS = Builder.CreateBitCast(ShufOp0, DestTy);
2659 Value *RHS = Builder.CreateBitCast(ShufOp1, DestTy);
2660 // Return a new shuffle vector. Use the same element ID's, as we
2661 // know the vector types match #elts.
2662 return new ShuffleVectorInst(LHS, RHS, Shuf->getShuffleMask());
2663 }
2664 }
2665
2666 // A bitcasted-to-scalar and byte/bit reversing shuffle is better recognized
2667 // as a byte/bit swap:
2668 // bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) -> bswap (bitcast X)
2669 // bitcast <N x i1> (shuf X, undef, <N, N-1,...0>) -> bitreverse (bitcast X)
2670 if (DestTy->isIntegerTy() && ShufElts.getKnownMinValue() % 2 == 0 &&
2671 Shuf->hasOneUse() && Shuf->isReverse()) {
2672 unsigned IntrinsicNum = 0;
2673 if (DL.isLegalInteger(DestTy->getScalarSizeInBits()) &&
2674 SrcTy->getScalarSizeInBits() == 8) {
2675 IntrinsicNum = Intrinsic::bswap;
2676 } else if (SrcTy->getScalarSizeInBits() == 1) {
2677 IntrinsicNum = Intrinsic::bitreverse;
2678 }
2679 if (IntrinsicNum != 0) {
2680 assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask");
2681 assert(match(ShufOp1, m_Undef()) && "Unexpected shuffle op");
2682 Function *BswapOrBitreverse =
2683 Intrinsic::getDeclaration(CI.getModule(), IntrinsicNum, DestTy);
2684 Value *ScalarX = Builder.CreateBitCast(ShufOp0, DestTy);
2685 return CallInst::Create(BswapOrBitreverse, {ScalarX});
2686 }
2687 }
2688 }
2689
2690 // Handle the A->B->A cast, and there is an intervening PHI node.
2691 if (PHINode *PN = dyn_cast<PHINode>(Src))
2692 if (Instruction *I = optimizeBitCastFromPhi(CI, PN))
2693 return I;
2694
2695 if (Instruction *I = canonicalizeBitCastExtElt(CI, *this))
2696 return I;
2697
2699 return I;
2700
2702 return I;
2703
2704 if (SrcTy->isPointerTy())
2705 return commonPointerCastTransforms(CI);
2706 return commonCastTransforms(CI);
2707}
2708
2710 return commonPointerCastTransforms(CI);
2711}
Rewrite undef for PHI
assume Assume Builder
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
Hexagon Common GEP
static bool collectInsertionElements(Value *V, unsigned Shift, SmallVectorImpl< Value * > &Elements, Type *VecEltTy, bool isBigEndian)
V is a value which is inserted into a vector of VecEltTy.
static bool canEvaluateSExtd(Value *V, Type *Ty)
Return true if we can take the specified value and return it as type Ty without inserting any new cas...
static bool hasStoreUsersOnly(CastInst &CI)
Check if all users of CI are StoreInsts.
static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear, InstCombinerImpl &IC, Instruction *CxtI)
Determine if the specified value can be computed in the specified wider type and produce the same low...
static Instruction * canonicalizeBitCastExtElt(BitCastInst &BitCast, InstCombinerImpl &IC)
Canonicalize scalar bitcasts of extracted elements into a bitcast of the vector followed by extract e...
static Instruction * shrinkSplatShuffle(TruncInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of a splat shuffle.
static Instruction * foldBitCastSelect(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a select if we can eliminate a bitcast.
static Instruction * foldBitCastBitwiseLogic(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a bitwise logic operation if we can eliminate a bitcast.
static bool fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem)
Return a Constant* for the specified floating-point constant if it fits in the specified FP type with...
static Instruction * optimizeVectorResizeWithIntegerBitCasts(Value *InVal, VectorType *DestTy, InstCombinerImpl &IC)
This input value (which is known to have vector type) is being zero extended or truncated to the spec...
static Instruction * shrinkInsertElt(CastInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of an insert element.
static bool isMultipleOfTypeSize(unsigned Value, Type *Ty)
static Value * optimizeIntegerToVectorInsertions(BitCastInst &CI, InstCombinerImpl &IC)
If the input is an 'or' instruction, we may be doing shifts and ors to assemble the elements of the v...
static bool canAlwaysEvaluateInType(Value *V, Type *Ty)
Constants and extensions/truncates from the destination type are always free to be evaluated in that ...
static Type * shrinkFPConstant(ConstantFP *CFP)
static bool canNotEvaluateInType(Value *V, Type *Ty)
Filter out values that we can not evaluate in the destination type for free.
static bool isKnownExactCastIntToFP(CastInst &I, InstCombinerImpl &IC)
Return true if the cast from integer to FP can be proven to be exact for all possible inputs (the con...
static Type * getMinimumFPType(Value *V)
Find the minimum FP type we can safely truncate to.
static unsigned getTypeSizeIndex(unsigned Value, Type *Ty)
static Type * shrinkFPConstantVector(Value *V)
static Instruction * foldVecTruncToExtElt(TruncInst &Trunc, InstCombinerImpl &IC)
Given a vector that is bitcast to an integer, optionally logically right-shifted, and truncated,...
static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC, Instruction *CxtI)
Return true if we can evaluate the specified expression tree as type Ty instead of its larger type,...
This file provides internal interfaces used to implement the InstCombine.
This file provides the interface for the instcombine pass implementation.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
static unsigned getScalarSizeInBits(Type *Ty)
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:76
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1485
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1083
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:264
This class represents a conversion between pointers from one address space to another.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:228
ArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:235
std::optional< unsigned > getVScaleRangeMax() const
Returns the maximum value for the vscale_range attribute or std::nullopt when unknown.
Definition: Attributes.cpp:381
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateFDivFMF(Value *V1, Value *V2, Instruction *FMFSource, const Twine &Name="")
Definition: InstrTypes.h:271
BinaryOps getOpcode() const
Definition: InstrTypes.h:391
static BinaryOperator * CreateFMulFMF(Value *V1, Value *V2, Instruction *FMFSource, const Twine &Name="")
Definition: InstrTypes.h:266
This class represents a no-op cast from one type to another.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:428
Type * getSrcTy() const
Return the source type, as a convenience.
Definition: InstrTypes.h:673
static CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:668
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static CastInst * CreateFPCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create an FPExt, BitCast, or FPTrunc for fp -> fp casts.
static unsigned isEliminableCastPair(Instruction::CastOps firstOpcode, Instruction::CastOps secondOpcode, Type *SrcTy, Type *MidTy, Type *DstTy, Type *SrcIntPtrTy, Type *MidIntPtrTy, Type *DstIntPtrTy)
Determine how a pair of casts can be eliminated, if they can be at all.
static CastInst * CreateTruncOrBitCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a Trunc or BitCast cast instruction.
Type * getDestTy() const
Return the destination type, as a convenience.
Definition: InstrTypes.h:675
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:711
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:740
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:736
@ ICMP_EQ
equal
Definition: InstrTypes.h:732
@ ICMP_NE
not equal
Definition: InstrTypes.h:733
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:737
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2573
static Constant * getZExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2103
static Constant * getIntegerCast(Constant *C, Type *Ty, bool IsSigned)
Create a ZExt, Bitcast or Trunc for integer -> integer casts.
Definition: Constants.cpp:2051
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2089
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2591
static Constant * getLShr(Constant *C1, Constant *C2, bool isExact=false)
Definition: Constants.cpp:2598
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2213
static Constant * getTrunc(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2075
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:260
const APFloat & getValueAPF() const
Definition: Constants.h:296
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:145
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * mergeUndefsWith(Constant *C, Constant *Other)
Merges undefs of a Constant with another Constant, along with the undefs already present.
Definition: Constants.cpp:777
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:403
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:356
bool isElementWiseEqual(Value *Y) const
Return true if this constant and a constant 'Y' are element-wise equal.
Definition: Constants.cpp:284
This class represents an Operation in the Expression.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:410
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition: DataLayout.h:260
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:876
bool isBigEndian() const
Definition: DataLayout.h:239
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
This class represents an extension of floating point types.
This class represents a cast from floating point to signed integer.
This class represents a cast from floating point to unsigned integer.
This class represents a truncation of floating point types.
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:536
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:693
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:176
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:671
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:645
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:940
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1993
Value * CreateVScale(Constant *Scaling, const Twine &Name="")
Create a call to llvm.vscale, multiplied by Scaling.
Definition: IRBuilder.cpp:88
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2434
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2422
Value * CreateFPTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2063
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2007
Value * CreateFRemFMF(Value *L, Value *R, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Definition: IRBuilder.h:1645
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:941
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1119
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1428
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:297
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1997
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:472
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2359
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1745
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2089
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1407
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1466
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1318
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2079
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1488
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2126
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2158
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1447
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1510
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * visitZExt(ZExtInst &Zext)
Instruction * visitAddrSpaceCast(AddrSpaceCastInst &CI)
Instruction * visitSExt(SExtInst &Sext)
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Instruction * visitFPToSI(FPToSIInst &FI)
Instruction * visitTrunc(TruncInst &CI)
Instruction * visitUIToFP(CastInst &CI)
Instruction * visitPtrToInt(PtrToIntInst &CI)
Instruction * visitSIToFP(CastInst &CI)
Instruction * commonCastTransforms(CastInst &CI)
Implement the transforms common to all CastInst visitors.
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Instruction * foldItoFPtoI(CastInst &FI)
fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X) This is safe if the intermediate ty...
Instruction * visitFPTrunc(FPTruncInst &CI)
Instruction * visitBitCast(BitCastInst &CI)
Instruction * visitIntToPtr(IntToPtrInst &CI)
Instruction * visitFPToUI(FPToUIInst &FI)
Value * EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned)
Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns true for,...
bool SimplifyDemandedInstructionBits(Instruction &Inst)
Tries to simplify operands to an integer instruction based on its demanded bits.
Instruction * visitFPExt(CastInst &CI)
LoadInst * combineLoadToNewType(LoadInst &LI, Type *NewTy, const Twine &Suffix="")
Helper to combine a load to a new type.
Instruction * commonPointerCastTransforms(CastInst &CI)
Implement the transforms for cast of pointer (bitcast/ptrtoint)
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:379
TargetLibraryInfo & TLI
Definition: InstCombiner.h:72
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:424
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Definition: InstCombiner.h:63
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
Definition: InstCombiner.h:413
const DataLayout & DL
Definition: InstCombiner.h:74
unsigned ComputeNumSignBits(const Value *Op, unsigned Depth=0, const Instruction *CxtI=nullptr) const
Definition: InstCombiner.h:490
AssumptionCache & AC
Definition: InstCombiner.h:71
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombiner.h:448
DominatorTree & DT
Definition: InstCombiner.h:73
void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Instruction *CxtI) const
Definition: InstCombiner.h:469
BuilderTy & Builder
Definition: InstCombiner.h:59
bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth=0, const Instruction *CxtI=nullptr) const
Definition: InstCombiner.h:485
void push(Instruction *I)
Push the instruction onto the worklist stack.
void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
Definition: Instruction.h:244
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:71
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:87
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:75
This class represents a cast from an integer to a pointer.
unsigned getAddressSpace() const
Returns the address space of this instruction's pointer type.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:279
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1743
This class represents a cast from a pointer to an integer.
Value * getPointerOperand()
Gets the pointer operand.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
This class represents a sign extension of integer types.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
This instruction constructs a fixed permutation of two input vectors.
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getHalfTy(LLVMContext &C)
static Type * getDoubleTy(LLVMContext &C)
const fltSemantics & getFltSemantics() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:234
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
bool isX86_MMXTy() const
Return true if this is X86 MMX.
Definition: Type.h:201
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
int getFPMantissaWidth() const
Return the width of the mantissa of this type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:157
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:262
bool isX86_AMXTy() const
Return true if this is X86 AMX.
Definition: Type.h:204
static IntegerType * getInt32Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:216
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static Type * getPPC_FP128Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:163
'undef' values are things that do not have specified contents.
Definition: Constants.h:1368
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1724
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
iterator_range< user_iterator > users()
Definition: Value.h:421
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1069
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:384
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:684
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:677
This class represents zero extension of integer types.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:119
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1422
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(APInt V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:862
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:144
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:780
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:147
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:525
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
BinOpPred_match< LHS, RHS, is_logical_shift_op > m_LogicalShift(const LHS &L, const RHS &R)
Matches logical shift operations.
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition: PatternMatch.h:798
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:532
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:759
CastClass_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
CastClass_match< OpTy, Instruction::IntToPtr > m_IntToPtr(const OpTy &Op)
Matches IntToPtr.
CastClass_match< OpTy, Instruction::FPToSI > m_FPToSI(const OpTy &Op)
match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, CastClass_match< OpTy, Instruction::SExt > > m_ZExtOrSExt(const OpTy &Op)
CastClass_match< OpTy, Instruction::FPToUI > m_FPToUI(const OpTy &Op)
CastClass_match< OpTy, Instruction::FPExt > m_FPExt(const OpTy &Op)
VScaleVal_match m_VScale()
specific_intval< true > m_SpecificIntAllowUndef(APInt V)
Definition: PatternMatch.h:870
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:278
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:136
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:994
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:218
cst_pred_ty< icmp_pred_with_threshold > m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold)
Match an integer or vector with every element comparing 'pred' (eg/ne/...) to Threshold.
Definition: PatternMatch.h:606
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
Constant * ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2)
Attempt to constant fold a select instruction with the specified operands.
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:666
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:313
@ SPF_UNKNOWN
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition: Local.cpp:2227
@ And
Bitwise or logical AND of integers.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:184
bool isKnownNonNegative(const Value *V, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Returns true if the give value is known to be non-negative.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition: STLExtras.h:2009
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:249
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:250
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:247
static unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition: APFloat.cpp:306
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:233
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:239
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:136