LLVM 20.0.0git
InstCombineCasts.cpp
Go to the documentation of this file.
1//===- InstCombineCasts.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visit functions for cast operations.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/SetVector.h"
16#include "llvm/IR/DataLayout.h"
17#include "llvm/IR/DebugInfo.h"
21#include <optional>
22
23using namespace llvm;
24using namespace PatternMatch;
25
26#define DEBUG_TYPE "instcombine"
27
28/// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
29/// true for, actually insert the code to evaluate the expression.
31 bool isSigned) {
32 if (Constant *C = dyn_cast<Constant>(V))
34
35 // Otherwise, it must be an instruction.
36 Instruction *I = cast<Instruction>(V);
37 Instruction *Res = nullptr;
38 unsigned Opc = I->getOpcode();
39 switch (Opc) {
40 case Instruction::Add:
41 case Instruction::Sub:
42 case Instruction::Mul:
43 case Instruction::And:
44 case Instruction::Or:
45 case Instruction::Xor:
46 case Instruction::AShr:
47 case Instruction::LShr:
48 case Instruction::Shl:
49 case Instruction::UDiv:
50 case Instruction::URem: {
51 Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
52 Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
54 break;
55 }
56 case Instruction::Trunc:
57 case Instruction::ZExt:
58 case Instruction::SExt:
59 // If the source type of the cast is the type we're trying for then we can
60 // just return the source. There's no need to insert it because it is not
61 // new.
62 if (I->getOperand(0)->getType() == Ty)
63 return I->getOperand(0);
64
65 // Otherwise, must be the same type of cast, so just reinsert a new one.
66 // This also handles the case of zext(trunc(x)) -> zext(x).
67 Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
68 Opc == Instruction::SExt);
69 break;
70 case Instruction::Select: {
71 Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
72 Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned);
73 Res = SelectInst::Create(I->getOperand(0), True, False);
74 break;
75 }
76 case Instruction::PHI: {
77 PHINode *OPN = cast<PHINode>(I);
79 for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
80 Value *V =
82 NPN->addIncoming(V, OPN->getIncomingBlock(i));
83 }
84 Res = NPN;
85 break;
86 }
87 case Instruction::FPToUI:
88 case Instruction::FPToSI:
89 Res = CastInst::Create(
90 static_cast<Instruction::CastOps>(Opc), I->getOperand(0), Ty);
91 break;
92 case Instruction::Call:
93 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
94 switch (II->getIntrinsicID()) {
95 default:
96 llvm_unreachable("Unsupported call!");
97 case Intrinsic::vscale: {
98 Function *Fn =
99 Intrinsic::getDeclaration(I->getModule(), Intrinsic::vscale, {Ty});
100 Res = CallInst::Create(Fn->getFunctionType(), Fn);
101 break;
102 }
103 }
104 }
105 break;
106 case Instruction::ShuffleVector: {
107 auto *ScalarTy = cast<VectorType>(Ty)->getElementType();
108 auto *VTy = cast<VectorType>(I->getOperand(0)->getType());
109 auto *FixedTy = VectorType::get(ScalarTy, VTy->getElementCount());
110 Value *Op0 = EvaluateInDifferentType(I->getOperand(0), FixedTy, isSigned);
111 Value *Op1 = EvaluateInDifferentType(I->getOperand(1), FixedTy, isSigned);
112 Res = new ShuffleVectorInst(Op0, Op1,
113 cast<ShuffleVectorInst>(I)->getShuffleMask());
114 break;
115 }
116 default:
117 // TODO: Can handle more cases here.
118 llvm_unreachable("Unreachable!");
119 }
120
121 Res->takeName(I);
122 return InsertNewInstWith(Res, I->getIterator());
123}
124
126InstCombinerImpl::isEliminableCastPair(const CastInst *CI1,
127 const CastInst *CI2) {
128 Type *SrcTy = CI1->getSrcTy();
129 Type *MidTy = CI1->getDestTy();
130 Type *DstTy = CI2->getDestTy();
131
132 Instruction::CastOps firstOp = CI1->getOpcode();
133 Instruction::CastOps secondOp = CI2->getOpcode();
134 Type *SrcIntPtrTy =
135 SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
136 Type *MidIntPtrTy =
137 MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr;
138 Type *DstIntPtrTy =
139 DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
140 unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
141 DstTy, SrcIntPtrTy, MidIntPtrTy,
142 DstIntPtrTy);
143
144 // We don't want to form an inttoptr or ptrtoint that converts to an integer
145 // type that differs from the pointer size.
146 if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) ||
147 (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy))
148 Res = 0;
149
150 return Instruction::CastOps(Res);
151}
152
153/// Implement the transforms common to all CastInst visitors.
155 Value *Src = CI.getOperand(0);
156 Type *Ty = CI.getType();
157
158 if (auto *SrcC = dyn_cast<Constant>(Src))
159 if (Constant *Res = ConstantFoldCastOperand(CI.getOpcode(), SrcC, Ty, DL))
160 return replaceInstUsesWith(CI, Res);
161
162 // Try to eliminate a cast of a cast.
163 if (auto *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
164 if (Instruction::CastOps NewOpc = isEliminableCastPair(CSrc, &CI)) {
165 // The first cast (CSrc) is eliminable so we need to fix up or replace
166 // the second cast (CI). CSrc will then have a good chance of being dead.
167 auto *Res = CastInst::Create(NewOpc, CSrc->getOperand(0), Ty);
168 // Point debug users of the dying cast to the new one.
169 if (CSrc->hasOneUse())
170 replaceAllDbgUsesWith(*CSrc, *Res, CI, DT);
171 return Res;
172 }
173 }
174
175 if (auto *Sel = dyn_cast<SelectInst>(Src)) {
176 // We are casting a select. Try to fold the cast into the select if the
177 // select does not have a compare instruction with matching operand types
178 // or the select is likely better done in a narrow type.
179 // Creating a select with operands that are different sizes than its
180 // condition may inhibit other folds and lead to worse codegen.
181 auto *Cmp = dyn_cast<CmpInst>(Sel->getCondition());
182 if (!Cmp || Cmp->getOperand(0)->getType() != Sel->getType() ||
183 (CI.getOpcode() == Instruction::Trunc &&
184 shouldChangeType(CI.getSrcTy(), CI.getType()))) {
185
186 // If it's a bitcast involving vectors, make sure it has the same number
187 // of elements on both sides.
188 if (CI.getOpcode() != Instruction::BitCast ||
190 if (Instruction *NV = FoldOpIntoSelect(CI, Sel)) {
191 replaceAllDbgUsesWith(*Sel, *NV, CI, DT);
192 return NV;
193 }
194 }
195 }
196 }
197
198 // If we are casting a PHI, then fold the cast into the PHI.
199 if (auto *PN = dyn_cast<PHINode>(Src)) {
200 // Don't do this if it would create a PHI node with an illegal type from a
201 // legal type.
202 if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() ||
203 shouldChangeType(CI.getSrcTy(), CI.getType()))
204 if (Instruction *NV = foldOpIntoPhi(CI, PN))
205 return NV;
206 }
207
208 // Canonicalize a unary shuffle after the cast if neither operation changes
209 // the size or element size of the input vector.
210 // TODO: We could allow size-changing ops if that doesn't harm codegen.
211 // cast (shuffle X, Mask) --> shuffle (cast X), Mask
212 Value *X;
213 ArrayRef<int> Mask;
214 if (match(Src, m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))) {
215 // TODO: Allow scalable vectors?
216 auto *SrcTy = dyn_cast<FixedVectorType>(X->getType());
217 auto *DestTy = dyn_cast<FixedVectorType>(Ty);
218 if (SrcTy && DestTy &&
219 SrcTy->getNumElements() == DestTy->getNumElements() &&
220 SrcTy->getPrimitiveSizeInBits() == DestTy->getPrimitiveSizeInBits()) {
221 Value *CastX = Builder.CreateCast(CI.getOpcode(), X, DestTy);
222 return new ShuffleVectorInst(CastX, Mask);
223 }
224 }
225
226 return nullptr;
227}
228
229/// Constants and extensions/truncates from the destination type are always
230/// free to be evaluated in that type. This is a helper for canEvaluate*.
231static bool canAlwaysEvaluateInType(Value *V, Type *Ty) {
232 if (isa<Constant>(V))
233 return match(V, m_ImmConstant());
234
235 Value *X;
236 if ((match(V, m_ZExtOrSExt(m_Value(X))) || match(V, m_Trunc(m_Value(X)))) &&
237 X->getType() == Ty)
238 return true;
239
240 return false;
241}
242
243/// Filter out values that we can not evaluate in the destination type for free.
244/// This is a helper for canEvaluate*.
245static bool canNotEvaluateInType(Value *V, Type *Ty) {
246 if (!isa<Instruction>(V))
247 return true;
248 // We don't extend or shrink something that has multiple uses -- doing so
249 // would require duplicating the instruction which isn't profitable.
250 if (!V->hasOneUse())
251 return true;
252
253 return false;
254}
255
256/// Return true if we can evaluate the specified expression tree as type Ty
257/// instead of its larger type, and arrive with the same value.
258/// This is used by code that tries to eliminate truncates.
259///
260/// Ty will always be a type smaller than V. We should return true if trunc(V)
261/// can be computed by computing V in the smaller type. If V is an instruction,
262/// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
263/// makes sense if x and y can be efficiently truncated.
264///
265/// This function works on both vectors and scalars.
266///
268 Instruction *CxtI) {
269 if (canAlwaysEvaluateInType(V, Ty))
270 return true;
271 if (canNotEvaluateInType(V, Ty))
272 return false;
273
274 auto *I = cast<Instruction>(V);
275 Type *OrigTy = V->getType();
276 switch (I->getOpcode()) {
277 case Instruction::Add:
278 case Instruction::Sub:
279 case Instruction::Mul:
280 case Instruction::And:
281 case Instruction::Or:
282 case Instruction::Xor:
283 // These operators can all arbitrarily be extended or truncated.
284 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
285 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
286
287 case Instruction::UDiv:
288 case Instruction::URem: {
289 // UDiv and URem can be truncated if all the truncated bits are zero.
290 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
292 assert(BitWidth < OrigBitWidth && "Unexpected bitwidths!");
293 APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
294 // Do not preserve the original context instruction. Simplifying div/rem
295 // based on later context may introduce a trap.
296 if (IC.MaskedValueIsZero(I->getOperand(0), Mask, 0, I) &&
297 IC.MaskedValueIsZero(I->getOperand(1), Mask, 0, I)) {
298 return canEvaluateTruncated(I->getOperand(0), Ty, IC, I) &&
299 canEvaluateTruncated(I->getOperand(1), Ty, IC, I);
300 }
301 break;
302 }
303 case Instruction::Shl: {
304 // If we are truncating the result of this SHL, and if it's a shift of an
305 // inrange amount, we can always perform a SHL in a smaller type.
307 KnownBits AmtKnownBits =
308 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
309 if (AmtKnownBits.getMaxValue().ult(BitWidth))
310 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
311 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
312 break;
313 }
314 case Instruction::LShr: {
315 // If this is a truncate of a logical shr, we can truncate it to a smaller
316 // lshr iff we know that the bits we would otherwise be shifting in are
317 // already zeros.
318 // TODO: It is enough to check that the bits we would be shifting in are
319 // zero - use AmtKnownBits.getMaxValue().
320 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
322 KnownBits AmtKnownBits =
323 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
324 APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
325 if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
326 IC.MaskedValueIsZero(I->getOperand(0), ShiftedBits, 0, CxtI)) {
327 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
328 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
329 }
330 break;
331 }
332 case Instruction::AShr: {
333 // If this is a truncate of an arithmetic shr, we can truncate it to a
334 // smaller ashr iff we know that all the bits from the sign bit of the
335 // original type and the sign bit of the truncate type are similar.
336 // TODO: It is enough to check that the bits we would be shifting in are
337 // similar to sign bit of the truncate type.
338 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
340 KnownBits AmtKnownBits =
341 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
342 unsigned ShiftedBits = OrigBitWidth - BitWidth;
343 if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
344 ShiftedBits < IC.ComputeNumSignBits(I->getOperand(0), 0, CxtI))
345 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
346 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
347 break;
348 }
349 case Instruction::Trunc:
350 // trunc(trunc(x)) -> trunc(x)
351 return true;
352 case Instruction::ZExt:
353 case Instruction::SExt:
354 // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
355 // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
356 return true;
357 case Instruction::Select: {
358 SelectInst *SI = cast<SelectInst>(I);
359 return canEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) &&
360 canEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI);
361 }
362 case Instruction::PHI: {
363 // We can change a phi if we can change all operands. Note that we never
364 // get into trouble with cyclic PHIs here because we only consider
365 // instructions with a single use.
366 PHINode *PN = cast<PHINode>(I);
367 for (Value *IncValue : PN->incoming_values())
368 if (!canEvaluateTruncated(IncValue, Ty, IC, CxtI))
369 return false;
370 return true;
371 }
372 case Instruction::FPToUI:
373 case Instruction::FPToSI: {
374 // If the integer type can hold the max FP value, it is safe to cast
375 // directly to that type. Otherwise, we may create poison via overflow
376 // that did not exist in the original code.
377 Type *InputTy = I->getOperand(0)->getType()->getScalarType();
378 const fltSemantics &Semantics = InputTy->getFltSemantics();
379 uint32_t MinBitWidth =
381 I->getOpcode() == Instruction::FPToSI);
382 return Ty->getScalarSizeInBits() >= MinBitWidth;
383 }
384 case Instruction::ShuffleVector:
385 return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
386 canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
387 default:
388 // TODO: Can handle more cases here.
389 break;
390 }
391
392 return false;
393}
394
395/// Given a vector that is bitcast to an integer, optionally logically
396/// right-shifted, and truncated, convert it to an extractelement.
397/// Example (big endian):
398/// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
399/// --->
400/// extractelement <4 x i32> %X, 1
402 InstCombinerImpl &IC) {
403 Value *TruncOp = Trunc.getOperand(0);
404 Type *DestType = Trunc.getType();
405 if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType))
406 return nullptr;
407
408 Value *VecInput = nullptr;
409 ConstantInt *ShiftVal = nullptr;
410 if (!match(TruncOp, m_CombineOr(m_BitCast(m_Value(VecInput)),
411 m_LShr(m_BitCast(m_Value(VecInput)),
412 m_ConstantInt(ShiftVal)))) ||
413 !isa<VectorType>(VecInput->getType()))
414 return nullptr;
415
416 VectorType *VecType = cast<VectorType>(VecInput->getType());
417 unsigned VecWidth = VecType->getPrimitiveSizeInBits();
418 unsigned DestWidth = DestType->getPrimitiveSizeInBits();
419 unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0;
420
421 if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0))
422 return nullptr;
423
424 // If the element type of the vector doesn't match the result type,
425 // bitcast it to a vector type that we can extract from.
426 unsigned NumVecElts = VecWidth / DestWidth;
427 if (VecType->getElementType() != DestType) {
428 VecType = FixedVectorType::get(DestType, NumVecElts);
429 VecInput = IC.Builder.CreateBitCast(VecInput, VecType, "bc");
430 }
431
432 unsigned Elt = ShiftAmount / DestWidth;
433 if (IC.getDataLayout().isBigEndian())
434 Elt = NumVecElts - 1 - Elt;
435
436 return ExtractElementInst::Create(VecInput, IC.Builder.getInt32(Elt));
437}
438
439/// Funnel/Rotate left/right may occur in a wider type than necessary because of
440/// type promotion rules. Try to narrow the inputs and convert to funnel shift.
441Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {
442 assert((isa<VectorType>(Trunc.getSrcTy()) ||
443 shouldChangeType(Trunc.getSrcTy(), Trunc.getType())) &&
444 "Don't narrow to an illegal scalar type");
445
446 // Bail out on strange types. It is possible to handle some of these patterns
447 // even with non-power-of-2 sizes, but it is not a likely scenario.
448 Type *DestTy = Trunc.getType();
449 unsigned NarrowWidth = DestTy->getScalarSizeInBits();
450 unsigned WideWidth = Trunc.getSrcTy()->getScalarSizeInBits();
451 if (!isPowerOf2_32(NarrowWidth))
452 return nullptr;
453
454 // First, find an or'd pair of opposite shifts:
455 // trunc (or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1))
456 BinaryOperator *Or0, *Or1;
457 if (!match(Trunc.getOperand(0), m_OneUse(m_Or(m_BinOp(Or0), m_BinOp(Or1)))))
458 return nullptr;
459
460 Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
461 if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
462 !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||
463 Or0->getOpcode() == Or1->getOpcode())
464 return nullptr;
465
466 // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
467 if (Or0->getOpcode() == BinaryOperator::LShr) {
468 std::swap(Or0, Or1);
469 std::swap(ShVal0, ShVal1);
470 std::swap(ShAmt0, ShAmt1);
471 }
472 assert(Or0->getOpcode() == BinaryOperator::Shl &&
473 Or1->getOpcode() == BinaryOperator::LShr &&
474 "Illegal or(shift,shift) pair");
475
476 // Match the shift amount operands for a funnel/rotate pattern. This always
477 // matches a subtraction on the R operand.
478 auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
479 // The shift amounts may add up to the narrow bit width:
480 // (shl ShVal0, L) | (lshr ShVal1, Width - L)
481 // If this is a funnel shift (different operands are shifted), then the
482 // shift amount can not over-shift (create poison) in the narrow type.
483 unsigned MaxShiftAmountWidth = Log2_32(NarrowWidth);
484 APInt HiBitMask = ~APInt::getLowBitsSet(WideWidth, MaxShiftAmountWidth);
485 if (ShVal0 == ShVal1 || MaskedValueIsZero(L, HiBitMask))
486 if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L)))))
487 return L;
488
489 // The following patterns currently only work for rotation patterns.
490 // TODO: Add more general funnel-shift compatible patterns.
491 if (ShVal0 != ShVal1)
492 return nullptr;
493
494 // The shift amount may be masked with negation:
495 // (shl ShVal0, (X & (Width - 1))) | (lshr ShVal1, ((-X) & (Width - 1)))
496 Value *X;
497 unsigned Mask = Width - 1;
498 if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
500 return X;
501
502 // Same as above, but the shift amount may be extended after masking:
503 if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
505 return X;
506
507 return nullptr;
508 };
509
510 Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, NarrowWidth);
511 bool IsFshl = true; // Sub on LSHR.
512 if (!ShAmt) {
513 ShAmt = matchShiftAmount(ShAmt1, ShAmt0, NarrowWidth);
514 IsFshl = false; // Sub on SHL.
515 }
516 if (!ShAmt)
517 return nullptr;
518
519 // The right-shifted value must have high zeros in the wide type (for example
520 // from 'zext', 'and' or 'shift'). High bits of the left-shifted value are
521 // truncated, so those do not matter.
522 APInt HiBitMask = APInt::getHighBitsSet(WideWidth, WideWidth - NarrowWidth);
523 if (!MaskedValueIsZero(ShVal1, HiBitMask, 0, &Trunc))
524 return nullptr;
525
526 // Adjust the width of ShAmt for narrowed funnel shift operation:
527 // - Zero-extend if ShAmt is narrower than the destination type.
528 // - Truncate if ShAmt is wider, discarding non-significant high-order bits.
529 // This prepares ShAmt for llvm.fshl.i8(trunc(ShVal), trunc(ShVal),
530 // zext/trunc(ShAmt)).
531 Value *NarrowShAmt = Builder.CreateZExtOrTrunc(ShAmt, DestTy);
532
533 Value *X, *Y;
534 X = Y = Builder.CreateTrunc(ShVal0, DestTy);
535 if (ShVal0 != ShVal1)
536 Y = Builder.CreateTrunc(ShVal1, DestTy);
537 Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
538 Function *F = Intrinsic::getDeclaration(Trunc.getModule(), IID, DestTy);
539 return CallInst::Create(F, {X, Y, NarrowShAmt});
540}
541
542/// Try to narrow the width of math or bitwise logic instructions by pulling a
543/// truncate ahead of binary operators.
544Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) {
545 Type *SrcTy = Trunc.getSrcTy();
546 Type *DestTy = Trunc.getType();
547 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
548 unsigned DestWidth = DestTy->getScalarSizeInBits();
549
550 if (!isa<VectorType>(SrcTy) && !shouldChangeType(SrcTy, DestTy))
551 return nullptr;
552
553 BinaryOperator *BinOp;
554 if (!match(Trunc.getOperand(0), m_OneUse(m_BinOp(BinOp))))
555 return nullptr;
556
557 Value *BinOp0 = BinOp->getOperand(0);
558 Value *BinOp1 = BinOp->getOperand(1);
559 switch (BinOp->getOpcode()) {
560 case Instruction::And:
561 case Instruction::Or:
562 case Instruction::Xor:
563 case Instruction::Add:
564 case Instruction::Sub:
565 case Instruction::Mul: {
566 Constant *C;
567 if (match(BinOp0, m_Constant(C))) {
568 // trunc (binop C, X) --> binop (trunc C', X)
569 Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
570 Value *TruncX = Builder.CreateTrunc(BinOp1, DestTy);
571 return BinaryOperator::Create(BinOp->getOpcode(), NarrowC, TruncX);
572 }
573 if (match(BinOp1, m_Constant(C))) {
574 // trunc (binop X, C) --> binop (trunc X, C')
575 Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
576 Value *TruncX = Builder.CreateTrunc(BinOp0, DestTy);
577 return BinaryOperator::Create(BinOp->getOpcode(), TruncX, NarrowC);
578 }
579 Value *X;
580 if (match(BinOp0, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
581 // trunc (binop (ext X), Y) --> binop X, (trunc Y)
582 Value *NarrowOp1 = Builder.CreateTrunc(BinOp1, DestTy);
583 return BinaryOperator::Create(BinOp->getOpcode(), X, NarrowOp1);
584 }
585 if (match(BinOp1, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
586 // trunc (binop Y, (ext X)) --> binop (trunc Y), X
587 Value *NarrowOp0 = Builder.CreateTrunc(BinOp0, DestTy);
588 return BinaryOperator::Create(BinOp->getOpcode(), NarrowOp0, X);
589 }
590 break;
591 }
592 case Instruction::LShr:
593 case Instruction::AShr: {
594 // trunc (*shr (trunc A), C) --> trunc(*shr A, C)
595 Value *A;
596 Constant *C;
597 if (match(BinOp0, m_Trunc(m_Value(A))) && match(BinOp1, m_Constant(C))) {
598 unsigned MaxShiftAmt = SrcWidth - DestWidth;
599 // If the shift is small enough, all zero/sign bits created by the shift
600 // are removed by the trunc.
602 APInt(SrcWidth, MaxShiftAmt)))) {
603 auto *OldShift = cast<Instruction>(Trunc.getOperand(0));
604 bool IsExact = OldShift->isExact();
605 if (Constant *ShAmt = ConstantFoldIntegerCast(C, A->getType(),
606 /*IsSigned*/ true, DL)) {
607 ShAmt = Constant::mergeUndefsWith(ShAmt, C);
608 Value *Shift =
609 OldShift->getOpcode() == Instruction::AShr
610 ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact)
611 : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact);
612 return CastInst::CreateTruncOrBitCast(Shift, DestTy);
613 }
614 }
615 }
616 break;
617 }
618 default: break;
619 }
620
621 if (Instruction *NarrowOr = narrowFunnelShift(Trunc))
622 return NarrowOr;
623
624 return nullptr;
625}
626
627/// Try to narrow the width of a splat shuffle. This could be generalized to any
628/// shuffle with a constant operand, but we limit the transform to avoid
629/// creating a shuffle type that targets may not be able to lower effectively.
631 InstCombiner::BuilderTy &Builder) {
632 auto *Shuf = dyn_cast<ShuffleVectorInst>(Trunc.getOperand(0));
633 if (Shuf && Shuf->hasOneUse() && match(Shuf->getOperand(1), m_Undef()) &&
634 all_equal(Shuf->getShuffleMask()) &&
635 Shuf->getType() == Shuf->getOperand(0)->getType()) {
636 // trunc (shuf X, Undef, SplatMask) --> shuf (trunc X), Poison, SplatMask
637 // trunc (shuf X, Poison, SplatMask) --> shuf (trunc X), Poison, SplatMask
638 Value *NarrowOp = Builder.CreateTrunc(Shuf->getOperand(0), Trunc.getType());
639 return new ShuffleVectorInst(NarrowOp, Shuf->getShuffleMask());
640 }
641
642 return nullptr;
643}
644
645/// Try to narrow the width of an insert element. This could be generalized for
646/// any vector constant, but we limit the transform to insertion into undef to
647/// avoid potential backend problems from unsupported insertion widths. This
648/// could also be extended to handle the case of inserting a scalar constant
649/// into a vector variable.
651 InstCombiner::BuilderTy &Builder) {
652 Instruction::CastOps Opcode = Trunc.getOpcode();
653 assert((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
654 "Unexpected instruction for shrinking");
655
656 auto *InsElt = dyn_cast<InsertElementInst>(Trunc.getOperand(0));
657 if (!InsElt || !InsElt->hasOneUse())
658 return nullptr;
659
660 Type *DestTy = Trunc.getType();
661 Type *DestScalarTy = DestTy->getScalarType();
662 Value *VecOp = InsElt->getOperand(0);
663 Value *ScalarOp = InsElt->getOperand(1);
664 Value *Index = InsElt->getOperand(2);
665
666 if (match(VecOp, m_Undef())) {
667 // trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
668 // fptrunc (inselt undef, X, Index) --> inselt undef, (fptrunc X), Index
669 UndefValue *NarrowUndef = UndefValue::get(DestTy);
670 Value *NarrowOp = Builder.CreateCast(Opcode, ScalarOp, DestScalarTy);
671 return InsertElementInst::Create(NarrowUndef, NarrowOp, Index);
672 }
673
674 return nullptr;
675}
676
678 if (Instruction *Result = commonCastTransforms(Trunc))
679 return Result;
680
681 Value *Src = Trunc.getOperand(0);
682 Type *DestTy = Trunc.getType(), *SrcTy = Src->getType();
683 unsigned DestWidth = DestTy->getScalarSizeInBits();
684 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
685
686 // Attempt to truncate the entire input expression tree to the destination
687 // type. Only do this if the dest type is a simple type, don't convert the
688 // expression tree to something weird like i93 unless the source is also
689 // strange.
690 if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) &&
691 canEvaluateTruncated(Src, DestTy, *this, &Trunc)) {
692
693 // If this cast is a truncate, evaluting in a different type always
694 // eliminates the cast, so it is always a win.
696 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
697 " to avoid cast: "
698 << Trunc << '\n');
699 Value *Res = EvaluateInDifferentType(Src, DestTy, false);
700 assert(Res->getType() == DestTy);
701 return replaceInstUsesWith(Trunc, Res);
702 }
703
704 // For integer types, check if we can shorten the entire input expression to
705 // DestWidth * 2, which won't allow removing the truncate, but reducing the
706 // width may enable further optimizations, e.g. allowing for larger
707 // vectorization factors.
708 if (auto *DestITy = dyn_cast<IntegerType>(DestTy)) {
709 if (DestWidth * 2 < SrcWidth) {
710 auto *NewDestTy = DestITy->getExtendedType();
711 if (shouldChangeType(SrcTy, NewDestTy) &&
712 canEvaluateTruncated(Src, NewDestTy, *this, &Trunc)) {
714 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
715 " to reduce the width of operand of"
716 << Trunc << '\n');
717 Value *Res = EvaluateInDifferentType(Src, NewDestTy, false);
718 return new TruncInst(Res, DestTy);
719 }
720 }
721 }
722
723 // Test if the trunc is the user of a select which is part of a
724 // minimum or maximum operation. If so, don't do any more simplification.
725 // Even simplifying demanded bits can break the canonical form of a
726 // min/max.
727 Value *LHS, *RHS;
728 if (SelectInst *Sel = dyn_cast<SelectInst>(Src))
729 if (matchSelectPattern(Sel, LHS, RHS).Flavor != SPF_UNKNOWN)
730 return nullptr;
731
732 // See if we can simplify any instructions used by the input whose sole
733 // purpose is to compute bits we don't care about.
735 return &Trunc;
736
737 if (DestWidth == 1) {
738 Value *Zero = Constant::getNullValue(SrcTy);
739
740 Value *X;
741 const APInt *C1;
742 Constant *C2;
743 if (match(Src, m_OneUse(m_Shr(m_Shl(m_Power2(C1), m_Value(X)),
744 m_ImmConstant(C2))))) {
745 // trunc ((C1 << X) >> C2) to i1 --> X == (C2-cttz(C1)), where C1 is pow2
746 Constant *Log2C1 = ConstantInt::get(SrcTy, C1->exactLogBase2());
747 Constant *CmpC = ConstantExpr::getSub(C2, Log2C1);
748 return new ICmpInst(ICmpInst::ICMP_EQ, X, CmpC);
749 }
750
751 Constant *C;
752 if (match(Src, m_OneUse(m_LShr(m_Value(X), m_ImmConstant(C))))) {
753 // trunc (lshr X, C) to i1 --> icmp ne (and X, C'), 0
754 Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
755 Value *MaskC = Builder.CreateShl(One, C);
756 Value *And = Builder.CreateAnd(X, MaskC);
757 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
758 }
760 m_Deferred(X))))) {
761 // trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0
762 Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
763 Value *MaskC = Builder.CreateShl(One, C);
764 Value *And = Builder.CreateAnd(X, Builder.CreateOr(MaskC, One));
765 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
766 }
767
768 {
769 const APInt *C;
770 if (match(Src, m_Shl(m_APInt(C), m_Value(X))) && (*C)[0] == 1) {
771 // trunc (C << X) to i1 --> X == 0, where C is odd
772 return new ICmpInst(ICmpInst::Predicate::ICMP_EQ, X, Zero);
773 }
774 }
775
776 if (Trunc.hasNoUnsignedWrap() || Trunc.hasNoSignedWrap()) {
777 Value *X, *Y;
778 if (match(Src, m_Xor(m_Value(X), m_Value(Y))))
779 return new ICmpInst(ICmpInst::ICMP_NE, X, Y);
780 }
781 }
782
783 Value *A, *B;
784 Constant *C;
785 if (match(Src, m_LShr(m_SExt(m_Value(A)), m_Constant(C)))) {
786 unsigned AWidth = A->getType()->getScalarSizeInBits();
787 unsigned MaxShiftAmt = SrcWidth - std::max(DestWidth, AWidth);
788 auto *OldSh = cast<Instruction>(Src);
789 bool IsExact = OldSh->isExact();
790
791 // If the shift is small enough, all zero bits created by the shift are
792 // removed by the trunc.
794 APInt(SrcWidth, MaxShiftAmt)))) {
795 auto GetNewShAmt = [&](unsigned Width) {
796 Constant *MaxAmt = ConstantInt::get(SrcTy, Width - 1, false);
797 Constant *Cmp =
799 Constant *ShAmt = ConstantFoldSelectInstruction(Cmp, C, MaxAmt);
800 return ConstantFoldCastOperand(Instruction::Trunc, ShAmt, A->getType(),
801 DL);
802 };
803
804 // trunc (lshr (sext A), C) --> ashr A, C
805 if (A->getType() == DestTy) {
806 Constant *ShAmt = GetNewShAmt(DestWidth);
807 ShAmt = Constant::mergeUndefsWith(ShAmt, C);
808 return IsExact ? BinaryOperator::CreateExactAShr(A, ShAmt)
809 : BinaryOperator::CreateAShr(A, ShAmt);
810 }
811 // The types are mismatched, so create a cast after shifting:
812 // trunc (lshr (sext A), C) --> sext/trunc (ashr A, C)
813 if (Src->hasOneUse()) {
814 Constant *ShAmt = GetNewShAmt(AWidth);
815 Value *Shift = Builder.CreateAShr(A, ShAmt, "", IsExact);
816 return CastInst::CreateIntegerCast(Shift, DestTy, true);
817 }
818 }
819 // TODO: Mask high bits with 'and'.
820 }
821
822 if (Instruction *I = narrowBinOp(Trunc))
823 return I;
824
826 return I;
827
828 if (Instruction *I = shrinkInsertElt(Trunc, Builder))
829 return I;
830
831 if (Src->hasOneUse() &&
832 (isa<VectorType>(SrcTy) || shouldChangeType(SrcTy, DestTy))) {
833 // Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the
834 // dest type is native and cst < dest size.
835 if (match(Src, m_Shl(m_Value(A), m_Constant(C))) &&
836 !match(A, m_Shr(m_Value(), m_Constant()))) {
837 // Skip shifts of shift by constants. It undoes a combine in
838 // FoldShiftByConstant and is the extend in reg pattern.
839 APInt Threshold = APInt(C->getType()->getScalarSizeInBits(), DestWidth);
840 if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold))) {
841 Value *NewTrunc = Builder.CreateTrunc(A, DestTy, A->getName() + ".tr");
842 return BinaryOperator::Create(Instruction::Shl, NewTrunc,
843 ConstantExpr::getTrunc(C, DestTy));
844 }
845 }
846 }
847
848 if (Instruction *I = foldVecTruncToExtElt(Trunc, *this))
849 return I;
850
851 // Whenever an element is extracted from a vector, and then truncated,
852 // canonicalize by converting it to a bitcast followed by an
853 // extractelement.
854 //
855 // Example (little endian):
856 // trunc (extractelement <4 x i64> %X, 0) to i32
857 // --->
858 // extractelement <8 x i32> (bitcast <4 x i64> %X to <8 x i32>), i32 0
859 Value *VecOp;
860 ConstantInt *Cst;
861 if (match(Src, m_OneUse(m_ExtractElt(m_Value(VecOp), m_ConstantInt(Cst))))) {
862 auto *VecOpTy = cast<VectorType>(VecOp->getType());
863 auto VecElts = VecOpTy->getElementCount();
864
865 // A badly fit destination size would result in an invalid cast.
866 if (SrcWidth % DestWidth == 0) {
867 uint64_t TruncRatio = SrcWidth / DestWidth;
868 uint64_t BitCastNumElts = VecElts.getKnownMinValue() * TruncRatio;
869 uint64_t VecOpIdx = Cst->getZExtValue();
870 uint64_t NewIdx = DL.isBigEndian() ? (VecOpIdx + 1) * TruncRatio - 1
871 : VecOpIdx * TruncRatio;
872 assert(BitCastNumElts <= std::numeric_limits<uint32_t>::max() &&
873 "overflow 32-bits");
874
875 auto *BitCastTo =
876 VectorType::get(DestTy, BitCastNumElts, VecElts.isScalable());
877 Value *BitCast = Builder.CreateBitCast(VecOp, BitCastTo);
878 return ExtractElementInst::Create(BitCast, Builder.getInt32(NewIdx));
879 }
880 }
881
882 // trunc (ctlz_i32(zext(A), B) --> add(ctlz_i16(A, B), C)
883 if (match(Src, m_OneUse(m_Intrinsic<Intrinsic::ctlz>(m_ZExt(m_Value(A)),
884 m_Value(B))))) {
885 unsigned AWidth = A->getType()->getScalarSizeInBits();
886 if (AWidth == DestWidth && AWidth > Log2_32(SrcWidth)) {
887 Value *WidthDiff = ConstantInt::get(A->getType(), SrcWidth - AWidth);
888 Value *NarrowCtlz =
889 Builder.CreateIntrinsic(Intrinsic::ctlz, {Trunc.getType()}, {A, B});
890 return BinaryOperator::CreateAdd(NarrowCtlz, WidthDiff);
891 }
892 }
893
894 if (match(Src, m_VScale())) {
895 if (Trunc.getFunction() &&
896 Trunc.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
897 Attribute Attr =
898 Trunc.getFunction()->getFnAttribute(Attribute::VScaleRange);
899 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
900 if (Log2_32(*MaxVScale) < DestWidth) {
901 Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
902 return replaceInstUsesWith(Trunc, VScale);
903 }
904 }
905 }
906 }
907
908 bool Changed = false;
909 if (!Trunc.hasNoSignedWrap() &&
910 ComputeMaxSignificantBits(Src, /*Depth=*/0, &Trunc) <= DestWidth) {
911 Trunc.setHasNoSignedWrap(true);
912 Changed = true;
913 }
914 if (!Trunc.hasNoUnsignedWrap() &&
915 MaskedValueIsZero(Src, APInt::getBitsSetFrom(SrcWidth, DestWidth),
916 /*Depth=*/0, &Trunc)) {
917 Trunc.setHasNoUnsignedWrap(true);
918 Changed = true;
919 }
920
921 return Changed ? &Trunc : nullptr;
922}
923
924Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp,
925 ZExtInst &Zext) {
926 // If we are just checking for a icmp eq of a single bit and zext'ing it
927 // to an integer, then shift the bit to the appropriate place and then
928 // cast to integer to avoid the comparison.
929
930 // FIXME: This set of transforms does not check for extra uses and/or creates
931 // an extra instruction (an optional final cast is not included
932 // in the transform comments). We may also want to favor icmp over
933 // shifts in cases of equal instructions because icmp has better
934 // analysis in general (invert the transform).
935
936 const APInt *Op1CV;
937 if (match(Cmp->getOperand(1), m_APInt(Op1CV))) {
938
939 // zext (x <s 0) to i32 --> x>>u31 true if signbit set.
940 if (Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isZero()) {
941 Value *In = Cmp->getOperand(0);
942 Value *Sh = ConstantInt::get(In->getType(),
943 In->getType()->getScalarSizeInBits() - 1);
944 In = Builder.CreateLShr(In, Sh, In->getName() + ".lobit");
945 if (In->getType() != Zext.getType())
946 In = Builder.CreateIntCast(In, Zext.getType(), false /*ZExt*/);
947
948 return replaceInstUsesWith(Zext, In);
949 }
950
951 // zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
952 // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
953 // zext (X != 0) to i32 --> X iff X has only the low bit set.
954 // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
955
956 if (Op1CV->isZero() && Cmp->isEquality()) {
957 // Exactly 1 possible 1? But not the high-bit because that is
958 // canonicalized to this form.
959 KnownBits Known = computeKnownBits(Cmp->getOperand(0), 0, &Zext);
960 APInt KnownZeroMask(~Known.Zero);
961 uint32_t ShAmt = KnownZeroMask.logBase2();
962 bool IsExpectShAmt = KnownZeroMask.isPowerOf2() &&
963 (Zext.getType()->getScalarSizeInBits() != ShAmt + 1);
964 if (IsExpectShAmt &&
965 (Cmp->getOperand(0)->getType() == Zext.getType() ||
966 Cmp->getPredicate() == ICmpInst::ICMP_NE || ShAmt == 0)) {
967 Value *In = Cmp->getOperand(0);
968 if (ShAmt) {
969 // Perform a logical shr by shiftamt.
970 // Insert the shift to put the result in the low bit.
971 In = Builder.CreateLShr(In, ConstantInt::get(In->getType(), ShAmt),
972 In->getName() + ".lobit");
973 }
974
975 // Toggle the low bit for "X == 0".
976 if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
977 In = Builder.CreateXor(In, ConstantInt::get(In->getType(), 1));
978
979 if (Zext.getType() == In->getType())
980 return replaceInstUsesWith(Zext, In);
981
982 Value *IntCast = Builder.CreateIntCast(In, Zext.getType(), false);
983 return replaceInstUsesWith(Zext, IntCast);
984 }
985 }
986 }
987
988 if (Cmp->isEquality()) {
989 // Test if a bit is clear/set using a shifted-one mask:
990 // zext (icmp eq (and X, (1 << ShAmt)), 0) --> and (lshr (not X), ShAmt), 1
991 // zext (icmp ne (and X, (1 << ShAmt)), 0) --> and (lshr X, ShAmt), 1
992 Value *X, *ShAmt;
993 if (Cmp->hasOneUse() && match(Cmp->getOperand(1), m_ZeroInt()) &&
994 match(Cmp->getOperand(0),
995 m_OneUse(m_c_And(m_Shl(m_One(), m_Value(ShAmt)), m_Value(X))))) {
996 auto *And = cast<BinaryOperator>(Cmp->getOperand(0));
997 Value *Shift = And->getOperand(X == And->getOperand(0) ? 1 : 0);
998 if (Zext.getType() == And->getType() ||
999 Cmp->getPredicate() != ICmpInst::ICMP_EQ || Shift->hasOneUse()) {
1000 if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
1001 X = Builder.CreateNot(X);
1002 Value *Lshr = Builder.CreateLShr(X, ShAmt);
1003 Value *And1 =
1004 Builder.CreateAnd(Lshr, ConstantInt::get(X->getType(), 1));
1005 return replaceInstUsesWith(
1006 Zext, Builder.CreateZExtOrTrunc(And1, Zext.getType()));
1007 }
1008 }
1009 }
1010
1011 return nullptr;
1012}
1013
1014/// Determine if the specified value can be computed in the specified wider type
1015/// and produce the same low bits. If not, return false.
1016///
1017/// If this function returns true, it can also return a non-zero number of bits
1018/// (in BitsToClear) which indicates that the value it computes is correct for
1019/// the zero extend, but that the additional BitsToClear bits need to be zero'd
1020/// out. For example, to promote something like:
1021///
1022/// %B = trunc i64 %A to i32
1023/// %C = lshr i32 %B, 8
1024/// %E = zext i32 %C to i64
1025///
1026/// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
1027/// set to 8 to indicate that the promoted value needs to have bits 24-31
1028/// cleared in addition to bits 32-63. Since an 'and' will be generated to
1029/// clear the top bits anyway, doing this has no extra cost.
1030///
1031/// This function works on both vectors and scalars.
1032static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
1033 InstCombinerImpl &IC, Instruction *CxtI) {
1034 BitsToClear = 0;
1035 if (canAlwaysEvaluateInType(V, Ty))
1036 return true;
1037 if (canNotEvaluateInType(V, Ty))
1038 return false;
1039
1040 auto *I = cast<Instruction>(V);
1041 unsigned Tmp;
1042 switch (I->getOpcode()) {
1043 case Instruction::ZExt: // zext(zext(x)) -> zext(x).
1044 case Instruction::SExt: // zext(sext(x)) -> sext(x).
1045 case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x)
1046 return true;
1047 case Instruction::And:
1048 case Instruction::Or:
1049 case Instruction::Xor:
1050 case Instruction::Add:
1051 case Instruction::Sub:
1052 case Instruction::Mul:
1053 if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||
1054 !canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI))
1055 return false;
1056 // These can all be promoted if neither operand has 'bits to clear'.
1057 if (BitsToClear == 0 && Tmp == 0)
1058 return true;
1059
1060 // If the operation is an AND/OR/XOR and the bits to clear are zero in the
1061 // other side, BitsToClear is ok.
1062 if (Tmp == 0 && I->isBitwiseLogicOp()) {
1063 // We use MaskedValueIsZero here for generality, but the case we care
1064 // about the most is constant RHS.
1065 unsigned VSize = V->getType()->getScalarSizeInBits();
1066 if (IC.MaskedValueIsZero(I->getOperand(1),
1067 APInt::getHighBitsSet(VSize, BitsToClear),
1068 0, CxtI)) {
1069 // If this is an And instruction and all of the BitsToClear are
1070 // known to be zero we can reset BitsToClear.
1071 if (I->getOpcode() == Instruction::And)
1072 BitsToClear = 0;
1073 return true;
1074 }
1075 }
1076
1077 // Otherwise, we don't know how to analyze this BitsToClear case yet.
1078 return false;
1079
1080 case Instruction::Shl: {
1081 // We can promote shl(x, cst) if we can promote x. Since shl overwrites the
1082 // upper bits we can reduce BitsToClear by the shift amount.
1083 const APInt *Amt;
1084 if (match(I->getOperand(1), m_APInt(Amt))) {
1085 if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1086 return false;
1087 uint64_t ShiftAmt = Amt->getZExtValue();
1088 BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0;
1089 return true;
1090 }
1091 return false;
1092 }
1093 case Instruction::LShr: {
1094 // We can promote lshr(x, cst) if we can promote x. This requires the
1095 // ultimate 'and' to clear out the high zero bits we're clearing out though.
1096 const APInt *Amt;
1097 if (match(I->getOperand(1), m_APInt(Amt))) {
1098 if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1099 return false;
1100 BitsToClear += Amt->getZExtValue();
1101 if (BitsToClear > V->getType()->getScalarSizeInBits())
1102 BitsToClear = V->getType()->getScalarSizeInBits();
1103 return true;
1104 }
1105 // Cannot promote variable LSHR.
1106 return false;
1107 }
1108 case Instruction::Select:
1109 if (!canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI) ||
1110 !canEvaluateZExtd(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||
1111 // TODO: If important, we could handle the case when the BitsToClear are
1112 // known zero in the disagreeing side.
1113 Tmp != BitsToClear)
1114 return false;
1115 return true;
1116
1117 case Instruction::PHI: {
1118 // We can change a phi if we can change all operands. Note that we never
1119 // get into trouble with cyclic PHIs here because we only consider
1120 // instructions with a single use.
1121 PHINode *PN = cast<PHINode>(I);
1122 if (!canEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear, IC, CxtI))
1123 return false;
1124 for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
1125 if (!canEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||
1126 // TODO: If important, we could handle the case when the BitsToClear
1127 // are known zero in the disagreeing input.
1128 Tmp != BitsToClear)
1129 return false;
1130 return true;
1131 }
1132 case Instruction::Call:
1133 // llvm.vscale() can always be executed in larger type, because the
1134 // value is automatically zero-extended.
1135 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
1136 if (II->getIntrinsicID() == Intrinsic::vscale)
1137 return true;
1138 return false;
1139 default:
1140 // TODO: Can handle more cases here.
1141 return false;
1142 }
1143}
1144
1146 // If this zero extend is only used by a truncate, let the truncate be
1147 // eliminated before we try to optimize this zext.
1148 if (Zext.hasOneUse() && isa<TruncInst>(Zext.user_back()) &&
1149 !isa<Constant>(Zext.getOperand(0)))
1150 return nullptr;
1151
1152 // If one of the common conversion will work, do it.
1153 if (Instruction *Result = commonCastTransforms(Zext))
1154 return Result;
1155
1156 Value *Src = Zext.getOperand(0);
1157 Type *SrcTy = Src->getType(), *DestTy = Zext.getType();
1158
1159 // zext nneg bool x -> 0
1160 if (SrcTy->isIntOrIntVectorTy(1) && Zext.hasNonNeg())
1162
1163 // Try to extend the entire expression tree to the wide destination type.
1164 unsigned BitsToClear;
1165 if (shouldChangeType(SrcTy, DestTy) &&
1166 canEvaluateZExtd(Src, DestTy, BitsToClear, *this, &Zext)) {
1167 assert(BitsToClear <= SrcTy->getScalarSizeInBits() &&
1168 "Can't clear more bits than in SrcTy");
1169
1170 // Okay, we can transform this! Insert the new expression now.
1171 LLVM_DEBUG(
1172 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1173 " to avoid zero extend: "
1174 << Zext << '\n');
1175 Value *Res = EvaluateInDifferentType(Src, DestTy, false);
1176 assert(Res->getType() == DestTy);
1177
1178 // Preserve debug values referring to Src if the zext is its last use.
1179 if (auto *SrcOp = dyn_cast<Instruction>(Src))
1180 if (SrcOp->hasOneUse())
1181 replaceAllDbgUsesWith(*SrcOp, *Res, Zext, DT);
1182
1183 uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits() - BitsToClear;
1184 uint32_t DestBitSize = DestTy->getScalarSizeInBits();
1185
1186 // If the high bits are already filled with zeros, just replace this
1187 // cast with the result.
1188 if (MaskedValueIsZero(Res,
1189 APInt::getHighBitsSet(DestBitSize,
1190 DestBitSize - SrcBitsKept),
1191 0, &Zext))
1192 return replaceInstUsesWith(Zext, Res);
1193
1194 // We need to emit an AND to clear the high bits.
1195 Constant *C = ConstantInt::get(Res->getType(),
1196 APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
1197 return BinaryOperator::CreateAnd(Res, C);
1198 }
1199
1200 // If this is a TRUNC followed by a ZEXT then we are dealing with integral
1201 // types and if the sizes are just right we can convert this into a logical
1202 // 'and' which will be much cheaper than the pair of casts.
1203 if (auto *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast
1204 // TODO: Subsume this into EvaluateInDifferentType.
1205
1206 // Get the sizes of the types involved. We know that the intermediate type
1207 // will be smaller than A or C, but don't know the relation between A and C.
1208 Value *A = CSrc->getOperand(0);
1209 unsigned SrcSize = A->getType()->getScalarSizeInBits();
1210 unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
1211 unsigned DstSize = DestTy->getScalarSizeInBits();
1212 // If we're actually extending zero bits, then if
1213 // SrcSize < DstSize: zext(a & mask)
1214 // SrcSize == DstSize: a & mask
1215 // SrcSize > DstSize: trunc(a) & mask
1216 if (SrcSize < DstSize) {
1217 APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1218 Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
1219 Value *And = Builder.CreateAnd(A, AndConst, CSrc->getName() + ".mask");
1220 return new ZExtInst(And, DestTy);
1221 }
1222
1223 if (SrcSize == DstSize) {
1224 APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1225 return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
1226 AndValue));
1227 }
1228 if (SrcSize > DstSize) {
1229 Value *Trunc = Builder.CreateTrunc(A, DestTy);
1230 APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
1231 return BinaryOperator::CreateAnd(Trunc,
1232 ConstantInt::get(Trunc->getType(),
1233 AndValue));
1234 }
1235 }
1236
1237 if (auto *Cmp = dyn_cast<ICmpInst>(Src))
1238 return transformZExtICmp(Cmp, Zext);
1239
1240 // zext(trunc(X) & C) -> (X & zext(C)).
1241 Constant *C;
1242 Value *X;
1243 if (match(Src, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&
1244 X->getType() == DestTy)
1245 return BinaryOperator::CreateAnd(X, Builder.CreateZExt(C, DestTy));
1246
1247 // zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
1248 Value *And;
1249 if (match(Src, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
1251 X->getType() == DestTy) {
1252 Value *ZC = Builder.CreateZExt(C, DestTy);
1253 return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC);
1254 }
1255
1256 // If we are truncating, masking, and then zexting back to the original type,
1257 // that's just a mask. This is not handled by canEvaluateZextd if the
1258 // intermediate values have extra uses. This could be generalized further for
1259 // a non-constant mask operand.
1260 // zext (and (trunc X), C) --> and X, (zext C)
1261 if (match(Src, m_And(m_Trunc(m_Value(X)), m_Constant(C))) &&
1262 X->getType() == DestTy) {
1263 Value *ZextC = Builder.CreateZExt(C, DestTy);
1264 return BinaryOperator::CreateAnd(X, ZextC);
1265 }
1266
1267 if (match(Src, m_VScale())) {
1268 if (Zext.getFunction() &&
1269 Zext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1270 Attribute Attr =
1271 Zext.getFunction()->getFnAttribute(Attribute::VScaleRange);
1272 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
1273 unsigned TypeWidth = Src->getType()->getScalarSizeInBits();
1274 if (Log2_32(*MaxVScale) < TypeWidth) {
1275 Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
1276 return replaceInstUsesWith(Zext, VScale);
1277 }
1278 }
1279 }
1280 }
1281
1282 if (!Zext.hasNonNeg()) {
1283 // If this zero extend is only used by a shift, add nneg flag.
1284 if (Zext.hasOneUse() &&
1285 SrcTy->getScalarSizeInBits() >
1286 Log2_64_Ceil(DestTy->getScalarSizeInBits()) &&
1287 match(Zext.user_back(), m_Shift(m_Value(), m_Specific(&Zext)))) {
1288 Zext.setNonNeg();
1289 return &Zext;
1290 }
1291
1292 if (isKnownNonNegative(Src, SQ.getWithInstruction(&Zext))) {
1293 Zext.setNonNeg();
1294 return &Zext;
1295 }
1296 }
1297
1298 return nullptr;
1299}
1300
1301/// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.
1302Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *Cmp,
1303 SExtInst &Sext) {
1304 Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
1305 ICmpInst::Predicate Pred = Cmp->getPredicate();
1306
1307 // Don't bother if Op1 isn't of vector or integer type.
1308 if (!Op1->getType()->isIntOrIntVectorTy())
1309 return nullptr;
1310
1311 if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_ZeroInt())) {
1312 // sext (x <s 0) --> ashr x, 31 (all ones if negative)
1313 Value *Sh = ConstantInt::get(Op0->getType(),
1314 Op0->getType()->getScalarSizeInBits() - 1);
1315 Value *In = Builder.CreateAShr(Op0, Sh, Op0->getName() + ".lobit");
1316 if (In->getType() != Sext.getType())
1317 In = Builder.CreateIntCast(In, Sext.getType(), true /*SExt*/);
1318
1319 return replaceInstUsesWith(Sext, In);
1320 }
1321
1322 if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1323 // If we know that only one bit of the LHS of the icmp can be set and we
1324 // have an equality comparison with zero or a power of 2, we can transform
1325 // the icmp and sext into bitwise/integer operations.
1326 if (Cmp->hasOneUse() &&
1327 Cmp->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
1328 KnownBits Known = computeKnownBits(Op0, 0, &Sext);
1329
1330 APInt KnownZeroMask(~Known.Zero);
1331 if (KnownZeroMask.isPowerOf2()) {
1332 Value *In = Cmp->getOperand(0);
1333
1334 // If the icmp tests for a known zero bit we can constant fold it.
1335 if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {
1336 Value *V = Pred == ICmpInst::ICMP_NE ?
1338 ConstantInt::getNullValue(Sext.getType());
1339 return replaceInstUsesWith(Sext, V);
1340 }
1341
1342 if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
1343 // sext ((x & 2^n) == 0) -> (x >> n) - 1
1344 // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
1345 unsigned ShiftAmt = KnownZeroMask.countr_zero();
1346 // Perform a right shift to place the desired bit in the LSB.
1347 if (ShiftAmt)
1348 In = Builder.CreateLShr(In,
1349 ConstantInt::get(In->getType(), ShiftAmt));
1350
1351 // At this point "In" is either 1 or 0. Subtract 1 to turn
1352 // {1, 0} -> {0, -1}.
1353 In = Builder.CreateAdd(In,
1354 ConstantInt::getAllOnesValue(In->getType()),
1355 "sext");
1356 } else {
1357 // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
1358 // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
1359 unsigned ShiftAmt = KnownZeroMask.countl_zero();
1360 // Perform a left shift to place the desired bit in the MSB.
1361 if (ShiftAmt)
1362 In = Builder.CreateShl(In,
1363 ConstantInt::get(In->getType(), ShiftAmt));
1364
1365 // Distribute the bit over the whole bit width.
1366 In = Builder.CreateAShr(In, ConstantInt::get(In->getType(),
1367 KnownZeroMask.getBitWidth() - 1), "sext");
1368 }
1369
1370 if (Sext.getType() == In->getType())
1371 return replaceInstUsesWith(Sext, In);
1372 return CastInst::CreateIntegerCast(In, Sext.getType(), true/*SExt*/);
1373 }
1374 }
1375 }
1376
1377 return nullptr;
1378}
1379
1380/// Return true if we can take the specified value and return it as type Ty
1381/// without inserting any new casts and without changing the value of the common
1382/// low bits. This is used by code that tries to promote integer operations to
1383/// a wider types will allow us to eliminate the extension.
1384///
1385/// This function works on both vectors and scalars.
1386///
1387static bool canEvaluateSExtd(Value *V, Type *Ty) {
1388 assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
1389 "Can't sign extend type to a smaller type");
1390 if (canAlwaysEvaluateInType(V, Ty))
1391 return true;
1392 if (canNotEvaluateInType(V, Ty))
1393 return false;
1394
1395 auto *I = cast<Instruction>(V);
1396 switch (I->getOpcode()) {
1397 case Instruction::SExt: // sext(sext(x)) -> sext(x)
1398 case Instruction::ZExt: // sext(zext(x)) -> zext(x)
1399 case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x)
1400 return true;
1401 case Instruction::And:
1402 case Instruction::Or:
1403 case Instruction::Xor:
1404 case Instruction::Add:
1405 case Instruction::Sub:
1406 case Instruction::Mul:
1407 // These operators can all arbitrarily be extended if their inputs can.
1408 return canEvaluateSExtd(I->getOperand(0), Ty) &&
1409 canEvaluateSExtd(I->getOperand(1), Ty);
1410
1411 //case Instruction::Shl: TODO
1412 //case Instruction::LShr: TODO
1413
1414 case Instruction::Select:
1415 return canEvaluateSExtd(I->getOperand(1), Ty) &&
1416 canEvaluateSExtd(I->getOperand(2), Ty);
1417
1418 case Instruction::PHI: {
1419 // We can change a phi if we can change all operands. Note that we never
1420 // get into trouble with cyclic PHIs here because we only consider
1421 // instructions with a single use.
1422 PHINode *PN = cast<PHINode>(I);
1423 for (Value *IncValue : PN->incoming_values())
1424 if (!canEvaluateSExtd(IncValue, Ty)) return false;
1425 return true;
1426 }
1427 default:
1428 // TODO: Can handle more cases here.
1429 break;
1430 }
1431
1432 return false;
1433}
1434
1436 // If this sign extend is only used by a truncate, let the truncate be
1437 // eliminated before we try to optimize this sext.
1438 if (Sext.hasOneUse() && isa<TruncInst>(Sext.user_back()))
1439 return nullptr;
1440
1441 if (Instruction *I = commonCastTransforms(Sext))
1442 return I;
1443
1444 Value *Src = Sext.getOperand(0);
1445 Type *SrcTy = Src->getType(), *DestTy = Sext.getType();
1446 unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
1447 unsigned DestBitSize = DestTy->getScalarSizeInBits();
1448
1449 // If the value being extended is zero or positive, use a zext instead.
1450 if (isKnownNonNegative(Src, SQ.getWithInstruction(&Sext))) {
1451 auto CI = CastInst::Create(Instruction::ZExt, Src, DestTy);
1452 CI->setNonNeg(true);
1453 return CI;
1454 }
1455
1456 // Try to extend the entire expression tree to the wide destination type.
1457 if (shouldChangeType(SrcTy, DestTy) && canEvaluateSExtd(Src, DestTy)) {
1458 // Okay, we can transform this! Insert the new expression now.
1459 LLVM_DEBUG(
1460 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1461 " to avoid sign extend: "
1462 << Sext << '\n');
1463 Value *Res = EvaluateInDifferentType(Src, DestTy, true);
1464 assert(Res->getType() == DestTy);
1465
1466 // If the high bits are already filled with sign bit, just replace this
1467 // cast with the result.
1468 if (ComputeNumSignBits(Res, 0, &Sext) > DestBitSize - SrcBitSize)
1469 return replaceInstUsesWith(Sext, Res);
1470
1471 // We need to emit a shl + ashr to do the sign extend.
1472 Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
1473 return BinaryOperator::CreateAShr(Builder.CreateShl(Res, ShAmt, "sext"),
1474 ShAmt);
1475 }
1476
1477 Value *X;
1478 if (match(Src, m_Trunc(m_Value(X)))) {
1479 // If the input has more sign bits than bits truncated, then convert
1480 // directly to final type.
1481 unsigned XBitSize = X->getType()->getScalarSizeInBits();
1482 if (ComputeNumSignBits(X, 0, &Sext) > XBitSize - SrcBitSize)
1483 return CastInst::CreateIntegerCast(X, DestTy, /* isSigned */ true);
1484
1485 // If input is a trunc from the destination type, then convert into shifts.
1486 if (Src->hasOneUse() && X->getType() == DestTy) {
1487 // sext (trunc X) --> ashr (shl X, C), C
1488 Constant *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
1489 return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShAmt), ShAmt);
1490 }
1491
1492 // If we are replacing shifted-in high zero bits with sign bits, convert
1493 // the logic shift to arithmetic shift and eliminate the cast to
1494 // intermediate type:
1495 // sext (trunc (lshr Y, C)) --> sext/trunc (ashr Y, C)
1496 Value *Y;
1497 if (Src->hasOneUse() &&
1499 m_SpecificIntAllowPoison(XBitSize - SrcBitSize)))) {
1500 Value *Ashr = Builder.CreateAShr(Y, XBitSize - SrcBitSize);
1501 return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
1502 }
1503 }
1504
1505 if (auto *Cmp = dyn_cast<ICmpInst>(Src))
1506 return transformSExtICmp(Cmp, Sext);
1507
1508 // If the input is a shl/ashr pair of a same constant, then this is a sign
1509 // extension from a smaller value. If we could trust arbitrary bitwidth
1510 // integers, we could turn this into a truncate to the smaller bit and then
1511 // use a sext for the whole extension. Since we don't, look deeper and check
1512 // for a truncate. If the source and dest are the same type, eliminate the
1513 // trunc and extend and just do shifts. For example, turn:
1514 // %a = trunc i32 %i to i8
1515 // %b = shl i8 %a, C
1516 // %c = ashr i8 %b, C
1517 // %d = sext i8 %c to i32
1518 // into:
1519 // %a = shl i32 %i, 32-(8-C)
1520 // %d = ashr i32 %a, 32-(8-C)
1521 Value *A = nullptr;
1522 // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
1523 Constant *BA = nullptr, *CA = nullptr;
1524 if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_Constant(BA)),
1525 m_ImmConstant(CA))) &&
1526 BA->isElementWiseEqual(CA) && A->getType() == DestTy) {
1527 Constant *WideCurrShAmt =
1528 ConstantFoldCastOperand(Instruction::SExt, CA, DestTy, DL);
1529 assert(WideCurrShAmt && "Constant folding of ImmConstant cannot fail");
1530 Constant *NumLowbitsLeft = ConstantExpr::getSub(
1531 ConstantInt::get(DestTy, SrcTy->getScalarSizeInBits()), WideCurrShAmt);
1532 Constant *NewShAmt = ConstantExpr::getSub(
1533 ConstantInt::get(DestTy, DestTy->getScalarSizeInBits()),
1534 NumLowbitsLeft);
1535 NewShAmt =
1537 A = Builder.CreateShl(A, NewShAmt, Sext.getName());
1538 return BinaryOperator::CreateAShr(A, NewShAmt);
1539 }
1540
1541 // Splatting a bit of constant-index across a value:
1542 // sext (ashr (trunc iN X to iM), M-1) to iN --> ashr (shl X, N-M), N-1
1543 // If the dest type is different, use a cast (adjust use check).
1544 if (match(Src, m_OneUse(m_AShr(m_Trunc(m_Value(X)),
1545 m_SpecificInt(SrcBitSize - 1))))) {
1546 Type *XTy = X->getType();
1547 unsigned XBitSize = XTy->getScalarSizeInBits();
1548 Constant *ShlAmtC = ConstantInt::get(XTy, XBitSize - SrcBitSize);
1549 Constant *AshrAmtC = ConstantInt::get(XTy, XBitSize - 1);
1550 if (XTy == DestTy)
1551 return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShlAmtC),
1552 AshrAmtC);
1553 if (cast<BinaryOperator>(Src)->getOperand(0)->hasOneUse()) {
1554 Value *Ashr = Builder.CreateAShr(Builder.CreateShl(X, ShlAmtC), AshrAmtC);
1555 return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
1556 }
1557 }
1558
1559 if (match(Src, m_VScale())) {
1560 if (Sext.getFunction() &&
1561 Sext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1562 Attribute Attr =
1563 Sext.getFunction()->getFnAttribute(Attribute::VScaleRange);
1564 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
1565 if (Log2_32(*MaxVScale) < (SrcBitSize - 1)) {
1566 Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
1567 return replaceInstUsesWith(Sext, VScale);
1568 }
1569 }
1570 }
1571 }
1572
1573 return nullptr;
1574}
1575
1576/// Return a Constant* for the specified floating-point constant if it fits
1577/// in the specified FP type without changing its value.
1578static bool fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
1579 bool losesInfo;
1580 APFloat F = CFP->getValueAPF();
1581 (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
1582 return !losesInfo;
1583}
1584
1585static Type *shrinkFPConstant(ConstantFP *CFP, bool PreferBFloat) {
1586 if (CFP->getType() == Type::getPPC_FP128Ty(CFP->getContext()))
1587 return nullptr; // No constant folding of this.
1588 // See if the value can be truncated to bfloat and then reextended.
1589 if (PreferBFloat && fitsInFPType(CFP, APFloat::BFloat()))
1590 return Type::getBFloatTy(CFP->getContext());
1591 // See if the value can be truncated to half and then reextended.
1592 if (!PreferBFloat && fitsInFPType(CFP, APFloat::IEEEhalf()))
1593 return Type::getHalfTy(CFP->getContext());
1594 // See if the value can be truncated to float and then reextended.
1596 return Type::getFloatTy(CFP->getContext());
1597 if (CFP->getType()->isDoubleTy())
1598 return nullptr; // Won't shrink.
1600 return Type::getDoubleTy(CFP->getContext());
1601 // Don't try to shrink to various long double types.
1602 return nullptr;
1603}
1604
1605// Determine if this is a vector of ConstantFPs and if so, return the minimal
1606// type we can safely truncate all elements to.
1607static Type *shrinkFPConstantVector(Value *V, bool PreferBFloat) {
1608 auto *CV = dyn_cast<Constant>(V);
1609 auto *CVVTy = dyn_cast<FixedVectorType>(V->getType());
1610 if (!CV || !CVVTy)
1611 return nullptr;
1612
1613 Type *MinType = nullptr;
1614
1615 unsigned NumElts = CVVTy->getNumElements();
1616
1617 // For fixed-width vectors we find the minimal type by looking
1618 // through the constant values of the vector.
1619 for (unsigned i = 0; i != NumElts; ++i) {
1620 if (isa<UndefValue>(CV->getAggregateElement(i)))
1621 continue;
1622
1623 auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i));
1624 if (!CFP)
1625 return nullptr;
1626
1627 Type *T = shrinkFPConstant(CFP, PreferBFloat);
1628 if (!T)
1629 return nullptr;
1630
1631 // If we haven't found a type yet or this type has a larger mantissa than
1632 // our previous type, this is our new minimal type.
1633 if (!MinType || T->getFPMantissaWidth() > MinType->getFPMantissaWidth())
1634 MinType = T;
1635 }
1636
1637 // Make a vector type from the minimal type.
1638 return MinType ? FixedVectorType::get(MinType, NumElts) : nullptr;
1639}
1640
1641/// Find the minimum FP type we can safely truncate to.
1642static Type *getMinimumFPType(Value *V, bool PreferBFloat) {
1643 if (auto *FPExt = dyn_cast<FPExtInst>(V))
1644 return FPExt->getOperand(0)->getType();
1645
1646 // If this value is a constant, return the constant in the smallest FP type
1647 // that can accurately represent it. This allows us to turn
1648 // (float)((double)X+2.0) into x+2.0f.
1649 if (auto *CFP = dyn_cast<ConstantFP>(V))
1650 if (Type *T = shrinkFPConstant(CFP, PreferBFloat))
1651 return T;
1652
1653 // We can only correctly find a minimum type for a scalable vector when it is
1654 // a splat. For splats of constant values the fpext is wrapped up as a
1655 // ConstantExpr.
1656 if (auto *FPCExt = dyn_cast<ConstantExpr>(V))
1657 if (FPCExt->getOpcode() == Instruction::FPExt)
1658 return FPCExt->getOperand(0)->getType();
1659
1660 // Try to shrink a vector of FP constants. This returns nullptr on scalable
1661 // vectors
1662 if (Type *T = shrinkFPConstantVector(V, PreferBFloat))
1663 return T;
1664
1665 return V->getType();
1666}
1667
1668/// Return true if the cast from integer to FP can be proven to be exact for all
1669/// possible inputs (the conversion does not lose any precision).
1671 CastInst::CastOps Opcode = I.getOpcode();
1672 assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) &&
1673 "Unexpected cast");
1674 Value *Src = I.getOperand(0);
1675 Type *SrcTy = Src->getType();
1676 Type *FPTy = I.getType();
1677 bool IsSigned = Opcode == Instruction::SIToFP;
1678 int SrcSize = (int)SrcTy->getScalarSizeInBits() - IsSigned;
1679
1680 // Easy case - if the source integer type has less bits than the FP mantissa,
1681 // then the cast must be exact.
1682 int DestNumSigBits = FPTy->getFPMantissaWidth();
1683 if (SrcSize <= DestNumSigBits)
1684 return true;
1685
1686 // Cast from FP to integer and back to FP is independent of the intermediate
1687 // integer width because of poison on overflow.
1688 Value *F;
1689 if (match(Src, m_FPToSI(m_Value(F))) || match(Src, m_FPToUI(m_Value(F)))) {
1690 // If this is uitofp (fptosi F), the source needs an extra bit to avoid
1691 // potential rounding of negative FP input values.
1692 int SrcNumSigBits = F->getType()->getFPMantissaWidth();
1693 if (!IsSigned && match(Src, m_FPToSI(m_Value())))
1694 SrcNumSigBits++;
1695
1696 // [su]itofp (fpto[su]i F) --> exact if the source type has less or equal
1697 // significant bits than the destination (and make sure neither type is
1698 // weird -- ppc_fp128).
1699 if (SrcNumSigBits > 0 && DestNumSigBits > 0 &&
1700 SrcNumSigBits <= DestNumSigBits)
1701 return true;
1702 }
1703
1704 // TODO:
1705 // Try harder to find if the source integer type has less significant bits.
1706 // For example, compute number of sign bits.
1707 KnownBits SrcKnown = IC.computeKnownBits(Src, 0, &I);
1708 int SigBits = (int)SrcTy->getScalarSizeInBits() -
1709 SrcKnown.countMinLeadingZeros() -
1710 SrcKnown.countMinTrailingZeros();
1711 if (SigBits <= DestNumSigBits)
1712 return true;
1713
1714 return false;
1715}
1716
1719 return I;
1720
1721 // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
1722 // simplify this expression to avoid one or more of the trunc/extend
1723 // operations if we can do so without changing the numerical results.
1724 //
1725 // The exact manner in which the widths of the operands interact to limit
1726 // what we can and cannot do safely varies from operation to operation, and
1727 // is explained below in the various case statements.
1728 Type *Ty = FPT.getType();
1729 auto *BO = dyn_cast<BinaryOperator>(FPT.getOperand(0));
1730 if (BO && BO->hasOneUse()) {
1731 Type *LHSMinType =
1732 getMinimumFPType(BO->getOperand(0), /*PreferBFloat=*/Ty->isBFloatTy());
1733 Type *RHSMinType =
1734 getMinimumFPType(BO->getOperand(1), /*PreferBFloat=*/Ty->isBFloatTy());
1735 unsigned OpWidth = BO->getType()->getFPMantissaWidth();
1736 unsigned LHSWidth = LHSMinType->getFPMantissaWidth();
1737 unsigned RHSWidth = RHSMinType->getFPMantissaWidth();
1738 unsigned SrcWidth = std::max(LHSWidth, RHSWidth);
1739 unsigned DstWidth = Ty->getFPMantissaWidth();
1740 switch (BO->getOpcode()) {
1741 default: break;
1742 case Instruction::FAdd:
1743 case Instruction::FSub:
1744 // For addition and subtraction, the infinitely precise result can
1745 // essentially be arbitrarily wide; proving that double rounding
1746 // will not occur because the result of OpI is exact (as we will for
1747 // FMul, for example) is hopeless. However, we *can* nonetheless
1748 // frequently know that double rounding cannot occur (or that it is
1749 // innocuous) by taking advantage of the specific structure of
1750 // infinitely-precise results that admit double rounding.
1751 //
1752 // Specifically, if OpWidth >= 2*DstWdith+1 and DstWidth is sufficient
1753 // to represent both sources, we can guarantee that the double
1754 // rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis,
1755 // "A Rigorous Framework for Fully Supporting the IEEE Standard ..."
1756 // for proof of this fact).
1757 //
1758 // Note: Figueroa does not consider the case where DstFormat !=
1759 // SrcFormat. It's possible (likely even!) that this analysis
1760 // could be tightened for those cases, but they are rare (the main
1761 // case of interest here is (float)((double)float + float)).
1762 if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) {
1763 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1764 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1765 Instruction *RI = BinaryOperator::Create(BO->getOpcode(), LHS, RHS);
1766 RI->copyFastMathFlags(BO);
1767 return RI;
1768 }
1769 break;
1770 case Instruction::FMul:
1771 // For multiplication, the infinitely precise result has at most
1772 // LHSWidth + RHSWidth significant bits; if OpWidth is sufficient
1773 // that such a value can be exactly represented, then no double
1774 // rounding can possibly occur; we can safely perform the operation
1775 // in the destination format if it can represent both sources.
1776 if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) {
1777 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1778 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1780 }
1781 break;
1782 case Instruction::FDiv:
1783 // For division, we use again use the bound from Figueroa's
1784 // dissertation. I am entirely certain that this bound can be
1785 // tightened in the unbalanced operand case by an analysis based on
1786 // the diophantine rational approximation bound, but the well-known
1787 // condition used here is a good conservative first pass.
1788 // TODO: Tighten bound via rigorous analysis of the unbalanced case.
1789 if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) {
1790 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1791 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1793 }
1794 break;
1795 case Instruction::FRem: {
1796 // Remainder is straightforward. Remainder is always exact, so the
1797 // type of OpI doesn't enter into things at all. We simply evaluate
1798 // in whichever source type is larger, then convert to the
1799 // destination type.
1800 if (SrcWidth == OpWidth)
1801 break;
1802 Value *LHS, *RHS;
1803 if (LHSWidth == SrcWidth) {
1804 LHS = Builder.CreateFPTrunc(BO->getOperand(0), LHSMinType);
1805 RHS = Builder.CreateFPTrunc(BO->getOperand(1), LHSMinType);
1806 } else {
1807 LHS = Builder.CreateFPTrunc(BO->getOperand(0), RHSMinType);
1808 RHS = Builder.CreateFPTrunc(BO->getOperand(1), RHSMinType);
1809 }
1810
1811 Value *ExactResult = Builder.CreateFRemFMF(LHS, RHS, BO);
1812 return CastInst::CreateFPCast(ExactResult, Ty);
1813 }
1814 }
1815 }
1816
1817 // (fptrunc (fneg x)) -> (fneg (fptrunc x))
1818 Value *X;
1819 Instruction *Op = dyn_cast<Instruction>(FPT.getOperand(0));
1820 if (Op && Op->hasOneUse()) {
1821 // FIXME: The FMF should propagate from the fptrunc, not the source op.
1823 if (isa<FPMathOperator>(Op))
1824 Builder.setFastMathFlags(Op->getFastMathFlags());
1825
1826 if (match(Op, m_FNeg(m_Value(X)))) {
1827 Value *InnerTrunc = Builder.CreateFPTrunc(X, Ty);
1828
1829 return UnaryOperator::CreateFNegFMF(InnerTrunc, Op);
1830 }
1831
1832 // If we are truncating a select that has an extended operand, we can
1833 // narrow the other operand and do the select as a narrow op.
1834 Value *Cond, *X, *Y;
1836 X->getType() == Ty) {
1837 // fptrunc (select Cond, (fpext X), Y --> select Cond, X, (fptrunc Y)
1838 Value *NarrowY = Builder.CreateFPTrunc(Y, Ty);
1839 Value *Sel = Builder.CreateSelect(Cond, X, NarrowY, "narrow.sel", Op);
1840 return replaceInstUsesWith(FPT, Sel);
1841 }
1843 X->getType() == Ty) {
1844 // fptrunc (select Cond, Y, (fpext X) --> select Cond, (fptrunc Y), X
1845 Value *NarrowY = Builder.CreateFPTrunc(Y, Ty);
1846 Value *Sel = Builder.CreateSelect(Cond, NarrowY, X, "narrow.sel", Op);
1847 return replaceInstUsesWith(FPT, Sel);
1848 }
1849 }
1850
1851 if (auto *II = dyn_cast<IntrinsicInst>(FPT.getOperand(0))) {
1852 switch (II->getIntrinsicID()) {
1853 default: break;
1854 case Intrinsic::ceil:
1855 case Intrinsic::fabs:
1856 case Intrinsic::floor:
1857 case Intrinsic::nearbyint:
1858 case Intrinsic::rint:
1859 case Intrinsic::round:
1860 case Intrinsic::roundeven:
1861 case Intrinsic::trunc: {
1862 Value *Src = II->getArgOperand(0);
1863 if (!Src->hasOneUse())
1864 break;
1865
1866 // Except for fabs, this transformation requires the input of the unary FP
1867 // operation to be itself an fpext from the type to which we're
1868 // truncating.
1869 if (II->getIntrinsicID() != Intrinsic::fabs) {
1870 FPExtInst *FPExtSrc = dyn_cast<FPExtInst>(Src);
1871 if (!FPExtSrc || FPExtSrc->getSrcTy() != Ty)
1872 break;
1873 }
1874
1875 // Do unary FP operation on smaller type.
1876 // (fptrunc (fabs x)) -> (fabs (fptrunc x))
1877 Value *InnerTrunc = Builder.CreateFPTrunc(Src, Ty);
1879 II->getIntrinsicID(), Ty);
1881 II->getOperandBundlesAsDefs(OpBundles);
1882 CallInst *NewCI =
1883 CallInst::Create(Overload, {InnerTrunc}, OpBundles, II->getName());
1884 NewCI->copyFastMathFlags(II);
1885 return NewCI;
1886 }
1887 }
1888 }
1889
1890 if (Instruction *I = shrinkInsertElt(FPT, Builder))
1891 return I;
1892
1893 Value *Src = FPT.getOperand(0);
1894 if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
1895 auto *FPCast = cast<CastInst>(Src);
1896 if (isKnownExactCastIntToFP(*FPCast, *this))
1897 return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
1898 }
1899
1900 return nullptr;
1901}
1902
1904 // If the source operand is a cast from integer to FP and known exact, then
1905 // cast the integer operand directly to the destination type.
1906 Type *Ty = FPExt.getType();
1907 Value *Src = FPExt.getOperand(0);
1908 if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
1909 auto *FPCast = cast<CastInst>(Src);
1910 if (isKnownExactCastIntToFP(*FPCast, *this))
1911 return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
1912 }
1913
1914 return commonCastTransforms(FPExt);
1915}
1916
1917/// fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
1918/// This is safe if the intermediate type has enough bits in its mantissa to
1919/// accurately represent all values of X. For example, this won't work with
1920/// i64 -> float -> i64.
1922 if (!isa<UIToFPInst>(FI.getOperand(0)) && !isa<SIToFPInst>(FI.getOperand(0)))
1923 return nullptr;
1924
1925 auto *OpI = cast<CastInst>(FI.getOperand(0));
1926 Value *X = OpI->getOperand(0);
1927 Type *XType = X->getType();
1928 Type *DestType = FI.getType();
1929 bool IsOutputSigned = isa<FPToSIInst>(FI);
1930
1931 // Since we can assume the conversion won't overflow, our decision as to
1932 // whether the input will fit in the float should depend on the minimum
1933 // of the input range and output range.
1934
1935 // This means this is also safe for a signed input and unsigned output, since
1936 // a negative input would lead to undefined behavior.
1937 if (!isKnownExactCastIntToFP(*OpI, *this)) {
1938 // The first cast may not round exactly based on the source integer width
1939 // and FP width, but the overflow UB rules can still allow this to fold.
1940 // If the destination type is narrow, that means the intermediate FP value
1941 // must be large enough to hold the source value exactly.
1942 // For example, (uint8_t)((float)(uint32_t 16777217) is undefined behavior.
1943 int OutputSize = (int)DestType->getScalarSizeInBits();
1944 if (OutputSize > OpI->getType()->getFPMantissaWidth())
1945 return nullptr;
1946 }
1947
1948 if (DestType->getScalarSizeInBits() > XType->getScalarSizeInBits()) {
1949 bool IsInputSigned = isa<SIToFPInst>(OpI);
1950 if (IsInputSigned && IsOutputSigned)
1951 return new SExtInst(X, DestType);
1952 return new ZExtInst(X, DestType);
1953 }
1954 if (DestType->getScalarSizeInBits() < XType->getScalarSizeInBits())
1955 return new TruncInst(X, DestType);
1956
1957 assert(XType == DestType && "Unexpected types for int to FP to int casts");
1958 return replaceInstUsesWith(FI, X);
1959}
1960
1962 // fpto{u/s}i non-norm --> 0
1963 FPClassTest Mask =
1964 FI.getOpcode() == Instruction::FPToUI ? fcPosNormal : fcNormal;
1965 KnownFPClass FPClass =
1966 computeKnownFPClass(FI.getOperand(0), Mask, /*Depth=*/0,
1968 if (FPClass.isKnownNever(Mask))
1970
1971 return nullptr;
1972}
1973
1975 if (Instruction *I = foldItoFPtoI(FI))
1976 return I;
1977
1978 if (Instruction *I = foldFPtoI(FI, *this))
1979 return I;
1980
1981 return commonCastTransforms(FI);
1982}
1983
1985 if (Instruction *I = foldItoFPtoI(FI))
1986 return I;
1987
1988 if (Instruction *I = foldFPtoI(FI, *this))
1989 return I;
1990
1991 return commonCastTransforms(FI);
1992}
1993
1995 if (Instruction *R = commonCastTransforms(CI))
1996 return R;
1997 if (!CI.hasNonNeg() && isKnownNonNegative(CI.getOperand(0), SQ)) {
1998 CI.setNonNeg();
1999 return &CI;
2000 }
2001 return nullptr;
2002}
2003
2005 if (Instruction *R = commonCastTransforms(CI))
2006 return R;
2007 if (isKnownNonNegative(CI.getOperand(0), SQ)) {
2008 auto *UI =
2009 CastInst::Create(Instruction::UIToFP, CI.getOperand(0), CI.getType());
2010 UI->setNonNeg(true);
2011 return UI;
2012 }
2013 return nullptr;
2014}
2015
2017 // If the source integer type is not the intptr_t type for this target, do a
2018 // trunc or zext to the intptr_t type, then inttoptr of it. This allows the
2019 // cast to be exposed to other transforms.
2020 unsigned AS = CI.getAddressSpace();
2021 if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
2023 Type *Ty = CI.getOperand(0)->getType()->getWithNewType(
2024 DL.getIntPtrType(CI.getContext(), AS));
2026 return new IntToPtrInst(P, CI.getType());
2027 }
2028
2030 return I;
2031
2032 return nullptr;
2033}
2034
2036 // If the destination integer type is not the intptr_t type for this target,
2037 // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
2038 // to be exposed to other transforms.
2040 Type *SrcTy = SrcOp->getType();
2041 Type *Ty = CI.getType();
2042 unsigned AS = CI.getPointerAddressSpace();
2043 unsigned TySize = Ty->getScalarSizeInBits();
2044 unsigned PtrSize = DL.getPointerSizeInBits(AS);
2045 if (TySize != PtrSize) {
2046 Type *IntPtrTy =
2047 SrcTy->getWithNewType(DL.getIntPtrType(CI.getContext(), AS));
2048 Value *P = Builder.CreatePtrToInt(SrcOp, IntPtrTy);
2049 return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
2050 }
2051
2052 // (ptrtoint (ptrmask P, M))
2053 // -> (and (ptrtoint P), M)
2054 // This is generally beneficial as `and` is better supported than `ptrmask`.
2055 Value *Ptr, *Mask;
2056 if (match(SrcOp, m_OneUse(m_Intrinsic<Intrinsic::ptrmask>(m_Value(Ptr),
2057 m_Value(Mask)))) &&
2058 Mask->getType() == Ty)
2059 return BinaryOperator::CreateAnd(Builder.CreatePtrToInt(Ptr, Ty), Mask);
2060
2061 if (auto *GEP = dyn_cast<GEPOperator>(SrcOp)) {
2062 // Fold ptrtoint(gep null, x) to multiply + constant if the GEP has one use.
2063 // While this can increase the number of instructions it doesn't actually
2064 // increase the overall complexity since the arithmetic is just part of
2065 // the GEP otherwise.
2066 if (GEP->hasOneUse() &&
2067 isa<ConstantPointerNull>(GEP->getPointerOperand())) {
2068 return replaceInstUsesWith(CI,
2069 Builder.CreateIntCast(EmitGEPOffset(GEP), Ty,
2070 /*isSigned=*/false));
2071 }
2072
2073 // (ptrtoint (gep (inttoptr Base), ...)) -> Base + Offset
2074 Value *Base;
2075 if (GEP->hasOneUse() &&
2076 match(GEP->getPointerOperand(), m_OneUse(m_IntToPtr(m_Value(Base)))) &&
2077 Base->getType() == Ty) {
2078 Value *Offset = EmitGEPOffset(GEP);
2079 auto *NewOp = BinaryOperator::CreateAdd(Base, Offset);
2080 if (GEP->hasNoUnsignedWrap() ||
2081 (GEP->hasNoUnsignedSignedWrap() &&
2083 NewOp->setHasNoUnsignedWrap(true);
2084 return NewOp;
2085 }
2086 }
2087
2088 Value *Vec, *Scalar, *Index;
2090 m_Value(Scalar), m_Value(Index)))) &&
2091 Vec->getType() == Ty) {
2092 assert(Vec->getType()->getScalarSizeInBits() == PtrSize && "Wrong type");
2093 // Convert the scalar to int followed by insert to eliminate one cast:
2094 // p2i (ins (i2p Vec), Scalar, Index --> ins Vec, (p2i Scalar), Index
2095 Value *NewCast = Builder.CreatePtrToInt(Scalar, Ty->getScalarType());
2096 return InsertElementInst::Create(Vec, NewCast, Index);
2097 }
2098
2099 return commonCastTransforms(CI);
2100}
2101
2102/// This input value (which is known to have vector type) is being zero extended
2103/// or truncated to the specified vector type. Since the zext/trunc is done
2104/// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
2105/// endianness will impact which end of the vector that is extended or
2106/// truncated.
2107///
2108/// A vector is always stored with index 0 at the lowest address, which
2109/// corresponds to the most significant bits for a big endian stored integer and
2110/// the least significant bits for little endian. A trunc/zext of an integer
2111/// impacts the big end of the integer. Thus, we need to add/remove elements at
2112/// the front of the vector for big endian targets, and the back of the vector
2113/// for little endian targets.
2114///
2115/// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
2116///
2117/// The source and destination vector types may have different element types.
2118static Instruction *
2120 InstCombinerImpl &IC) {
2121 // We can only do this optimization if the output is a multiple of the input
2122 // element size, or the input is a multiple of the output element size.
2123 // Convert the input type to have the same element type as the output.
2124 VectorType *SrcTy = cast<VectorType>(InVal->getType());
2125
2126 if (SrcTy->getElementType() != DestTy->getElementType()) {
2127 // The input types don't need to be identical, but for now they must be the
2128 // same size. There is no specific reason we couldn't handle things like
2129 // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
2130 // there yet.
2131 if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
2132 DestTy->getElementType()->getPrimitiveSizeInBits())
2133 return nullptr;
2134
2135 SrcTy =
2136 FixedVectorType::get(DestTy->getElementType(),
2137 cast<FixedVectorType>(SrcTy)->getNumElements());
2138 InVal = IC.Builder.CreateBitCast(InVal, SrcTy);
2139 }
2140
2141 bool IsBigEndian = IC.getDataLayout().isBigEndian();
2142 unsigned SrcElts = cast<FixedVectorType>(SrcTy)->getNumElements();
2143 unsigned DestElts = cast<FixedVectorType>(DestTy)->getNumElements();
2144
2145 assert(SrcElts != DestElts && "Element counts should be different.");
2146
2147 // Now that the element types match, get the shuffle mask and RHS of the
2148 // shuffle to use, which depends on whether we're increasing or decreasing the
2149 // size of the input.
2150 auto ShuffleMaskStorage = llvm::to_vector<16>(llvm::seq<int>(0, SrcElts));
2151 ArrayRef<int> ShuffleMask;
2152 Value *V2;
2153
2154 if (SrcElts > DestElts) {
2155 // If we're shrinking the number of elements (rewriting an integer
2156 // truncate), just shuffle in the elements corresponding to the least
2157 // significant bits from the input and use poison as the second shuffle
2158 // input.
2159 V2 = PoisonValue::get(SrcTy);
2160 // Make sure the shuffle mask selects the "least significant bits" by
2161 // keeping elements from back of the src vector for big endian, and from the
2162 // front for little endian.
2163 ShuffleMask = ShuffleMaskStorage;
2164 if (IsBigEndian)
2165 ShuffleMask = ShuffleMask.take_back(DestElts);
2166 else
2167 ShuffleMask = ShuffleMask.take_front(DestElts);
2168 } else {
2169 // If we're increasing the number of elements (rewriting an integer zext),
2170 // shuffle in all of the elements from InVal. Fill the rest of the result
2171 // elements with zeros from a constant zero.
2172 V2 = Constant::getNullValue(SrcTy);
2173 // Use first elt from V2 when indicating zero in the shuffle mask.
2174 uint32_t NullElt = SrcElts;
2175 // Extend with null values in the "most significant bits" by adding elements
2176 // in front of the src vector for big endian, and at the back for little
2177 // endian.
2178 unsigned DeltaElts = DestElts - SrcElts;
2179 if (IsBigEndian)
2180 ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt);
2181 else
2182 ShuffleMaskStorage.append(DeltaElts, NullElt);
2183 ShuffleMask = ShuffleMaskStorage;
2184 }
2185
2186 return new ShuffleVectorInst(InVal, V2, ShuffleMask);
2187}
2188
2189static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) {
2190 return Value % Ty->getPrimitiveSizeInBits() == 0;
2191}
2192
2193static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
2194 return Value / Ty->getPrimitiveSizeInBits();
2195}
2196
2197/// V is a value which is inserted into a vector of VecEltTy.
2198/// Look through the value to see if we can decompose it into
2199/// insertions into the vector. See the example in the comment for
2200/// OptimizeIntegerToVectorInsertions for the pattern this handles.
2201/// The type of V is always a non-zero multiple of VecEltTy's size.
2202/// Shift is the number of bits between the lsb of V and the lsb of
2203/// the vector.
2204///
2205/// This returns false if the pattern can't be matched or true if it can,
2206/// filling in Elements with the elements found here.
2207static bool collectInsertionElements(Value *V, unsigned Shift,
2208 SmallVectorImpl<Value *> &Elements,
2209 Type *VecEltTy, bool isBigEndian) {
2210 assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
2211 "Shift should be a multiple of the element type size");
2212
2213 // Undef values never contribute useful bits to the result.
2214 if (isa<UndefValue>(V)) return true;
2215
2216 // If we got down to a value of the right type, we win, try inserting into the
2217 // right element.
2218 if (V->getType() == VecEltTy) {
2219 // Inserting null doesn't actually insert any elements.
2220 if (Constant *C = dyn_cast<Constant>(V))
2221 if (C->isNullValue())
2222 return true;
2223
2224 unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
2225 if (isBigEndian)
2226 ElementIndex = Elements.size() - ElementIndex - 1;
2227
2228 // Fail if multiple elements are inserted into this slot.
2229 if (Elements[ElementIndex])
2230 return false;
2231
2232 Elements[ElementIndex] = V;
2233 return true;
2234 }
2235
2236 if (Constant *C = dyn_cast<Constant>(V)) {
2237 // Figure out the # elements this provides, and bitcast it or slice it up
2238 // as required.
2239 unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
2240 VecEltTy);
2241 // If the constant is the size of a vector element, we just need to bitcast
2242 // it to the right type so it gets properly inserted.
2243 if (NumElts == 1)
2245 Shift, Elements, VecEltTy, isBigEndian);
2246
2247 // Okay, this is a constant that covers multiple elements. Slice it up into
2248 // pieces and insert each element-sized piece into the vector.
2249 if (!isa<IntegerType>(C->getType()))
2250 C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),
2251 C->getType()->getPrimitiveSizeInBits()));
2252 unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
2253 Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
2254
2255 for (unsigned i = 0; i != NumElts; ++i) {
2256 unsigned ShiftI = i * ElementSize;
2258 Instruction::LShr, C, ConstantInt::get(C->getType(), ShiftI));
2259 if (!Piece)
2260 return false;
2261
2262 Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
2263 if (!collectInsertionElements(Piece, ShiftI + Shift, Elements, VecEltTy,
2264 isBigEndian))
2265 return false;
2266 }
2267 return true;
2268 }
2269
2270 if (!V->hasOneUse()) return false;
2271
2272 Instruction *I = dyn_cast<Instruction>(V);
2273 if (!I) return false;
2274 switch (I->getOpcode()) {
2275 default: return false; // Unhandled case.
2276 case Instruction::BitCast:
2277 if (I->getOperand(0)->getType()->isVectorTy())
2278 return false;
2279 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2280 isBigEndian);
2281 case Instruction::ZExt:
2283 I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
2284 VecEltTy))
2285 return false;
2286 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2287 isBigEndian);
2288 case Instruction::Or:
2289 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2290 isBigEndian) &&
2291 collectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
2292 isBigEndian);
2293 case Instruction::Shl: {
2294 // Must be shifting by a constant that is a multiple of the element size.
2295 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
2296 if (!CI) return false;
2297 Shift += CI->getZExtValue();
2298 if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
2299 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2300 isBigEndian);
2301 }
2302
2303 }
2304}
2305
2306
2307/// If the input is an 'or' instruction, we may be doing shifts and ors to
2308/// assemble the elements of the vector manually.
2309/// Try to rip the code out and replace it with insertelements. This is to
2310/// optimize code like this:
2311///
2312/// %tmp37 = bitcast float %inc to i32
2313/// %tmp38 = zext i32 %tmp37 to i64
2314/// %tmp31 = bitcast float %inc5 to i32
2315/// %tmp32 = zext i32 %tmp31 to i64
2316/// %tmp33 = shl i64 %tmp32, 32
2317/// %ins35 = or i64 %tmp33, %tmp38
2318/// %tmp43 = bitcast i64 %ins35 to <2 x float>
2319///
2320/// Into two insertelements that do "buildvector{%inc, %inc5}".
2322 InstCombinerImpl &IC) {
2323 auto *DestVecTy = cast<FixedVectorType>(CI.getType());
2324 Value *IntInput = CI.getOperand(0);
2325
2326 SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
2327 if (!collectInsertionElements(IntInput, 0, Elements,
2328 DestVecTy->getElementType(),
2329 IC.getDataLayout().isBigEndian()))
2330 return nullptr;
2331
2332 // If we succeeded, we know that all of the element are specified by Elements
2333 // or are zero if Elements has a null entry. Recast this as a set of
2334 // insertions.
2335 Value *Result = Constant::getNullValue(CI.getType());
2336 for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
2337 if (!Elements[i]) continue; // Unset element.
2338
2339 Result = IC.Builder.CreateInsertElement(Result, Elements[i],
2340 IC.Builder.getInt32(i));
2341 }
2342
2343 return Result;
2344}
2345
2346/// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
2347/// vector followed by extract element. The backend tends to handle bitcasts of
2348/// vectors better than bitcasts of scalars because vector registers are
2349/// usually not type-specific like scalar integer or scalar floating-point.
2351 InstCombinerImpl &IC) {
2352 Value *VecOp, *Index;
2353 if (!match(BitCast.getOperand(0),
2355 return nullptr;
2356
2357 // The bitcast must be to a vectorizable type, otherwise we can't make a new
2358 // type to extract from.
2359 Type *DestType = BitCast.getType();
2360 VectorType *VecType = cast<VectorType>(VecOp->getType());
2361 if (VectorType::isValidElementType(DestType)) {
2362 auto *NewVecType = VectorType::get(DestType, VecType);
2363 auto *NewBC = IC.Builder.CreateBitCast(VecOp, NewVecType, "bc");
2364 return ExtractElementInst::Create(NewBC, Index);
2365 }
2366
2367 // Only solve DestType is vector to avoid inverse transform in visitBitCast.
2368 // bitcast (extractelement <1 x elt>, dest) -> bitcast(<1 x elt>, dest)
2369 auto *FixedVType = dyn_cast<FixedVectorType>(VecType);
2370 if (DestType->isVectorTy() && FixedVType && FixedVType->getNumElements() == 1)
2371 return CastInst::Create(Instruction::BitCast, VecOp, DestType);
2372
2373 return nullptr;
2374}
2375
2376/// Change the type of a bitwise logic operation if we can eliminate a bitcast.
2378 InstCombiner::BuilderTy &Builder) {
2379 Type *DestTy = BitCast.getType();
2380 BinaryOperator *BO;
2381
2382 if (!match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) ||
2383 !BO->isBitwiseLogicOp())
2384 return nullptr;
2385
2386 // FIXME: This transform is restricted to vector types to avoid backend
2387 // problems caused by creating potentially illegal operations. If a fix-up is
2388 // added to handle that situation, we can remove this check.
2389 if (!DestTy->isVectorTy() || !BO->getType()->isVectorTy())
2390 return nullptr;
2391
2392 if (DestTy->isFPOrFPVectorTy()) {
2393 Value *X, *Y;
2394 // bitcast(logic(bitcast(X), bitcast(Y))) -> bitcast'(logic(bitcast'(X), Y))
2395 if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
2397 if (X->getType()->isFPOrFPVectorTy() &&
2398 Y->getType()->isIntOrIntVectorTy()) {
2399 Value *CastedOp =
2400 Builder.CreateBitCast(BO->getOperand(0), Y->getType());
2401 Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, Y);
2402 return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
2403 }
2404 if (X->getType()->isIntOrIntVectorTy() &&
2405 Y->getType()->isFPOrFPVectorTy()) {
2406 Value *CastedOp =
2407 Builder.CreateBitCast(BO->getOperand(1), X->getType());
2408 Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, X);
2409 return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
2410 }
2411 }
2412 return nullptr;
2413 }
2414
2415 if (!DestTy->isIntOrIntVectorTy())
2416 return nullptr;
2417
2418 Value *X;
2419 if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
2420 X->getType() == DestTy && !isa<Constant>(X)) {
2421 // bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
2422 Value *CastedOp1 = Builder.CreateBitCast(BO->getOperand(1), DestTy);
2423 return BinaryOperator::Create(BO->getOpcode(), X, CastedOp1);
2424 }
2425
2426 if (match(BO->getOperand(1), m_OneUse(m_BitCast(m_Value(X)))) &&
2427 X->getType() == DestTy && !isa<Constant>(X)) {
2428 // bitcast(logic(Y, bitcast(X))) --> logic'(bitcast(Y), X)
2429 Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2430 return BinaryOperator::Create(BO->getOpcode(), CastedOp0, X);
2431 }
2432
2433 // Canonicalize vector bitcasts to come before vector bitwise logic with a
2434 // constant. This eases recognition of special constants for later ops.
2435 // Example:
2436 // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
2437 Constant *C;
2438 if (match(BO->getOperand(1), m_Constant(C))) {
2439 // bitcast (logic X, C) --> logic (bitcast X, C')
2440 Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2441 Value *CastedC = Builder.CreateBitCast(C, DestTy);
2442 return BinaryOperator::Create(BO->getOpcode(), CastedOp0, CastedC);
2443 }
2444
2445 return nullptr;
2446}
2447
2448/// Change the type of a select if we can eliminate a bitcast.
2450 InstCombiner::BuilderTy &Builder) {
2451 Value *Cond, *TVal, *FVal;
2452 if (!match(BitCast.getOperand(0),
2453 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
2454 return nullptr;
2455
2456 // A vector select must maintain the same number of elements in its operands.
2457 Type *CondTy = Cond->getType();
2458 Type *DestTy = BitCast.getType();
2459 if (auto *CondVTy = dyn_cast<VectorType>(CondTy))
2460 if (!DestTy->isVectorTy() ||
2461 CondVTy->getElementCount() !=
2462 cast<VectorType>(DestTy)->getElementCount())
2463 return nullptr;
2464
2465 // FIXME: This transform is restricted from changing the select between
2466 // scalars and vectors to avoid backend problems caused by creating
2467 // potentially illegal operations. If a fix-up is added to handle that
2468 // situation, we can remove this check.
2469 if (DestTy->isVectorTy() != TVal->getType()->isVectorTy())
2470 return nullptr;
2471
2472 auto *Sel = cast<Instruction>(BitCast.getOperand(0));
2473 Value *X;
2474 if (match(TVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
2475 !isa<Constant>(X)) {
2476 // bitcast(select(Cond, bitcast(X), Y)) --> select'(Cond, X, bitcast(Y))
2477 Value *CastedVal = Builder.CreateBitCast(FVal, DestTy);
2478 return SelectInst::Create(Cond, X, CastedVal, "", nullptr, Sel);
2479 }
2480
2481 if (match(FVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
2482 !isa<Constant>(X)) {
2483 // bitcast(select(Cond, Y, bitcast(X))) --> select'(Cond, bitcast(Y), X)
2484 Value *CastedVal = Builder.CreateBitCast(TVal, DestTy);
2485 return SelectInst::Create(Cond, CastedVal, X, "", nullptr, Sel);
2486 }
2487
2488 return nullptr;
2489}
2490
2491/// Check if all users of CI are StoreInsts.
2492static bool hasStoreUsersOnly(CastInst &CI) {
2493 for (User *U : CI.users()) {
2494 if (!isa<StoreInst>(U))
2495 return false;
2496 }
2497 return true;
2498}
2499
2500/// This function handles following case
2501///
2502/// A -> B cast
2503/// PHI
2504/// B -> A cast
2505///
2506/// All the related PHI nodes can be replaced by new PHI nodes with type A.
2507/// The uses of \p CI can be changed to the new PHI node corresponding to \p PN.
2508Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI,
2509 PHINode *PN) {
2510 // BitCast used by Store can be handled in InstCombineLoadStoreAlloca.cpp.
2511 if (hasStoreUsersOnly(CI))
2512 return nullptr;
2513
2514 Value *Src = CI.getOperand(0);
2515 Type *SrcTy = Src->getType(); // Type B
2516 Type *DestTy = CI.getType(); // Type A
2517
2518 SmallVector<PHINode *, 4> PhiWorklist;
2519 SmallSetVector<PHINode *, 4> OldPhiNodes;
2520
2521 // Find all of the A->B casts and PHI nodes.
2522 // We need to inspect all related PHI nodes, but PHIs can be cyclic, so
2523 // OldPhiNodes is used to track all known PHI nodes, before adding a new
2524 // PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
2525 PhiWorklist.push_back(PN);
2526 OldPhiNodes.insert(PN);
2527 while (!PhiWorklist.empty()) {
2528 auto *OldPN = PhiWorklist.pop_back_val();
2529 for (Value *IncValue : OldPN->incoming_values()) {
2530 if (isa<Constant>(IncValue))
2531 continue;
2532
2533 if (auto *LI = dyn_cast<LoadInst>(IncValue)) {
2534 // If there is a sequence of one or more load instructions, each loaded
2535 // value is used as address of later load instruction, bitcast is
2536 // necessary to change the value type, don't optimize it. For
2537 // simplicity we give up if the load address comes from another load.
2538 Value *Addr = LI->getOperand(0);
2539 if (Addr == &CI || isa<LoadInst>(Addr))
2540 return nullptr;
2541 // Don't tranform "load <256 x i32>, <256 x i32>*" to
2542 // "load x86_amx, x86_amx*", because x86_amx* is invalid.
2543 // TODO: Remove this check when bitcast between vector and x86_amx
2544 // is replaced with a specific intrinsic.
2545 if (DestTy->isX86_AMXTy())
2546 return nullptr;
2547 if (LI->hasOneUse() && LI->isSimple())
2548 continue;
2549 // If a LoadInst has more than one use, changing the type of loaded
2550 // value may create another bitcast.
2551 return nullptr;
2552 }
2553
2554 if (auto *PNode = dyn_cast<PHINode>(IncValue)) {
2555 if (OldPhiNodes.insert(PNode))
2556 PhiWorklist.push_back(PNode);
2557 continue;
2558 }
2559
2560 auto *BCI = dyn_cast<BitCastInst>(IncValue);
2561 // We can't handle other instructions.
2562 if (!BCI)
2563 return nullptr;
2564
2565 // Verify it's a A->B cast.
2566 Type *TyA = BCI->getOperand(0)->getType();
2567 Type *TyB = BCI->getType();
2568 if (TyA != DestTy || TyB != SrcTy)
2569 return nullptr;
2570 }
2571 }
2572
2573 // Check that each user of each old PHI node is something that we can
2574 // rewrite, so that all of the old PHI nodes can be cleaned up afterwards.
2575 for (auto *OldPN : OldPhiNodes) {
2576 for (User *V : OldPN->users()) {
2577 if (auto *SI = dyn_cast<StoreInst>(V)) {
2578 if (!SI->isSimple() || SI->getOperand(0) != OldPN)
2579 return nullptr;
2580 } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2581 // Verify it's a B->A cast.
2582 Type *TyB = BCI->getOperand(0)->getType();
2583 Type *TyA = BCI->getType();
2584 if (TyA != DestTy || TyB != SrcTy)
2585 return nullptr;
2586 } else if (auto *PHI = dyn_cast<PHINode>(V)) {
2587 // As long as the user is another old PHI node, then even if we don't
2588 // rewrite it, the PHI web we're considering won't have any users
2589 // outside itself, so it'll be dead.
2590 if (!OldPhiNodes.contains(PHI))
2591 return nullptr;
2592 } else {
2593 return nullptr;
2594 }
2595 }
2596 }
2597
2598 // For each old PHI node, create a corresponding new PHI node with a type A.
2600 for (auto *OldPN : OldPhiNodes) {
2601 Builder.SetInsertPoint(OldPN);
2602 PHINode *NewPN = Builder.CreatePHI(DestTy, OldPN->getNumOperands());
2603 NewPNodes[OldPN] = NewPN;
2604 }
2605
2606 // Fill in the operands of new PHI nodes.
2607 for (auto *OldPN : OldPhiNodes) {
2608 PHINode *NewPN = NewPNodes[OldPN];
2609 for (unsigned j = 0, e = OldPN->getNumOperands(); j != e; ++j) {
2610 Value *V = OldPN->getOperand(j);
2611 Value *NewV = nullptr;
2612 if (auto *C = dyn_cast<Constant>(V)) {
2613 NewV = ConstantExpr::getBitCast(C, DestTy);
2614 } else if (auto *LI = dyn_cast<LoadInst>(V)) {
2615 // Explicitly perform load combine to make sure no opposing transform
2616 // can remove the bitcast in the meantime and trigger an infinite loop.
2618 NewV = combineLoadToNewType(*LI, DestTy);
2619 // Remove the old load and its use in the old phi, which itself becomes
2620 // dead once the whole transform finishes.
2623 } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2624 NewV = BCI->getOperand(0);
2625 } else if (auto *PrevPN = dyn_cast<PHINode>(V)) {
2626 NewV = NewPNodes[PrevPN];
2627 }
2628 assert(NewV);
2629 NewPN->addIncoming(NewV, OldPN->getIncomingBlock(j));
2630 }
2631 }
2632
2633 // Traverse all accumulated PHI nodes and process its users,
2634 // which are Stores and BitcCasts. Without this processing
2635 // NewPHI nodes could be replicated and could lead to extra
2636 // moves generated after DeSSA.
2637 // If there is a store with type B, change it to type A.
2638
2639
2640 // Replace users of BitCast B->A with NewPHI. These will help
2641 // later to get rid off a closure formed by OldPHI nodes.
2642 Instruction *RetVal = nullptr;
2643 for (auto *OldPN : OldPhiNodes) {
2644 PHINode *NewPN = NewPNodes[OldPN];
2645 for (User *V : make_early_inc_range(OldPN->users())) {
2646 if (auto *SI = dyn_cast<StoreInst>(V)) {
2647 assert(SI->isSimple() && SI->getOperand(0) == OldPN);
2649 auto *NewBC =
2650 cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));
2651 SI->setOperand(0, NewBC);
2652 Worklist.push(SI);
2653 assert(hasStoreUsersOnly(*NewBC));
2654 }
2655 else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2656 Type *TyB = BCI->getOperand(0)->getType();
2657 Type *TyA = BCI->getType();
2658 assert(TyA == DestTy && TyB == SrcTy);
2659 (void) TyA;
2660 (void) TyB;
2661 Instruction *I = replaceInstUsesWith(*BCI, NewPN);
2662 if (BCI == &CI)
2663 RetVal = I;
2664 } else if (auto *PHI = dyn_cast<PHINode>(V)) {
2665 assert(OldPhiNodes.contains(PHI));
2666 (void) PHI;
2667 } else {
2668 llvm_unreachable("all uses should be handled");
2669 }
2670 }
2671 }
2672
2673 return RetVal;
2674}
2675
2676/// Fold (bitcast (or (and (bitcast X to int), signmask), nneg Y) to fp) to
2677/// copysign((bitcast Y to fp), X)
2679 InstCombiner::BuilderTy &Builder,
2680 const SimplifyQuery &SQ) {
2681 Value *X, *Y;
2682 Type *FTy = CI.getType();
2683 if (!FTy->isFPOrFPVectorTy())
2684 return nullptr;
2687 m_Value(Y)))))
2688 return nullptr;
2689 if (X->getType() != FTy)
2690 return nullptr;
2691 if (!isKnownNonNegative(Y, SQ))
2692 return nullptr;
2693
2694 return Builder.CreateCopySign(Builder.CreateBitCast(Y, FTy), X);
2695}
2696
2698 // If the operands are integer typed then apply the integer transforms,
2699 // otherwise just apply the common ones.
2700 Value *Src = CI.getOperand(0);
2701 Type *SrcTy = Src->getType();
2702 Type *DestTy = CI.getType();
2703
2704 // Get rid of casts from one type to the same type. These are useless and can
2705 // be replaced by the operand.
2706 if (DestTy == Src->getType())
2707 return replaceInstUsesWith(CI, Src);
2708
2709 if (isa<FixedVectorType>(DestTy)) {
2710 if (isa<IntegerType>(SrcTy)) {
2711 // If this is a cast from an integer to vector, check to see if the input
2712 // is a trunc or zext of a bitcast from vector. If so, we can replace all
2713 // the casts with a shuffle and (potentially) a bitcast.
2714 if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {
2715 CastInst *SrcCast = cast<CastInst>(Src);
2716 if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
2717 if (isa<VectorType>(BCIn->getOperand(0)->getType()))
2719 BCIn->getOperand(0), cast<VectorType>(DestTy), *this))
2720 return I;
2721 }
2722
2723 // If the input is an 'or' instruction, we may be doing shifts and ors to
2724 // assemble the elements of the vector manually. Try to rip the code out
2725 // and replace it with insertelements.
2726 if (Value *V = optimizeIntegerToVectorInsertions(CI, *this))
2727 return replaceInstUsesWith(CI, V);
2728 }
2729 }
2730
2731 if (FixedVectorType *SrcVTy = dyn_cast<FixedVectorType>(SrcTy)) {
2732 if (SrcVTy->getNumElements() == 1) {
2733 // If our destination is not a vector, then make this a straight
2734 // scalar-scalar cast.
2735 if (!DestTy->isVectorTy()) {
2736 Value *Elem =
2739 return CastInst::Create(Instruction::BitCast, Elem, DestTy);
2740 }
2741
2742 // Otherwise, see if our source is an insert. If so, then use the scalar
2743 // component directly:
2744 // bitcast (inselt <1 x elt> V, X, 0) to <n x m> --> bitcast X to <n x m>
2745 if (auto *InsElt = dyn_cast<InsertElementInst>(Src))
2746 return new BitCastInst(InsElt->getOperand(1), DestTy);
2747 }
2748
2749 // Convert an artificial vector insert into more analyzable bitwise logic.
2750 unsigned BitWidth = DestTy->getScalarSizeInBits();
2751 Value *X, *Y;
2752 uint64_t IndexC;
2754 m_Value(Y), m_ConstantInt(IndexC)))) &&
2755 DestTy->isIntegerTy() && X->getType() == DestTy &&
2756 Y->getType()->isIntegerTy() && isDesirableIntType(BitWidth)) {
2757 // Adjust for big endian - the LSBs are at the high index.
2758 if (DL.isBigEndian())
2759 IndexC = SrcVTy->getNumElements() - 1 - IndexC;
2760
2761 // We only handle (endian-normalized) insert to index 0. Any other insert
2762 // would require a left-shift, so that is an extra instruction.
2763 if (IndexC == 0) {
2764 // bitcast (inselt (bitcast X), Y, 0) --> or (and X, MaskC), (zext Y)
2765 unsigned EltWidth = Y->getType()->getScalarSizeInBits();
2766 APInt MaskC = APInt::getHighBitsSet(BitWidth, BitWidth - EltWidth);
2767 Value *AndX = Builder.CreateAnd(X, MaskC);
2768 Value *ZextY = Builder.CreateZExt(Y, DestTy);
2769 return BinaryOperator::CreateOr(AndX, ZextY);
2770 }
2771 }
2772 }
2773
2774 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Src)) {
2775 // Okay, we have (bitcast (shuffle ..)). Check to see if this is
2776 // a bitcast to a vector with the same # elts.
2777 Value *ShufOp0 = Shuf->getOperand(0);
2778 Value *ShufOp1 = Shuf->getOperand(1);
2779 auto ShufElts = cast<VectorType>(Shuf->getType())->getElementCount();
2780 auto SrcVecElts = cast<VectorType>(ShufOp0->getType())->getElementCount();
2781 if (Shuf->hasOneUse() && DestTy->isVectorTy() &&
2782 cast<VectorType>(DestTy)->getElementCount() == ShufElts &&
2783 ShufElts == SrcVecElts) {
2784 BitCastInst *Tmp;
2785 // If either of the operands is a cast from CI.getType(), then
2786 // evaluating the shuffle in the casted destination's type will allow
2787 // us to eliminate at least one cast.
2788 if (((Tmp = dyn_cast<BitCastInst>(ShufOp0)) &&
2789 Tmp->getOperand(0)->getType() == DestTy) ||
2790 ((Tmp = dyn_cast<BitCastInst>(ShufOp1)) &&
2791 Tmp->getOperand(0)->getType() == DestTy)) {
2792 Value *LHS = Builder.CreateBitCast(ShufOp0, DestTy);
2793 Value *RHS = Builder.CreateBitCast(ShufOp1, DestTy);
2794 // Return a new shuffle vector. Use the same element ID's, as we
2795 // know the vector types match #elts.
2796 return new ShuffleVectorInst(LHS, RHS, Shuf->getShuffleMask());
2797 }
2798 }
2799
2800 // A bitcasted-to-scalar and byte/bit reversing shuffle is better recognized
2801 // as a byte/bit swap:
2802 // bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) -> bswap (bitcast X)
2803 // bitcast <N x i1> (shuf X, undef, <N, N-1,...0>) -> bitreverse (bitcast X)
2804 if (DestTy->isIntegerTy() && ShufElts.getKnownMinValue() % 2 == 0 &&
2805 Shuf->hasOneUse() && Shuf->isReverse()) {
2806 unsigned IntrinsicNum = 0;
2807 if (DL.isLegalInteger(DestTy->getScalarSizeInBits()) &&
2808 SrcTy->getScalarSizeInBits() == 8) {
2809 IntrinsicNum = Intrinsic::bswap;
2810 } else if (SrcTy->getScalarSizeInBits() == 1) {
2811 IntrinsicNum = Intrinsic::bitreverse;
2812 }
2813 if (IntrinsicNum != 0) {
2814 assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask");
2815 assert(match(ShufOp1, m_Undef()) && "Unexpected shuffle op");
2816 Function *BswapOrBitreverse =
2817 Intrinsic::getDeclaration(CI.getModule(), IntrinsicNum, DestTy);
2818 Value *ScalarX = Builder.CreateBitCast(ShufOp0, DestTy);
2819 return CallInst::Create(BswapOrBitreverse, {ScalarX});
2820 }
2821 }
2822 }
2823
2824 // Handle the A->B->A cast, and there is an intervening PHI node.
2825 if (PHINode *PN = dyn_cast<PHINode>(Src))
2826 if (Instruction *I = optimizeBitCastFromPhi(CI, PN))
2827 return I;
2828
2829 if (Instruction *I = canonicalizeBitCastExtElt(CI, *this))
2830 return I;
2831
2833 return I;
2834
2836 return I;
2837
2839 return replaceInstUsesWith(CI, V);
2840
2841 return commonCastTransforms(CI);
2842}
2843
2845 return commonCastTransforms(CI);
2846}
Rewrite undef for PHI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static bool isSigned(unsigned int Opcode)
Hexagon Common GEP
static bool collectInsertionElements(Value *V, unsigned Shift, SmallVectorImpl< Value * > &Elements, Type *VecEltTy, bool isBigEndian)
V is a value which is inserted into a vector of VecEltTy.
static bool canEvaluateSExtd(Value *V, Type *Ty)
Return true if we can take the specified value and return it as type Ty without inserting any new cas...
static bool hasStoreUsersOnly(CastInst &CI)
Check if all users of CI are StoreInsts.
static Value * foldCopySignIdioms(BitCastInst &CI, InstCombiner::BuilderTy &Builder, const SimplifyQuery &SQ)
Fold (bitcast (or (and (bitcast X to int), signmask), nneg Y) to fp) to copysign((bitcast Y to fp),...
static Type * shrinkFPConstantVector(Value *V, bool PreferBFloat)
static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear, InstCombinerImpl &IC, Instruction *CxtI)
Determine if the specified value can be computed in the specified wider type and produce the same low...
static Instruction * canonicalizeBitCastExtElt(BitCastInst &BitCast, InstCombinerImpl &IC)
Canonicalize scalar bitcasts of extracted elements into a bitcast of the vector followed by extract e...
static Instruction * shrinkSplatShuffle(TruncInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of a splat shuffle.
static Type * shrinkFPConstant(ConstantFP *CFP, bool PreferBFloat)
static Instruction * foldFPtoI(Instruction &FI, InstCombiner &IC)
static Instruction * foldBitCastSelect(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a select if we can eliminate a bitcast.
static Instruction * foldBitCastBitwiseLogic(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a bitwise logic operation if we can eliminate a bitcast.
static bool fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem)
Return a Constant* for the specified floating-point constant if it fits in the specified FP type with...
static Instruction * optimizeVectorResizeWithIntegerBitCasts(Value *InVal, VectorType *DestTy, InstCombinerImpl &IC)
This input value (which is known to have vector type) is being zero extended or truncated to the spec...
static Instruction * shrinkInsertElt(CastInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of an insert element.
static Type * getMinimumFPType(Value *V, bool PreferBFloat)
Find the minimum FP type we can safely truncate to.
static bool isMultipleOfTypeSize(unsigned Value, Type *Ty)
static Value * optimizeIntegerToVectorInsertions(BitCastInst &CI, InstCombinerImpl &IC)
If the input is an 'or' instruction, we may be doing shifts and ors to assemble the elements of the v...
static bool canAlwaysEvaluateInType(Value *V, Type *Ty)
Constants and extensions/truncates from the destination type are always free to be evaluated in that ...
static bool canNotEvaluateInType(Value *V, Type *Ty)
Filter out values that we can not evaluate in the destination type for free.
static bool isKnownExactCastIntToFP(CastInst &I, InstCombinerImpl &IC)
Return true if the cast from integer to FP can be proven to be exact for all possible inputs (the con...
static unsigned getTypeSizeIndex(unsigned Value, Type *Ty)
static Instruction * foldVecTruncToExtElt(TruncInst &Trunc, InstCombinerImpl &IC)
Given a vector that is bitcast to an integer, optionally logically right-shifted, and truncated,...
static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC, Instruction *CxtI)
Return true if we can evaluate the specified expression tree as type Ty instead of its larger type,...
This file provides internal interfaces used to implement the InstCombine.
This file provides the interface for the instcombine pass implementation.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements a set that has insertion order iteration characteristics.
static unsigned getScalarSizeInBits(Type *Ty)
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:77
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1497
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:357
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1088
int32_t exactLogBase2() const
Definition: APInt.h:1731
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:283
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:273
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:263
This class represents a conversion between pointers from one address space to another.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:228
ArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:235
std::optional< unsigned > getVScaleRangeMax() const
Returns the maximum value for the vscale_range attribute or std::nullopt when unknown.
Definition: Attributes.cpp:466
BinaryOps getOpcode() const
Definition: InstrTypes.h:442
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateFMulFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition: InstrTypes.h:271
static BinaryOperator * CreateFDivFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition: InstrTypes.h:275
This class represents a no-op cast from one type to another.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:530
Type * getSrcTy() const
Return the source type, as a convenience.
Definition: InstrTypes.h:699
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:694
static CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
static CastInst * CreateFPCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create an FPExt, BitCast, or FPTrunc for fp -> fp casts.
static unsigned isEliminableCastPair(Instruction::CastOps firstOpcode, Instruction::CastOps secondOpcode, Type *SrcTy, Type *MidTy, Type *DstTy, Type *SrcIntPtrTy, Type *MidIntPtrTy, Type *DstIntPtrTy)
Determine how a pair of casts can be eliminated, if they can be at all.
static CastInst * CreateTruncOrBitCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a Trunc or BitCast cast instruction.
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Type * getDestTy() const
Return the destination type, as a convenience.
Definition: InstrTypes.h:701
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:786
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:782
@ ICMP_EQ
equal
Definition: InstrTypes.h:778
@ ICMP_NE
not equal
Definition: InstrTypes.h:779
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:783
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2618
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2295
static Constant * getTrunc(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2253
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
const APFloat & getValueAPF() const
Definition: Constants.h:312
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:155
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * mergeUndefsWith(Constant *C, Constant *Other)
Merges undefs of a Constant with another Constant, along with the undefs already present.
Definition: Constants.cpp:792
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
bool isElementWiseEqual(Value *Y) const
Return true if this constant and a constant 'Y' are element-wise equal.
Definition: Constants.cpp:298
This class represents an Operation in the Expression.
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:367
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition: DataLayout.h:217
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:846
bool isBigEndian() const
Definition: DataLayout.h:196
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This class represents an extension of floating point types.
This class represents a cast from floating point to signed integer.
This class represents a cast from floating point to unsigned integer.
This class represents a truncation of floating point types.
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:539
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:680
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:214
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:769
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:743
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateVScale(Constant *Scaling, const Twine &Name="")
Create a call to llvm.vscale, multiplied by Scaling.
Definition: IRBuilder.cpp:89
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2492
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2480
Value * CreateFPTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2121
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2059
Value * CreateFRemFMF(Value *L, Value *R, Instruction *FMFSource, const Twine &Name="")
Copy fast-math-flags from an instruction rather than using the builder's default FMF.
Definition: IRBuilder.h:1671
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:933
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1091
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1454
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:308
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:483
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2417
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1766
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2147
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1433
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2041
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1492
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1344
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2137
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2027
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1514
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1683
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2181
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2216
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1473
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1536
Value * CreateCopySign(Value *LHS, Value *RHS, Instruction *FMFSource=nullptr, const Twine &Name="")
Create call to the copysign intrinsic.
Definition: IRBuilder.h:1031
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * visitZExt(ZExtInst &Zext)
Instruction * visitAddrSpaceCast(AddrSpaceCastInst &CI)
Instruction * visitSExt(SExtInst &Sext)
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Instruction * visitFPToSI(FPToSIInst &FI)
Instruction * visitTrunc(TruncInst &CI)
Instruction * visitUIToFP(CastInst &CI)
Instruction * visitPtrToInt(PtrToIntInst &CI)
Instruction * visitSIToFP(CastInst &CI)
Instruction * commonCastTransforms(CastInst &CI)
Implement the transforms common to all CastInst visitors.
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Instruction * foldItoFPtoI(CastInst &FI)
fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X) This is safe if the intermediate ty...
Instruction * visitFPTrunc(FPTruncInst &CI)
Instruction * visitBitCast(BitCastInst &CI)
Instruction * visitIntToPtr(IntToPtrInst &CI)
Instruction * visitFPToUI(FPToUIInst &FI)
Value * EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned)
Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns true for,...
bool SimplifyDemandedInstructionBits(Instruction &Inst)
Tries to simplify operands to an integer instruction based on its demanded bits.
Instruction * visitFPExt(CastInst &CI)
LoadInst * combineLoadToNewType(LoadInst &LI, Type *NewTy, const Twine &Suffix="")
Helper to combine a load to a new type.
The core instruction combiner logic.
Definition: InstCombiner.h:47
SimplifyQuery SQ
Definition: InstCombiner.h:76
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:338
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombiner.h:383
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Definition: InstCombiner.h:64
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
Definition: InstCombiner.h:372
const DataLayout & DL
Definition: InstCombiner.h:75
unsigned ComputeNumSignBits(const Value *Op, unsigned Depth=0, const Instruction *CxtI=nullptr) const
Definition: InstCombiner.h:449
DominatorTree & DT
Definition: InstCombiner.h:74
void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Instruction *CxtI) const
Definition: InstCombiner.h:428
BuilderTy & Builder
Definition: InstCombiner.h:60
bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth=0, const Instruction *CxtI=nullptr) const
Definition: InstCombiner.h:444
const SimplifyQuery & getSimplifyQuery() const
Definition: InstCombiner.h:339
unsigned ComputeMaxSignificantBits(const Value *Op, unsigned Depth=0, const Instruction *CxtI=nullptr) const
Definition: InstCombiner.h:454
void push(Instruction *I)
Push the instruction onto the worklist stack.
void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
Definition: Instruction.h:323
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:169
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:70
void setNonNeg(bool b=true)
Set or clear the nneg flag on this instruction, which must be a zext instruction.
bool hasNonNeg() const LLVM_READONLY
Determine whether the the nneg flag is set.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
This class represents a cast from an integer to a pointer.
unsigned getAddressSpace() const
Returns the address space of this instruction's pointer type.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:266
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1852
This class represents a cast from a pointer to an integer.
Value * getPointerOperand()
Gets the pointer operand.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
This class represents a sign extension of integer types.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
This instruction constructs a fixed permutation of two input vectors.
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class represents a truncation of integer types.
void setHasNoSignedWrap(bool B)
void setHasNoUnsignedWrap(bool B)
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getHalfTy(LLVMContext &C)
static Type * getDoubleTy(LLVMContext &C)
const fltSemantics & getFltSemantics() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:261
static Type * getBFloatTy(LLVMContext &C)
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:230
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition: Type.h:145
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
int getFPMantissaWidth() const
Return the width of the mantissa of this type.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:156
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:258
bool isX86_AMXTy() const
Return true if this is X86 AMX.
Definition: Type.h:200
static IntegerType * getInt32Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:224
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:212
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
static Type * getPPC_FP128Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:343
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition: InstrTypes.h:164
'undef' values are things that do not have specified contents.
Definition: Constants.h:1398
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1833
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
iterator_range< user_iterator > users()
Definition: Value.h:421
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:671
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:664
This class represents zero extension of integer types.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1539
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
cst_pred_ty< is_sign_mask > m_SignMask()
Match an integer or vector with only the sign bit(s) set.
Definition: PatternMatch.h:664
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
cst_pred_ty< is_power2 > m_Power2()
Match an integer or vector power-of-2.
Definition: PatternMatch.h:619
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:165
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:972
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:875
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
specific_intval< true > m_SpecificIntAllowPoison(const APInt &V)
Definition: PatternMatch.h:980
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:168
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:592
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
BinOpPred_match< LHS, RHS, is_logical_shift_op > m_LogicalShift(const LHS &L, const RHS &R)
Matches logical shift operations.
CastInst_match< OpTy, FPToUIInst > m_FPToUI(const OpTy &Op)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition: PatternMatch.h:893
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:599
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:854
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
VScaleVal_match m_VScale()
CastInst_match< OpTy, FPToSIInst > m_FPToSI(const OpTy &Op)
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
CastOperator_match< OpTy, Instruction::IntToPtr > m_IntToPtr(const OpTy &Op)
Matches IntToPtr.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
ElementWiseBitCast_match< OpTy > m_ElementWiseBitCast(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
cst_pred_ty< icmp_pred_with_threshold > m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold)
Match an integer or vector with every element comparing 'pred' (eg/ne/...) to Threshold.
Definition: PatternMatch.h:698
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
Constant * ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2)
Attempt to constant fold a select instruction with the specified operands.
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
Definition: MathExtras.h:359
Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
@ SPF_UNKNOWN
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition: Local.cpp:2749
@ And
Bitwise or logical AND of integers.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition: STLExtras.h:2070
KnownFPClass computeKnownFPClass(const Value *V, const APInt &DemandedElts, FPClassTest InterestedClasses, unsigned Depth, const SimplifyQuery &SQ)
Determine which floating-point classes are valid for V, and return them in KnownFPClass bit sets.
Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
Constant * ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1, Constant *V2)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:281
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:253
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:282
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:279
static const fltSemantics & BFloat() LLVM_READNONE
Definition: APFloat.cpp:280
static unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition: APFloat.cpp:343
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:231
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:237
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:134
bool isKnownNever(FPClassTest Mask) const
Return true if it's known this can never be one of the mask entries.
SimplifyQuery getWithInstruction(const Instruction *I) const