LLVM 23.0.0git
InstCombineCasts.cpp
Go to the documentation of this file.
1//===- InstCombineCasts.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visit functions for cast operations.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/APInt.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/SetVector.h"
21#include "llvm/IR/DataLayout.h"
22#include "llvm/IR/DebugInfo.h"
23#include "llvm/IR/Instruction.h"
25#include "llvm/IR/Type.h"
26#include "llvm/IR/Value.h"
29#include <iterator>
30#include <optional>
31
32using namespace llvm;
33using namespace PatternMatch;
34
35#define DEBUG_TYPE "instcombine"
36
38
41 EvaluatedMap &Processed) {
42 // Since we cover transformation of instructions with multiple users, we might
43 // come to the same node via multiple paths. We should not create a
44 // replacement for every single one of them though.
45 if (Value *Result = Processed.lookup(V))
46 return Result;
47
50
51 // Otherwise, it must be an instruction.
53 Instruction *Res = nullptr;
54 unsigned Opc = I->getOpcode();
55 switch (Opc) {
56 case Instruction::Add:
57 case Instruction::Sub:
58 case Instruction::Mul:
59 case Instruction::And:
60 case Instruction::Or:
61 case Instruction::Xor:
62 case Instruction::AShr:
63 case Instruction::LShr:
64 case Instruction::Shl:
65 case Instruction::UDiv:
66 case Instruction::URem: {
67 Value *LHS = EvaluateInDifferentTypeImpl(I->getOperand(0), Ty, isSigned, IC,
68 Processed);
69 Value *RHS = EvaluateInDifferentTypeImpl(I->getOperand(1), Ty, isSigned, IC,
70 Processed);
72 if (Opc == Instruction::LShr || Opc == Instruction::AShr)
73 Res->setIsExact(I->isExact());
74 break;
75 }
76 case Instruction::Trunc:
77 case Instruction::ZExt:
78 case Instruction::SExt:
79 // If the source type of the cast is the type we're trying for then we can
80 // just return the source. There's no need to insert it because it is not
81 // new.
82 if (I->getOperand(0)->getType() == Ty)
83 return I->getOperand(0);
84
85 // Otherwise, must be the same type of cast, so just reinsert a new one.
86 // This also handles the case of zext(trunc(x)) -> zext(x).
87 Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
88 Opc == Instruction::SExt);
89 break;
90 case Instruction::Select: {
91 Value *True = EvaluateInDifferentTypeImpl(I->getOperand(1), Ty, isSigned,
92 IC, Processed);
93 Value *False = EvaluateInDifferentTypeImpl(I->getOperand(2), Ty, isSigned,
94 IC, Processed);
95 Res = SelectInst::Create(I->getOperand(0), True, False);
96 break;
97 }
98 case Instruction::PHI: {
99 PHINode *OPN = cast<PHINode>(I);
101 for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
103 isSigned, IC, Processed);
104 NPN->addIncoming(V, OPN->getIncomingBlock(i));
105 }
106 Res = NPN;
107 break;
108 }
109 case Instruction::FPToUI:
110 case Instruction::FPToSI:
111 Res = CastInst::Create(static_cast<Instruction::CastOps>(Opc),
112 I->getOperand(0), Ty);
113 break;
114 case Instruction::Call:
116 switch (II->getIntrinsicID()) {
117 default:
118 llvm_unreachable("Unsupported call!");
119 case Intrinsic::vscale: {
121 I->getModule(), Intrinsic::vscale, {Ty});
122 Res = CallInst::Create(Fn->getFunctionType(), Fn);
123 break;
124 }
125 case Intrinsic::umin:
126 case Intrinsic::umax:
127 case Intrinsic::smin:
128 case Intrinsic::smax: {
129 Value *Op0 = EvaluateInDifferentTypeImpl(II->getArgOperand(0), Ty,
130 isSigned, IC, Processed);
131 Value *Op1 = EvaluateInDifferentTypeImpl(II->getArgOperand(1), Ty,
132 isSigned, IC, Processed);
134 I->getModule(), II->getIntrinsicID(), {Ty});
135 Res = CallInst::Create(Fn->getFunctionType(), Fn, {Op0, Op1});
136 break;
137 }
138 case Intrinsic::abs: {
139 Value *Arg = EvaluateInDifferentTypeImpl(II->getArgOperand(0), Ty,
140 isSigned, IC, Processed);
142 I->getModule(), II->getIntrinsicID(), {Ty});
143 Res = CallInst::Create(Fn->getFunctionType(), Fn,
144 {Arg, ConstantInt::getFalse(I->getContext())});
145 break;
146 }
147 }
148 }
149 break;
150 case Instruction::ShuffleVector: {
151 auto *ScalarTy = cast<VectorType>(Ty)->getElementType();
152 auto *VTy = cast<VectorType>(I->getOperand(0)->getType());
153 auto *FixedTy = VectorType::get(ScalarTy, VTy->getElementCount());
154 Value *Op0 = EvaluateInDifferentTypeImpl(I->getOperand(0), FixedTy,
155 isSigned, IC, Processed);
156 Value *Op1 = EvaluateInDifferentTypeImpl(I->getOperand(1), FixedTy,
157 isSigned, IC, Processed);
158 Res = new ShuffleVectorInst(Op0, Op1,
159 cast<ShuffleVectorInst>(I)->getShuffleMask());
160 break;
161 }
162 default:
163 // TODO: Can handle more cases here.
164 llvm_unreachable("Unreachable!");
165 }
166
167 Res->takeName(I);
168 Value *Result = IC.InsertNewInstWith(Res, I->getIterator());
169 // There is no need in keeping track of the old value/new value relationship
170 // when we have only one user, we came have here from that user and no-one
171 // else cares.
172 if (!V->hasOneUse())
173 Processed[V] = Result;
174
175 return Result;
176}
177
178/// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
179/// true for, actually insert the code to evaluate the expression.
181 bool isSigned) {
182 EvaluatedMap Processed;
183 return EvaluateInDifferentTypeImpl(V, Ty, isSigned, *this, Processed);
184}
185
187InstCombinerImpl::isEliminableCastPair(const CastInst *CI1,
188 const CastInst *CI2) {
189 Type *SrcTy = CI1->getSrcTy();
190 Type *MidTy = CI1->getDestTy();
191 Type *DstTy = CI2->getDestTy();
192
193 Instruction::CastOps firstOp = CI1->getOpcode();
194 Instruction::CastOps secondOp = CI2->getOpcode();
195 Type *SrcIntPtrTy =
196 SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
197 Type *DstIntPtrTy =
198 DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
199 unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
200 DstTy, &DL);
201
202 // We don't want to form an inttoptr or ptrtoint that converts to an integer
203 // type that differs from the pointer size.
204 if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) ||
205 (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy))
206 Res = 0;
207
208 return Instruction::CastOps(Res);
209}
210
211/// Implement the transforms common to all CastInst visitors.
213 Value *Src = CI.getOperand(0);
214 Type *Ty = CI.getType();
215
216 if (Value *Res =
217 simplifyCastInst(CI.getOpcode(), Src, Ty, SQ.getWithInstruction(&CI)))
218 return replaceInstUsesWith(CI, Res);
219
220 // Try to eliminate a cast of a cast.
221 if (auto *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
222 if (Instruction::CastOps NewOpc = isEliminableCastPair(CSrc, &CI)) {
223 // The first cast (CSrc) is eliminable so we need to fix up or replace
224 // the second cast (CI). CSrc will then have a good chance of being dead.
225 auto *Res = CastInst::Create(NewOpc, CSrc->getOperand(0), Ty);
226 // Point debug users of the dying cast to the new one.
227 if (CSrc->hasOneUse())
228 replaceAllDbgUsesWith(*CSrc, *Res, CI, DT);
229 return Res;
230 }
231 }
232
233 if (auto *Sel = dyn_cast<SelectInst>(Src)) {
234 // We are casting a select. Try to fold the cast into the select if the
235 // select does not have a compare instruction with matching operand types
236 // or the select is likely better done in a narrow type.
237 // Creating a select with operands that are different sizes than its
238 // condition may inhibit other folds and lead to worse codegen.
239 auto *Cmp = dyn_cast<CmpInst>(Sel->getCondition());
240 if (!Cmp || Cmp->getOperand(0)->getType() != Sel->getType() ||
241 (CI.getOpcode() == Instruction::Trunc &&
242 shouldChangeType(CI.getSrcTy(), CI.getType()))) {
243
244 // If it's a bitcast involving vectors, make sure it has the same number
245 // of elements on both sides.
246 if (CI.getOpcode() != Instruction::BitCast ||
248 if (Instruction *NV = FoldOpIntoSelect(CI, Sel)) {
249 replaceAllDbgUsesWith(*Sel, *NV, CI, DT);
250 return NV;
251 }
252 }
253 }
254 }
255
256 // If we are casting a PHI, then fold the cast into the PHI.
257 if (auto *PN = dyn_cast<PHINode>(Src)) {
258 // Don't do this if it would create a PHI node with an illegal type from a
259 // legal type.
260 if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() ||
261 shouldChangeType(CI.getSrcTy(), CI.getType()))
262 if (Instruction *NV = foldOpIntoPhi(CI, PN))
263 return NV;
264 }
265
266 // Canonicalize a unary shuffle after the cast if neither operation changes
267 // the size or element size of the input vector.
268 // TODO: We could allow size-changing ops if that doesn't harm codegen.
269 // cast (shuffle X, Mask) --> shuffle (cast X), Mask
270 Value *X;
271 ArrayRef<int> Mask;
272 if (match(Src, m_OneUse(m_Shuffle(m_Value(X), m_Poison(), m_Mask(Mask))))) {
273 // TODO: Allow scalable vectors?
274 auto *SrcTy = dyn_cast<FixedVectorType>(X->getType());
275 auto *DestTy = dyn_cast<FixedVectorType>(Ty);
276 if (SrcTy && DestTy &&
277 SrcTy->getNumElements() == DestTy->getNumElements() &&
278 SrcTy->getPrimitiveSizeInBits() == DestTy->getPrimitiveSizeInBits()) {
279 Value *CastX = Builder.CreateCast(CI.getOpcode(), X, DestTy);
280 return new ShuffleVectorInst(CastX, Mask);
281 }
282 }
283
284 return nullptr;
285}
286
287namespace {
288
289/// Helper class for evaluating whether a value can be computed in a different
290/// type without changing its value. Used by cast simplification transforms.
291class TypeEvaluationHelper {
292public:
293 /// Return true if we can evaluate the specified expression tree as type Ty
294 /// instead of its larger type, and arrive with the same value.
295 /// This is used by code that tries to eliminate truncates.
296 [[nodiscard]] static bool canEvaluateTruncated(Value *V, Type *Ty,
298 Instruction *CxtI);
299
300 /// Determine if the specified value can be computed in the specified wider
301 /// type and produce the same low bits. If not, return false.
302 [[nodiscard]] static bool canEvaluateZExtd(Value *V, Type *Ty,
303 unsigned &BitsToClear,
305 Instruction *CxtI);
306
307 /// Return true if we can take the specified value and return it as type Ty
308 /// without inserting any new casts and without changing the value of the
309 /// common low bits.
310 [[nodiscard]] static bool canEvaluateSExtd(Value *V, Type *Ty);
311
312private:
313 /// Constants and extensions/truncates from the destination type are always
314 /// free to be evaluated in that type.
315 [[nodiscard]] static bool canAlwaysEvaluateInType(Value *V, Type *Ty);
316
317 /// Check if we traversed all the users of the multi-use values we've seen.
318 [[nodiscard]] bool allPendingVisited() const {
319 return llvm::all_of(Pending,
320 [this](Value *V) { return Visited.contains(V); });
321 }
322
323 /// A generic wrapper for canEvaluate* recursions to inject visitation
324 /// tracking and enforce correct multi-use value evaluations.
325 [[nodiscard]] bool
326 canEvaluate(Value *V, Type *Ty,
327 llvm::function_ref<bool(Value *, Type *Type)> Pred) {
328 if (canAlwaysEvaluateInType(V, Ty))
329 return true;
330
331 auto *I = dyn_cast<Instruction>(V);
332
333 if (I == nullptr)
334 return false;
335
336 // We insert false by default to return false when we encounter user loops.
337 const auto [It, Inserted] = Visited.insert({V, false});
338
339 // There are three possible cases for us having information on this value
340 // in the Visited map:
341 // 1. We properly checked it and concluded that we can evaluate it (true)
342 // 2. We properly checked it and concluded that we can't (false)
343 // 3. We started to check it, but during the recursive traversal we came
344 // back to it.
345 //
346 // For cases 1 and 2, we can safely return the stored result. For case 3, we
347 // can potentially have a situation where we can evaluate recursive user
348 // chains, but that can be quite tricky to do properly and isntead, we
349 // return false.
350 //
351 // In any case, we should return whatever was there in the map to begin
352 // with.
353 if (!Inserted)
354 return It->getSecond();
355
356 // We can easily make a decision about single-user values whether they can
357 // be evaluated in a different type or not, we came from that user. This is
358 // not as simple for multi-user values.
359 //
360 // In general, we have the following case (inverted control-flow, users are
361 // at the top):
362 //
363 // Cast %A
364 // ____|
365 // /
366 // %A = Use %B, %C
367 // ________| |
368 // / |
369 // %B = Use %D |
370 // ________| |
371 // / |
372 // %D = Use %C |
373 // ________|___|
374 // /
375 // %C = ...
376 //
377 // In this case, when we check %A, %B and %D, we are confident that we can
378 // make the decision here and now, since we came from their only users.
379 //
380 // For %C, it is harder. We come there twice, and when we come the first
381 // time, it's hard to tell if we will visit the second user (technically
382 // it's not hard, but we might need a lot of repetitive checks with non-zero
383 // cost).
384 //
385 // In the case above, we are allowed to evaluate %C in different type
386 // because all of it users were part of the traversal.
387 //
388 // In the following case, however, we can't make this conclusion:
389 //
390 // Cast %A
391 // ____|
392 // /
393 // %A = Use %B, %C
394 // ________| |
395 // / |
396 // %B = Use %D |
397 // ________| |
398 // / |
399 // %D = Use %C |
400 // | |
401 // foo(%C) | | <- never traversing foo(%C)
402 // ________|___|
403 // /
404 // %C = ...
405 //
406 // In this case, we still can evaluate %C in a different type, but we'd need
407 // to create a copy of the original %C to be used in foo(%C). Such
408 // duplication might be not profitable.
409 //
410 // For this reason, we collect all users of the mult-user values and mark
411 // them as "pending" and defer this decision to the very end. When we are
412 // done and and ready to have a positive verdict, we should double-check all
413 // of the pending users and ensure that we visited them. allPendingVisited
414 // predicate checks exactly that.
415 if (!I->hasOneUse()) {
416 for (Use &U : I->uses()) {
417 // For most instructions, evaluating them in a different type will
418 // change the type of all operands. This is not the case for select
419 // conditions. Make sure we don't retain an extra use via the select
420 // condition.
421 if (isa<SelectInst>(U.getUser()) && U.getOperandNo() == 0)
422 return false;
423
424 Pending.push_back(U.getUser());
425 }
426 }
427
428 const bool Result = Pred(V, Ty);
429 // We have to set result this way and not via It because Pred is recursive
430 // and it is very likely that we grew Visited and invalidated It.
431 Visited[V] = Result;
432 return Result;
433 }
434
435 /// Filter out values that we can not evaluate in the destination type for
436 /// free.
437 [[nodiscard]] bool canNotEvaluateInType(Value *V, Type *Ty);
438
439 [[nodiscard]] bool canEvaluateTruncatedImpl(Value *V, Type *Ty,
440 InstCombinerImpl &IC,
441 Instruction *CxtI);
442 [[nodiscard]] bool canEvaluateTruncatedPred(Value *V, Type *Ty,
443 InstCombinerImpl &IC,
444 Instruction *CxtI);
445 [[nodiscard]] bool canEvaluateZExtdImpl(Value *V, Type *Ty,
446 unsigned &BitsToClear,
447 InstCombinerImpl &IC,
448 Instruction *CxtI);
449 [[nodiscard]] bool canEvaluateSExtdImpl(Value *V, Type *Ty);
450 [[nodiscard]] bool canEvaluateSExtdPred(Value *V, Type *Ty);
451
452 /// A bookkeeping map to memorize an already made decision for a traversed
453 /// value.
454 SmallDenseMap<Value *, bool, 8> Visited;
455
456 /// A list of pending values to check in the end.
457 SmallVector<Value *, 8> Pending;
458};
459
460} // anonymous namespace
461
462/// Constants and extensions/truncates from the destination type are always
463/// free to be evaluated in that type. This is a helper for canEvaluate*.
464bool TypeEvaluationHelper::canAlwaysEvaluateInType(Value *V, Type *Ty) {
465 if (isa<Constant>(V))
466 return match(V, m_ImmConstant());
467
468 Value *X;
469 if ((match(V, m_ZExtOrSExt(m_Value(X))) || match(V, m_Trunc(m_Value(X)))) &&
470 X->getType() == Ty)
471 return true;
472
473 return false;
474}
475
476/// Filter out values that we can not evaluate in the destination type for free.
477/// This is a helper for canEvaluate*.
478bool TypeEvaluationHelper::canNotEvaluateInType(Value *V, Type *Ty) {
479 if (!isa<Instruction>(V))
480 return true;
481 // We don't extend or shrink something that has multiple uses -- doing so
482 // would require duplicating the instruction which isn't profitable.
483 if (!V->hasOneUse())
484 return true;
485
486 return false;
487}
488
489/// Return true if we can evaluate the specified expression tree as type Ty
490/// instead of its larger type, and arrive with the same value.
491/// This is used by code that tries to eliminate truncates.
492///
493/// Ty will always be a type smaller than V. We should return true if trunc(V)
494/// can be computed by computing V in the smaller type. If V is an instruction,
495/// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
496/// makes sense if x and y can be efficiently truncated.
497///
498/// This function works on both vectors and scalars.
499///
500bool TypeEvaluationHelper::canEvaluateTruncated(Value *V, Type *Ty,
502 Instruction *CxtI) {
503 TypeEvaluationHelper TYH;
504 return TYH.canEvaluateTruncatedImpl(V, Ty, IC, CxtI) &&
505 // We need to check whether we visited all users of multi-user values,
506 // and we have to do it at the very end, outside of the recursion.
507 TYH.allPendingVisited();
508}
509
510bool TypeEvaluationHelper::canEvaluateTruncatedImpl(Value *V, Type *Ty,
512 Instruction *CxtI) {
513 return canEvaluate(V, Ty, [this, &IC, CxtI](Value *V, Type *Ty) {
514 return canEvaluateTruncatedPred(V, Ty, IC, CxtI);
515 });
516}
517
518bool TypeEvaluationHelper::canEvaluateTruncatedPred(Value *V, Type *Ty,
520 Instruction *CxtI) {
521 auto *I = cast<Instruction>(V);
522 Type *OrigTy = V->getType();
523 switch (I->getOpcode()) {
524 case Instruction::Add:
525 case Instruction::Sub:
526 case Instruction::Mul:
527 case Instruction::And:
528 case Instruction::Or:
529 case Instruction::Xor:
530 // These operators can all arbitrarily be extended or truncated.
531 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
532 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
533
534 case Instruction::UDiv:
535 case Instruction::URem: {
536 // UDiv and URem can be truncated if all the truncated bits are zero.
537 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
538 uint32_t BitWidth = Ty->getScalarSizeInBits();
539 assert(BitWidth < OrigBitWidth && "Unexpected bitwidths!");
540 APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
541 // Do not preserve the original context instruction. Simplifying div/rem
542 // based on later context may introduce a trap.
543 if (IC.MaskedValueIsZero(I->getOperand(0), Mask, I) &&
544 IC.MaskedValueIsZero(I->getOperand(1), Mask, I)) {
545 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
546 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
547 }
548 break;
549 }
550 case Instruction::Shl: {
551 // If we are truncating the result of this SHL, and if it's a shift of an
552 // inrange amount, we can always perform a SHL in a smaller type.
553 uint32_t BitWidth = Ty->getScalarSizeInBits();
554 KnownBits AmtKnownBits =
555 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
556 if (AmtKnownBits.getMaxValue().ult(BitWidth))
557 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
558 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
559 break;
560 }
561 case Instruction::LShr: {
562 // If this is a truncate of a logical shr, we can truncate it to a smaller
563 // lshr iff we know that the bits we would otherwise be shifting in are
564 // already zeros.
565 // TODO: It is enough to check that the bits we would be shifting in are
566 // zero - use AmtKnownBits.getMaxValue().
567 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
568 uint32_t BitWidth = Ty->getScalarSizeInBits();
569 KnownBits AmtKnownBits = IC.computeKnownBits(I->getOperand(1), CxtI);
570 APInt MaxShiftAmt = AmtKnownBits.getMaxValue();
571 APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
572 if (MaxShiftAmt.ult(BitWidth)) {
573 // If the only user is a trunc then we can narrow the shift if any new
574 // MSBs are not going to be used.
575 if (auto *Trunc = dyn_cast<TruncInst>(V->user_back())) {
576 auto DemandedBits = Trunc->getType()->getScalarSizeInBits();
577 if ((MaxShiftAmt + DemandedBits).ule(BitWidth))
578 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
579 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
580 }
581 if (IC.MaskedValueIsZero(I->getOperand(0), ShiftedBits, CxtI))
582 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
583 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
584 }
585 break;
586 }
587 case Instruction::AShr: {
588 // If this is a truncate of an arithmetic shr, we can truncate it to a
589 // smaller ashr iff we know that all the bits from the sign bit of the
590 // original type and the sign bit of the truncate type are similar.
591 // TODO: It is enough to check that the bits we would be shifting in are
592 // similar to sign bit of the truncate type.
593 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
594 uint32_t BitWidth = Ty->getScalarSizeInBits();
595 KnownBits AmtKnownBits =
596 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
597 unsigned ShiftedBits = OrigBitWidth - BitWidth;
598 if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
599 ShiftedBits < IC.ComputeNumSignBits(I->getOperand(0), CxtI))
600 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
601 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
602 break;
603 }
604 case Instruction::Trunc:
605 // trunc(trunc(x)) -> trunc(x)
606 return true;
607 case Instruction::ZExt:
608 case Instruction::SExt:
609 // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
610 // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
611 return true;
612 case Instruction::Select: {
614 return canEvaluateTruncatedImpl(SI->getTrueValue(), Ty, IC, CxtI) &&
615 canEvaluateTruncatedImpl(SI->getFalseValue(), Ty, IC, CxtI);
616 }
617 case Instruction::PHI: {
618 // We can change a phi if we can change all operands. Note that we never
619 // get into trouble with cyclic PHIs here because canEvaluate handles use
620 // chain loops.
621 PHINode *PN = cast<PHINode>(I);
622 return llvm::all_of(
623 PN->incoming_values(), [this, Ty, &IC, CxtI](Value *IncValue) {
624 return canEvaluateTruncatedImpl(IncValue, Ty, IC, CxtI);
625 });
626 }
627 case Instruction::FPToUI:
628 case Instruction::FPToSI: {
629 // If the integer type can hold the max FP value, it is safe to cast
630 // directly to that type. Otherwise, we may create poison via overflow
631 // that did not exist in the original code.
632 Type *InputTy = I->getOperand(0)->getType()->getScalarType();
633 const fltSemantics &Semantics = InputTy->getFltSemantics();
634 uint32_t MinBitWidth = APFloatBase::semanticsIntSizeInBits(
635 Semantics, I->getOpcode() == Instruction::FPToSI);
636 return Ty->getScalarSizeInBits() >= MinBitWidth;
637 }
638 case Instruction::ShuffleVector:
639 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
640 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
641
642 case Instruction::Call: {
643 Value *AbsOp;
645 if (IC.ComputeMaxSignificantBits(AbsOp, CxtI) > Ty->getScalarSizeInBits())
646 return false;
647 return canEvaluateTruncatedImpl(AbsOp, Ty, IC, CxtI);
648 }
649 auto *MM = dyn_cast<MinMaxIntrinsic>(I);
650 if (!MM)
651 return false;
652 // The min/max can be performed in the narrow type when each operand has
653 // zero high bits (for umin/umax) or enough sign bits (for smin/smax).
654 Value *Op0 = MM->getLHS();
655 Value *Op1 = MM->getRHS();
656 uint32_t BitWidth = Ty->getScalarSizeInBits();
657 if (MM->isSigned()) {
658 if (IC.ComputeMaxSignificantBits(Op0, CxtI) > BitWidth ||
659 IC.ComputeMaxSignificantBits(Op1, CxtI) > BitWidth)
660 break;
661 } else {
662 APInt Mask =
664 if (!IC.MaskedValueIsZero(Op0, Mask, CxtI) ||
665 !IC.MaskedValueIsZero(Op1, Mask, CxtI))
666 break;
667 }
668 return canEvaluateTruncatedImpl(Op0, Ty, IC, CxtI) &&
669 canEvaluateTruncatedImpl(Op1, Ty, IC, CxtI);
670 }
671 default:
672 // TODO: Can handle more cases here.
673 break;
674 }
675
676 return false;
677}
678
679/// Given a vector that is bitcast to an integer, optionally logically
680/// right-shifted, and truncated, convert it to an extractelement.
681/// Example (big endian):
682/// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
683/// --->
684/// extractelement <4 x i32> %X, 1
686 InstCombinerImpl &IC) {
687 Value *TruncOp = Trunc.getOperand(0);
688 Type *DestType = Trunc.getType();
689 if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType))
690 return nullptr;
691
692 Value *VecInput = nullptr;
693 ConstantInt *ShiftVal = nullptr;
694 if (!match(TruncOp, m_CombineOr(m_BitCast(m_Value(VecInput)),
695 m_LShr(m_BitCast(m_Value(VecInput)),
696 m_ConstantInt(ShiftVal)))) ||
697 !isa<VectorType>(VecInput->getType()))
698 return nullptr;
699
700 VectorType *VecType = cast<VectorType>(VecInput->getType());
701 unsigned VecWidth = VecType->getPrimitiveSizeInBits();
702 unsigned DestWidth = DestType->getPrimitiveSizeInBits();
703 unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0;
704
705 if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0))
706 return nullptr;
707
708 // If the element type of the vector doesn't match the result type,
709 // bitcast it to a vector type that we can extract from.
710 unsigned NumVecElts = VecWidth / DestWidth;
711 if (VecType->getElementType() != DestType) {
712 VecType = FixedVectorType::get(DestType, NumVecElts);
713 VecInput = IC.Builder.CreateBitCast(VecInput, VecType, "bc");
714 }
715
716 unsigned Elt = ShiftAmount / DestWidth;
717 if (IC.getDataLayout().isBigEndian())
718 Elt = NumVecElts - 1 - Elt;
719
720 return ExtractElementInst::Create(VecInput, IC.Builder.getInt32(Elt));
721}
722
723/// Whenever an element is extracted from a vector, optionally shifted down, and
724/// then truncated, canonicalize by converting it to a bitcast followed by an
725/// extractelement.
726///
727/// Examples (little endian):
728/// trunc (extractelement <4 x i64> %X, 0) to i32
729/// --->
730/// extractelement <8 x i32> (bitcast <4 x i64> %X to <8 x i32>), i32 0
731///
732/// trunc (lshr (extractelement <4 x i32> %X, 0), 8) to i8
733/// --->
734/// extractelement <16 x i8> (bitcast <4 x i32> %X to <16 x i8>), i32 1
736 InstCombinerImpl &IC) {
737 Value *Src = Trunc.getOperand(0);
738 Type *SrcType = Src->getType();
739 Type *DstType = Trunc.getType();
740
741 // Only attempt this if we have simple aliasing of the vector elements.
742 // A badly fit destination size would result in an invalid cast.
743 unsigned SrcBits = SrcType->getScalarSizeInBits();
744 unsigned DstBits = DstType->getScalarSizeInBits();
745 unsigned TruncRatio = SrcBits / DstBits;
746 if ((SrcBits % DstBits) != 0)
747 return nullptr;
748
749 Value *VecOp;
750 ConstantInt *Cst;
751 const APInt *ShiftAmount = nullptr;
752 if (!match(Src, m_OneUse(m_ExtractElt(m_Value(VecOp), m_ConstantInt(Cst)))) &&
753 !match(Src,
755 m_APInt(ShiftAmount)))))
756 return nullptr;
757
758 auto *VecOpTy = cast<VectorType>(VecOp->getType());
759 auto VecElts = VecOpTy->getElementCount();
760
761 uint64_t BitCastNumElts = VecElts.getKnownMinValue() * TruncRatio;
762 // Make sure we don't overflow in the calculation of the new index.
763 // (VecOpIdx + 1) * TruncRatio should not overflow.
764 if (Cst->uge(std::numeric_limits<uint64_t>::max() / TruncRatio))
765 return nullptr;
766 uint64_t VecOpIdx = Cst->getZExtValue();
767 uint64_t NewIdx = IC.getDataLayout().isBigEndian()
768 ? (VecOpIdx + 1) * TruncRatio - 1
769 : VecOpIdx * TruncRatio;
770
771 // Adjust index by the whole number of truncated elements.
772 if (ShiftAmount) {
773 // Check shift amount is in range and shifts a whole number of truncated
774 // elements.
775 if (ShiftAmount->uge(SrcBits) || ShiftAmount->urem(DstBits) != 0)
776 return nullptr;
777
778 uint64_t IdxOfs = ShiftAmount->udiv(DstBits).getZExtValue();
779 // IdxOfs is guaranteed to be less than TruncRatio, so we won't overflow in
780 // the adjustment.
781 assert(IdxOfs < TruncRatio &&
782 "IdxOfs is expected to be less than TruncRatio.");
783 NewIdx = IC.getDataLayout().isBigEndian() ? (NewIdx - IdxOfs)
784 : (NewIdx + IdxOfs);
785 }
786
787 assert(BitCastNumElts <= std::numeric_limits<uint32_t>::max() &&
788 "overflow 32-bits");
789
790 auto *BitCastTo =
791 VectorType::get(DstType, BitCastNumElts, VecElts.isScalable());
792 Value *BitCast = IC.Builder.CreateBitCast(VecOp, BitCastTo);
793 return ExtractElementInst::Create(BitCast, IC.Builder.getInt64(NewIdx));
794}
795
796/// Funnel/Rotate left/right may occur in a wider type than necessary because of
797/// type promotion rules. Try to narrow the inputs and convert to funnel shift.
798Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {
799 assert((isa<VectorType>(Trunc.getSrcTy()) ||
800 shouldChangeType(Trunc.getSrcTy(), Trunc.getType())) &&
801 "Don't narrow to an illegal scalar type");
802
803 // Bail out on strange types. It is possible to handle some of these patterns
804 // even with non-power-of-2 sizes, but it is not a likely scenario.
805 Type *DestTy = Trunc.getType();
806 unsigned NarrowWidth = DestTy->getScalarSizeInBits();
807 unsigned WideWidth = Trunc.getSrcTy()->getScalarSizeInBits();
808 if (!isPowerOf2_32(NarrowWidth))
809 return nullptr;
810
811 // First, find an or'd pair of opposite shifts:
812 // trunc (or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1))
813 BinaryOperator *Or0, *Or1;
814 if (!match(Trunc.getOperand(0), m_OneUse(m_Or(m_BinOp(Or0), m_BinOp(Or1)))))
815 return nullptr;
816
817 Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
818 if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
819 !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||
820 Or0->getOpcode() == Or1->getOpcode())
821 return nullptr;
822
823 // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
824 if (Or0->getOpcode() == BinaryOperator::LShr) {
825 std::swap(Or0, Or1);
826 std::swap(ShVal0, ShVal1);
827 std::swap(ShAmt0, ShAmt1);
828 }
829 assert(Or0->getOpcode() == BinaryOperator::Shl &&
830 Or1->getOpcode() == BinaryOperator::LShr &&
831 "Illegal or(shift,shift) pair");
832
833 // Match the shift amount operands for a funnel/rotate pattern. This always
834 // matches a subtraction on the R operand.
835 auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
836 // The shift amounts may add up to the narrow bit width:
837 // (shl ShVal0, L) | (lshr ShVal1, Width - L)
838 // If this is a funnel shift (different operands are shifted), then the
839 // shift amount can not over-shift (create poison) in the narrow type.
840 unsigned MaxShiftAmountWidth = Log2_32(NarrowWidth);
841 APInt HiBitMask = ~APInt::getLowBitsSet(WideWidth, MaxShiftAmountWidth);
842 if (ShVal0 == ShVal1 || MaskedValueIsZero(L, HiBitMask))
843 if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L)))))
844 return L;
845
846 // The following patterns currently only work for rotation patterns.
847 // TODO: Add more general funnel-shift compatible patterns.
848 if (ShVal0 != ShVal1)
849 return nullptr;
850
851 // The shift amount may be masked with negation:
852 // (shl ShVal0, (X & (Width - 1))) | (lshr ShVal1, ((-X) & (Width - 1)))
853 Value *X;
854 unsigned Mask = Width - 1;
855 if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
857 return X;
858
859 // Same as above, but the shift amount may be extended after masking:
860 if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
862 return X;
863
864 return nullptr;
865 };
866
867 Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, NarrowWidth);
868 bool IsFshl = true; // Sub on LSHR.
869 if (!ShAmt) {
870 ShAmt = matchShiftAmount(ShAmt1, ShAmt0, NarrowWidth);
871 IsFshl = false; // Sub on SHL.
872 }
873 if (!ShAmt)
874 return nullptr;
875
876 // The right-shifted value must have high zeros in the wide type (for example
877 // from 'zext', 'and' or 'shift'). High bits of the left-shifted value are
878 // truncated, so those do not matter.
879 APInt HiBitMask = APInt::getHighBitsSet(WideWidth, WideWidth - NarrowWidth);
880 if (!MaskedValueIsZero(ShVal1, HiBitMask, &Trunc))
881 return nullptr;
882
883 // Adjust the width of ShAmt for narrowed funnel shift operation:
884 // - Zero-extend if ShAmt is narrower than the destination type.
885 // - Truncate if ShAmt is wider, discarding non-significant high-order bits.
886 // This prepares ShAmt for llvm.fshl.i8(trunc(ShVal), trunc(ShVal),
887 // zext/trunc(ShAmt)).
888 Value *NarrowShAmt = Builder.CreateZExtOrTrunc(ShAmt, DestTy);
889
890 Value *X, *Y;
891 X = Y = Builder.CreateTrunc(ShVal0, DestTy);
892 if (ShVal0 != ShVal1)
893 Y = Builder.CreateTrunc(ShVal1, DestTy);
894 Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
895 Function *F =
896 Intrinsic::getOrInsertDeclaration(Trunc.getModule(), IID, DestTy);
897 return CallInst::Create(F, {X, Y, NarrowShAmt});
898}
899
900/// Try to narrow the width of math or bitwise logic instructions by pulling a
901/// truncate ahead of binary operators.
902Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) {
903 Type *SrcTy = Trunc.getSrcTy();
904 Type *DestTy = Trunc.getType();
905 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
906 unsigned DestWidth = DestTy->getScalarSizeInBits();
907
908 if (!isa<VectorType>(SrcTy) && !shouldChangeType(SrcTy, DestTy))
909 return nullptr;
910
911 BinaryOperator *BinOp;
912 if (!match(Trunc.getOperand(0), m_OneUse(m_BinOp(BinOp))))
913 return nullptr;
914
915 Value *BinOp0 = BinOp->getOperand(0);
916 Value *BinOp1 = BinOp->getOperand(1);
917 switch (BinOp->getOpcode()) {
918 case Instruction::And:
919 case Instruction::Or:
920 case Instruction::Xor:
921 case Instruction::Add:
922 case Instruction::Sub:
923 case Instruction::Mul: {
924 Constant *C;
925 if (match(BinOp0, m_Constant(C))) {
926 // trunc (binop C, X) --> binop (trunc C', X)
927 Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
928 Value *TruncX = Builder.CreateTrunc(BinOp1, DestTy);
929 return BinaryOperator::Create(BinOp->getOpcode(), NarrowC, TruncX);
930 }
931 if (match(BinOp1, m_Constant(C))) {
932 // trunc (binop X, C) --> binop (trunc X, C')
933 Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
934 Value *TruncX = Builder.CreateTrunc(BinOp0, DestTy);
935 return BinaryOperator::Create(BinOp->getOpcode(), TruncX, NarrowC);
936 }
937 Value *X;
938 if (match(BinOp0, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
939 // trunc (binop (ext X), Y) --> binop X, (trunc Y)
940 Value *NarrowOp1 = Builder.CreateTrunc(BinOp1, DestTy);
941 return BinaryOperator::Create(BinOp->getOpcode(), X, NarrowOp1);
942 }
943 if (match(BinOp1, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
944 // trunc (binop Y, (ext X)) --> binop (trunc Y), X
945 Value *NarrowOp0 = Builder.CreateTrunc(BinOp0, DestTy);
946 return BinaryOperator::Create(BinOp->getOpcode(), NarrowOp0, X);
947 }
948 break;
949 }
950 case Instruction::LShr:
951 case Instruction::AShr: {
952 // trunc (*shr (trunc A), C) --> trunc(*shr A, C)
953 Value *A;
954 Constant *C;
955 if (match(BinOp0, m_Trunc(m_Value(A))) && match(BinOp1, m_Constant(C))) {
956 unsigned MaxShiftAmt = SrcWidth - DestWidth;
957 // If the shift is small enough, all zero/sign bits created by the shift
958 // are removed by the trunc.
960 APInt(SrcWidth, MaxShiftAmt)))) {
961 auto *OldShift = cast<Instruction>(Trunc.getOperand(0));
962 bool IsExact = OldShift->isExact();
963 if (Constant *ShAmt = ConstantFoldIntegerCast(C, A->getType(),
964 /*IsSigned*/ true, DL)) {
965 ShAmt = Constant::mergeUndefsWith(ShAmt, C);
966 Value *Shift =
967 OldShift->getOpcode() == Instruction::AShr
968 ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact)
969 : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact);
970 return CastInst::CreateTruncOrBitCast(Shift, DestTy);
971 }
972 }
973 }
974 break;
975 }
976 default: break;
977 }
978
979 if (Instruction *NarrowOr = narrowFunnelShift(Trunc))
980 return NarrowOr;
981
982 return nullptr;
983}
984
985/// Try to narrow the width of a splat shuffle. This could be generalized to any
986/// shuffle with a constant operand, but we limit the transform to avoid
987/// creating a shuffle type that targets may not be able to lower effectively.
989 InstCombiner::BuilderTy &Builder) {
990 Value *Shuf = Trunc.getOperand(0), *ShufVec;
991 ArrayRef<int> SplatMask;
992 if (match(Shuf, m_OneUse(m_Shuffle(m_Value(ShufVec), m_Poison(),
993 m_Mask(SplatMask)))) &&
994 match(SplatMask, m_SplatMask()) &&
996 cast<VectorType>(Shuf->getType())->getElementCount(),
997 cast<VectorType>(ShufVec->getType())->getElementCount())) {
998 // trunc (shuf X, poison, SplatMask) --> shuf (trunc X), poison, SplatMask
999 Type *NewTruncTy =
1000 ShufVec->getType()->getWithNewType(Trunc.getType()->getScalarType());
1001 Value *NarrowOp = Builder.CreateTrunc(ShufVec, NewTruncTy);
1002 return new ShuffleVectorInst(NarrowOp, SplatMask);
1003 }
1004
1005 return nullptr;
1006}
1007
1008/// Try to narrow the width of an insert element. This could be generalized for
1009/// any vector constant, but we limit the transform to insertion into poison to
1010/// avoid potential backend problems from unsupported insertion widths. This
1011/// could also be extended to handle the case of inserting a scalar constant
1012/// into a vector variable.
1014 InstCombiner::BuilderTy &Builder) {
1015 Instruction::CastOps Opcode = Trunc.getOpcode();
1016 assert((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
1017 "Unexpected instruction for shrinking");
1018
1019 Value *Elt, *Index;
1020 if (match(Trunc.getOperand(0),
1021 m_OneUse(m_InsertElt(m_Poison(), m_Value(Elt), m_Value(Index))))) {
1022 // trunc (inselt poison, X, Index) --> inselt poison, (trunc X), Index
1023 // fptrunc (inselt poison, X, Index) --> inselt poison, (fptrunc X), Index
1024 auto *NarrowPoison = PoisonValue::get(Trunc.getType());
1025 Value *NarrowOp =
1026 Builder.CreateCast(Opcode, Elt, Trunc.getType()->getScalarType());
1027 return InsertElementInst::Create(NarrowPoison, NarrowOp, Index);
1028 }
1029
1030 return nullptr;
1031}
1032
1034 if (Instruction *Result = commonCastTransforms(Trunc))
1035 return Result;
1036
1037 Value *Src = Trunc.getOperand(0);
1038 Type *DestTy = Trunc.getType(), *SrcTy = Src->getType();
1039 unsigned DestWidth = DestTy->getScalarSizeInBits();
1040 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
1041
1042 // Attempt to truncate the entire input expression tree to the destination
1043 // type. Only do this if the dest type is a simple type, don't convert the
1044 // expression tree to something weird like i93 unless the source is also
1045 // strange.
1046 if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) &&
1047 TypeEvaluationHelper::canEvaluateTruncated(Src, DestTy, *this, &Trunc)) {
1048
1049 // If this cast is a truncate, evaluting in a different type always
1050 // eliminates the cast, so it is always a win.
1051 LLVM_DEBUG(
1052 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1053 " to avoid cast: "
1054 << Trunc << '\n');
1055 Value *Res = EvaluateInDifferentType(Src, DestTy, false);
1056 assert(Res->getType() == DestTy);
1057 return replaceInstUsesWith(Trunc, Res);
1058 }
1059
1060 // For integer types, check if we can shorten the entire input expression to
1061 // DestWidth * 2, which won't allow removing the truncate, but reducing the
1062 // width may enable further optimizations, e.g. allowing for larger
1063 // vectorization factors.
1064 if (auto *DestITy = dyn_cast<IntegerType>(DestTy)) {
1065 if (DestWidth * 2 < SrcWidth) {
1066 auto *NewDestTy = DestITy->getExtendedType();
1067 if (shouldChangeType(SrcTy, NewDestTy) &&
1068 TypeEvaluationHelper::canEvaluateTruncated(Src, NewDestTy, *this,
1069 &Trunc)) {
1070 LLVM_DEBUG(
1071 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1072 " to reduce the width of operand of"
1073 << Trunc << '\n');
1074 Value *Res = EvaluateInDifferentType(Src, NewDestTy, false);
1075 return new TruncInst(Res, DestTy);
1076 }
1077 }
1078 }
1079
1080 // See if we can simplify any instructions used by the input whose sole
1081 // purpose is to compute bits we don't care about.
1083 return &Trunc;
1084
1085 if (DestWidth == 1) {
1086 Value *Zero = Constant::getNullValue(SrcTy);
1087
1088 Value *X;
1089 const APInt *C1;
1090 Constant *C2;
1091 if (match(Src, m_OneUse(m_Shr(m_Shl(m_Power2(C1), m_Value(X)),
1092 m_ImmConstant(C2))))) {
1093 // trunc ((C1 << X) >> C2) to i1 --> X == (C2-cttz(C1)), where C1 is pow2
1094 Constant *Log2C1 = ConstantInt::get(SrcTy, C1->exactLogBase2());
1095 Constant *CmpC = ConstantExpr::getSub(C2, Log2C1);
1096 return new ICmpInst(ICmpInst::ICMP_EQ, X, CmpC);
1097 }
1098
1099 if (match(Src, m_Shr(m_Value(X), m_SpecificInt(SrcWidth - 1)))) {
1100 // trunc (ashr X, BW-1) to i1 --> icmp slt X, 0
1101 // trunc (lshr X, BW-1) to i1 --> icmp slt X, 0
1102 return new ICmpInst(ICmpInst::ICMP_SLT, X, Zero);
1103 }
1104
1105 Constant *C;
1106 if (match(Src, m_OneUse(m_LShr(m_Value(X), m_ImmConstant(C))))) {
1107 // trunc (lshr X, C) to i1 --> icmp ne (and X, C'), 0
1108 Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
1109 Value *MaskC = Builder.CreateShl(One, C);
1110 Value *And = Builder.CreateAnd(X, MaskC);
1111 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
1112 }
1114 m_Deferred(X))))) {
1115 // trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0
1116 Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
1117 Value *MaskC = Builder.CreateShl(One, C);
1118 Value *And = Builder.CreateAnd(X, Builder.CreateOr(MaskC, One));
1119 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
1120 }
1121
1122 {
1123 const APInt *C;
1124 if (match(Src, m_Shl(m_APInt(C), m_Value(X))) && (*C)[0] == 1) {
1125 // trunc (C << X) to i1 --> X == 0, where C is odd
1126 return new ICmpInst(ICmpInst::Predicate::ICMP_EQ, X, Zero);
1127 }
1128 }
1129
1130 if (Trunc.hasNoUnsignedWrap() || Trunc.hasNoSignedWrap()) {
1131 Value *X, *Y;
1132 if (match(Src, m_Xor(m_Value(X), m_Value(Y))))
1133 return new ICmpInst(ICmpInst::ICMP_NE, X, Y);
1134 }
1135
1136 if (match(Src,
1138 return new ICmpInst(ICmpInst::ICMP_EQ, X,
1140 }
1141
1142 Value *A, *B;
1143 Constant *C;
1144
1145 // trunc(u/smin(zext(a) + zext(b), MAX)) --> uadd.sat(a, b)
1146 if (match(Src,
1149 m_SpecificInt(APInt::getMaxValue(DestWidth))),
1151 m_SpecificInt(APInt::getMaxValue(DestWidth)))))) &&
1152 A->getType() == DestTy && B->getType() == DestTy) {
1153 return replaceInstUsesWith(
1154 Trunc, Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, A, B));
1155 }
1156
1157 // trunc(smax(zext(a) - zext(b), 0)) --> usub.sat(a, b)
1158 if (match(Src, m_OneUse(m_SMax(
1160 m_Zero()))) &&
1161 A->getType() == DestTy && B->getType() == DestTy) {
1162 return replaceInstUsesWith(
1163 Trunc, Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, A, B));
1164 }
1165
1166 if (match(Src, m_LShr(m_SExt(m_Value(A)), m_Constant(C)))) {
1167 unsigned AWidth = A->getType()->getScalarSizeInBits();
1168 unsigned MaxShiftAmt = SrcWidth - std::max(DestWidth, AWidth);
1169 auto *OldSh = cast<Instruction>(Src);
1170 bool IsExact = OldSh->isExact();
1171
1172 // If the shift is small enough, all zero bits created by the shift are
1173 // removed by the trunc.
1175 APInt(SrcWidth, MaxShiftAmt)))) {
1176 auto GetNewShAmt = [&](unsigned Width) {
1177 Constant *MaxAmt = ConstantInt::get(SrcTy, Width - 1, false);
1178 Constant *Cmp =
1180 Constant *ShAmt = ConstantFoldSelectInstruction(Cmp, C, MaxAmt);
1181 return ConstantFoldCastOperand(Instruction::Trunc, ShAmt, A->getType(),
1182 DL);
1183 };
1184
1185 // trunc (lshr (sext A), C) --> ashr A, C
1186 if (A->getType() == DestTy) {
1187 Constant *ShAmt = GetNewShAmt(DestWidth);
1188 ShAmt = Constant::mergeUndefsWith(ShAmt, C);
1189 return IsExact ? BinaryOperator::CreateExactAShr(A, ShAmt)
1190 : BinaryOperator::CreateAShr(A, ShAmt);
1191 }
1192 // The types are mismatched, so create a cast after shifting:
1193 // trunc (lshr (sext A), C) --> sext/trunc (ashr A, C)
1194 if (Src->hasOneUse()) {
1195 Constant *ShAmt = GetNewShAmt(AWidth);
1196 Value *Shift = Builder.CreateAShr(A, ShAmt, "", IsExact);
1197 return CastInst::CreateIntegerCast(Shift, DestTy, true);
1198 }
1199 }
1200 // TODO: Mask high bits with 'and'.
1201 }
1202
1203 if (Instruction *I = narrowBinOp(Trunc))
1204 return I;
1205
1206 if (Instruction *I = shrinkSplatShuffle(Trunc, Builder))
1207 return I;
1208
1209 if (Instruction *I = shrinkInsertElt(Trunc, Builder))
1210 return I;
1211
1212 if (Src->hasOneUse() &&
1213 (isa<VectorType>(SrcTy) || shouldChangeType(SrcTy, DestTy))) {
1214 // Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the
1215 // dest type is native and cst < dest size.
1216 if (match(Src, m_Shl(m_Value(A), m_Constant(C))) &&
1217 !match(A, m_Shr(m_Value(), m_Constant()))) {
1218 // Skip shifts of shift by constants. It undoes a combine in
1219 // FoldShiftByConstant and is the extend in reg pattern.
1220 APInt Threshold = APInt(C->getType()->getScalarSizeInBits(), DestWidth);
1221 if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold))) {
1222 Value *NewTrunc = Builder.CreateTrunc(A, DestTy, A->getName() + ".tr");
1223 return BinaryOperator::Create(Instruction::Shl, NewTrunc,
1224 ConstantExpr::getTrunc(C, DestTy));
1225 }
1226 }
1227 }
1228
1229 // trunc (select(icmp_ult(A, DestTy_umax+1), A, sext(icmp_sgt(A, 0)))) -->
1230 // trunc (smin(smax(0, A), DestTy_umax))
1231 if (SrcTy->isIntegerTy() && isPowerOf2_64(SrcTy->getPrimitiveSizeInBits()) &&
1233 match(Src, m_OneUse(m_Select(
1235 m_Constant(C))),
1236 m_Deferred(A),
1238 ICmpInst::ICMP_SGT, m_Deferred(A), m_Zero())))))))) {
1239 APInt UpperBound = C->getUniqueInteger();
1240 APInt TruncatedMax = APInt::getAllOnes(DestTy->getIntegerBitWidth());
1241 TruncatedMax = TruncatedMax.zext(UpperBound.getBitWidth());
1242 if (!UpperBound.isZero() && UpperBound - 1 == TruncatedMax) {
1243 Value *SMax = Builder.CreateIntrinsic(Intrinsic::smax, {SrcTy},
1244 {ConstantInt::get(SrcTy, 0), A});
1245 Value *SMin = Builder.CreateIntrinsic(
1246 Intrinsic::smin, {SrcTy},
1247 {SMax, ConstantInt::get(SrcTy, TruncatedMax)});
1248 return new TruncInst(SMin, DestTy);
1249 }
1250 }
1251
1252 if (Instruction *I = foldVecTruncToExtElt(Trunc, *this))
1253 return I;
1254
1255 if (Instruction *I = foldVecExtTruncToExtElt(Trunc, *this))
1256 return I;
1257
1258 // trunc (ctlz_i32(zext(A), B) --> add(ctlz_i16(A, B), C)
1259 if (match(Src, m_OneUse(m_Ctlz(m_ZExt(m_Value(A)), m_Value(B))))) {
1260 unsigned AWidth = A->getType()->getScalarSizeInBits();
1261 if (AWidth == DestWidth && AWidth > Log2_32(SrcWidth)) {
1262 Value *WidthDiff = ConstantInt::get(A->getType(), SrcWidth - AWidth);
1263 Value *NarrowCtlz =
1264 Builder.CreateIntrinsic(Intrinsic::ctlz, {Trunc.getType()}, {A, B});
1265 return BinaryOperator::CreateAdd(NarrowCtlz, WidthDiff);
1266 }
1267 }
1268
1269 if (match(Src, m_VScale())) {
1270 if (Trunc.getFunction() &&
1271 Trunc.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1272 Attribute Attr =
1273 Trunc.getFunction()->getFnAttribute(Attribute::VScaleRange);
1274 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax())
1275 if (Log2_32(*MaxVScale) < DestWidth)
1276 return replaceInstUsesWith(Trunc, Builder.CreateVScale(DestTy));
1277 }
1278 }
1279
1280 if (DestWidth == 1 &&
1281 (Trunc.hasNoUnsignedWrap() || Trunc.hasNoSignedWrap()) &&
1282 isKnownNonZero(Src, SQ.getWithInstruction(&Trunc)))
1283 return replaceInstUsesWith(Trunc, ConstantInt::getTrue(DestTy));
1284
1285 bool Changed = false;
1286 if (!Trunc.hasNoSignedWrap() &&
1287 ComputeMaxSignificantBits(Src, &Trunc) <= DestWidth) {
1288 Trunc.setHasNoSignedWrap(true);
1289 Changed = true;
1290 }
1291 if (!Trunc.hasNoUnsignedWrap() &&
1292 MaskedValueIsZero(Src, APInt::getBitsSetFrom(SrcWidth, DestWidth),
1293 &Trunc)) {
1294 Trunc.setHasNoUnsignedWrap(true);
1295 Changed = true;
1296 }
1297
1298 const APInt *C1;
1299 Value *V1;
1300 // OP = { lshr, ashr }
1301 // trunc ( OP i8 C1, V1) to i1 -> icmp eq V1, log_2(C1) iff C1 is power of 2
1302 if (DestWidth == 1 && match(Src, m_Shr(m_Power2(C1), m_Value(V1)))) {
1303 Value *Right = ConstantInt::get(V1->getType(), C1->countr_zero());
1304 return new ICmpInst(ICmpInst::ICMP_EQ, V1, Right);
1305 }
1306
1307 // OP = { lshr, ashr }
1308 // trunc ( OP i8 C1, V1) to i1 -> icmp ult V1, log_2(C1 + 1) iff (C1 + 1) is
1309 // power of 2
1310 if (DestWidth == 1 && match(Src, m_Shr(m_LowBitMask(C1), m_Value(V1)))) {
1311 Value *Right = ConstantInt::get(V1->getType(), C1->countr_one());
1312 return new ICmpInst(ICmpInst::ICMP_ULT, V1, Right);
1313 }
1314
1315 // OP = { lshr, ashr }
1316 // trunc ( OP i8 C1, V1) to i1 -> icmp ugt V1, cttz(C1) - 1 iff (C1) is
1317 // negative power of 2
1318 if (DestWidth == 1 && match(Src, m_Shr(m_NegatedPower2(C1), m_Value(V1)))) {
1319 Value *Right = ConstantInt::get(V1->getType(), C1->countr_zero());
1320 return new ICmpInst(ICmpInst::ICMP_UGE, V1, Right);
1321 }
1322
1323 return Changed ? &Trunc : nullptr;
1324}
1325
1326Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp,
1327 ZExtInst &Zext) {
1328 // If we are just checking for a icmp eq of a single bit and zext'ing it
1329 // to an integer, then shift the bit to the appropriate place and then
1330 // cast to integer to avoid the comparison.
1331
1332 // FIXME: This set of transforms does not check for extra uses and/or creates
1333 // an extra instruction (an optional final cast is not included
1334 // in the transform comments). We may also want to favor icmp over
1335 // shifts in cases of equal instructions because icmp has better
1336 // analysis in general (invert the transform).
1337
1338 const APInt *Op1CV;
1339 if (match(Cmp->getOperand(1), m_APInt(Op1CV))) {
1340
1341 // zext (x <s 0) to i32 --> x>>u31 true if signbit set.
1342 if (Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isZero()) {
1343 Value *In = Cmp->getOperand(0);
1344 Value *Sh = ConstantInt::get(In->getType(),
1345 In->getType()->getScalarSizeInBits() - 1);
1346 In = Builder.CreateLShr(In, Sh, In->getName() + ".lobit");
1347 if (In->getType() != Zext.getType())
1348 In = Builder.CreateIntCast(In, Zext.getType(), false /*ZExt*/);
1349
1350 return replaceInstUsesWith(Zext, In);
1351 }
1352
1353 // zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
1354 // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
1355 // zext (X != 0) to i32 --> X iff X has only the low bit set.
1356 // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
1357
1358 if (Op1CV->isZero() && Cmp->isEquality()) {
1359 // Exactly 1 possible 1? But not the high-bit because that is
1360 // canonicalized to this form.
1361 KnownBits Known = computeKnownBits(Cmp->getOperand(0), &Zext);
1362 APInt KnownZeroMask(~Known.Zero);
1363 uint32_t ShAmt = KnownZeroMask.logBase2();
1364 bool IsExpectShAmt = KnownZeroMask.isPowerOf2() &&
1365 (Zext.getType()->getScalarSizeInBits() != ShAmt + 1);
1366 if (IsExpectShAmt &&
1367 (Cmp->getOperand(0)->getType() == Zext.getType() ||
1368 Cmp->getPredicate() == ICmpInst::ICMP_NE || ShAmt == 0)) {
1369 Value *In = Cmp->getOperand(0);
1370 if (ShAmt) {
1371 // Perform a logical shr by shiftamt.
1372 // Insert the shift to put the result in the low bit.
1373 In = Builder.CreateLShr(In, ConstantInt::get(In->getType(), ShAmt),
1374 In->getName() + ".lobit");
1375 }
1376
1377 // Toggle the low bit for "X == 0".
1378 if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
1379 In = Builder.CreateXor(In, ConstantInt::get(In->getType(), 1));
1380
1381 if (Zext.getType() == In->getType())
1382 return replaceInstUsesWith(Zext, In);
1383
1384 Value *IntCast = Builder.CreateIntCast(In, Zext.getType(), false);
1385 return replaceInstUsesWith(Zext, IntCast);
1386 }
1387 }
1388 }
1389
1390 if (Cmp->isEquality()) {
1391 // Test if a bit is clear/set using a shifted-one mask:
1392 // zext (icmp eq (and X, (1 << ShAmt)), 0) --> and (lshr (not X), ShAmt), 1
1393 // zext (icmp ne (and X, (1 << ShAmt)), 0) --> and (lshr X, ShAmt), 1
1394 Value *X, *ShAmt;
1395 if (Cmp->hasOneUse() && match(Cmp->getOperand(1), m_ZeroInt()) &&
1396 match(Cmp->getOperand(0),
1397 m_OneUse(m_c_And(m_Shl(m_One(), m_Value(ShAmt)), m_Value(X))))) {
1398 auto *And = cast<BinaryOperator>(Cmp->getOperand(0));
1399 Value *Shift = And->getOperand(X == And->getOperand(0) ? 1 : 0);
1400 if (Zext.getType() == And->getType() ||
1401 Cmp->getPredicate() != ICmpInst::ICMP_EQ || Shift->hasOneUse()) {
1402 if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
1403 X = Builder.CreateNot(X);
1404 Value *Lshr = Builder.CreateLShr(X, ShAmt);
1405 Value *And1 =
1406 Builder.CreateAnd(Lshr, ConstantInt::get(X->getType(), 1));
1407 return replaceInstUsesWith(
1408 Zext, Builder.CreateZExtOrTrunc(And1, Zext.getType()));
1409 }
1410 }
1411 }
1412
1413 return nullptr;
1414}
1415
1416/// Determine if the specified value can be computed in the specified wider type
1417/// and produce the same low bits. If not, return false.
1418///
1419/// If this function returns true, it can also return a non-zero number of bits
1420/// (in BitsToClear) which indicates that the value it computes is correct for
1421/// the zero extend, but that the additional BitsToClear bits need to be zero'd
1422/// out. For example, to promote something like:
1423///
1424/// %B = trunc i64 %A to i32
1425/// %C = lshr i32 %B, 8
1426/// %E = zext i32 %C to i64
1427///
1428/// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
1429/// set to 8 to indicate that the promoted value needs to have bits 24-31
1430/// cleared in addition to bits 32-63. Since an 'and' will be generated to
1431/// clear the top bits anyway, doing this has no extra cost.
1432///
1433/// This function works on both vectors and scalars.
1434bool TypeEvaluationHelper::canEvaluateZExtd(Value *V, Type *Ty,
1435 unsigned &BitsToClear,
1436 InstCombinerImpl &IC,
1437 Instruction *CxtI) {
1438 TypeEvaluationHelper TYH;
1439 return TYH.canEvaluateZExtdImpl(V, Ty, BitsToClear, IC, CxtI);
1440}
1441bool TypeEvaluationHelper::canEvaluateZExtdImpl(Value *V, Type *Ty,
1442 unsigned &BitsToClear,
1443 InstCombinerImpl &IC,
1444 Instruction *CxtI) {
1445 BitsToClear = 0;
1446 if (canAlwaysEvaluateInType(V, Ty))
1447 return true;
1448 // We stick to the one-user limit for the ZExt transform due to the fact
1449 // that this predicate returns two values: predicate result and BitsToClear.
1450 if (canNotEvaluateInType(V, Ty))
1451 return false;
1452
1453 auto *I = cast<Instruction>(V);
1454 unsigned Tmp;
1455 switch (I->getOpcode()) {
1456 case Instruction::ZExt: // zext(zext(x)) -> zext(x).
1457 case Instruction::SExt: // zext(sext(x)) -> sext(x).
1458 case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x)
1459 return true;
1460 case Instruction::And:
1461 case Instruction::Or:
1462 case Instruction::Xor:
1463 case Instruction::Add:
1464 case Instruction::Sub:
1465 case Instruction::Mul:
1466 if (!canEvaluateZExtdImpl(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||
1467 !canEvaluateZExtdImpl(I->getOperand(1), Ty, Tmp, IC, CxtI))
1468 return false;
1469 // These can all be promoted if neither operand has 'bits to clear'.
1470 if (BitsToClear == 0 && Tmp == 0)
1471 return true;
1472
1473 // If the operation is an AND/OR/XOR and the bits to clear are zero in the
1474 // other side, BitsToClear is ok.
1475 if (Tmp == 0 && I->isBitwiseLogicOp()) {
1476 // We use MaskedValueIsZero here for generality, but the case we care
1477 // about the most is constant RHS.
1478 unsigned VSize = V->getType()->getScalarSizeInBits();
1479 if (IC.MaskedValueIsZero(I->getOperand(1),
1480 APInt::getHighBitsSet(VSize, BitsToClear),
1481 CxtI)) {
1482 // If this is an And instruction and all of the BitsToClear are
1483 // known to be zero we can reset BitsToClear.
1484 if (I->getOpcode() == Instruction::And)
1485 BitsToClear = 0;
1486 return true;
1487 }
1488 }
1489
1490 // Otherwise, we don't know how to analyze this BitsToClear case yet.
1491 return false;
1492
1493 case Instruction::Shl: {
1494 // We can promote shl(x, cst) if we can promote x. Since shl overwrites the
1495 // upper bits we can reduce BitsToClear by the shift amount.
1496 uint64_t ShiftAmt;
1497 if (match(I->getOperand(1), m_ConstantInt(ShiftAmt))) {
1498 if (!canEvaluateZExtdImpl(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1499 return false;
1500 BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0;
1501 return true;
1502 }
1503 return false;
1504 }
1505 case Instruction::LShr: {
1506 // We can promote lshr(x, cst) if we can promote x. This requires the
1507 // ultimate 'and' to clear out the high zero bits we're clearing out though.
1508 uint64_t ShiftAmt;
1509 if (match(I->getOperand(1), m_ConstantInt(ShiftAmt))) {
1510 if (!canEvaluateZExtdImpl(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1511 return false;
1512 BitsToClear += ShiftAmt;
1513 if (BitsToClear > V->getType()->getScalarSizeInBits())
1514 BitsToClear = V->getType()->getScalarSizeInBits();
1515 return true;
1516 }
1517 // Cannot promote variable LSHR.
1518 return false;
1519 }
1520 case Instruction::Select:
1521 if (!canEvaluateZExtdImpl(I->getOperand(1), Ty, Tmp, IC, CxtI) ||
1522 !canEvaluateZExtdImpl(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||
1523 // TODO: If important, we could handle the case when the BitsToClear are
1524 // known zero in the disagreeing side.
1525 Tmp != BitsToClear)
1526 return false;
1527 return true;
1528
1529 case Instruction::PHI: {
1530 // We can change a phi if we can change all operands. Note that we never
1531 // get into trouble with cyclic PHIs here because we only consider
1532 // instructions with a single use.
1533 PHINode *PN = cast<PHINode>(I);
1534 if (!canEvaluateZExtdImpl(PN->getIncomingValue(0), Ty, BitsToClear, IC,
1535 CxtI))
1536 return false;
1537 for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
1538 if (!canEvaluateZExtdImpl(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||
1539 // TODO: If important, we could handle the case when the BitsToClear
1540 // are known zero in the disagreeing input.
1541 Tmp != BitsToClear)
1542 return false;
1543 return true;
1544 }
1545 case Instruction::Call:
1546 // llvm.vscale() can always be executed in larger type, because the
1547 // value is automatically zero-extended.
1549 if (II->getIntrinsicID() == Intrinsic::vscale)
1550 return true;
1551 return false;
1552 default:
1553 // TODO: Can handle more cases here.
1554 return false;
1555 }
1556}
1557
1559 // If this zero extend is only used by a truncate, let the truncate be
1560 // eliminated before we try to optimize this zext.
1561 if (Zext.hasOneUse() && isa<TruncInst>(Zext.user_back()) &&
1562 !isa<Constant>(Zext.getOperand(0)))
1563 return nullptr;
1564
1565 // If one of the common conversion will work, do it.
1566 if (Instruction *Result = commonCastTransforms(Zext))
1567 return Result;
1568
1569 if (auto *NewI = foldExtractionOfVectorDeinterleave(Zext))
1570 return NewI;
1571
1572 Value *Src = Zext.getOperand(0);
1573 Type *SrcTy = Src->getType(), *DestTy = Zext.getType();
1574
1575 // zext nneg bool x -> 0
1576 if (SrcTy->isIntOrIntVectorTy(1) && Zext.hasNonNeg())
1578
1579 // Try to extend the entire expression tree to the wide destination type.
1580 unsigned BitsToClear;
1581 if (shouldChangeType(SrcTy, DestTy) &&
1582 TypeEvaluationHelper::canEvaluateZExtd(Src, DestTy, BitsToClear, *this,
1583 &Zext)) {
1584 assert(BitsToClear <= SrcTy->getScalarSizeInBits() &&
1585 "Can't clear more bits than in SrcTy");
1586
1587 // Okay, we can transform this! Insert the new expression now.
1588 LLVM_DEBUG(
1589 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1590 " to avoid zero extend: "
1591 << Zext << '\n');
1592 Value *Res = EvaluateInDifferentType(Src, DestTy, false);
1593 assert(Res->getType() == DestTy);
1594
1595 // Preserve debug values referring to Src if the zext is its last use.
1596 if (auto *SrcOp = dyn_cast<Instruction>(Src))
1597 if (SrcOp->hasOneUse())
1598 replaceAllDbgUsesWith(*SrcOp, *Res, Zext, DT);
1599
1600 uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits() - BitsToClear;
1601 uint32_t DestBitSize = DestTy->getScalarSizeInBits();
1602
1603 // If the high bits are already filled with zeros, just replace this
1604 // cast with the result.
1606 Res, APInt::getHighBitsSet(DestBitSize, DestBitSize - SrcBitsKept),
1607 &Zext))
1608 return replaceInstUsesWith(Zext, Res);
1609
1610 // We need to emit an AND to clear the high bits.
1611 Constant *C = ConstantInt::get(Res->getType(),
1612 APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
1613 return BinaryOperator::CreateAnd(Res, C);
1614 }
1615
1616 // If this is a TRUNC followed by a ZEXT then we are dealing with integral
1617 // types and if the sizes are just right we can convert this into a logical
1618 // 'and' which will be much cheaper than the pair of casts.
1619 if (auto *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast
1620 // TODO: Subsume this into EvaluateInDifferentType.
1621
1622 // Get the sizes of the types involved. We know that the intermediate type
1623 // will be smaller than A or C, but don't know the relation between A and C.
1624 Value *A = CSrc->getOperand(0);
1625 unsigned SrcSize = A->getType()->getScalarSizeInBits();
1626 unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
1627 unsigned DstSize = DestTy->getScalarSizeInBits();
1628 // If we're actually extending zero bits, then if
1629 // SrcSize < DstSize: zext(a & mask)
1630 // SrcSize == DstSize: a & mask
1631 // SrcSize > DstSize: trunc(a) & mask
1632 if (SrcSize < DstSize) {
1633 APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1634 Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
1635 Value *And = Builder.CreateAnd(A, AndConst, CSrc->getName() + ".mask");
1636 return new ZExtInst(And, DestTy);
1637 }
1638
1639 if (SrcSize == DstSize) {
1640 APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1641 return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
1642 AndValue));
1643 }
1644 if (SrcSize > DstSize) {
1645 Value *Trunc = Builder.CreateTrunc(A, DestTy);
1646 APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
1647 return BinaryOperator::CreateAnd(Trunc,
1648 ConstantInt::get(Trunc->getType(),
1649 AndValue));
1650 }
1651 }
1652
1653 if (auto *Cmp = dyn_cast<ICmpInst>(Src))
1654 return transformZExtICmp(Cmp, Zext);
1655
1656 // zext(trunc(X) & C) -> (X & zext(C)).
1657 Constant *C;
1658 Value *X;
1659 if (match(Src, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&
1660 X->getType() == DestTy)
1661 return BinaryOperator::CreateAnd(X, Builder.CreateZExt(C, DestTy));
1662
1663 // zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
1664 Value *And;
1665 if (match(Src, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
1667 X->getType() == DestTy) {
1668 Value *ZC = Builder.CreateZExt(C, DestTy);
1669 return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC);
1670 }
1671
1672 // If we are truncating, masking, and then zexting back to the original type,
1673 // that's just a mask. This is not handled by canEvaluateZextd if the
1674 // intermediate values have extra uses. This could be generalized further for
1675 // a non-constant mask operand.
1676 // zext (and (trunc X), C) --> and X, (zext C)
1677 if (match(Src, m_And(m_Trunc(m_Value(X)), m_Constant(C))) &&
1678 X->getType() == DestTy) {
1679 Value *ZextC = Builder.CreateZExt(C, DestTy);
1680 return BinaryOperator::CreateAnd(X, ZextC);
1681 }
1682
1683 if (match(Src, m_VScale())) {
1684 if (Zext.getFunction() &&
1685 Zext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1686 Attribute Attr =
1687 Zext.getFunction()->getFnAttribute(Attribute::VScaleRange);
1688 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
1689 unsigned TypeWidth = Src->getType()->getScalarSizeInBits();
1690 if (Log2_32(*MaxVScale) < TypeWidth)
1691 return replaceInstUsesWith(Zext, Builder.CreateVScale(DestTy));
1692 }
1693 }
1694 }
1695
1696 if (!Zext.hasNonNeg()) {
1697 // If this zero extend is only used by a shift, add nneg flag.
1698 if (Zext.hasOneUse() &&
1699 SrcTy->getScalarSizeInBits() >
1700 Log2_64_Ceil(DestTy->getScalarSizeInBits()) &&
1701 match(Zext.user_back(), m_Shift(m_Value(), m_Specific(&Zext)))) {
1702 Zext.setNonNeg();
1703 return &Zext;
1704 }
1705
1706 if (isKnownNonNegative(Src, SQ.getWithInstruction(&Zext))) {
1707 Zext.setNonNeg();
1708 return &Zext;
1709 }
1710 }
1711
1712 return nullptr;
1713}
1714
1715/// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.
1716Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *Cmp,
1717 SExtInst &Sext) {
1718 Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
1719 ICmpInst::Predicate Pred = Cmp->getPredicate();
1720
1721 // Don't bother if Op1 isn't of vector or integer type.
1722 if (!Op1->getType()->isIntOrIntVectorTy())
1723 return nullptr;
1724
1725 if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_ZeroInt())) {
1726 // sext (x <s 0) --> ashr x, 31 (all ones if negative)
1727 Value *Sh = ConstantInt::get(Op0->getType(),
1728 Op0->getType()->getScalarSizeInBits() - 1);
1729 Value *In = Builder.CreateAShr(Op0, Sh, Op0->getName() + ".lobit");
1730 if (In->getType() != Sext.getType())
1731 In = Builder.CreateIntCast(In, Sext.getType(), true /*SExt*/);
1732
1733 return replaceInstUsesWith(Sext, In);
1734 }
1735
1736 if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1737 // If we know that only one bit of the LHS of the icmp can be set and we
1738 // have an equality comparison with zero or a power of 2, we can transform
1739 // the icmp and sext into bitwise/integer operations.
1740 if (Cmp->hasOneUse() &&
1741 Cmp->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
1742 KnownBits Known = computeKnownBits(Op0, &Sext);
1743
1744 APInt KnownZeroMask(~Known.Zero);
1745 if (KnownZeroMask.isPowerOf2()) {
1746 Value *In = Cmp->getOperand(0);
1747
1748 // If the icmp tests for a known zero bit we can constant fold it.
1749 if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {
1750 Value *V = Pred == ICmpInst::ICMP_NE ?
1752 ConstantInt::getNullValue(Sext.getType());
1753 return replaceInstUsesWith(Sext, V);
1754 }
1755
1756 if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
1757 // sext ((x & 2^n) == 0) -> (x >> n) - 1
1758 // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
1759 unsigned ShiftAmt = KnownZeroMask.countr_zero();
1760 // Perform a right shift to place the desired bit in the LSB.
1761 if (ShiftAmt)
1762 In = Builder.CreateLShr(In,
1763 ConstantInt::get(In->getType(), ShiftAmt));
1764
1765 // At this point "In" is either 1 or 0. Subtract 1 to turn
1766 // {1, 0} -> {0, -1}.
1767 In = Builder.CreateAdd(In,
1768 ConstantInt::getAllOnesValue(In->getType()),
1769 "sext");
1770 } else {
1771 // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
1772 // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
1773 unsigned ShiftAmt = KnownZeroMask.countl_zero();
1774 // Perform a left shift to place the desired bit in the MSB.
1775 if (ShiftAmt)
1776 In = Builder.CreateShl(In,
1777 ConstantInt::get(In->getType(), ShiftAmt));
1778
1779 // Distribute the bit over the whole bit width.
1780 In = Builder.CreateAShr(In, ConstantInt::get(In->getType(),
1781 KnownZeroMask.getBitWidth() - 1), "sext");
1782 }
1783
1784 if (Sext.getType() == In->getType())
1785 return replaceInstUsesWith(Sext, In);
1786 return CastInst::CreateIntegerCast(In, Sext.getType(), true/*SExt*/);
1787 }
1788 }
1789 }
1790
1791 return nullptr;
1792}
1793
1794/// Return true if we can take the specified value and return it as type Ty
1795/// without inserting any new casts and without changing the value of the common
1796/// low bits. This is used by code that tries to promote integer operations to
1797/// a wider types will allow us to eliminate the extension.
1798///
1799/// This function works on both vectors and scalars.
1800///
1801bool TypeEvaluationHelper::canEvaluateSExtd(Value *V, Type *Ty) {
1802 TypeEvaluationHelper TYH;
1803 return TYH.canEvaluateSExtdImpl(V, Ty) && TYH.allPendingVisited();
1804}
1805
1806bool TypeEvaluationHelper::canEvaluateSExtdImpl(Value *V, Type *Ty) {
1807 return canEvaluate(V, Ty, [this](Value *V, Type *Ty) {
1808 return canEvaluateSExtdPred(V, Ty);
1809 });
1810}
1811
1812bool TypeEvaluationHelper::canEvaluateSExtdPred(Value *V, Type *Ty) {
1813 assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
1814 "Can't sign extend type to a smaller type");
1815
1816 auto *I = cast<Instruction>(V);
1817 switch (I->getOpcode()) {
1818 case Instruction::SExt: // sext(sext(x)) -> sext(x)
1819 case Instruction::ZExt: // sext(zext(x)) -> zext(x)
1820 case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x)
1821 return true;
1822 case Instruction::And:
1823 case Instruction::Or:
1824 case Instruction::Xor:
1825 case Instruction::Add:
1826 case Instruction::Sub:
1827 case Instruction::Mul:
1828 // These operators can all arbitrarily be extended if their inputs can.
1829 return canEvaluateSExtdImpl(I->getOperand(0), Ty) &&
1830 canEvaluateSExtdImpl(I->getOperand(1), Ty);
1831
1832 // case Instruction::Shl: TODO
1833 // case Instruction::LShr: TODO
1834
1835 case Instruction::Select:
1836 return canEvaluateSExtdImpl(I->getOperand(1), Ty) &&
1837 canEvaluateSExtdImpl(I->getOperand(2), Ty);
1838
1839 case Instruction::PHI: {
1840 // We can change a phi if we can change all operands. Note that we never
1841 // get into trouble with cyclic PHIs here because canEvaluate handles use
1842 // chain loops.
1843 PHINode *PN = cast<PHINode>(I);
1844 for (Value *IncValue : PN->incoming_values())
1845 if (!canEvaluateSExtdImpl(IncValue, Ty))
1846 return false;
1847 return true;
1848 }
1849 default:
1850 // TODO: Can handle more cases here.
1851 break;
1852 }
1853
1854 return false;
1855}
1856
1858 // If this sign extend is only used by a truncate, let the truncate be
1859 // eliminated before we try to optimize this sext.
1860 if (Sext.hasOneUse() && isa<TruncInst>(Sext.user_back()))
1861 return nullptr;
1862
1863 if (Instruction *I = commonCastTransforms(Sext))
1864 return I;
1865
1866 Value *Src = Sext.getOperand(0);
1867 Type *SrcTy = Src->getType(), *DestTy = Sext.getType();
1868 unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
1869 unsigned DestBitSize = DestTy->getScalarSizeInBits();
1870
1871 // If the value being extended is zero or positive, use a zext instead.
1872 if (isKnownNonNegative(Src, SQ.getWithInstruction(&Sext))) {
1873 auto CI = CastInst::Create(Instruction::ZExt, Src, DestTy);
1874 CI->setNonNeg(true);
1875 return CI;
1876 }
1877
1878 // Try to extend the entire expression tree to the wide destination type.
1879 bool ShouldExtendExpression = true;
1880 Value *TruncSrc = nullptr;
1881 // It is not desirable to extend expression in the trunc + sext pattern when
1882 // destination type is narrower than original (pre-trunc) type.
1883 if (match(Src, m_Trunc(m_Value(TruncSrc))))
1884 if (TruncSrc->getType()->getScalarSizeInBits() > DestBitSize)
1885 ShouldExtendExpression = false;
1886 if (ShouldExtendExpression && shouldChangeType(SrcTy, DestTy) &&
1887 TypeEvaluationHelper::canEvaluateSExtd(Src, DestTy)) {
1888 // Okay, we can transform this! Insert the new expression now.
1889 LLVM_DEBUG(
1890 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1891 " to avoid sign extend: "
1892 << Sext << '\n');
1893 Value *Res = EvaluateInDifferentType(Src, DestTy, true);
1894 assert(Res->getType() == DestTy);
1895
1896 // If the high bits are already filled with sign bit, just replace this
1897 // cast with the result.
1898 if (ComputeNumSignBits(Res, &Sext) > DestBitSize - SrcBitSize)
1899 return replaceInstUsesWith(Sext, Res);
1900
1901 // We need to emit a shl + ashr to do the sign extend.
1902 Value *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
1903 return BinaryOperator::CreateAShr(Builder.CreateShl(Res, ShAmt, "sext"),
1904 ShAmt);
1905 }
1906
1907 Value *X = TruncSrc;
1908 if (X) {
1909 // If the input has more sign bits than bits truncated, then convert
1910 // directly to final type.
1911 unsigned XBitSize = X->getType()->getScalarSizeInBits();
1912 bool HasNSW = cast<TruncInst>(Src)->hasNoSignedWrap();
1913 if (HasNSW || (ComputeNumSignBits(X, &Sext) > XBitSize - SrcBitSize)) {
1914 auto *Res = CastInst::CreateIntegerCast(X, DestTy, /* isSigned */ true);
1915 if (auto *ResTrunc = dyn_cast<TruncInst>(Res); ResTrunc && HasNSW)
1916 ResTrunc->setHasNoSignedWrap(true);
1917 return Res;
1918 }
1919
1920 // If input is a trunc from the destination type, then convert into shifts.
1921 if (Src->hasOneUse() && X->getType() == DestTy) {
1922 // sext (trunc X) --> ashr (shl X, C), C
1923 Constant *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
1924 return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShAmt), ShAmt);
1925 }
1926
1927 // If we are replacing shifted-in high zero bits with sign bits, convert
1928 // the logic shift to arithmetic shift and eliminate the cast to
1929 // intermediate type:
1930 // sext (trunc (lshr Y, C)) --> sext/trunc (ashr Y, C)
1931 Value *Y;
1932 if (Src->hasOneUse() &&
1934 m_SpecificIntAllowPoison(XBitSize - SrcBitSize)))) {
1935 Value *Ashr = Builder.CreateAShr(Y, XBitSize - SrcBitSize);
1936 return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
1937 }
1938 }
1939
1940 if (auto *Cmp = dyn_cast<ICmpInst>(Src))
1941 return transformSExtICmp(Cmp, Sext);
1942
1943 // If the input is a shl/ashr pair of a same constant, then this is a sign
1944 // extension from a smaller value. If we could trust arbitrary bitwidth
1945 // integers, we could turn this into a truncate to the smaller bit and then
1946 // use a sext for the whole extension. Since we don't, look deeper and check
1947 // for a truncate. If the source and dest are the same type, eliminate the
1948 // trunc and extend and just do shifts. For example, turn:
1949 // %a = trunc i32 %i to i8
1950 // %b = shl i8 %a, C
1951 // %c = ashr i8 %b, C
1952 // %d = sext i8 %c to i32
1953 // into:
1954 // %a = shl i32 %i, 32-(8-C)
1955 // %d = ashr i32 %a, 32-(8-C)
1956 Value *A = nullptr;
1957 // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
1958 Constant *BA = nullptr, *CA = nullptr;
1959 if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_Constant(BA)),
1960 m_ImmConstant(CA))) &&
1961 BA->isElementWiseEqual(CA) && A->getType() == DestTy) {
1962 Constant *WideCurrShAmt =
1963 ConstantFoldCastOperand(Instruction::SExt, CA, DestTy, DL);
1964 assert(WideCurrShAmt && "Constant folding of ImmConstant cannot fail");
1965 Constant *NumLowbitsLeft = ConstantExpr::getSub(
1966 ConstantInt::get(DestTy, SrcTy->getScalarSizeInBits()), WideCurrShAmt);
1967 Constant *NewShAmt = ConstantExpr::getSub(
1968 ConstantInt::get(DestTy, DestTy->getScalarSizeInBits()),
1969 NumLowbitsLeft);
1970 NewShAmt =
1972 A = Builder.CreateShl(A, NewShAmt, Sext.getName());
1973 return BinaryOperator::CreateAShr(A, NewShAmt);
1974 }
1975
1976 // Splatting a bit of constant-index across a value:
1977 // sext (ashr (trunc iN X to iM), M-1) to iN --> ashr (shl X, N-M), N-1
1978 // If the dest type is different, use a cast (adjust use check).
1979 if (match(Src, m_OneUse(m_AShr(m_Trunc(m_Value(X)),
1980 m_SpecificInt(SrcBitSize - 1))))) {
1981 Type *XTy = X->getType();
1982 unsigned XBitSize = XTy->getScalarSizeInBits();
1983 Constant *ShlAmtC = ConstantInt::get(XTy, XBitSize - SrcBitSize);
1984 Constant *AshrAmtC = ConstantInt::get(XTy, XBitSize - 1);
1985 if (XTy == DestTy)
1986 return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShlAmtC),
1987 AshrAmtC);
1988 if (cast<BinaryOperator>(Src)->getOperand(0)->hasOneUse()) {
1989 Value *Ashr = Builder.CreateAShr(Builder.CreateShl(X, ShlAmtC), AshrAmtC);
1990 return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
1991 }
1992 }
1993
1994 if (match(Src, m_VScale())) {
1995 if (Sext.getFunction() &&
1996 Sext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1997 Attribute Attr =
1998 Sext.getFunction()->getFnAttribute(Attribute::VScaleRange);
1999 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax())
2000 if (Log2_32(*MaxVScale) < (SrcBitSize - 1))
2001 return replaceInstUsesWith(Sext, Builder.CreateVScale(DestTy));
2002 }
2003 }
2004
2005 // sext(scmp(x, y)) -> scmp(x, y) with a wider result type.
2006 // sext(ucmp(x, y)) -> ucmp(x, y) with a wider result type.
2007 // scmp/ucmp return only -1, 0, or 1, which sign-extend correctly to any
2008 // wider integer type, so we can sink the extension into the intrinsic.
2009 if (auto *II = dyn_cast<IntrinsicInst>(Src)) {
2010 Intrinsic::ID IID = II->getIntrinsicID();
2011 if ((IID == Intrinsic::scmp || IID == Intrinsic::ucmp) && II->hasOneUse())
2012 return replaceInstUsesWith(
2013 Sext, Builder.CreateIntrinsic(
2014 DestTy, IID, {II->getArgOperand(0), II->getArgOperand(1)}));
2015 }
2016
2017 return nullptr;
2018}
2019
2020/// Return a Constant* for the specified floating-point constant if it fits
2021/// in the specified FP type without changing its value.
2022static bool fitsInFPType(APFloat F, const fltSemantics &Sem) {
2023 bool losesInfo;
2024 (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
2025 return !losesInfo;
2026}
2027
2029 bool PreferBFloat) {
2030 // See if the value can be truncated to bfloat and then reextended.
2031 if (PreferBFloat && fitsInFPType(F, APFloat::BFloat()))
2032 return Type::getBFloatTy(Ctx);
2033 // See if the value can be truncated to half and then reextended.
2034 if (!PreferBFloat && fitsInFPType(F, APFloat::IEEEhalf()))
2035 return Type::getHalfTy(Ctx);
2036 // See if the value can be truncated to float and then reextended.
2038 return Type::getFloatTy(Ctx);
2039 if (&F.getSemantics() == &APFloat::IEEEdouble())
2040 return nullptr; // Won't shrink.
2041 // See if the value can be truncated to double and then reextended.
2043 return Type::getDoubleTy(Ctx);
2044 // Don't try to shrink to various long double types.
2045 return nullptr;
2046}
2047
2048static Type *shrinkFPConstant(ConstantFP *CFP, bool PreferBFloat) {
2049 Type *Ty = CFP->getType();
2050 if (Ty->getScalarType()->isPPC_FP128Ty())
2051 return nullptr; // No constant folding of this.
2052
2053 Type *ShrinkTy =
2054 shrinkFPConstant(CFP->getContext(), CFP->getValueAPF(), PreferBFloat);
2055 if (ShrinkTy)
2056 if (auto *VecTy = dyn_cast<VectorType>(Ty))
2057 ShrinkTy = VectorType::get(ShrinkTy, VecTy);
2058
2059 return ShrinkTy;
2060}
2061
2062// Determine if this is a vector of ConstantFPs and if so, return the minimal
2063// type we can safely truncate all elements to.
2064static Type *shrinkFPConstantVector(Value *V, bool PreferBFloat) {
2065 auto *CV = dyn_cast<Constant>(V);
2066 auto *CVVTy = dyn_cast<FixedVectorType>(V->getType());
2067 if (!CV || !CVVTy)
2068 return nullptr;
2069
2070 Type *MinType = nullptr;
2071
2072 unsigned NumElts = CVVTy->getNumElements();
2073
2074 // For fixed-width vectors we find the minimal type by looking
2075 // through the constant values of the vector.
2076 for (unsigned I = 0; I != NumElts; ++I) {
2077 if (match(CV->getAggregateElement(I), m_Poison()))
2078 continue;
2079
2080 auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(I));
2081 if (!CFP)
2082 return nullptr;
2083
2084 Type *T = shrinkFPConstant(CFP, PreferBFloat);
2085 if (!T)
2086 return nullptr;
2087
2088 // If we haven't found a type yet or this type has a larger mantissa than
2089 // our previous type, this is our new minimal type.
2090 if (!MinType || T->getFPMantissaWidth() > MinType->getFPMantissaWidth())
2091 MinType = T;
2092 }
2093
2094 // Make a vector type from the minimal type.
2095 return MinType ? FixedVectorType::get(MinType, NumElts) : nullptr;
2096}
2097
2098/// Find the minimum FP type we can safely truncate to.
2099static Type *getMinimumFPType(Value *V, Type *PreferredTy, InstCombiner &IC) {
2100 if (auto *FPExt = dyn_cast<FPExtInst>(V))
2101 return FPExt->getOperand(0)->getType();
2102
2103 Value *Src;
2104 if (match(V, m_IToFP(m_Value(Src))) &&
2105 IC.canBeCastedExactlyIntToFP(Src, PreferredTy, isa<SIToFPInst>(V),
2107 return PreferredTy;
2108
2109 bool PreferBFloat = PreferredTy->getScalarType()->isBFloatTy();
2110 // If this value is a constant, return the constant in the smallest FP type
2111 // that can accurately represent it. This allows us to turn
2112 // (float)((double)X+2.0) into x+2.0f.
2113 if (auto *CFP = dyn_cast<ConstantFP>(V))
2114 if (Type *T = shrinkFPConstant(CFP, PreferBFloat))
2115 return T;
2116
2117 // Try to shrink scalable and fixed splat vectors.
2118 if (auto *FPC = dyn_cast<Constant>(V))
2119 if (auto *VTy = dyn_cast<VectorType>(V->getType()))
2120 if (auto *Splat = dyn_cast_or_null<ConstantFP>(FPC->getSplatValue()))
2121 if (Type *T = shrinkFPConstant(Splat, PreferBFloat))
2122 return VectorType::get(T, VTy);
2123
2124 // Try to shrink a vector of FP constants. This returns nullptr on scalable
2125 // vectors
2126 if (Type *T = shrinkFPConstantVector(V, PreferBFloat))
2127 return T;
2128
2129 return V->getType();
2130}
2131
2133 bool IsSigned,
2134 const Instruction *CxtI) const {
2135 Type *SrcTy = V->getType();
2136 assert(SrcTy->isIntOrIntVectorTy() && "Expected an integer type");
2137 int SrcSize = (int)SrcTy->getScalarSizeInBits() - IsSigned;
2138 int DestNumSigBits = FPTy->getFPMantissaWidth();
2139
2140 // Easy case - if the source integer type has less bits than the FP mantissa,
2141 // then the cast must be exact.
2142 if (SrcSize <= DestNumSigBits)
2143 return true;
2144
2145 // Cast from FP to integer and back to FP is independent of the intermediate
2146 // integer width because of poison on overflow.
2147 Value *F;
2148 if (match(V, m_FPToI(m_Value(F)))) {
2149 // If this is uitofp (fptosi F), the source needs an extra bit to avoid
2150 // potential rounding of negative FP input values.
2151 int SrcNumSigBits = F->getType()->getFPMantissaWidth();
2152 if (!IsSigned && match(V, m_FPToSI(m_Value())))
2153 SrcNumSigBits++;
2154
2155 // [su]itofp (fpto[su]i F) --> exact if the source type has less or equal
2156 // significant bits than the destination (and make sure neither type is
2157 // weird -- ppc_fp128).
2158 if (SrcNumSigBits > 0 && DestNumSigBits > 0 &&
2159 SrcNumSigBits <= DestNumSigBits)
2160 return true;
2161 }
2162
2163 // Try harder to find if the source integer type has less significant bits.
2164 // Compute number of sign bits or determine trailing zeros.
2165 KnownBits SrcKnown = computeKnownBits(V, CxtI);
2166 int SigBits = (int)SrcTy->getScalarSizeInBits() -
2167 SrcKnown.countMinLeadingZeros() -
2168 SrcKnown.countMinTrailingZeros();
2169 if (SigBits <= DestNumSigBits)
2170 return true;
2171
2172 // For sitofp, the sign maps to the FP sign bit, so only magnitude bits
2173 // (BitWidth - NumSignBits) consume mantissa.
2174 if (IsSigned) {
2175 SigBits = (int)SrcTy->getScalarSizeInBits() - ComputeNumSignBits(V, CxtI);
2176 if (SigBits <= DestNumSigBits)
2177 return true;
2178 }
2179
2180 return false;
2181}
2182
2184 CastInst::CastOps Opcode = I.getOpcode();
2185 assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) &&
2186 "Unexpected cast");
2187 Value *Src = I.getOperand(0);
2188 Type *FPTy = I.getType();
2189 return canBeCastedExactlyIntToFP(Src, FPTy, Opcode == CastInst::SIToFP, &I);
2190}
2191
2194 return I;
2195
2196 // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
2197 // simplify this expression to avoid one or more of the trunc/extend
2198 // operations if we can do so without changing the numerical results.
2199 //
2200 // The exact manner in which the widths of the operands interact to limit
2201 // what we can and cannot do safely varies from operation to operation, and
2202 // is explained below in the various case statements.
2203 Type *Ty = FPT.getType();
2204 auto *BO = dyn_cast<BinaryOperator>(FPT.getOperand(0));
2205 if (BO && BO->hasOneUse()) {
2206 Type *LHSMinType = getMinimumFPType(BO->getOperand(0), Ty, *this);
2207 Type *RHSMinType = getMinimumFPType(BO->getOperand(1), Ty, *this);
2208 unsigned OpWidth = BO->getType()->getFPMantissaWidth();
2209 unsigned LHSWidth = LHSMinType->getFPMantissaWidth();
2210 unsigned RHSWidth = RHSMinType->getFPMantissaWidth();
2211 unsigned SrcWidth = std::max(LHSWidth, RHSWidth);
2212 unsigned DstWidth = Ty->getFPMantissaWidth();
2213 switch (BO->getOpcode()) {
2214 default: break;
2215 case Instruction::FAdd:
2216 case Instruction::FSub:
2217 // For addition and subtraction, the infinitely precise result can
2218 // essentially be arbitrarily wide; proving that double rounding
2219 // will not occur because the result of OpI is exact (as we will for
2220 // FMul, for example) is hopeless. However, we *can* nonetheless
2221 // frequently know that double rounding cannot occur (or that it is
2222 // innocuous) by taking advantage of the specific structure of
2223 // infinitely-precise results that admit double rounding.
2224 //
2225 // Specifically, if OpWidth >= 2*DstWdith+1 and DstWidth is sufficient
2226 // to represent both sources, we can guarantee that the double
2227 // rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis,
2228 // "A Rigorous Framework for Fully Supporting the IEEE Standard ..."
2229 // for proof of this fact).
2230 //
2231 // Note: Figueroa does not consider the case where DstFormat !=
2232 // SrcFormat. It's possible (likely even!) that this analysis
2233 // could be tightened for those cases, but they are rare (the main
2234 // case of interest here is (float)((double)float + float)).
2235 if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) {
2236 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
2237 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
2238 Instruction *RI = BinaryOperator::Create(BO->getOpcode(), LHS, RHS);
2239 RI->copyFastMathFlags(BO);
2240 return RI;
2241 }
2242 break;
2243 case Instruction::FMul:
2244 // For multiplication, the infinitely precise result has at most
2245 // LHSWidth + RHSWidth significant bits; if OpWidth is sufficient
2246 // that such a value can be exactly represented, then no double
2247 // rounding can possibly occur; we can safely perform the operation
2248 // in the destination format if it can represent both sources.
2249 if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) {
2250 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
2251 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
2252 return BinaryOperator::CreateFMulFMF(LHS, RHS, BO);
2253 }
2254 break;
2255 case Instruction::FDiv:
2256 // For division, we use again use the bound from Figueroa's
2257 // dissertation. I am entirely certain that this bound can be
2258 // tightened in the unbalanced operand case by an analysis based on
2259 // the diophantine rational approximation bound, but the well-known
2260 // condition used here is a good conservative first pass.
2261 // TODO: Tighten bound via rigorous analysis of the unbalanced case.
2262 if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) {
2263 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
2264 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
2265 return BinaryOperator::CreateFDivFMF(LHS, RHS, BO);
2266 }
2267 break;
2268 case Instruction::FRem: {
2269 // Remainder is straightforward. Remainder is always exact, so the
2270 // type of OpI doesn't enter into things at all. We simply evaluate
2271 // in whichever source type is larger, then convert to the
2272 // destination type.
2273 if (SrcWidth == OpWidth)
2274 break;
2275 Value *LHS, *RHS;
2276 if (LHSWidth == SrcWidth) {
2277 LHS = Builder.CreateFPTrunc(BO->getOperand(0), LHSMinType);
2278 RHS = Builder.CreateFPTrunc(BO->getOperand(1), LHSMinType);
2279 } else {
2280 LHS = Builder.CreateFPTrunc(BO->getOperand(0), RHSMinType);
2281 RHS = Builder.CreateFPTrunc(BO->getOperand(1), RHSMinType);
2282 }
2283
2284 Value *ExactResult = Builder.CreateFRemFMF(LHS, RHS, BO);
2285 return CastInst::CreateFPCast(ExactResult, Ty);
2286 }
2287 }
2288 }
2289
2290 // (fptrunc (fneg x)) -> (fneg (fptrunc x))
2291 Value *X;
2293 if (Op && Op->hasOneUse()) {
2294 FastMathFlags FMF = FPT.getFastMathFlags();
2295 if (auto *FPMO = dyn_cast<FPMathOperator>(Op))
2296 FMF &= FPMO->getFastMathFlags();
2297
2298 if (match(Op, m_FNeg(m_Value(X)))) {
2299 Value *InnerTrunc = Builder.CreateFPTruncFMF(X, Ty, FMF);
2300 Value *Neg = Builder.CreateFNegFMF(InnerTrunc, FMF);
2301 return replaceInstUsesWith(FPT, Neg);
2302 }
2303
2304 // If we are truncating a select that has an extended operand, we can
2305 // narrow the other operand and do the select as a narrow op.
2306 Value *Cond, *X, *Y;
2308 X->getType() == Ty) {
2309 // fptrunc (select Cond, (fpext X), Y --> select Cond, X, (fptrunc Y)
2310 Value *NarrowY = Builder.CreateFPTruncFMF(Y, Ty, FMF);
2311 Value *Sel =
2312 Builder.CreateSelectFMF(Cond, X, NarrowY, FMF, "narrow.sel", Op);
2313 return replaceInstUsesWith(FPT, Sel);
2314 }
2316 X->getType() == Ty) {
2317 // fptrunc (select Cond, Y, (fpext X) --> select Cond, (fptrunc Y), X
2318 Value *NarrowY = Builder.CreateFPTruncFMF(Y, Ty, FMF);
2319 Value *Sel =
2320 Builder.CreateSelectFMF(Cond, NarrowY, X, FMF, "narrow.sel", Op);
2321 return replaceInstUsesWith(FPT, Sel);
2322 }
2323 }
2324
2325 if (auto *II = dyn_cast<IntrinsicInst>(FPT.getOperand(0))) {
2326 switch (II->getIntrinsicID()) {
2327 default: break;
2328 case Intrinsic::ceil:
2329 case Intrinsic::fabs:
2330 case Intrinsic::floor:
2331 case Intrinsic::nearbyint:
2332 case Intrinsic::rint:
2333 case Intrinsic::round:
2334 case Intrinsic::roundeven:
2335 case Intrinsic::trunc: {
2336 Value *Src = II->getArgOperand(0);
2337 if (!Src->hasOneUse())
2338 break;
2339
2340 // Except for fabs, this transformation requires the input of the unary FP
2341 // operation to be itself an fpext from the type to which we're
2342 // truncating.
2343 if (II->getIntrinsicID() != Intrinsic::fabs) {
2344 FPExtInst *FPExtSrc = dyn_cast<FPExtInst>(Src);
2345 if (!FPExtSrc || FPExtSrc->getSrcTy() != Ty)
2346 break;
2347 }
2348
2349 // Do unary FP operation on smaller type.
2350 // (fptrunc (fabs x)) -> (fabs (fptrunc x))
2351 Value *InnerTrunc = Builder.CreateFPTrunc(Src, Ty);
2353 FPT.getModule(), II->getIntrinsicID(), Ty);
2355 II->getOperandBundlesAsDefs(OpBundles);
2356 CallInst *NewCI =
2357 CallInst::Create(Overload, {InnerTrunc}, OpBundles, II->getName());
2358 // A normal value may be converted to an infinity. It means that we cannot
2359 // propagate ninf from the intrinsic. So we propagate FMF from fptrunc.
2360 NewCI->copyFastMathFlags(&FPT);
2361 return NewCI;
2362 }
2363 }
2364 }
2365
2366 if (Instruction *I = shrinkInsertElt(FPT, Builder))
2367 return I;
2368
2369 Value *Src = FPT.getOperand(0);
2370 if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
2371 auto *FPCast = cast<CastInst>(Src);
2372 if (isKnownExactCastIntToFP(*FPCast))
2373 return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
2374 }
2375
2376 return nullptr;
2377}
2378
2380 // If the source operand is a cast from integer to FP and known exact, then
2381 // cast the integer operand directly to the destination type.
2382 Type *Ty = FPExt.getType();
2383 Value *Src = FPExt.getOperand(0);
2384 if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
2385 auto *FPCast = cast<CastInst>(Src);
2386 if (isKnownExactCastIntToFP(*FPCast))
2387 return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
2388 }
2389
2390 return commonCastTransforms(FPExt);
2391}
2392
2393/// fpto{s/u}i[.sat]({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
2394/// This is safe if the intermediate type has enough bits in its mantissa to
2395/// accurately represent all values of X. For example, this won't work with
2396/// i64 -> float -> i64.
2397template <typename FPToIntTy>
2399 constexpr bool IsSaturating = std::is_same_v<FPToIntTy, IntrinsicInst>;
2400
2401 if (!isa<UIToFPInst>(FI.getOperand(0)) && !isa<SIToFPInst>(FI.getOperand(0)))
2402 return nullptr;
2403
2404 auto *OpI = cast<CastInst>(FI.getOperand(0));
2405 Value *X = OpI->getOperand(0);
2406 Type *XType = X->getType();
2407 Type *DestType = FI.getType();
2408 bool IsInputSigned = isa<SIToFPInst>(OpI);
2409
2410 bool IsOutputSigned;
2411 if constexpr (IsSaturating)
2412 IsOutputSigned = FI.getIntrinsicID() == Intrinsic::fptosi_sat;
2413 else
2414 IsOutputSigned = isa<FPToSIInst>(FI);
2415
2416 // Since we can assume the conversion won't overflow, our decision as to
2417 // whether the input will fit in the float should depend on the minimum
2418 // of the input range and output range.
2419
2420 // This means this is also safe for a signed input and unsigned output, since
2421 // a negative input would lead to undefined behavior.
2422 if (!isKnownExactCastIntToFP(*OpI)) {
2423 if constexpr (!IsSaturating) {
2424 // The first cast may not round exactly based on the source integer width
2425 // and FP width, but the overflow UB rules can still allow this to fold.
2426 // If the destination type is narrow, that means the intermediate FP value
2427 // must be large enough to hold the source value exactly.
2428 //
2429 // For example, (uint8_t)((float)(uint32_t 16777217) is UB.
2430 int OutputSize = (int)DestType->getScalarSizeInBits();
2431 if (OutputSize > OpI->getType()->getFPMantissaWidth())
2432 return nullptr;
2433 } else {
2434 // Sat intrinsics produce a defined saturated value on overflow, so
2435 // the UB-based shortcut is invalid. Require exactness.
2436 return nullptr;
2437 }
2438 }
2439
2440 unsigned SrcWidth = XType->getScalarSizeInBits();
2441 unsigned DestWidth = DestType->getScalarSizeInBits();
2442
2443 if constexpr (IsSaturating) {
2444 // TODO: cross-sign and narrowing cases could be handled with range
2445 // analysis to prove the source fits in the destination.
2446 if (IsInputSigned != IsOutputSigned || DestWidth < SrcWidth)
2447 return nullptr;
2448 }
2449
2450 if (DestWidth > SrcWidth) {
2451 if (IsInputSigned && IsOutputSigned)
2452 return new SExtInst(X, DestType);
2453 return new ZExtInst(X, DestType);
2454 }
2455 if (DestWidth < SrcWidth)
2456 return new TruncInst(X, DestType);
2457
2458 assert(XType == DestType && "Unexpected types for int to FP to int casts");
2459 return replaceInstUsesWith(FI, X);
2460}
2461
2463template Instruction *
2465
2467 // fpto{u/s}i non-norm --> 0
2468 FPClassTest Mask =
2469 FI.getOpcode() == Instruction::FPToUI ? fcPosNormal : fcNormal;
2471 FI.getOperand(0), Mask, IC.getSimplifyQuery().getWithInstruction(&FI));
2472 if (FPClass.isKnownNever(Mask))
2474
2475 return nullptr;
2476}
2477
2479 if (Instruction *I = foldItoFPtoI(FI))
2480 return I;
2481
2482 if (Instruction *I = foldFPtoI(FI, *this))
2483 return I;
2484
2485 return commonCastTransforms(FI);
2486}
2487
2489 if (Instruction *I = foldItoFPtoI(FI))
2490 return I;
2491
2492 if (Instruction *I = foldFPtoI(FI, *this))
2493 return I;
2494
2495 return commonCastTransforms(FI);
2496}
2497
2499 if (Instruction *R = commonCastTransforms(CI))
2500 return R;
2501 if (!CI.hasNonNeg() && isKnownNonNegative(CI.getOperand(0), SQ)) {
2502 CI.setNonNeg();
2503 return &CI;
2504 }
2505 return nullptr;
2506}
2507
2509 if (Instruction *R = commonCastTransforms(CI))
2510 return R;
2511 if (isKnownNonNegative(CI.getOperand(0), SQ)) {
2512 auto *UI =
2513 CastInst::Create(Instruction::UIToFP, CI.getOperand(0), CI.getType());
2514 UI->setNonNeg(true);
2515 return UI;
2516 }
2517 return nullptr;
2518}
2519
2521 // If the source integer type is not the intptr_t type for this target, do a
2522 // trunc or zext to the intptr_t type, then inttoptr of it. This allows the
2523 // cast to be exposed to other transforms.
2524 unsigned AS = CI.getAddressSpace();
2525 if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
2526 DL.getPointerSizeInBits(AS)) {
2527 Type *Ty = CI.getOperand(0)->getType()->getWithNewType(
2528 DL.getIntPtrType(CI.getContext(), AS));
2529 Value *P = Builder.CreateZExtOrTrunc(CI.getOperand(0), Ty);
2530 return new IntToPtrInst(P, CI.getType());
2531 }
2532
2533 // Replace (inttoptr (add (ptrtoint %Base), %Offset)) with
2534 // (getelementptr i8, %Base, %Offset) if the pointer is only used as integer
2535 // value.
2536 Value *Base;
2537 Value *Offset;
2538 auto UsesPointerAsInt = [](User *U) {
2540 return true;
2541 if (auto *P = dyn_cast<PHINode>(U))
2542 return P->hasOneUse() && isa<ICmpInst, PtrToIntInst>(*P->user_begin());
2543 return false;
2544 };
2545 if (match(CI.getOperand(0),
2547 m_Value(Offset)))) &&
2549 Base->getType()->getPointerAddressSpace() &&
2550 all_of(CI.users(), UsesPointerAsInt)) {
2551 return GetElementPtrInst::Create(Builder.getInt8Ty(), Base, Offset);
2552 }
2553
2555 return I;
2556
2557 return nullptr;
2558}
2559
2561 // Look through chain of one-use GEPs.
2562 Type *PtrTy = Ptr->getType();
2564 while (true) {
2565 auto *GEP = dyn_cast<GEPOperator>(Ptr);
2566 if (!GEP || !GEP->hasOneUse())
2567 break;
2568 GEPs.push_back(GEP);
2569 Ptr = GEP->getPointerOperand();
2570 }
2571
2572 // Don't handle case where GEP converts from pointer to vector.
2573 if (GEPs.empty() || PtrTy != Ptr->getType())
2574 return nullptr;
2575
2576 // Check whether we know the integer value of the base pointer.
2577 Value *Res;
2578 Type *IdxTy = DL.getIndexType(PtrTy);
2579 if (match(Ptr, m_OneUse(m_IntToPtr(m_Value(Res)))) &&
2580 Res->getType() == IntTy && IntTy == IdxTy) {
2581 // pass
2582 } else if (isa<ConstantPointerNull>(Ptr)) {
2583 Res = Constant::getNullValue(IdxTy);
2584 } else {
2585 return nullptr;
2586 }
2587
2588 // Perform the entire operation on integers instead.
2589 for (GEPOperator *GEP : reverse(GEPs)) {
2590 Value *Offset = EmitGEPOffset(GEP);
2591 Res = Builder.CreateAdd(Res, Offset, "", GEP->hasNoUnsignedWrap());
2592 }
2593 return Builder.CreateZExtOrTrunc(Res, IntTy);
2594}
2595
2597 // If the destination integer type is not the intptr_t type for this target,
2598 // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
2599 // to be exposed to other transforms.
2601 Type *SrcTy = SrcOp->getType();
2602 Type *Ty = CI.getType();
2603 unsigned AS = CI.getPointerAddressSpace();
2604 unsigned TySize = Ty->getScalarSizeInBits();
2605 unsigned PtrSize = DL.getPointerSizeInBits(AS);
2606 if (TySize != PtrSize) {
2607 Type *IntPtrTy =
2608 SrcTy->getWithNewType(DL.getIntPtrType(CI.getContext(), AS));
2609 Value *P = Builder.CreatePtrToInt(SrcOp, IntPtrTy);
2610 return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
2611 }
2612
2613 // (ptrtoint (ptrmask P, M))
2614 // -> (and (ptrtoint P), M)
2615 // This is generally beneficial as `and` is better supported than `ptrmask`.
2616 Value *Ptr, *Mask;
2618 m_Value(Mask)))) &&
2619 Mask->getType() == Ty)
2620 return BinaryOperator::CreateAnd(Builder.CreatePtrToInt(Ptr, Ty), Mask);
2621
2622 if (Value *V = foldPtrToIntOrAddrOfGEP(Ty, SrcOp))
2623 return replaceInstUsesWith(CI, V);
2624
2625 Value *Vec, *Scalar, *Index;
2627 m_Value(Scalar), m_Value(Index)))) &&
2628 Vec->getType() == Ty) {
2629 assert(Vec->getType()->getScalarSizeInBits() == PtrSize && "Wrong type");
2630 // Convert the scalar to int followed by insert to eliminate one cast:
2631 // p2i (ins (i2p Vec), Scalar, Index --> ins Vec, (p2i Scalar), Index
2632 Value *NewCast = Builder.CreatePtrToInt(Scalar, Ty->getScalarType());
2633 return InsertElementInst::Create(Vec, NewCast, Index);
2634 }
2635
2636 return commonCastTransforms(CI);
2637}
2638
2641 Type *Ty = CI.getType();
2642
2643 // (ptrtoaddr (ptrmask P, M))
2644 // -> (and (ptrtoaddr P), M)
2645 // This is generally beneficial as `and` is better supported than `ptrmask`.
2646 Value *Ptr, *Mask;
2648 m_Value(Mask)))) &&
2649 Mask->getType() == Ty)
2650 return BinaryOperator::CreateAnd(Builder.CreatePtrToAddr(Ptr), Mask);
2651
2652 if (Value *V = foldPtrToIntOrAddrOfGEP(Ty, SrcOp))
2653 return replaceInstUsesWith(CI, V);
2654
2655 // FIXME: Implement variants of ptrtoint folds.
2656 return commonCastTransforms(CI);
2657}
2658
2659/// This input value (which is known to have vector type) is being zero extended
2660/// or truncated to the specified vector type. Since the zext/trunc is done
2661/// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
2662/// endianness will impact which end of the vector that is extended or
2663/// truncated.
2664///
2665/// A vector is always stored with index 0 at the lowest address, which
2666/// corresponds to the most significant bits for a big endian stored integer and
2667/// the least significant bits for little endian. A trunc/zext of an integer
2668/// impacts the big end of the integer. Thus, we need to add/remove elements at
2669/// the front of the vector for big endian targets, and the back of the vector
2670/// for little endian targets.
2671///
2672/// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
2673///
2674/// The source and destination vector types may have different element types.
2675static Instruction *
2677 InstCombinerImpl &IC) {
2678 // We can only do this optimization if the output is a multiple of the input
2679 // element size, or the input is a multiple of the output element size.
2680 // Convert the input type to have the same element type as the output.
2681 VectorType *SrcTy = cast<VectorType>(InVal->getType());
2682
2683 if (SrcTy->getElementType() != DestTy->getElementType()) {
2684 // The input types don't need to be identical, but for now they must be the
2685 // same size. There is no specific reason we couldn't handle things like
2686 // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
2687 // there yet.
2688 if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
2689 DestTy->getElementType()->getPrimitiveSizeInBits())
2690 return nullptr;
2691
2692 SrcTy =
2693 FixedVectorType::get(DestTy->getElementType(),
2694 cast<FixedVectorType>(SrcTy)->getNumElements());
2695 InVal = IC.Builder.CreateBitCast(InVal, SrcTy);
2696 }
2697
2698 bool IsBigEndian = IC.getDataLayout().isBigEndian();
2699 unsigned SrcElts = cast<FixedVectorType>(SrcTy)->getNumElements();
2700 unsigned DestElts = cast<FixedVectorType>(DestTy)->getNumElements();
2701
2702 assert(SrcElts != DestElts && "Element counts should be different.");
2703
2704 // Now that the element types match, get the shuffle mask and RHS of the
2705 // shuffle to use, which depends on whether we're increasing or decreasing the
2706 // size of the input.
2707 auto ShuffleMaskStorage = llvm::to_vector<16>(llvm::seq<int>(0, SrcElts));
2708 ArrayRef<int> ShuffleMask;
2709 Value *V2;
2710
2711 if (SrcElts > DestElts) {
2712 // If we're shrinking the number of elements (rewriting an integer
2713 // truncate), just shuffle in the elements corresponding to the least
2714 // significant bits from the input and use poison as the second shuffle
2715 // input.
2716 V2 = PoisonValue::get(SrcTy);
2717 // Make sure the shuffle mask selects the "least significant bits" by
2718 // keeping elements from back of the src vector for big endian, and from the
2719 // front for little endian.
2720 ShuffleMask = ShuffleMaskStorage;
2721 if (IsBigEndian)
2722 ShuffleMask = ShuffleMask.take_back(DestElts);
2723 else
2724 ShuffleMask = ShuffleMask.take_front(DestElts);
2725 } else {
2726 // If we're increasing the number of elements (rewriting an integer zext),
2727 // shuffle in all of the elements from InVal. Fill the rest of the result
2728 // elements with zeros from a constant zero.
2729 V2 = Constant::getNullValue(SrcTy);
2730 // Use first elt from V2 when indicating zero in the shuffle mask.
2731 uint32_t NullElt = SrcElts;
2732 // Extend with null values in the "most significant bits" by adding elements
2733 // in front of the src vector for big endian, and at the back for little
2734 // endian.
2735 unsigned DeltaElts = DestElts - SrcElts;
2736 if (IsBigEndian)
2737 ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt);
2738 else
2739 ShuffleMaskStorage.append(DeltaElts, NullElt);
2740 ShuffleMask = ShuffleMaskStorage;
2741 }
2742
2743 return new ShuffleVectorInst(InVal, V2, ShuffleMask);
2744}
2745
2746static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) {
2747 return Value % Ty->getPrimitiveSizeInBits() == 0;
2748}
2749
2750static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
2751 return Value / Ty->getPrimitiveSizeInBits();
2752}
2753
2754/// V is a value which is inserted into a vector of VecEltTy.
2755/// Look through the value to see if we can decompose it into
2756/// insertions into the vector. See the example in the comment for
2757/// OptimizeIntegerToVectorInsertions for the pattern this handles.
2758/// The type of V is always a non-zero multiple of VecEltTy's size.
2759/// Shift is the number of bits between the lsb of V and the lsb of
2760/// the vector.
2761///
2762/// This returns false if the pattern can't be matched or true if it can,
2763/// filling in Elements with the elements found here.
2764static bool collectInsertionElements(Value *V, unsigned Shift,
2765 SmallVectorImpl<Value *> &Elements,
2766 Type *VecEltTy, bool isBigEndian) {
2767 assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
2768 "Shift should be a multiple of the element type size");
2769
2770 // Poison values never contribute useful bits to the result.
2771 if (match(V, m_Poison()))
2772 return true;
2773
2774 // If we got down to a value of the right type, we win, try inserting into the
2775 // right element.
2776 if (V->getType() == VecEltTy) {
2777 // Inserting null doesn't actually insert any elements.
2778 if (Constant *C = dyn_cast<Constant>(V))
2779 if (C->isNullValue())
2780 return true;
2781
2782 unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
2783 if (isBigEndian)
2784 ElementIndex = Elements.size() - ElementIndex - 1;
2785
2786 // Fail if multiple elements are inserted into this slot.
2787 if (Elements[ElementIndex])
2788 return false;
2789
2790 Elements[ElementIndex] = V;
2791 return true;
2792 }
2793
2794 if (Constant *C = dyn_cast<Constant>(V)) {
2795 // Figure out the # elements this provides, and bitcast it or slice it up
2796 // as required.
2797 unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
2798 VecEltTy);
2799 // If the constant is the size of a vector element, we just need to bitcast
2800 // it to the right type so it gets properly inserted.
2801 if (NumElts == 1)
2803 Shift, Elements, VecEltTy, isBigEndian);
2804
2805 // Okay, this is a constant that covers multiple elements. Slice it up into
2806 // pieces and insert each element-sized piece into the vector.
2807 if (!isa<IntegerType>(C->getType()))
2808 C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),
2809 C->getType()->getPrimitiveSizeInBits()));
2810 unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
2811 Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
2812
2813 for (unsigned i = 0; i != NumElts; ++i) {
2814 unsigned ShiftI = i * ElementSize;
2816 Instruction::LShr, C, ConstantInt::get(C->getType(), ShiftI));
2817 if (!Piece)
2818 return false;
2819
2820 Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
2821 if (!collectInsertionElements(Piece, ShiftI + Shift, Elements, VecEltTy,
2822 isBigEndian))
2823 return false;
2824 }
2825 return true;
2826 }
2827
2828 if (!V->hasOneUse()) return false;
2829
2831 if (!I) return false;
2832 switch (I->getOpcode()) {
2833 default: return false; // Unhandled case.
2834 case Instruction::BitCast:
2835 if (I->getOperand(0)->getType()->isVectorTy())
2836 return false;
2837 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2838 isBigEndian);
2839 case Instruction::ZExt:
2841 I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
2842 VecEltTy))
2843 return false;
2844 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2845 isBigEndian);
2846 case Instruction::Or:
2847 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2848 isBigEndian) &&
2849 collectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
2850 isBigEndian);
2851 case Instruction::Shl: {
2852 // Must be shifting by a constant that is a multiple of the element size.
2853 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
2854 if (!CI) return false;
2855 Shift += CI->getZExtValue();
2856 if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
2857 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2858 isBigEndian);
2859 }
2860
2861 }
2862}
2863
2864
2865/// If the input is an 'or' instruction, we may be doing shifts and ors to
2866/// assemble the elements of the vector manually.
2867/// Try to rip the code out and replace it with insertelements. This is to
2868/// optimize code like this:
2869///
2870/// %tmp37 = bitcast float %inc to i32
2871/// %tmp38 = zext i32 %tmp37 to i64
2872/// %tmp31 = bitcast float %inc5 to i32
2873/// %tmp32 = zext i32 %tmp31 to i64
2874/// %tmp33 = shl i64 %tmp32, 32
2875/// %ins35 = or i64 %tmp33, %tmp38
2876/// %tmp43 = bitcast i64 %ins35 to <2 x float>
2877///
2878/// Into two insertelements that do "buildvector{%inc, %inc5}".
2880 InstCombinerImpl &IC) {
2881 auto *DestVecTy = cast<FixedVectorType>(CI.getType());
2882 Value *IntInput = CI.getOperand(0);
2883
2884 // if the int input is just an undef value do not try to optimize to vector
2885 // insertions as it will prevent undef propagation
2886 if (isa<UndefValue>(IntInput))
2887 return nullptr;
2888
2889 SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
2890 if (!collectInsertionElements(IntInput, 0, Elements,
2891 DestVecTy->getElementType(),
2892 IC.getDataLayout().isBigEndian()))
2893 return nullptr;
2894
2895 // If we succeeded, we know that all of the element are specified by Elements
2896 // or are zero if Elements has a null entry. Recast this as a set of
2897 // insertions.
2898 Value *Result = Constant::getNullValue(CI.getType());
2899 for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
2900 if (!Elements[i]) continue; // Unset element.
2901
2902 Result = IC.Builder.CreateInsertElement(Result, Elements[i],
2903 IC.Builder.getInt32(i));
2904 }
2905
2906 return Result;
2907}
2908
2909/// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
2910/// vector followed by extract element. The backend tends to handle bitcasts of
2911/// vectors better than bitcasts of scalars because vector registers are
2912/// usually not type-specific like scalar integer or scalar floating-point.
2914 InstCombinerImpl &IC) {
2915 Value *VecOp, *Index;
2916 if (!match(BitCast.getOperand(0),
2917 m_OneUse(m_ExtractElt(m_Value(VecOp), m_Value(Index)))))
2918 return nullptr;
2919
2920 // The bitcast must be to a vectorizable type, otherwise we can't make a new
2921 // type to extract from.
2922 Type *DestType = BitCast.getType();
2923 VectorType *VecType = cast<VectorType>(VecOp->getType());
2924 if (VectorType::isValidElementType(DestType)) {
2925 auto *NewVecType = VectorType::get(DestType, VecType);
2926 auto *NewBC = IC.Builder.CreateBitCast(VecOp, NewVecType, "bc");
2927 return ExtractElementInst::Create(NewBC, Index);
2928 }
2929
2930 // Only solve DestType is vector to avoid inverse transform in visitBitCast.
2931 // bitcast (extractelement <1 x elt>, dest) -> bitcast(<1 x elt>, dest)
2932 auto *FixedVType = dyn_cast<FixedVectorType>(VecType);
2933 if (DestType->isVectorTy() && FixedVType && FixedVType->getNumElements() == 1)
2934 return CastInst::Create(Instruction::BitCast, VecOp, DestType);
2935
2936 return nullptr;
2937}
2938
2939/// Change the type of a bitwise logic operation if we can eliminate a bitcast.
2941 InstCombiner::BuilderTy &Builder) {
2942 Type *DestTy = BitCast.getType();
2943 BinaryOperator *BO;
2944
2945 if (!match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) ||
2946 !BO->isBitwiseLogicOp())
2947 return nullptr;
2948
2949 // FIXME: This transform is restricted to vector types to avoid backend
2950 // problems caused by creating potentially illegal operations. If a fix-up is
2951 // added to handle that situation, we can remove this check.
2952 if (!DestTy->isVectorTy() || !BO->getType()->isVectorTy())
2953 return nullptr;
2954
2955 if (DestTy->isFPOrFPVectorTy()) {
2956 Value *X, *Y;
2957 // bitcast(logic(bitcast(X), bitcast(Y))) -> bitcast'(logic(bitcast'(X), Y))
2958 if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
2960 if (X->getType()->isFPOrFPVectorTy() &&
2961 Y->getType()->isIntOrIntVectorTy()) {
2962 Value *CastedOp =
2963 Builder.CreateBitCast(BO->getOperand(0), Y->getType());
2964 Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, Y);
2965 return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
2966 }
2967 if (X->getType()->isIntOrIntVectorTy() &&
2968 Y->getType()->isFPOrFPVectorTy()) {
2969 Value *CastedOp =
2970 Builder.CreateBitCast(BO->getOperand(1), X->getType());
2971 Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, X);
2972 return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
2973 }
2974 }
2975 return nullptr;
2976 }
2977
2978 if (!DestTy->isIntOrIntVectorTy())
2979 return nullptr;
2980
2981 Value *X;
2982 if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
2983 X->getType() == DestTy && !isa<Constant>(X)) {
2984 // bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
2985 Value *CastedOp1 = Builder.CreateBitCast(BO->getOperand(1), DestTy);
2986 return BinaryOperator::Create(BO->getOpcode(), X, CastedOp1);
2987 }
2988
2989 if (match(BO->getOperand(1), m_OneUse(m_BitCast(m_Value(X)))) &&
2990 X->getType() == DestTy && !isa<Constant>(X)) {
2991 // bitcast(logic(Y, bitcast(X))) --> logic'(bitcast(Y), X)
2992 Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2993 return BinaryOperator::Create(BO->getOpcode(), CastedOp0, X);
2994 }
2995
2996 // Canonicalize vector bitcasts to come before vector bitwise logic with a
2997 // constant. This eases recognition of special constants for later ops.
2998 // Example:
2999 // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
3000 Constant *C;
3001 if (match(BO->getOperand(1), m_Constant(C))) {
3002 // bitcast (logic X, C) --> logic (bitcast X, C')
3003 Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
3004 Value *CastedC = Builder.CreateBitCast(C, DestTy);
3005 return BinaryOperator::Create(BO->getOpcode(), CastedOp0, CastedC);
3006 }
3007
3008 return nullptr;
3009}
3010
3011/// Change the type of a select if we can eliminate a bitcast.
3013 InstCombiner::BuilderTy &Builder) {
3014 Value *Cond, *TVal, *FVal;
3015 if (!match(BitCast.getOperand(0),
3016 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
3017 return nullptr;
3018
3019 // A vector select must maintain the same number of elements in its operands.
3020 Type *CondTy = Cond->getType();
3021 Type *DestTy = BitCast.getType();
3022
3023 auto *DestVecTy = dyn_cast<VectorType>(DestTy);
3024
3025 if (auto *CondVTy = dyn_cast<VectorType>(CondTy))
3026 if (!DestVecTy ||
3027 CondVTy->getElementCount() != DestVecTy->getElementCount())
3028 return nullptr;
3029
3030 auto *Sel = cast<Instruction>(BitCast.getOperand(0));
3031 auto *SrcVecTy = dyn_cast<VectorType>(TVal->getType());
3032
3033 if ((isa<Constant>(TVal) || isa<Constant>(FVal)) &&
3034 (!DestVecTy ||
3035 (SrcVecTy && ElementCount::isKnownLE(DestVecTy->getElementCount(),
3036 SrcVecTy->getElementCount())))) {
3037 // Avoid introducing select of vector (or select of vector with more
3038 // elements) until the backend can undo this transformation.
3039 Value *CastedTVal = Builder.CreateBitCast(TVal, DestTy);
3040 Value *CastedFVal = Builder.CreateBitCast(FVal, DestTy);
3041 return SelectInst::Create(Cond, CastedTVal, CastedFVal, "", nullptr, Sel);
3042 }
3043
3044 // FIXME: This transform is restricted from changing the select between
3045 // scalars and vectors to avoid backend problems caused by creating
3046 // potentially illegal operations. If a fix-up is added to handle that
3047 // situation, we can remove this check.
3048 if ((DestVecTy != nullptr) != (SrcVecTy != nullptr))
3049 return nullptr;
3050
3051 Value *X;
3052 if (match(TVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
3053 !isa<Constant>(X)) {
3054 // bitcast(select(Cond, bitcast(X), Y)) --> select'(Cond, X, bitcast(Y))
3055 Value *CastedVal = Builder.CreateBitCast(FVal, DestTy);
3056 return SelectInst::Create(Cond, X, CastedVal, "", nullptr, Sel);
3057 }
3058
3059 if (match(FVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
3060 !isa<Constant>(X)) {
3061 // bitcast(select(Cond, Y, bitcast(X))) --> select'(Cond, bitcast(Y), X)
3062 Value *CastedVal = Builder.CreateBitCast(TVal, DestTy);
3063 return SelectInst::Create(Cond, CastedVal, X, "", nullptr, Sel);
3064 }
3065
3066 return nullptr;
3067}
3068
3069/// Check if all users of CI are StoreInsts.
3070static bool hasStoreUsersOnly(CastInst &CI) {
3071 for (User *U : CI.users()) {
3072 if (!isa<StoreInst>(U))
3073 return false;
3074 }
3075 return true;
3076}
3077
3078/// This function handles following case
3079///
3080/// A -> B cast
3081/// PHI
3082/// B -> A cast
3083///
3084/// All the related PHI nodes can be replaced by new PHI nodes with type A.
3085/// The uses of \p CI can be changed to the new PHI node corresponding to \p PN.
3086Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI,
3087 PHINode *PN) {
3088 // BitCast used by Store can be handled in InstCombineLoadStoreAlloca.cpp.
3089 if (hasStoreUsersOnly(CI))
3090 return nullptr;
3091
3092 Value *Src = CI.getOperand(0);
3093 Type *SrcTy = Src->getType(); // Type B
3094 Type *DestTy = CI.getType(); // Type A
3095
3096 SmallVector<PHINode *, 4> PhiWorklist;
3097 SmallSetVector<PHINode *, 4> OldPhiNodes;
3098
3099 // Find all of the A->B casts and PHI nodes.
3100 // We need to inspect all related PHI nodes, but PHIs can be cyclic, so
3101 // OldPhiNodes is used to track all known PHI nodes, before adding a new
3102 // PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
3103 PhiWorklist.push_back(PN);
3104 OldPhiNodes.insert(PN);
3105 while (!PhiWorklist.empty()) {
3106 auto *OldPN = PhiWorklist.pop_back_val();
3107 for (Value *IncValue : OldPN->incoming_values()) {
3108 if (isa<Constant>(IncValue))
3109 continue;
3110
3111 if (auto *LI = dyn_cast<LoadInst>(IncValue)) {
3112 // If there is a sequence of one or more load instructions, each loaded
3113 // value is used as address of later load instruction, bitcast is
3114 // necessary to change the value type, don't optimize it. For
3115 // simplicity we give up if the load address comes from another load.
3116 Value *Addr = LI->getOperand(0);
3117 if (Addr == &CI || isa<LoadInst>(Addr))
3118 return nullptr;
3119 // Don't tranform "load <256 x i32>, <256 x i32>*" to
3120 // "load x86_amx, x86_amx*", because x86_amx* is invalid.
3121 // TODO: Remove this check when bitcast between vector and x86_amx
3122 // is replaced with a specific intrinsic.
3123 if (DestTy->isX86_AMXTy())
3124 return nullptr;
3125 if (LI->hasOneUse() && LI->isSimple())
3126 continue;
3127 // If a LoadInst has more than one use, changing the type of loaded
3128 // value may create another bitcast.
3129 return nullptr;
3130 }
3131
3132 if (auto *PNode = dyn_cast<PHINode>(IncValue)) {
3133 if (OldPhiNodes.insert(PNode))
3134 PhiWorklist.push_back(PNode);
3135 continue;
3136 }
3137
3138 auto *BCI = dyn_cast<BitCastInst>(IncValue);
3139 // We can't handle other instructions.
3140 if (!BCI)
3141 return nullptr;
3142
3143 // Verify it's a A->B cast.
3144 Type *TyA = BCI->getOperand(0)->getType();
3145 Type *TyB = BCI->getType();
3146 if (TyA != DestTy || TyB != SrcTy)
3147 return nullptr;
3148 }
3149 }
3150
3151 // Check that each user of each old PHI node is something that we can
3152 // rewrite, so that all of the old PHI nodes can be cleaned up afterwards.
3153 for (auto *OldPN : OldPhiNodes) {
3154 for (User *V : OldPN->users()) {
3155 if (auto *SI = dyn_cast<StoreInst>(V)) {
3156 if (!SI->isSimple() || SI->getOperand(0) != OldPN)
3157 return nullptr;
3158 } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
3159 // Verify it's a B->A cast.
3160 Type *TyB = BCI->getOperand(0)->getType();
3161 Type *TyA = BCI->getType();
3162 if (TyA != DestTy || TyB != SrcTy)
3163 return nullptr;
3164 } else if (auto *PHI = dyn_cast<PHINode>(V)) {
3165 // As long as the user is another old PHI node, then even if we don't
3166 // rewrite it, the PHI web we're considering won't have any users
3167 // outside itself, so it'll be dead.
3168 if (!OldPhiNodes.contains(PHI))
3169 return nullptr;
3170 } else {
3171 return nullptr;
3172 }
3173 }
3174 }
3175
3176 // For each old PHI node, create a corresponding new PHI node with a type A.
3177 SmallDenseMap<PHINode *, PHINode *> NewPNodes;
3178 for (auto *OldPN : OldPhiNodes) {
3179 Builder.SetInsertPoint(OldPN);
3180 PHINode *NewPN = Builder.CreatePHI(DestTy, OldPN->getNumOperands());
3181 NewPNodes[OldPN] = NewPN;
3182 }
3183
3184 // Fill in the operands of new PHI nodes.
3185 for (auto *OldPN : OldPhiNodes) {
3186 PHINode *NewPN = NewPNodes[OldPN];
3187 for (unsigned j = 0, e = OldPN->getNumOperands(); j != e; ++j) {
3188 Value *V = OldPN->getOperand(j);
3189 Value *NewV = nullptr;
3190 if (auto *C = dyn_cast<Constant>(V)) {
3191 NewV = ConstantExpr::getBitCast(C, DestTy);
3192 } else if (auto *LI = dyn_cast<LoadInst>(V)) {
3193 // Explicitly perform load combine to make sure no opposing transform
3194 // can remove the bitcast in the meantime and trigger an infinite loop.
3195 Builder.SetInsertPoint(LI);
3196 NewV = combineLoadToNewType(*LI, DestTy);
3197 // Remove the old load and its use in the old phi, which itself becomes
3198 // dead once the whole transform finishes.
3199 replaceInstUsesWith(*LI, PoisonValue::get(LI->getType()));
3201 } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
3202 NewV = BCI->getOperand(0);
3203 } else if (auto *PrevPN = dyn_cast<PHINode>(V)) {
3204 NewV = NewPNodes[PrevPN];
3205 }
3206 assert(NewV);
3207 NewPN->addIncoming(NewV, OldPN->getIncomingBlock(j));
3208 }
3209 }
3210
3211 // Traverse all accumulated PHI nodes and process its users,
3212 // which are Stores and BitcCasts. Without this processing
3213 // NewPHI nodes could be replicated and could lead to extra
3214 // moves generated after DeSSA.
3215 // If there is a store with type B, change it to type A.
3216
3217
3218 // Replace users of BitCast B->A with NewPHI. These will help
3219 // later to get rid off a closure formed by OldPHI nodes.
3220 Instruction *RetVal = nullptr;
3221 for (auto *OldPN : OldPhiNodes) {
3222 PHINode *NewPN = NewPNodes[OldPN];
3223 for (User *V : make_early_inc_range(OldPN->users())) {
3224 if (auto *SI = dyn_cast<StoreInst>(V)) {
3225 assert(SI->isSimple() && SI->getOperand(0) == OldPN);
3226 Builder.SetInsertPoint(SI);
3227 auto *NewBC =
3228 cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));
3229 SI->setOperand(0, NewBC);
3230 Worklist.push(SI);
3231 assert(hasStoreUsersOnly(*NewBC));
3232 }
3233 else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
3234 Type *TyB = BCI->getOperand(0)->getType();
3235 Type *TyA = BCI->getType();
3236 assert(TyA == DestTy && TyB == SrcTy);
3237 (void) TyA;
3238 (void) TyB;
3239 Instruction *I = replaceInstUsesWith(*BCI, NewPN);
3240 if (BCI == &CI)
3241 RetVal = I;
3242 } else if (auto *PHI = dyn_cast<PHINode>(V)) {
3243 assert(OldPhiNodes.contains(PHI));
3244 (void) PHI;
3245 } else {
3246 llvm_unreachable("all uses should be handled");
3247 }
3248 }
3249 }
3250
3251 return RetVal;
3252}
3253
3254/// Fold (bitcast (or (and (bitcast X to int), signmask), nneg Y) to fp) to
3255/// copysign((bitcast Y to fp), X)
3257 InstCombiner::BuilderTy &Builder,
3258 const SimplifyQuery &SQ) {
3259 Value *X, *Y;
3260 Type *FTy = CI.getType();
3261 if (!FTy->isFPOrFPVectorTy())
3262 return nullptr;
3265 m_Value(Y)))))
3266 return nullptr;
3267 if (X->getType() != FTy)
3268 return nullptr;
3269 if (!isKnownNonNegative(Y, SQ))
3270 return nullptr;
3271
3272 return Builder.CreateCopySign(Builder.CreateBitCast(Y, FTy), X);
3273}
3274
3276 // If the operands are integer typed then apply the integer transforms,
3277 // otherwise just apply the common ones.
3278 Value *Src = CI.getOperand(0);
3279 Type *SrcTy = Src->getType();
3280 Type *DestTy = CI.getType();
3281
3282 // Get rid of casts from one type to the same type. These are useless and can
3283 // be replaced by the operand.
3284 if (DestTy == Src->getType())
3285 return replaceInstUsesWith(CI, Src);
3286
3287 if (isa<FixedVectorType>(DestTy)) {
3288 if (isa<IntegerType>(SrcTy)) {
3289 // If this is a cast from an integer to vector, check to see if the input
3290 // is a trunc or zext of a bitcast from vector. If so, we can replace all
3291 // the casts with a shuffle and (potentially) a bitcast.
3292 if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {
3293 CastInst *SrcCast = cast<CastInst>(Src);
3294 if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
3295 if (isa<VectorType>(BCIn->getOperand(0)->getType()))
3297 BCIn->getOperand(0), cast<VectorType>(DestTy), *this))
3298 return I;
3299 }
3300
3301 // If the input is an 'or' instruction, we may be doing shifts and ors to
3302 // assemble the elements of the vector manually. Try to rip the code out
3303 // and replace it with insertelements.
3304 if (Value *V = optimizeIntegerToVectorInsertions(CI, *this))
3305 return replaceInstUsesWith(CI, V);
3306 }
3307 }
3308
3309 if (FixedVectorType *SrcVTy = dyn_cast<FixedVectorType>(SrcTy)) {
3310 if (SrcVTy->getNumElements() == 1) {
3311 // If our destination is not a vector, then make this a straight
3312 // scalar-scalar cast.
3313 if (!DestTy->isVectorTy()) {
3314 Value *Elem =
3315 Builder.CreateExtractElement(Src,
3317 return CastInst::Create(Instruction::BitCast, Elem, DestTy);
3318 }
3319
3320 // Otherwise, see if our source is an insert. If so, then use the scalar
3321 // component directly:
3322 // bitcast (inselt <1 x elt> V, X, 0) to <n x m> --> bitcast X to <n x m>
3323 if (auto *InsElt = dyn_cast<InsertElementInst>(Src))
3324 return new BitCastInst(InsElt->getOperand(1), DestTy);
3325 }
3326
3327 // Convert an artificial vector insert into more analyzable bitwise logic.
3328 unsigned BitWidth = DestTy->getScalarSizeInBits();
3329 Value *X, *Y;
3330 uint64_t IndexC;
3332 m_Value(Y), m_ConstantInt(IndexC)))) &&
3333 DestTy->isIntegerTy() && X->getType() == DestTy &&
3334 Y->getType()->isIntegerTy() && isDesirableIntType(BitWidth)) {
3335 // Adjust for big endian - the LSBs are at the high index.
3336 if (DL.isBigEndian())
3337 IndexC = SrcVTy->getNumElements() - 1 - IndexC;
3338
3339 // We only handle (endian-normalized) insert to index 0. Any other insert
3340 // would require a left-shift, so that is an extra instruction.
3341 if (IndexC == 0) {
3342 // bitcast (inselt (bitcast X), Y, 0) --> or (and X, MaskC), (zext Y)
3343 unsigned EltWidth = Y->getType()->getScalarSizeInBits();
3344 APInt MaskC = APInt::getHighBitsSet(BitWidth, BitWidth - EltWidth);
3345 Value *AndX = Builder.CreateAnd(X, MaskC);
3346 Value *ZextY = Builder.CreateZExt(Y, DestTy);
3347 return BinaryOperator::CreateOr(AndX, ZextY);
3348 }
3349 }
3350 }
3351
3352 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Src)) {
3353 // Okay, we have (bitcast (shuffle ..)). Check to see if this is
3354 // a bitcast to a vector with the same # elts.
3355 Value *ShufOp0 = Shuf->getOperand(0);
3356 Value *ShufOp1 = Shuf->getOperand(1);
3357 auto ShufElts = cast<VectorType>(Shuf->getType())->getElementCount();
3358 auto SrcVecElts = cast<VectorType>(ShufOp0->getType())->getElementCount();
3359 if (Shuf->hasOneUse() && DestTy->isVectorTy() &&
3360 cast<VectorType>(DestTy)->getElementCount() == ShufElts &&
3361 ShufElts == SrcVecElts) {
3362 BitCastInst *Tmp;
3363 // If either of the operands is a cast from CI.getType(), then
3364 // evaluating the shuffle in the casted destination's type will allow
3365 // us to eliminate at least one cast.
3366 if (((Tmp = dyn_cast<BitCastInst>(ShufOp0)) &&
3367 Tmp->getOperand(0)->getType() == DestTy) ||
3368 ((Tmp = dyn_cast<BitCastInst>(ShufOp1)) &&
3369 Tmp->getOperand(0)->getType() == DestTy)) {
3370 Value *LHS = Builder.CreateBitCast(ShufOp0, DestTy);
3371 Value *RHS = Builder.CreateBitCast(ShufOp1, DestTy);
3372 // Return a new shuffle vector. Use the same element ID's, as we
3373 // know the vector types match #elts.
3374 return new ShuffleVectorInst(LHS, RHS, Shuf->getShuffleMask());
3375 }
3376 }
3377
3378 // A bitcasted-to-scalar and byte/bit reversing shuffle is better recognized
3379 // as a byte/bit swap:
3380 // bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) -> bswap (bitcast X)
3381 // bitcast <N x i1> (shuf X, undef, <N, N-1,...0>) -> bitreverse (bitcast X)
3382 if (DestTy->isIntegerTy() && ShufElts.getKnownMinValue() % 2 == 0 &&
3383 Shuf->hasOneUse() && Shuf->isReverse()) {
3384 unsigned IntrinsicNum = 0;
3385 if (DL.isLegalInteger(DestTy->getScalarSizeInBits()) &&
3386 SrcTy->getScalarSizeInBits() == 8) {
3387 IntrinsicNum = Intrinsic::bswap;
3388 } else if (SrcTy->getScalarSizeInBits() == 1) {
3389 IntrinsicNum = Intrinsic::bitreverse;
3390 }
3391 if (IntrinsicNum != 0) {
3392 assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask");
3393 assert(match(ShufOp1, m_Undef()) && "Unexpected shuffle op");
3394 Function *BswapOrBitreverse = Intrinsic::getOrInsertDeclaration(
3395 CI.getModule(), IntrinsicNum, DestTy);
3396 Value *ScalarX = Builder.CreateBitCast(ShufOp0, DestTy);
3397 return CallInst::Create(BswapOrBitreverse, {ScalarX});
3398 }
3399 }
3400 }
3401
3402 // Handle the A->B->A cast, and there is an intervening PHI node.
3403 if (PHINode *PN = dyn_cast<PHINode>(Src))
3404 if (Instruction *I = optimizeBitCastFromPhi(CI, PN))
3405 return I;
3406
3407 if (Instruction *I = canonicalizeBitCastExtElt(CI, *this))
3408 return I;
3409
3411 return I;
3412
3414 return I;
3415
3416 if (Value *V = foldCopySignIdioms(CI, Builder, SQ.getWithInstruction(&CI)))
3417 return replaceInstUsesWith(CI, V);
3418
3419 return commonCastTransforms(CI);
3420}
3421
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This file defines the DenseMap class.
static bool isSigned(unsigned Opcode)
Hexagon Common GEP
static bool collectInsertionElements(Value *V, unsigned Shift, SmallVectorImpl< Value * > &Elements, Type *VecEltTy, bool isBigEndian)
V is a value which is inserted into a vector of VecEltTy.
static bool hasStoreUsersOnly(CastInst &CI)
Check if all users of CI are StoreInsts.
static Value * foldCopySignIdioms(BitCastInst &CI, InstCombiner::BuilderTy &Builder, const SimplifyQuery &SQ)
Fold (bitcast (or (and (bitcast X to int), signmask), nneg Y) to fp) to copysign((bitcast Y to fp),...
static Type * shrinkFPConstantVector(Value *V, bool PreferBFloat)
static Instruction * canonicalizeBitCastExtElt(BitCastInst &BitCast, InstCombinerImpl &IC)
Canonicalize scalar bitcasts of extracted elements into a bitcast of the vector followed by extract e...
static Instruction * shrinkSplatShuffle(TruncInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of a splat shuffle.
static Instruction * foldFPtoI(Instruction &FI, InstCombiner &IC)
static Instruction * foldBitCastSelect(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a select if we can eliminate a bitcast.
static Instruction * foldBitCastBitwiseLogic(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a bitwise logic operation if we can eliminate a bitcast.
static bool fitsInFPType(APFloat F, const fltSemantics &Sem)
Return a Constant* for the specified floating-point constant if it fits in the specified FP type with...
static Instruction * optimizeVectorResizeWithIntegerBitCasts(Value *InVal, VectorType *DestTy, InstCombinerImpl &IC)
This input value (which is known to have vector type) is being zero extended or truncated to the spec...
static Instruction * shrinkInsertElt(CastInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of an insert element.
SmallDenseMap< Value *, Value *, 8 > EvaluatedMap
static Type * getMinimumFPType(Value *V, Type *PreferredTy, InstCombiner &IC)
Find the minimum FP type we can safely truncate to.
static bool isMultipleOfTypeSize(unsigned Value, Type *Ty)
static Value * optimizeIntegerToVectorInsertions(BitCastInst &CI, InstCombinerImpl &IC)
If the input is an 'or' instruction, we may be doing shifts and ors to assemble the elements of the v...
static Type * shrinkFPConstant(LLVMContext &Ctx, const APFloat &F, bool PreferBFloat)
static Instruction * foldVecExtTruncToExtElt(TruncInst &Trunc, InstCombinerImpl &IC)
Whenever an element is extracted from a vector, optionally shifted down, and then truncated,...
static Value * EvaluateInDifferentTypeImpl(Value *V, Type *Ty, bool isSigned, InstCombinerImpl &IC, EvaluatedMap &Processed)
static unsigned getTypeSizeIndex(unsigned Value, Type *Ty)
static Instruction * foldVecTruncToExtElt(TruncInst &Trunc, InstCombinerImpl &IC)
Given a vector that is bitcast to an integer, optionally logically right-shifted, and truncated,...
This file provides internal interfaces used to implement the InstCombine.
This file provides the interface for the instcombine pass implementation.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
uint64_t IntrinsicInst * II
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
static unsigned getScalarSizeInBits(Type *Ty)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
Value * RHS
Value * LHS
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & BFloat()
Definition APFloat.h:295
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
static LLVM_ABI unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition APFloat.cpp:241
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1616
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1709
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118
int32_t exactLogBase2() const
Definition APInt.h:1806
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1679
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
This class represents a conversion between pointers from one address space to another.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:105
LLVM_ABI std::optional< unsigned > getVScaleRangeMax() const
Returns the maximum value for the vscale_range attribute or std::nullopt when unknown.
BinaryOps getOpcode() const
Definition InstrTypes.h:409
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateFMulFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:279
static BinaryOperator * CreateFDivFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:283
This class represents a no-op cast from one type to another.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:512
Type * getSrcTy() const
Return the source type, as a convenience.
Definition InstrTypes.h:679
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition InstrTypes.h:674
static LLVM_ABI unsigned isEliminableCastPair(Instruction::CastOps firstOpcode, Instruction::CastOps secondOpcode, Type *SrcTy, Type *MidTy, Type *DstTy, const DataLayout *DL)
Determine how a pair of casts can be eliminated, if they can be at all.
static LLVM_ABI CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
static LLVM_ABI CastInst * CreateFPCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create an FPExt, BitCast, or FPTrunc for fp -> fp casts.
static LLVM_ABI CastInst * CreateTruncOrBitCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a Trunc or BitCast cast instruction.
static LLVM_ABI CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Type * getDestTy() const
Return the destination type, as a convenience.
Definition InstrTypes.h:681
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ ICMP_SLT
signed less than
Definition InstrTypes.h:769
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:764
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ ICMP_NE
not equal
Definition InstrTypes.h:762
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:766
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getTrunc(Constant *C, Type *Ty, bool OnlyIfReduced=false)
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
const APFloat & getValueAPF() const
Definition Constants.h:463
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
bool uge(uint64_t Num) const
This function will return true iff this constant represents a value with active bits bigger than 64 b...
Definition Constants.h:262
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * mergeUndefsWith(Constant *C, Constant *Other)
Merges undefs of a Constant with another Constant, along with the undefs already present.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI bool isElementWiseEqual(Value *Y) const
Return true if this constant and a constant 'Y' are element-wise equal.
bool isBigEndian() const
Definition DataLayout.h:218
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
Definition DenseMap.h:252
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This class represents an extension of floating point types.
This class represents a cast from floating point to signed integer.
This class represents a cast from floating point to unsigned integer.
This class represents a truncation of floating point types.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
Class to represent fixed width SIMD vectors.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:869
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:759
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:724
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition IRBuilder.h:2637
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Definition IRBuilder.h:534
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition IRBuilder.h:529
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2252
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Instruction * visitZExt(ZExtInst &Zext)
Instruction * visitAddrSpaceCast(AddrSpaceCastInst &CI)
Instruction * foldExtractionOfVectorDeinterleave(ZExtInst &RootZExt)
Instruction * visitSExt(SExtInst &Sext)
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Instruction * visitFPToSI(FPToSIInst &FI)
Instruction * visitTrunc(TruncInst &CI)
Instruction * visitUIToFP(CastInst &CI)
Instruction * visitPtrToInt(PtrToIntInst &CI)
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false, bool SimplifyBothArms=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * foldItoFPtoI(FPToIntTy &FI)
fpto{s/u}i.sat --> X or zext(X) or sext(X) or trunc(X) This is safe if the intermediate type has enou...
Instruction * visitSIToFP(CastInst &CI)
Instruction * commonCastTransforms(CastInst &CI)
Implement the transforms common to all CastInst visitors.
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Instruction * visitFPTrunc(FPTruncInst &CI)
Value * foldPtrToIntOrAddrOfGEP(Type *IntTy, Value *Ptr)
Instruction * visitBitCast(BitCastInst &CI)
Instruction * visitIntToPtr(IntToPtrInst &CI)
Instruction * visitFPToUI(FPToUIInst &FI)
Instruction * visitPtrToAddr(PtrToAddrInst &CI)
Value * EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned)
Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns true for,...
bool SimplifyDemandedInstructionBits(Instruction &Inst)
Tries to simplify operands to an integer instruction based on its demanded bits.
Instruction * visitFPExt(CastInst &CI)
LoadInst * combineLoadToNewType(LoadInst &LI, Type *NewTy, const Twine &Suffix="")
Helper to combine a load to a new type.
The core instruction combiner logic.
SimplifyQuery SQ
const DataLayout & getDataLayout() const
unsigned ComputeMaxSignificantBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
unsigned ComputeNumSignBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
LLVM_ABI bool canBeCastedExactlyIntToFP(Value *V, Type *FPTy, bool IsSigned, const Instruction *CxtI=nullptr) const
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
const DataLayout & DL
void computeKnownBits(const Value *V, KnownBits &Known, const Instruction *CxtI, unsigned Depth=0) const
LLVM_ABI bool isKnownExactCastIntToFP(CastInst &I) const
Return true if the cast from integer to FP can be proven to be exact for all possible inputs (the con...
IRBuilder< TargetFolder, IRBuilderInstCombineInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const Instruction *CxtI=nullptr, unsigned Depth=0) const
DominatorTree & DT
const SimplifyQuery & getSimplifyQuery() const
LLVM_ABI void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI void setNonNeg(bool b=true)
Set or clear the nneg flag on this instruction, which must be a zext instruction.
LLVM_ABI bool hasNonNeg() const LLVM_READONLY
Determine whether the the nneg flag is set.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void setIsExact(bool b=true)
Set or clear the exact flag on this instruction, which must be an operator which supports this flag.
This class represents a cast from an integer to a pointer.
unsigned getAddressSpace() const
Returns the address space of this instruction's pointer type.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:350
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class represents a cast from a pointer to an address (non-capturing ptrtoint).
Value * getPointerOperand()
Gets the pointer operand.
This class represents a cast from a pointer to an integer.
Value * getPointerOperand()
Gets the pointer operand.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
This class represents a sign extension of integer types.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This instruction constructs a fixed permutation of two input vectors.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class represents a truncation of integer types.
void setHasNoSignedWrap(bool B)
void setHasNoUnsignedWrap(bool B)
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:288
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:309
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition Type.h:263
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:147
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:232
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:285
bool isX86_AMXTy() const
Return true if this is X86 AMX.
Definition Type.h:202
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:287
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:227
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:286
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.
Definition Type.cpp:237
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:106
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:284
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:399
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
This class represents zero extension of integer types.
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230
static constexpr bool isKnownGE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:237
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
cst_pred_ty< is_lowbit_mask > m_LowBitMask()
Match an integer or vector with only the low bit(s) set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
PtrToIntSameSize_match< OpTy > m_PtrToIntSameSize(const DataLayout &DL, const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
cst_pred_ty< is_sign_mask > m_SignMask()
Match an integer or vector with only the sign bit(s) set.
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
cst_pred_ty< is_power2 > m_Power2()
Match an integer or vector power-of-2.
auto m_Poison()
Match an arbitrary poison constant.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
specific_intval< true > m_SpecificIntAllowPoison(const APInt &V)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
BinOpPred_match< LHS, RHS, is_logical_shift_op > m_LogicalShift(const LHS &L, const RHS &R)
Matches logical shift operations.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, UIToFPInst >, CastInst_match< OpTy, SIToFPInst > > m_IToFP(const OpTy &Op)
auto m_Value()
Match an arbitrary value and ignore it.
auto m_Constant()
Match an arbitrary Constant and ignore it.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
match_combine_or< CastInst_match< OpTy, FPToUIInst >, CastInst_match< OpTy, FPToSIInst > > m_FPToI(const OpTy &Op)
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
cst_pred_ty< is_negated_power2 > m_NegatedPower2()
Match a integer or vector negated power-of-2.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
CastInst_match< OpTy, FPToSIInst > m_FPToSI(const OpTy &Op)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_Ctlz(const Opnd0 &Op0, const Opnd1 &Op1)
auto m_Undef()
Match an arbitrary undef constant.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
CastOperator_match< OpTy, Instruction::IntToPtr > m_IntToPtr(const OpTy &Op)
Matches IntToPtr.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
ElementWiseBitCast_match< OpTy > m_ElementWiseBitCast(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
cst_pred_ty< icmp_pred_with_threshold > m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold)
Match an integer or vector with every element comparing 'pred' (eg/ne/...) to Threshold.
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI KnownFPClass computeKnownFPClass(const Value *V, const APInt &DemandedElts, FPClassTest InterestedClasses, const SimplifyQuery &SQ, unsigned Depth=0)
Determine which floating-point classes are valid for V, and return them in KnownFPClass bit sets.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
LLVM_ABI Constant * ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2)
Attempt to constant fold a select instruction with the specified operands.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
Definition MathExtras.h:350
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI Value * simplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, const SimplifyQuery &Q)
Given operands for a CastInst, fold the result or return null.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition Local.cpp:2440
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
DWARFExpression::Operation Op
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI Constant * ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1, Constant *V2)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:256
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:262
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:146
bool isKnownNever(FPClassTest Mask) const
Return true if it's known this can never be one of the mask entries.
Matching combinators.
SimplifyQuery getWithInstruction(const Instruction *I) const