LLVM 23.0.0git
InstCombineCasts.cpp
Go to the documentation of this file.
1//===- InstCombineCasts.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visit functions for cast operations.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/APInt.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/SetVector.h"
21#include "llvm/IR/DataLayout.h"
22#include "llvm/IR/DebugInfo.h"
23#include "llvm/IR/Instruction.h"
25#include "llvm/IR/Type.h"
26#include "llvm/IR/Value.h"
29#include <iterator>
30#include <optional>
31
32using namespace llvm;
33using namespace PatternMatch;
34
35#define DEBUG_TYPE "instcombine"
36
38
41 EvaluatedMap &Processed) {
42 // Since we cover transformation of instructions with multiple users, we might
43 // come to the same node via multiple paths. We should not create a
44 // replacement for every single one of them though.
45 if (Value *Result = Processed.lookup(V))
46 return Result;
47
50
51 // Otherwise, it must be an instruction.
53 Instruction *Res = nullptr;
54 unsigned Opc = I->getOpcode();
55 switch (Opc) {
56 case Instruction::Add:
57 case Instruction::Sub:
58 case Instruction::Mul:
59 case Instruction::And:
60 case Instruction::Or:
61 case Instruction::Xor:
62 case Instruction::AShr:
63 case Instruction::LShr:
64 case Instruction::Shl:
65 case Instruction::UDiv:
66 case Instruction::URem: {
67 Value *LHS = EvaluateInDifferentTypeImpl(I->getOperand(0), Ty, isSigned, IC,
68 Processed);
69 Value *RHS = EvaluateInDifferentTypeImpl(I->getOperand(1), Ty, isSigned, IC,
70 Processed);
72 if (Opc == Instruction::LShr || Opc == Instruction::AShr)
73 Res->setIsExact(I->isExact());
74 break;
75 }
76 case Instruction::Trunc:
77 case Instruction::ZExt:
78 case Instruction::SExt:
79 // If the source type of the cast is the type we're trying for then we can
80 // just return the source. There's no need to insert it because it is not
81 // new.
82 if (I->getOperand(0)->getType() == Ty)
83 return I->getOperand(0);
84
85 // Otherwise, must be the same type of cast, so just reinsert a new one.
86 // This also handles the case of zext(trunc(x)) -> zext(x).
87 Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
88 Opc == Instruction::SExt);
89 break;
90 case Instruction::Select: {
91 Value *True = EvaluateInDifferentTypeImpl(I->getOperand(1), Ty, isSigned,
92 IC, Processed);
93 Value *False = EvaluateInDifferentTypeImpl(I->getOperand(2), Ty, isSigned,
94 IC, Processed);
95 Res = SelectInst::Create(I->getOperand(0), True, False);
96 break;
97 }
98 case Instruction::PHI: {
99 PHINode *OPN = cast<PHINode>(I);
101 for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
103 isSigned, IC, Processed);
104 NPN->addIncoming(V, OPN->getIncomingBlock(i));
105 }
106 Res = NPN;
107 break;
108 }
109 case Instruction::FPToUI:
110 case Instruction::FPToSI:
111 Res = CastInst::Create(static_cast<Instruction::CastOps>(Opc),
112 I->getOperand(0), Ty);
113 break;
114 case Instruction::Call:
116 switch (II->getIntrinsicID()) {
117 default:
118 llvm_unreachable("Unsupported call!");
119 case Intrinsic::vscale: {
121 I->getModule(), Intrinsic::vscale, {Ty});
122 Res = CallInst::Create(Fn->getFunctionType(), Fn);
123 break;
124 }
125 case Intrinsic::umin:
126 case Intrinsic::umax:
127 case Intrinsic::smin:
128 case Intrinsic::smax: {
129 Value *Op0 = EvaluateInDifferentTypeImpl(II->getArgOperand(0), Ty,
130 isSigned, IC, Processed);
131 Value *Op1 = EvaluateInDifferentTypeImpl(II->getArgOperand(1), Ty,
132 isSigned, IC, Processed);
134 I->getModule(), II->getIntrinsicID(), {Ty});
135 Res = CallInst::Create(Fn->getFunctionType(), Fn, {Op0, Op1});
136 break;
137 }
138 case Intrinsic::abs: {
139 Value *Arg = EvaluateInDifferentTypeImpl(II->getArgOperand(0), Ty,
140 isSigned, IC, Processed);
142 I->getModule(), II->getIntrinsicID(), {Ty});
143 Res = CallInst::Create(Fn->getFunctionType(), Fn,
144 {Arg, ConstantInt::getFalse(I->getContext())});
145 break;
146 }
147 }
148 }
149 break;
150 case Instruction::ShuffleVector: {
151 auto *ScalarTy = cast<VectorType>(Ty)->getElementType();
152 auto *VTy = cast<VectorType>(I->getOperand(0)->getType());
153 auto *FixedTy = VectorType::get(ScalarTy, VTy->getElementCount());
154 Value *Op0 = EvaluateInDifferentTypeImpl(I->getOperand(0), FixedTy,
155 isSigned, IC, Processed);
156 Value *Op1 = EvaluateInDifferentTypeImpl(I->getOperand(1), FixedTy,
157 isSigned, IC, Processed);
158 Res = new ShuffleVectorInst(Op0, Op1,
159 cast<ShuffleVectorInst>(I)->getShuffleMask());
160 break;
161 }
162 default:
163 // TODO: Can handle more cases here.
164 llvm_unreachable("Unreachable!");
165 }
166
167 Res->takeName(I);
168 Value *Result = IC.InsertNewInstWith(Res, I->getIterator());
169 // There is no need in keeping track of the old value/new value relationship
170 // when we have only one user, we came have here from that user and no-one
171 // else cares.
172 if (!V->hasOneUse())
173 Processed[V] = Result;
174
175 return Result;
176}
177
178/// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
179/// true for, actually insert the code to evaluate the expression.
181 bool isSigned) {
182 EvaluatedMap Processed;
183 return EvaluateInDifferentTypeImpl(V, Ty, isSigned, *this, Processed);
184}
185
187InstCombinerImpl::isEliminableCastPair(const CastInst *CI1,
188 const CastInst *CI2) {
189 Type *SrcTy = CI1->getSrcTy();
190 Type *MidTy = CI1->getDestTy();
191 Type *DstTy = CI2->getDestTy();
192
193 Instruction::CastOps firstOp = CI1->getOpcode();
194 Instruction::CastOps secondOp = CI2->getOpcode();
195 Type *SrcIntPtrTy =
196 SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
197 Type *DstIntPtrTy =
198 DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
199 unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
200 DstTy, &DL);
201
202 // We don't want to form an inttoptr or ptrtoint that converts to an integer
203 // type that differs from the pointer size.
204 if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) ||
205 (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy))
206 Res = 0;
207
208 return Instruction::CastOps(Res);
209}
210
211/// Implement the transforms common to all CastInst visitors.
213 Value *Src = CI.getOperand(0);
214 Type *Ty = CI.getType();
215
216 if (Value *Res =
217 simplifyCastInst(CI.getOpcode(), Src, Ty, SQ.getWithInstruction(&CI)))
218 return replaceInstUsesWith(CI, Res);
219
220 // Try to eliminate a cast of a cast.
221 if (auto *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
222 if (Instruction::CastOps NewOpc = isEliminableCastPair(CSrc, &CI)) {
223 // The first cast (CSrc) is eliminable so we need to fix up or replace
224 // the second cast (CI). CSrc will then have a good chance of being dead.
225 auto *Res = CastInst::Create(NewOpc, CSrc->getOperand(0), Ty);
226 // Point debug users of the dying cast to the new one.
227 if (CSrc->hasOneUse())
228 replaceAllDbgUsesWith(*CSrc, *Res, CI, DT);
229 return Res;
230 }
231 }
232
233 if (auto *Sel = dyn_cast<SelectInst>(Src)) {
234 // We are casting a select. Try to fold the cast into the select if the
235 // select does not have a compare instruction with matching operand types
236 // or the select is likely better done in a narrow type.
237 // Creating a select with operands that are different sizes than its
238 // condition may inhibit other folds and lead to worse codegen.
239 auto *Cmp = dyn_cast<CmpInst>(Sel->getCondition());
240 if (!Cmp || Cmp->getOperand(0)->getType() != Sel->getType() ||
241 (CI.getOpcode() == Instruction::Trunc &&
242 shouldChangeType(CI.getSrcTy(), CI.getType()))) {
243
244 // If it's a bitcast involving vectors, make sure it has the same number
245 // of elements on both sides.
246 if (CI.getOpcode() != Instruction::BitCast ||
248 if (Instruction *NV = FoldOpIntoSelect(CI, Sel)) {
249 replaceAllDbgUsesWith(*Sel, *NV, CI, DT);
250 return NV;
251 }
252 }
253 }
254 }
255
256 // If we are casting a PHI, then fold the cast into the PHI.
257 if (auto *PN = dyn_cast<PHINode>(Src)) {
258 // Don't do this if it would create a PHI node with an illegal type from a
259 // legal type.
260 if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() ||
261 shouldChangeType(CI.getSrcTy(), CI.getType()))
262 if (Instruction *NV = foldOpIntoPhi(CI, PN))
263 return NV;
264 }
265
266 // Canonicalize a unary shuffle after the cast if neither operation changes
267 // the size or element size of the input vector.
268 // TODO: We could allow size-changing ops if that doesn't harm codegen.
269 // cast (shuffle X, Mask) --> shuffle (cast X), Mask
270 Value *X;
271 ArrayRef<int> Mask;
272 if (match(Src, m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))) {
273 // TODO: Allow scalable vectors?
274 auto *SrcTy = dyn_cast<FixedVectorType>(X->getType());
275 auto *DestTy = dyn_cast<FixedVectorType>(Ty);
276 if (SrcTy && DestTy &&
277 SrcTy->getNumElements() == DestTy->getNumElements() &&
278 SrcTy->getPrimitiveSizeInBits() == DestTy->getPrimitiveSizeInBits()) {
279 Value *CastX = Builder.CreateCast(CI.getOpcode(), X, DestTy);
280 return new ShuffleVectorInst(CastX, Mask);
281 }
282 }
283
284 return nullptr;
285}
286
287namespace {
288
289/// Helper class for evaluating whether a value can be computed in a different
290/// type without changing its value. Used by cast simplification transforms.
291class TypeEvaluationHelper {
292public:
293 /// Return true if we can evaluate the specified expression tree as type Ty
294 /// instead of its larger type, and arrive with the same value.
295 /// This is used by code that tries to eliminate truncates.
296 [[nodiscard]] static bool canEvaluateTruncated(Value *V, Type *Ty,
298 Instruction *CxtI);
299
300 /// Determine if the specified value can be computed in the specified wider
301 /// type and produce the same low bits. If not, return false.
302 [[nodiscard]] static bool canEvaluateZExtd(Value *V, Type *Ty,
303 unsigned &BitsToClear,
305 Instruction *CxtI);
306
307 /// Return true if we can take the specified value and return it as type Ty
308 /// without inserting any new casts and without changing the value of the
309 /// common low bits.
310 [[nodiscard]] static bool canEvaluateSExtd(Value *V, Type *Ty);
311
312private:
313 /// Constants and extensions/truncates from the destination type are always
314 /// free to be evaluated in that type.
315 [[nodiscard]] static bool canAlwaysEvaluateInType(Value *V, Type *Ty);
316
317 /// Check if we traversed all the users of the multi-use values we've seen.
318 [[nodiscard]] bool allPendingVisited() const {
319 return llvm::all_of(Pending,
320 [this](Value *V) { return Visited.contains(V); });
321 }
322
323 /// A generic wrapper for canEvaluate* recursions to inject visitation
324 /// tracking and enforce correct multi-use value evaluations.
325 [[nodiscard]] bool
326 canEvaluate(Value *V, Type *Ty,
327 llvm::function_ref<bool(Value *, Type *Type)> Pred) {
328 if (canAlwaysEvaluateInType(V, Ty))
329 return true;
330
331 auto *I = dyn_cast<Instruction>(V);
332
333 if (I == nullptr)
334 return false;
335
336 // We insert false by default to return false when we encounter user loops.
337 const auto [It, Inserted] = Visited.insert({V, false});
338
339 // There are three possible cases for us having information on this value
340 // in the Visited map:
341 // 1. We properly checked it and concluded that we can evaluate it (true)
342 // 2. We properly checked it and concluded that we can't (false)
343 // 3. We started to check it, but during the recursive traversal we came
344 // back to it.
345 //
346 // For cases 1 and 2, we can safely return the stored result. For case 3, we
347 // can potentially have a situation where we can evaluate recursive user
348 // chains, but that can be quite tricky to do properly and isntead, we
349 // return false.
350 //
351 // In any case, we should return whatever was there in the map to begin
352 // with.
353 if (!Inserted)
354 return It->getSecond();
355
356 // We can easily make a decision about single-user values whether they can
357 // be evaluated in a different type or not, we came from that user. This is
358 // not as simple for multi-user values.
359 //
360 // In general, we have the following case (inverted control-flow, users are
361 // at the top):
362 //
363 // Cast %A
364 // ____|
365 // /
366 // %A = Use %B, %C
367 // ________| |
368 // / |
369 // %B = Use %D |
370 // ________| |
371 // / |
372 // %D = Use %C |
373 // ________|___|
374 // /
375 // %C = ...
376 //
377 // In this case, when we check %A, %B and %D, we are confident that we can
378 // make the decision here and now, since we came from their only users.
379 //
380 // For %C, it is harder. We come there twice, and when we come the first
381 // time, it's hard to tell if we will visit the second user (technically
382 // it's not hard, but we might need a lot of repetitive checks with non-zero
383 // cost).
384 //
385 // In the case above, we are allowed to evaluate %C in different type
386 // because all of it users were part of the traversal.
387 //
388 // In the following case, however, we can't make this conclusion:
389 //
390 // Cast %A
391 // ____|
392 // /
393 // %A = Use %B, %C
394 // ________| |
395 // / |
396 // %B = Use %D |
397 // ________| |
398 // / |
399 // %D = Use %C |
400 // | |
401 // foo(%C) | | <- never traversing foo(%C)
402 // ________|___|
403 // /
404 // %C = ...
405 //
406 // In this case, we still can evaluate %C in a different type, but we'd need
407 // to create a copy of the original %C to be used in foo(%C). Such
408 // duplication might be not profitable.
409 //
410 // For this reason, we collect all users of the mult-user values and mark
411 // them as "pending" and defer this decision to the very end. When we are
412 // done and and ready to have a positive verdict, we should double-check all
413 // of the pending users and ensure that we visited them. allPendingVisited
414 // predicate checks exactly that.
415 if (!I->hasOneUse())
416 llvm::append_range(Pending, I->users());
417
418 const bool Result = Pred(V, Ty);
419 // We have to set result this way and not via It because Pred is recursive
420 // and it is very likely that we grew Visited and invalidated It.
421 Visited[V] = Result;
422 return Result;
423 }
424
425 /// Filter out values that we can not evaluate in the destination type for
426 /// free.
427 [[nodiscard]] bool canNotEvaluateInType(Value *V, Type *Ty);
428
429 [[nodiscard]] bool canEvaluateTruncatedImpl(Value *V, Type *Ty,
430 InstCombinerImpl &IC,
431 Instruction *CxtI);
432 [[nodiscard]] bool canEvaluateTruncatedPred(Value *V, Type *Ty,
433 InstCombinerImpl &IC,
434 Instruction *CxtI);
435 [[nodiscard]] bool canEvaluateZExtdImpl(Value *V, Type *Ty,
436 unsigned &BitsToClear,
437 InstCombinerImpl &IC,
438 Instruction *CxtI);
439 [[nodiscard]] bool canEvaluateSExtdImpl(Value *V, Type *Ty);
440 [[nodiscard]] bool canEvaluateSExtdPred(Value *V, Type *Ty);
441
442 /// A bookkeeping map to memorize an already made decision for a traversed
443 /// value.
444 SmallDenseMap<Value *, bool, 8> Visited;
445
446 /// A list of pending values to check in the end.
447 SmallVector<Value *, 8> Pending;
448};
449
450} // anonymous namespace
451
452/// Constants and extensions/truncates from the destination type are always
453/// free to be evaluated in that type. This is a helper for canEvaluate*.
454bool TypeEvaluationHelper::canAlwaysEvaluateInType(Value *V, Type *Ty) {
455 if (isa<Constant>(V))
456 return match(V, m_ImmConstant());
457
458 Value *X;
459 if ((match(V, m_ZExtOrSExt(m_Value(X))) || match(V, m_Trunc(m_Value(X)))) &&
460 X->getType() == Ty)
461 return true;
462
463 return false;
464}
465
466/// Filter out values that we can not evaluate in the destination type for free.
467/// This is a helper for canEvaluate*.
468bool TypeEvaluationHelper::canNotEvaluateInType(Value *V, Type *Ty) {
469 if (!isa<Instruction>(V))
470 return true;
471 // We don't extend or shrink something that has multiple uses -- doing so
472 // would require duplicating the instruction which isn't profitable.
473 if (!V->hasOneUse())
474 return true;
475
476 return false;
477}
478
479/// Return true if we can evaluate the specified expression tree as type Ty
480/// instead of its larger type, and arrive with the same value.
481/// This is used by code that tries to eliminate truncates.
482///
483/// Ty will always be a type smaller than V. We should return true if trunc(V)
484/// can be computed by computing V in the smaller type. If V is an instruction,
485/// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
486/// makes sense if x and y can be efficiently truncated.
487///
488/// This function works on both vectors and scalars.
489///
490bool TypeEvaluationHelper::canEvaluateTruncated(Value *V, Type *Ty,
492 Instruction *CxtI) {
493 TypeEvaluationHelper TYH;
494 return TYH.canEvaluateTruncatedImpl(V, Ty, IC, CxtI) &&
495 // We need to check whether we visited all users of multi-user values,
496 // and we have to do it at the very end, outside of the recursion.
497 TYH.allPendingVisited();
498}
499
500bool TypeEvaluationHelper::canEvaluateTruncatedImpl(Value *V, Type *Ty,
502 Instruction *CxtI) {
503 return canEvaluate(V, Ty, [this, &IC, CxtI](Value *V, Type *Ty) {
504 return canEvaluateTruncatedPred(V, Ty, IC, CxtI);
505 });
506}
507
508bool TypeEvaluationHelper::canEvaluateTruncatedPred(Value *V, Type *Ty,
510 Instruction *CxtI) {
511 auto *I = cast<Instruction>(V);
512 Type *OrigTy = V->getType();
513 switch (I->getOpcode()) {
514 case Instruction::Add:
515 case Instruction::Sub:
516 case Instruction::Mul:
517 case Instruction::And:
518 case Instruction::Or:
519 case Instruction::Xor:
520 // These operators can all arbitrarily be extended or truncated.
521 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
522 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
523
524 case Instruction::UDiv:
525 case Instruction::URem: {
526 // UDiv and URem can be truncated if all the truncated bits are zero.
527 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
528 uint32_t BitWidth = Ty->getScalarSizeInBits();
529 assert(BitWidth < OrigBitWidth && "Unexpected bitwidths!");
530 APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
531 // Do not preserve the original context instruction. Simplifying div/rem
532 // based on later context may introduce a trap.
533 if (IC.MaskedValueIsZero(I->getOperand(0), Mask, I) &&
534 IC.MaskedValueIsZero(I->getOperand(1), Mask, I)) {
535 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
536 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
537 }
538 break;
539 }
540 case Instruction::Shl: {
541 // If we are truncating the result of this SHL, and if it's a shift of an
542 // inrange amount, we can always perform a SHL in a smaller type.
543 uint32_t BitWidth = Ty->getScalarSizeInBits();
544 KnownBits AmtKnownBits =
545 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
546 if (AmtKnownBits.getMaxValue().ult(BitWidth))
547 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
548 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
549 break;
550 }
551 case Instruction::LShr: {
552 // If this is a truncate of a logical shr, we can truncate it to a smaller
553 // lshr iff we know that the bits we would otherwise be shifting in are
554 // already zeros.
555 // TODO: It is enough to check that the bits we would be shifting in are
556 // zero - use AmtKnownBits.getMaxValue().
557 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
558 uint32_t BitWidth = Ty->getScalarSizeInBits();
559 KnownBits AmtKnownBits = IC.computeKnownBits(I->getOperand(1), CxtI);
560 APInt MaxShiftAmt = AmtKnownBits.getMaxValue();
561 APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
562 if (MaxShiftAmt.ult(BitWidth)) {
563 // If the only user is a trunc then we can narrow the shift if any new
564 // MSBs are not going to be used.
565 if (auto *Trunc = dyn_cast<TruncInst>(V->user_back())) {
566 auto DemandedBits = Trunc->getType()->getScalarSizeInBits();
567 if ((MaxShiftAmt + DemandedBits).ule(BitWidth))
568 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
569 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
570 }
571 if (IC.MaskedValueIsZero(I->getOperand(0), ShiftedBits, CxtI))
572 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
573 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
574 }
575 break;
576 }
577 case Instruction::AShr: {
578 // If this is a truncate of an arithmetic shr, we can truncate it to a
579 // smaller ashr iff we know that all the bits from the sign bit of the
580 // original type and the sign bit of the truncate type are similar.
581 // TODO: It is enough to check that the bits we would be shifting in are
582 // similar to sign bit of the truncate type.
583 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
584 uint32_t BitWidth = Ty->getScalarSizeInBits();
585 KnownBits AmtKnownBits =
586 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
587 unsigned ShiftedBits = OrigBitWidth - BitWidth;
588 if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
589 ShiftedBits < IC.ComputeNumSignBits(I->getOperand(0), CxtI))
590 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
591 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
592 break;
593 }
594 case Instruction::Trunc:
595 // trunc(trunc(x)) -> trunc(x)
596 return true;
597 case Instruction::ZExt:
598 case Instruction::SExt:
599 // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
600 // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
601 return true;
602 case Instruction::Select: {
604 return canEvaluateTruncatedImpl(SI->getTrueValue(), Ty, IC, CxtI) &&
605 canEvaluateTruncatedImpl(SI->getFalseValue(), Ty, IC, CxtI);
606 }
607 case Instruction::PHI: {
608 // We can change a phi if we can change all operands. Note that we never
609 // get into trouble with cyclic PHIs here because canEvaluate handles use
610 // chain loops.
611 PHINode *PN = cast<PHINode>(I);
612 return llvm::all_of(
613 PN->incoming_values(), [this, Ty, &IC, CxtI](Value *IncValue) {
614 return canEvaluateTruncatedImpl(IncValue, Ty, IC, CxtI);
615 });
616 }
617 case Instruction::FPToUI:
618 case Instruction::FPToSI: {
619 // If the integer type can hold the max FP value, it is safe to cast
620 // directly to that type. Otherwise, we may create poison via overflow
621 // that did not exist in the original code.
622 Type *InputTy = I->getOperand(0)->getType()->getScalarType();
623 const fltSemantics &Semantics = InputTy->getFltSemantics();
624 uint32_t MinBitWidth = APFloatBase::semanticsIntSizeInBits(
625 Semantics, I->getOpcode() == Instruction::FPToSI);
626 return Ty->getScalarSizeInBits() >= MinBitWidth;
627 }
628 case Instruction::ShuffleVector:
629 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
630 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
631
632 case Instruction::Call: {
633 Value *AbsOp;
635 if (IC.ComputeMaxSignificantBits(AbsOp, CxtI) > Ty->getScalarSizeInBits())
636 return false;
637 return canEvaluateTruncatedImpl(AbsOp, Ty, IC, CxtI);
638 }
639 auto *MM = dyn_cast<MinMaxIntrinsic>(I);
640 if (!MM)
641 return false;
642 // The min/max can be performed in the narrow type when each operand has
643 // zero high bits (for umin/umax) or enough sign bits (for smin/smax).
644 Value *Op0 = MM->getLHS();
645 Value *Op1 = MM->getRHS();
646 uint32_t BitWidth = Ty->getScalarSizeInBits();
647 if (MM->isSigned()) {
648 if (IC.ComputeMaxSignificantBits(Op0, CxtI) > BitWidth ||
649 IC.ComputeMaxSignificantBits(Op1, CxtI) > BitWidth)
650 break;
651 } else {
652 APInt Mask =
654 if (!IC.MaskedValueIsZero(Op0, Mask, CxtI) ||
655 !IC.MaskedValueIsZero(Op1, Mask, CxtI))
656 break;
657 }
658 return canEvaluateTruncatedImpl(Op0, Ty, IC, CxtI) &&
659 canEvaluateTruncatedImpl(Op1, Ty, IC, CxtI);
660 }
661 default:
662 // TODO: Can handle more cases here.
663 break;
664 }
665
666 return false;
667}
668
669/// Given a vector that is bitcast to an integer, optionally logically
670/// right-shifted, and truncated, convert it to an extractelement.
671/// Example (big endian):
672/// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
673/// --->
674/// extractelement <4 x i32> %X, 1
676 InstCombinerImpl &IC) {
677 Value *TruncOp = Trunc.getOperand(0);
678 Type *DestType = Trunc.getType();
679 if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType))
680 return nullptr;
681
682 Value *VecInput = nullptr;
683 ConstantInt *ShiftVal = nullptr;
684 if (!match(TruncOp, m_CombineOr(m_BitCast(m_Value(VecInput)),
685 m_LShr(m_BitCast(m_Value(VecInput)),
686 m_ConstantInt(ShiftVal)))) ||
687 !isa<VectorType>(VecInput->getType()))
688 return nullptr;
689
690 VectorType *VecType = cast<VectorType>(VecInput->getType());
691 unsigned VecWidth = VecType->getPrimitiveSizeInBits();
692 unsigned DestWidth = DestType->getPrimitiveSizeInBits();
693 unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0;
694
695 if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0))
696 return nullptr;
697
698 // If the element type of the vector doesn't match the result type,
699 // bitcast it to a vector type that we can extract from.
700 unsigned NumVecElts = VecWidth / DestWidth;
701 if (VecType->getElementType() != DestType) {
702 VecType = FixedVectorType::get(DestType, NumVecElts);
703 VecInput = IC.Builder.CreateBitCast(VecInput, VecType, "bc");
704 }
705
706 unsigned Elt = ShiftAmount / DestWidth;
707 if (IC.getDataLayout().isBigEndian())
708 Elt = NumVecElts - 1 - Elt;
709
710 return ExtractElementInst::Create(VecInput, IC.Builder.getInt32(Elt));
711}
712
713/// Whenever an element is extracted from a vector, optionally shifted down, and
714/// then truncated, canonicalize by converting it to a bitcast followed by an
715/// extractelement.
716///
717/// Examples (little endian):
718/// trunc (extractelement <4 x i64> %X, 0) to i32
719/// --->
720/// extractelement <8 x i32> (bitcast <4 x i64> %X to <8 x i32>), i32 0
721///
722/// trunc (lshr (extractelement <4 x i32> %X, 0), 8) to i8
723/// --->
724/// extractelement <16 x i8> (bitcast <4 x i32> %X to <16 x i8>), i32 1
726 InstCombinerImpl &IC) {
727 Value *Src = Trunc.getOperand(0);
728 Type *SrcType = Src->getType();
729 Type *DstType = Trunc.getType();
730
731 // Only attempt this if we have simple aliasing of the vector elements.
732 // A badly fit destination size would result in an invalid cast.
733 unsigned SrcBits = SrcType->getScalarSizeInBits();
734 unsigned DstBits = DstType->getScalarSizeInBits();
735 unsigned TruncRatio = SrcBits / DstBits;
736 if ((SrcBits % DstBits) != 0)
737 return nullptr;
738
739 Value *VecOp;
740 ConstantInt *Cst;
741 const APInt *ShiftAmount = nullptr;
742 if (!match(Src, m_OneUse(m_ExtractElt(m_Value(VecOp), m_ConstantInt(Cst)))) &&
743 !match(Src,
745 m_APInt(ShiftAmount)))))
746 return nullptr;
747
748 auto *VecOpTy = cast<VectorType>(VecOp->getType());
749 auto VecElts = VecOpTy->getElementCount();
750
751 uint64_t BitCastNumElts = VecElts.getKnownMinValue() * TruncRatio;
752 // Make sure we don't overflow in the calculation of the new index.
753 // (VecOpIdx + 1) * TruncRatio should not overflow.
754 if (Cst->uge(std::numeric_limits<uint64_t>::max() / TruncRatio))
755 return nullptr;
756 uint64_t VecOpIdx = Cst->getZExtValue();
757 uint64_t NewIdx = IC.getDataLayout().isBigEndian()
758 ? (VecOpIdx + 1) * TruncRatio - 1
759 : VecOpIdx * TruncRatio;
760
761 // Adjust index by the whole number of truncated elements.
762 if (ShiftAmount) {
763 // Check shift amount is in range and shifts a whole number of truncated
764 // elements.
765 if (ShiftAmount->uge(SrcBits) || ShiftAmount->urem(DstBits) != 0)
766 return nullptr;
767
768 uint64_t IdxOfs = ShiftAmount->udiv(DstBits).getZExtValue();
769 // IdxOfs is guaranteed to be less than TruncRatio, so we won't overflow in
770 // the adjustment.
771 assert(IdxOfs < TruncRatio &&
772 "IdxOfs is expected to be less than TruncRatio.");
773 NewIdx = IC.getDataLayout().isBigEndian() ? (NewIdx - IdxOfs)
774 : (NewIdx + IdxOfs);
775 }
776
777 assert(BitCastNumElts <= std::numeric_limits<uint32_t>::max() &&
778 "overflow 32-bits");
779
780 auto *BitCastTo =
781 VectorType::get(DstType, BitCastNumElts, VecElts.isScalable());
782 Value *BitCast = IC.Builder.CreateBitCast(VecOp, BitCastTo);
783 return ExtractElementInst::Create(BitCast, IC.Builder.getInt64(NewIdx));
784}
785
786/// Funnel/Rotate left/right may occur in a wider type than necessary because of
787/// type promotion rules. Try to narrow the inputs and convert to funnel shift.
788Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {
789 assert((isa<VectorType>(Trunc.getSrcTy()) ||
790 shouldChangeType(Trunc.getSrcTy(), Trunc.getType())) &&
791 "Don't narrow to an illegal scalar type");
792
793 // Bail out on strange types. It is possible to handle some of these patterns
794 // even with non-power-of-2 sizes, but it is not a likely scenario.
795 Type *DestTy = Trunc.getType();
796 unsigned NarrowWidth = DestTy->getScalarSizeInBits();
797 unsigned WideWidth = Trunc.getSrcTy()->getScalarSizeInBits();
798 if (!isPowerOf2_32(NarrowWidth))
799 return nullptr;
800
801 // First, find an or'd pair of opposite shifts:
802 // trunc (or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1))
803 BinaryOperator *Or0, *Or1;
804 if (!match(Trunc.getOperand(0), m_OneUse(m_Or(m_BinOp(Or0), m_BinOp(Or1)))))
805 return nullptr;
806
807 Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
808 if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
809 !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||
810 Or0->getOpcode() == Or1->getOpcode())
811 return nullptr;
812
813 // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
814 if (Or0->getOpcode() == BinaryOperator::LShr) {
815 std::swap(Or0, Or1);
816 std::swap(ShVal0, ShVal1);
817 std::swap(ShAmt0, ShAmt1);
818 }
819 assert(Or0->getOpcode() == BinaryOperator::Shl &&
820 Or1->getOpcode() == BinaryOperator::LShr &&
821 "Illegal or(shift,shift) pair");
822
823 // Match the shift amount operands for a funnel/rotate pattern. This always
824 // matches a subtraction on the R operand.
825 auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
826 // The shift amounts may add up to the narrow bit width:
827 // (shl ShVal0, L) | (lshr ShVal1, Width - L)
828 // If this is a funnel shift (different operands are shifted), then the
829 // shift amount can not over-shift (create poison) in the narrow type.
830 unsigned MaxShiftAmountWidth = Log2_32(NarrowWidth);
831 APInt HiBitMask = ~APInt::getLowBitsSet(WideWidth, MaxShiftAmountWidth);
832 if (ShVal0 == ShVal1 || MaskedValueIsZero(L, HiBitMask))
833 if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L)))))
834 return L;
835
836 // The following patterns currently only work for rotation patterns.
837 // TODO: Add more general funnel-shift compatible patterns.
838 if (ShVal0 != ShVal1)
839 return nullptr;
840
841 // The shift amount may be masked with negation:
842 // (shl ShVal0, (X & (Width - 1))) | (lshr ShVal1, ((-X) & (Width - 1)))
843 Value *X;
844 unsigned Mask = Width - 1;
845 if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
847 return X;
848
849 // Same as above, but the shift amount may be extended after masking:
850 if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
852 return X;
853
854 return nullptr;
855 };
856
857 Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, NarrowWidth);
858 bool IsFshl = true; // Sub on LSHR.
859 if (!ShAmt) {
860 ShAmt = matchShiftAmount(ShAmt1, ShAmt0, NarrowWidth);
861 IsFshl = false; // Sub on SHL.
862 }
863 if (!ShAmt)
864 return nullptr;
865
866 // The right-shifted value must have high zeros in the wide type (for example
867 // from 'zext', 'and' or 'shift'). High bits of the left-shifted value are
868 // truncated, so those do not matter.
869 APInt HiBitMask = APInt::getHighBitsSet(WideWidth, WideWidth - NarrowWidth);
870 if (!MaskedValueIsZero(ShVal1, HiBitMask, &Trunc))
871 return nullptr;
872
873 // Adjust the width of ShAmt for narrowed funnel shift operation:
874 // - Zero-extend if ShAmt is narrower than the destination type.
875 // - Truncate if ShAmt is wider, discarding non-significant high-order bits.
876 // This prepares ShAmt for llvm.fshl.i8(trunc(ShVal), trunc(ShVal),
877 // zext/trunc(ShAmt)).
878 Value *NarrowShAmt = Builder.CreateZExtOrTrunc(ShAmt, DestTy);
879
880 Value *X, *Y;
881 X = Y = Builder.CreateTrunc(ShVal0, DestTy);
882 if (ShVal0 != ShVal1)
883 Y = Builder.CreateTrunc(ShVal1, DestTy);
884 Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
885 Function *F =
886 Intrinsic::getOrInsertDeclaration(Trunc.getModule(), IID, DestTy);
887 return CallInst::Create(F, {X, Y, NarrowShAmt});
888}
889
890/// Try to narrow the width of math or bitwise logic instructions by pulling a
891/// truncate ahead of binary operators.
892Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) {
893 Type *SrcTy = Trunc.getSrcTy();
894 Type *DestTy = Trunc.getType();
895 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
896 unsigned DestWidth = DestTy->getScalarSizeInBits();
897
898 if (!isa<VectorType>(SrcTy) && !shouldChangeType(SrcTy, DestTy))
899 return nullptr;
900
901 BinaryOperator *BinOp;
902 if (!match(Trunc.getOperand(0), m_OneUse(m_BinOp(BinOp))))
903 return nullptr;
904
905 Value *BinOp0 = BinOp->getOperand(0);
906 Value *BinOp1 = BinOp->getOperand(1);
907 switch (BinOp->getOpcode()) {
908 case Instruction::And:
909 case Instruction::Or:
910 case Instruction::Xor:
911 case Instruction::Add:
912 case Instruction::Sub:
913 case Instruction::Mul: {
914 Constant *C;
915 if (match(BinOp0, m_Constant(C))) {
916 // trunc (binop C, X) --> binop (trunc C', X)
917 Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
918 Value *TruncX = Builder.CreateTrunc(BinOp1, DestTy);
919 return BinaryOperator::Create(BinOp->getOpcode(), NarrowC, TruncX);
920 }
921 if (match(BinOp1, m_Constant(C))) {
922 // trunc (binop X, C) --> binop (trunc X, C')
923 Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
924 Value *TruncX = Builder.CreateTrunc(BinOp0, DestTy);
925 return BinaryOperator::Create(BinOp->getOpcode(), TruncX, NarrowC);
926 }
927 Value *X;
928 if (match(BinOp0, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
929 // trunc (binop (ext X), Y) --> binop X, (trunc Y)
930 Value *NarrowOp1 = Builder.CreateTrunc(BinOp1, DestTy);
931 return BinaryOperator::Create(BinOp->getOpcode(), X, NarrowOp1);
932 }
933 if (match(BinOp1, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
934 // trunc (binop Y, (ext X)) --> binop (trunc Y), X
935 Value *NarrowOp0 = Builder.CreateTrunc(BinOp0, DestTy);
936 return BinaryOperator::Create(BinOp->getOpcode(), NarrowOp0, X);
937 }
938 break;
939 }
940 case Instruction::LShr:
941 case Instruction::AShr: {
942 // trunc (*shr (trunc A), C) --> trunc(*shr A, C)
943 Value *A;
944 Constant *C;
945 if (match(BinOp0, m_Trunc(m_Value(A))) && match(BinOp1, m_Constant(C))) {
946 unsigned MaxShiftAmt = SrcWidth - DestWidth;
947 // If the shift is small enough, all zero/sign bits created by the shift
948 // are removed by the trunc.
950 APInt(SrcWidth, MaxShiftAmt)))) {
951 auto *OldShift = cast<Instruction>(Trunc.getOperand(0));
952 bool IsExact = OldShift->isExact();
953 if (Constant *ShAmt = ConstantFoldIntegerCast(C, A->getType(),
954 /*IsSigned*/ true, DL)) {
955 ShAmt = Constant::mergeUndefsWith(ShAmt, C);
956 Value *Shift =
957 OldShift->getOpcode() == Instruction::AShr
958 ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact)
959 : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact);
960 return CastInst::CreateTruncOrBitCast(Shift, DestTy);
961 }
962 }
963 }
964 break;
965 }
966 default: break;
967 }
968
969 if (Instruction *NarrowOr = narrowFunnelShift(Trunc))
970 return NarrowOr;
971
972 return nullptr;
973}
974
975/// Try to narrow the width of a splat shuffle. This could be generalized to any
976/// shuffle with a constant operand, but we limit the transform to avoid
977/// creating a shuffle type that targets may not be able to lower effectively.
979 InstCombiner::BuilderTy &Builder) {
980 auto *Shuf = dyn_cast<ShuffleVectorInst>(Trunc.getOperand(0));
981 if (Shuf && Shuf->hasOneUse() && match(Shuf->getOperand(1), m_Undef()) &&
982 all_equal(Shuf->getShuffleMask()) &&
983 ElementCount::isKnownGE(Shuf->getType()->getElementCount(),
984 cast<VectorType>(Shuf->getOperand(0)->getType())
985 ->getElementCount())) {
986 // trunc (shuf X, Undef, SplatMask) --> shuf (trunc X), Poison, SplatMask
987 // trunc (shuf X, Poison, SplatMask) --> shuf (trunc X), Poison, SplatMask
988 Type *NewTruncTy = Shuf->getOperand(0)->getType()->getWithNewType(
989 Trunc.getType()->getScalarType());
990 Value *NarrowOp = Builder.CreateTrunc(Shuf->getOperand(0), NewTruncTy);
991 return new ShuffleVectorInst(NarrowOp, Shuf->getShuffleMask());
992 }
993
994 return nullptr;
995}
996
997/// Try to narrow the width of an insert element. This could be generalized for
998/// any vector constant, but we limit the transform to insertion into undef to
999/// avoid potential backend problems from unsupported insertion widths. This
1000/// could also be extended to handle the case of inserting a scalar constant
1001/// into a vector variable.
1003 InstCombiner::BuilderTy &Builder) {
1004 Instruction::CastOps Opcode = Trunc.getOpcode();
1005 assert((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
1006 "Unexpected instruction for shrinking");
1007
1008 auto *InsElt = dyn_cast<InsertElementInst>(Trunc.getOperand(0));
1009 if (!InsElt || !InsElt->hasOneUse())
1010 return nullptr;
1011
1012 Type *DestTy = Trunc.getType();
1013 Type *DestScalarTy = DestTy->getScalarType();
1014 Value *VecOp = InsElt->getOperand(0);
1015 Value *ScalarOp = InsElt->getOperand(1);
1016 Value *Index = InsElt->getOperand(2);
1017
1018 if (match(VecOp, m_Undef())) {
1019 // trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
1020 // fptrunc (inselt undef, X, Index) --> inselt undef, (fptrunc X), Index
1021 UndefValue *NarrowUndef = UndefValue::get(DestTy);
1022 Value *NarrowOp = Builder.CreateCast(Opcode, ScalarOp, DestScalarTy);
1023 return InsertElementInst::Create(NarrowUndef, NarrowOp, Index);
1024 }
1025
1026 return nullptr;
1027}
1028
1030 if (Instruction *Result = commonCastTransforms(Trunc))
1031 return Result;
1032
1033 Value *Src = Trunc.getOperand(0);
1034 Type *DestTy = Trunc.getType(), *SrcTy = Src->getType();
1035 unsigned DestWidth = DestTy->getScalarSizeInBits();
1036 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
1037
1038 // Attempt to truncate the entire input expression tree to the destination
1039 // type. Only do this if the dest type is a simple type, don't convert the
1040 // expression tree to something weird like i93 unless the source is also
1041 // strange.
1042 if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) &&
1043 TypeEvaluationHelper::canEvaluateTruncated(Src, DestTy, *this, &Trunc)) {
1044
1045 // If this cast is a truncate, evaluting in a different type always
1046 // eliminates the cast, so it is always a win.
1047 LLVM_DEBUG(
1048 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1049 " to avoid cast: "
1050 << Trunc << '\n');
1051 Value *Res = EvaluateInDifferentType(Src, DestTy, false);
1052 assert(Res->getType() == DestTy);
1053 return replaceInstUsesWith(Trunc, Res);
1054 }
1055
1056 // For integer types, check if we can shorten the entire input expression to
1057 // DestWidth * 2, which won't allow removing the truncate, but reducing the
1058 // width may enable further optimizations, e.g. allowing for larger
1059 // vectorization factors.
1060 if (auto *DestITy = dyn_cast<IntegerType>(DestTy)) {
1061 if (DestWidth * 2 < SrcWidth) {
1062 auto *NewDestTy = DestITy->getExtendedType();
1063 if (shouldChangeType(SrcTy, NewDestTy) &&
1064 TypeEvaluationHelper::canEvaluateTruncated(Src, NewDestTy, *this,
1065 &Trunc)) {
1066 LLVM_DEBUG(
1067 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1068 " to reduce the width of operand of"
1069 << Trunc << '\n');
1070 Value *Res = EvaluateInDifferentType(Src, NewDestTy, false);
1071 return new TruncInst(Res, DestTy);
1072 }
1073 }
1074 }
1075
1076 // See if we can simplify any instructions used by the input whose sole
1077 // purpose is to compute bits we don't care about.
1079 return &Trunc;
1080
1081 if (DestWidth == 1) {
1082 Value *Zero = Constant::getNullValue(SrcTy);
1083
1084 Value *X;
1085 const APInt *C1;
1086 Constant *C2;
1087 if (match(Src, m_OneUse(m_Shr(m_Shl(m_Power2(C1), m_Value(X)),
1088 m_ImmConstant(C2))))) {
1089 // trunc ((C1 << X) >> C2) to i1 --> X == (C2-cttz(C1)), where C1 is pow2
1090 Constant *Log2C1 = ConstantInt::get(SrcTy, C1->exactLogBase2());
1091 Constant *CmpC = ConstantExpr::getSub(C2, Log2C1);
1092 return new ICmpInst(ICmpInst::ICMP_EQ, X, CmpC);
1093 }
1094
1095 if (match(Src, m_Shr(m_Value(X), m_SpecificInt(SrcWidth - 1)))) {
1096 // trunc (ashr X, BW-1) to i1 --> icmp slt X, 0
1097 // trunc (lshr X, BW-1) to i1 --> icmp slt X, 0
1098 return new ICmpInst(ICmpInst::ICMP_SLT, X, Zero);
1099 }
1100
1101 Constant *C;
1102 if (match(Src, m_OneUse(m_LShr(m_Value(X), m_ImmConstant(C))))) {
1103 // trunc (lshr X, C) to i1 --> icmp ne (and X, C'), 0
1104 Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
1105 Value *MaskC = Builder.CreateShl(One, C);
1106 Value *And = Builder.CreateAnd(X, MaskC);
1107 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
1108 }
1110 m_Deferred(X))))) {
1111 // trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0
1112 Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
1113 Value *MaskC = Builder.CreateShl(One, C);
1114 Value *And = Builder.CreateAnd(X, Builder.CreateOr(MaskC, One));
1115 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
1116 }
1117
1118 {
1119 const APInt *C;
1120 if (match(Src, m_Shl(m_APInt(C), m_Value(X))) && (*C)[0] == 1) {
1121 // trunc (C << X) to i1 --> X == 0, where C is odd
1122 return new ICmpInst(ICmpInst::Predicate::ICMP_EQ, X, Zero);
1123 }
1124 }
1125
1126 if (Trunc.hasNoUnsignedWrap() || Trunc.hasNoSignedWrap()) {
1127 Value *X, *Y;
1128 if (match(Src, m_Xor(m_Value(X), m_Value(Y))))
1129 return new ICmpInst(ICmpInst::ICMP_NE, X, Y);
1130 }
1131
1132 if (match(Src,
1134 return new ICmpInst(ICmpInst::ICMP_EQ, X,
1136 }
1137
1138 Value *A, *B;
1139 Constant *C;
1140
1141 // trunc(u/smin(zext(a) + zext(b), MAX)) --> uadd.sat(a, b)
1142 if (match(Src,
1145 m_SpecificInt(APInt::getMaxValue(DestWidth))),
1147 m_SpecificInt(APInt::getMaxValue(DestWidth)))))) &&
1148 A->getType() == DestTy && B->getType() == DestTy) {
1149 return replaceInstUsesWith(
1150 Trunc, Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, A, B));
1151 }
1152
1153 // trunc(smax(zext(a) - zext(b), 0)) --> usub.sat(a, b)
1154 if (match(Src, m_OneUse(m_SMax(
1156 m_Zero()))) &&
1157 A->getType() == DestTy && B->getType() == DestTy) {
1158 return replaceInstUsesWith(
1159 Trunc, Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, A, B));
1160 }
1161
1162 if (match(Src, m_LShr(m_SExt(m_Value(A)), m_Constant(C)))) {
1163 unsigned AWidth = A->getType()->getScalarSizeInBits();
1164 unsigned MaxShiftAmt = SrcWidth - std::max(DestWidth, AWidth);
1165 auto *OldSh = cast<Instruction>(Src);
1166 bool IsExact = OldSh->isExact();
1167
1168 // If the shift is small enough, all zero bits created by the shift are
1169 // removed by the trunc.
1171 APInt(SrcWidth, MaxShiftAmt)))) {
1172 auto GetNewShAmt = [&](unsigned Width) {
1173 Constant *MaxAmt = ConstantInt::get(SrcTy, Width - 1, false);
1174 Constant *Cmp =
1176 Constant *ShAmt = ConstantFoldSelectInstruction(Cmp, C, MaxAmt);
1177 return ConstantFoldCastOperand(Instruction::Trunc, ShAmt, A->getType(),
1178 DL);
1179 };
1180
1181 // trunc (lshr (sext A), C) --> ashr A, C
1182 if (A->getType() == DestTy) {
1183 Constant *ShAmt = GetNewShAmt(DestWidth);
1184 ShAmt = Constant::mergeUndefsWith(ShAmt, C);
1185 return IsExact ? BinaryOperator::CreateExactAShr(A, ShAmt)
1186 : BinaryOperator::CreateAShr(A, ShAmt);
1187 }
1188 // The types are mismatched, so create a cast after shifting:
1189 // trunc (lshr (sext A), C) --> sext/trunc (ashr A, C)
1190 if (Src->hasOneUse()) {
1191 Constant *ShAmt = GetNewShAmt(AWidth);
1192 Value *Shift = Builder.CreateAShr(A, ShAmt, "", IsExact);
1193 return CastInst::CreateIntegerCast(Shift, DestTy, true);
1194 }
1195 }
1196 // TODO: Mask high bits with 'and'.
1197 }
1198
1199 if (Instruction *I = narrowBinOp(Trunc))
1200 return I;
1201
1202 if (Instruction *I = shrinkSplatShuffle(Trunc, Builder))
1203 return I;
1204
1205 if (Instruction *I = shrinkInsertElt(Trunc, Builder))
1206 return I;
1207
1208 if (Src->hasOneUse() &&
1209 (isa<VectorType>(SrcTy) || shouldChangeType(SrcTy, DestTy))) {
1210 // Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the
1211 // dest type is native and cst < dest size.
1212 if (match(Src, m_Shl(m_Value(A), m_Constant(C))) &&
1213 !match(A, m_Shr(m_Value(), m_Constant()))) {
1214 // Skip shifts of shift by constants. It undoes a combine in
1215 // FoldShiftByConstant and is the extend in reg pattern.
1216 APInt Threshold = APInt(C->getType()->getScalarSizeInBits(), DestWidth);
1217 if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold))) {
1218 Value *NewTrunc = Builder.CreateTrunc(A, DestTy, A->getName() + ".tr");
1219 return BinaryOperator::Create(Instruction::Shl, NewTrunc,
1220 ConstantExpr::getTrunc(C, DestTy));
1221 }
1222 }
1223 }
1224
1225 // trunc (select(icmp_ult(A, DestTy_umax+1), A, sext(icmp_sgt(A, 0)))) -->
1226 // trunc (smin(smax(0, A), DestTy_umax))
1227 if (SrcTy->isIntegerTy() && isPowerOf2_64(SrcTy->getPrimitiveSizeInBits()) &&
1229 match(Src, m_OneUse(m_Select(
1231 m_Constant(C))),
1232 m_Deferred(A),
1234 ICmpInst::ICMP_SGT, m_Deferred(A), m_Zero())))))))) {
1235 APInt UpperBound = C->getUniqueInteger();
1236 APInt TruncatedMax = APInt::getAllOnes(DestTy->getIntegerBitWidth());
1237 TruncatedMax = TruncatedMax.zext(UpperBound.getBitWidth());
1238 if (!UpperBound.isZero() && UpperBound - 1 == TruncatedMax) {
1239 Value *SMax = Builder.CreateIntrinsic(Intrinsic::smax, {SrcTy},
1240 {ConstantInt::get(SrcTy, 0), A});
1241 Value *SMin = Builder.CreateIntrinsic(
1242 Intrinsic::smin, {SrcTy},
1243 {SMax, ConstantInt::get(SrcTy, TruncatedMax)});
1244 return new TruncInst(SMin, DestTy);
1245 }
1246 }
1247
1248 if (Instruction *I = foldVecTruncToExtElt(Trunc, *this))
1249 return I;
1250
1251 if (Instruction *I = foldVecExtTruncToExtElt(Trunc, *this))
1252 return I;
1253
1254 // trunc (ctlz_i32(zext(A), B) --> add(ctlz_i16(A, B), C)
1255 if (match(Src, m_OneUse(m_Ctlz(m_ZExt(m_Value(A)), m_Value(B))))) {
1256 unsigned AWidth = A->getType()->getScalarSizeInBits();
1257 if (AWidth == DestWidth && AWidth > Log2_32(SrcWidth)) {
1258 Value *WidthDiff = ConstantInt::get(A->getType(), SrcWidth - AWidth);
1259 Value *NarrowCtlz =
1260 Builder.CreateIntrinsic(Intrinsic::ctlz, {Trunc.getType()}, {A, B});
1261 return BinaryOperator::CreateAdd(NarrowCtlz, WidthDiff);
1262 }
1263 }
1264
1265 if (match(Src, m_VScale())) {
1266 if (Trunc.getFunction() &&
1267 Trunc.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1268 Attribute Attr =
1269 Trunc.getFunction()->getFnAttribute(Attribute::VScaleRange);
1270 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax())
1271 if (Log2_32(*MaxVScale) < DestWidth)
1272 return replaceInstUsesWith(Trunc, Builder.CreateVScale(DestTy));
1273 }
1274 }
1275
1276 if (DestWidth == 1 &&
1277 (Trunc.hasNoUnsignedWrap() || Trunc.hasNoSignedWrap()) &&
1278 isKnownNonZero(Src, SQ.getWithInstruction(&Trunc)))
1279 return replaceInstUsesWith(Trunc, ConstantInt::getTrue(DestTy));
1280
1281 bool Changed = false;
1282 if (!Trunc.hasNoSignedWrap() &&
1283 ComputeMaxSignificantBits(Src, &Trunc) <= DestWidth) {
1284 Trunc.setHasNoSignedWrap(true);
1285 Changed = true;
1286 }
1287 if (!Trunc.hasNoUnsignedWrap() &&
1288 MaskedValueIsZero(Src, APInt::getBitsSetFrom(SrcWidth, DestWidth),
1289 &Trunc)) {
1290 Trunc.setHasNoUnsignedWrap(true);
1291 Changed = true;
1292 }
1293
1294 const APInt *C1;
1295 Value *V1;
1296 // OP = { lshr, ashr }
1297 // trunc ( OP i8 C1, V1) to i1 -> icmp eq V1, log_2(C1) iff C1 is power of 2
1298 if (DestWidth == 1 && match(Src, m_Shr(m_Power2(C1), m_Value(V1)))) {
1299 Value *Right = ConstantInt::get(V1->getType(), C1->countr_zero());
1300 return new ICmpInst(ICmpInst::ICMP_EQ, V1, Right);
1301 }
1302
1303 // OP = { lshr, ashr }
1304 // trunc ( OP i8 C1, V1) to i1 -> icmp ult V1, log_2(C1 + 1) iff (C1 + 1) is
1305 // power of 2
1306 if (DestWidth == 1 && match(Src, m_Shr(m_LowBitMask(C1), m_Value(V1)))) {
1307 Value *Right = ConstantInt::get(V1->getType(), C1->countr_one());
1308 return new ICmpInst(ICmpInst::ICMP_ULT, V1, Right);
1309 }
1310
1311 // OP = { lshr, ashr }
1312 // trunc ( OP i8 C1, V1) to i1 -> icmp ugt V1, cttz(C1) - 1 iff (C1) is
1313 // negative power of 2
1314 if (DestWidth == 1 && match(Src, m_Shr(m_NegatedPower2(C1), m_Value(V1)))) {
1315 Value *Right = ConstantInt::get(V1->getType(), C1->countr_zero());
1316 return new ICmpInst(ICmpInst::ICMP_UGE, V1, Right);
1317 }
1318
1319 return Changed ? &Trunc : nullptr;
1320}
1321
1322Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp,
1323 ZExtInst &Zext) {
1324 // If we are just checking for a icmp eq of a single bit and zext'ing it
1325 // to an integer, then shift the bit to the appropriate place and then
1326 // cast to integer to avoid the comparison.
1327
1328 // FIXME: This set of transforms does not check for extra uses and/or creates
1329 // an extra instruction (an optional final cast is not included
1330 // in the transform comments). We may also want to favor icmp over
1331 // shifts in cases of equal instructions because icmp has better
1332 // analysis in general (invert the transform).
1333
1334 const APInt *Op1CV;
1335 if (match(Cmp->getOperand(1), m_APInt(Op1CV))) {
1336
1337 // zext (x <s 0) to i32 --> x>>u31 true if signbit set.
1338 if (Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isZero()) {
1339 Value *In = Cmp->getOperand(0);
1340 Value *Sh = ConstantInt::get(In->getType(),
1341 In->getType()->getScalarSizeInBits() - 1);
1342 In = Builder.CreateLShr(In, Sh, In->getName() + ".lobit");
1343 if (In->getType() != Zext.getType())
1344 In = Builder.CreateIntCast(In, Zext.getType(), false /*ZExt*/);
1345
1346 return replaceInstUsesWith(Zext, In);
1347 }
1348
1349 // zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
1350 // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
1351 // zext (X != 0) to i32 --> X iff X has only the low bit set.
1352 // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
1353
1354 if (Op1CV->isZero() && Cmp->isEquality()) {
1355 // Exactly 1 possible 1? But not the high-bit because that is
1356 // canonicalized to this form.
1357 KnownBits Known = computeKnownBits(Cmp->getOperand(0), &Zext);
1358 APInt KnownZeroMask(~Known.Zero);
1359 uint32_t ShAmt = KnownZeroMask.logBase2();
1360 bool IsExpectShAmt = KnownZeroMask.isPowerOf2() &&
1361 (Zext.getType()->getScalarSizeInBits() != ShAmt + 1);
1362 if (IsExpectShAmt &&
1363 (Cmp->getOperand(0)->getType() == Zext.getType() ||
1364 Cmp->getPredicate() == ICmpInst::ICMP_NE || ShAmt == 0)) {
1365 Value *In = Cmp->getOperand(0);
1366 if (ShAmt) {
1367 // Perform a logical shr by shiftamt.
1368 // Insert the shift to put the result in the low bit.
1369 In = Builder.CreateLShr(In, ConstantInt::get(In->getType(), ShAmt),
1370 In->getName() + ".lobit");
1371 }
1372
1373 // Toggle the low bit for "X == 0".
1374 if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
1375 In = Builder.CreateXor(In, ConstantInt::get(In->getType(), 1));
1376
1377 if (Zext.getType() == In->getType())
1378 return replaceInstUsesWith(Zext, In);
1379
1380 Value *IntCast = Builder.CreateIntCast(In, Zext.getType(), false);
1381 return replaceInstUsesWith(Zext, IntCast);
1382 }
1383 }
1384 }
1385
1386 if (Cmp->isEquality()) {
1387 // Test if a bit is clear/set using a shifted-one mask:
1388 // zext (icmp eq (and X, (1 << ShAmt)), 0) --> and (lshr (not X), ShAmt), 1
1389 // zext (icmp ne (and X, (1 << ShAmt)), 0) --> and (lshr X, ShAmt), 1
1390 Value *X, *ShAmt;
1391 if (Cmp->hasOneUse() && match(Cmp->getOperand(1), m_ZeroInt()) &&
1392 match(Cmp->getOperand(0),
1393 m_OneUse(m_c_And(m_Shl(m_One(), m_Value(ShAmt)), m_Value(X))))) {
1394 auto *And = cast<BinaryOperator>(Cmp->getOperand(0));
1395 Value *Shift = And->getOperand(X == And->getOperand(0) ? 1 : 0);
1396 if (Zext.getType() == And->getType() ||
1397 Cmp->getPredicate() != ICmpInst::ICMP_EQ || Shift->hasOneUse()) {
1398 if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
1399 X = Builder.CreateNot(X);
1400 Value *Lshr = Builder.CreateLShr(X, ShAmt);
1401 Value *And1 =
1402 Builder.CreateAnd(Lshr, ConstantInt::get(X->getType(), 1));
1403 return replaceInstUsesWith(
1404 Zext, Builder.CreateZExtOrTrunc(And1, Zext.getType()));
1405 }
1406 }
1407 }
1408
1409 return nullptr;
1410}
1411
1412/// Determine if the specified value can be computed in the specified wider type
1413/// and produce the same low bits. If not, return false.
1414///
1415/// If this function returns true, it can also return a non-zero number of bits
1416/// (in BitsToClear) which indicates that the value it computes is correct for
1417/// the zero extend, but that the additional BitsToClear bits need to be zero'd
1418/// out. For example, to promote something like:
1419///
1420/// %B = trunc i64 %A to i32
1421/// %C = lshr i32 %B, 8
1422/// %E = zext i32 %C to i64
1423///
1424/// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
1425/// set to 8 to indicate that the promoted value needs to have bits 24-31
1426/// cleared in addition to bits 32-63. Since an 'and' will be generated to
1427/// clear the top bits anyway, doing this has no extra cost.
1428///
1429/// This function works on both vectors and scalars.
1430bool TypeEvaluationHelper::canEvaluateZExtd(Value *V, Type *Ty,
1431 unsigned &BitsToClear,
1432 InstCombinerImpl &IC,
1433 Instruction *CxtI) {
1434 TypeEvaluationHelper TYH;
1435 return TYH.canEvaluateZExtdImpl(V, Ty, BitsToClear, IC, CxtI);
1436}
1437bool TypeEvaluationHelper::canEvaluateZExtdImpl(Value *V, Type *Ty,
1438 unsigned &BitsToClear,
1439 InstCombinerImpl &IC,
1440 Instruction *CxtI) {
1441 BitsToClear = 0;
1442 if (canAlwaysEvaluateInType(V, Ty))
1443 return true;
1444 // We stick to the one-user limit for the ZExt transform due to the fact
1445 // that this predicate returns two values: predicate result and BitsToClear.
1446 if (canNotEvaluateInType(V, Ty))
1447 return false;
1448
1449 auto *I = cast<Instruction>(V);
1450 unsigned Tmp;
1451 switch (I->getOpcode()) {
1452 case Instruction::ZExt: // zext(zext(x)) -> zext(x).
1453 case Instruction::SExt: // zext(sext(x)) -> sext(x).
1454 case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x)
1455 return true;
1456 case Instruction::And:
1457 case Instruction::Or:
1458 case Instruction::Xor:
1459 case Instruction::Add:
1460 case Instruction::Sub:
1461 case Instruction::Mul:
1462 if (!canEvaluateZExtdImpl(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||
1463 !canEvaluateZExtdImpl(I->getOperand(1), Ty, Tmp, IC, CxtI))
1464 return false;
1465 // These can all be promoted if neither operand has 'bits to clear'.
1466 if (BitsToClear == 0 && Tmp == 0)
1467 return true;
1468
1469 // If the operation is an AND/OR/XOR and the bits to clear are zero in the
1470 // other side, BitsToClear is ok.
1471 if (Tmp == 0 && I->isBitwiseLogicOp()) {
1472 // We use MaskedValueIsZero here for generality, but the case we care
1473 // about the most is constant RHS.
1474 unsigned VSize = V->getType()->getScalarSizeInBits();
1475 if (IC.MaskedValueIsZero(I->getOperand(1),
1476 APInt::getHighBitsSet(VSize, BitsToClear),
1477 CxtI)) {
1478 // If this is an And instruction and all of the BitsToClear are
1479 // known to be zero we can reset BitsToClear.
1480 if (I->getOpcode() == Instruction::And)
1481 BitsToClear = 0;
1482 return true;
1483 }
1484 }
1485
1486 // Otherwise, we don't know how to analyze this BitsToClear case yet.
1487 return false;
1488
1489 case Instruction::Shl: {
1490 // We can promote shl(x, cst) if we can promote x. Since shl overwrites the
1491 // upper bits we can reduce BitsToClear by the shift amount.
1492 uint64_t ShiftAmt;
1493 if (match(I->getOperand(1), m_ConstantInt(ShiftAmt))) {
1494 if (!canEvaluateZExtdImpl(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1495 return false;
1496 BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0;
1497 return true;
1498 }
1499 return false;
1500 }
1501 case Instruction::LShr: {
1502 // We can promote lshr(x, cst) if we can promote x. This requires the
1503 // ultimate 'and' to clear out the high zero bits we're clearing out though.
1504 uint64_t ShiftAmt;
1505 if (match(I->getOperand(1), m_ConstantInt(ShiftAmt))) {
1506 if (!canEvaluateZExtdImpl(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1507 return false;
1508 BitsToClear += ShiftAmt;
1509 if (BitsToClear > V->getType()->getScalarSizeInBits())
1510 BitsToClear = V->getType()->getScalarSizeInBits();
1511 return true;
1512 }
1513 // Cannot promote variable LSHR.
1514 return false;
1515 }
1516 case Instruction::Select:
1517 if (!canEvaluateZExtdImpl(I->getOperand(1), Ty, Tmp, IC, CxtI) ||
1518 !canEvaluateZExtdImpl(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||
1519 // TODO: If important, we could handle the case when the BitsToClear are
1520 // known zero in the disagreeing side.
1521 Tmp != BitsToClear)
1522 return false;
1523 return true;
1524
1525 case Instruction::PHI: {
1526 // We can change a phi if we can change all operands. Note that we never
1527 // get into trouble with cyclic PHIs here because we only consider
1528 // instructions with a single use.
1529 PHINode *PN = cast<PHINode>(I);
1530 if (!canEvaluateZExtdImpl(PN->getIncomingValue(0), Ty, BitsToClear, IC,
1531 CxtI))
1532 return false;
1533 for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
1534 if (!canEvaluateZExtdImpl(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||
1535 // TODO: If important, we could handle the case when the BitsToClear
1536 // are known zero in the disagreeing input.
1537 Tmp != BitsToClear)
1538 return false;
1539 return true;
1540 }
1541 case Instruction::Call:
1542 // llvm.vscale() can always be executed in larger type, because the
1543 // value is automatically zero-extended.
1545 if (II->getIntrinsicID() == Intrinsic::vscale)
1546 return true;
1547 return false;
1548 default:
1549 // TODO: Can handle more cases here.
1550 return false;
1551 }
1552}
1553
1555 // If this zero extend is only used by a truncate, let the truncate be
1556 // eliminated before we try to optimize this zext.
1557 if (Zext.hasOneUse() && isa<TruncInst>(Zext.user_back()) &&
1558 !isa<Constant>(Zext.getOperand(0)))
1559 return nullptr;
1560
1561 // If one of the common conversion will work, do it.
1562 if (Instruction *Result = commonCastTransforms(Zext))
1563 return Result;
1564
1565 if (auto *NewI = foldExtractionOfVectorDeinterleave(Zext))
1566 return NewI;
1567
1568 Value *Src = Zext.getOperand(0);
1569 Type *SrcTy = Src->getType(), *DestTy = Zext.getType();
1570
1571 // zext nneg bool x -> 0
1572 if (SrcTy->isIntOrIntVectorTy(1) && Zext.hasNonNeg())
1574
1575 // Try to extend the entire expression tree to the wide destination type.
1576 unsigned BitsToClear;
1577 if (shouldChangeType(SrcTy, DestTy) &&
1578 TypeEvaluationHelper::canEvaluateZExtd(Src, DestTy, BitsToClear, *this,
1579 &Zext)) {
1580 assert(BitsToClear <= SrcTy->getScalarSizeInBits() &&
1581 "Can't clear more bits than in SrcTy");
1582
1583 // Okay, we can transform this! Insert the new expression now.
1584 LLVM_DEBUG(
1585 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1586 " to avoid zero extend: "
1587 << Zext << '\n');
1588 Value *Res = EvaluateInDifferentType(Src, DestTy, false);
1589 assert(Res->getType() == DestTy);
1590
1591 // Preserve debug values referring to Src if the zext is its last use.
1592 if (auto *SrcOp = dyn_cast<Instruction>(Src))
1593 if (SrcOp->hasOneUse())
1594 replaceAllDbgUsesWith(*SrcOp, *Res, Zext, DT);
1595
1596 uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits() - BitsToClear;
1597 uint32_t DestBitSize = DestTy->getScalarSizeInBits();
1598
1599 // If the high bits are already filled with zeros, just replace this
1600 // cast with the result.
1602 Res, APInt::getHighBitsSet(DestBitSize, DestBitSize - SrcBitsKept),
1603 &Zext))
1604 return replaceInstUsesWith(Zext, Res);
1605
1606 // We need to emit an AND to clear the high bits.
1607 Constant *C = ConstantInt::get(Res->getType(),
1608 APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
1609 return BinaryOperator::CreateAnd(Res, C);
1610 }
1611
1612 // If this is a TRUNC followed by a ZEXT then we are dealing with integral
1613 // types and if the sizes are just right we can convert this into a logical
1614 // 'and' which will be much cheaper than the pair of casts.
1615 if (auto *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast
1616 // TODO: Subsume this into EvaluateInDifferentType.
1617
1618 // Get the sizes of the types involved. We know that the intermediate type
1619 // will be smaller than A or C, but don't know the relation between A and C.
1620 Value *A = CSrc->getOperand(0);
1621 unsigned SrcSize = A->getType()->getScalarSizeInBits();
1622 unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
1623 unsigned DstSize = DestTy->getScalarSizeInBits();
1624 // If we're actually extending zero bits, then if
1625 // SrcSize < DstSize: zext(a & mask)
1626 // SrcSize == DstSize: a & mask
1627 // SrcSize > DstSize: trunc(a) & mask
1628 if (SrcSize < DstSize) {
1629 APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1630 Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
1631 Value *And = Builder.CreateAnd(A, AndConst, CSrc->getName() + ".mask");
1632 return new ZExtInst(And, DestTy);
1633 }
1634
1635 if (SrcSize == DstSize) {
1636 APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1637 return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
1638 AndValue));
1639 }
1640 if (SrcSize > DstSize) {
1641 Value *Trunc = Builder.CreateTrunc(A, DestTy);
1642 APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
1643 return BinaryOperator::CreateAnd(Trunc,
1644 ConstantInt::get(Trunc->getType(),
1645 AndValue));
1646 }
1647 }
1648
1649 if (auto *Cmp = dyn_cast<ICmpInst>(Src))
1650 return transformZExtICmp(Cmp, Zext);
1651
1652 // zext(trunc(X) & C) -> (X & zext(C)).
1653 Constant *C;
1654 Value *X;
1655 if (match(Src, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&
1656 X->getType() == DestTy)
1657 return BinaryOperator::CreateAnd(X, Builder.CreateZExt(C, DestTy));
1658
1659 // zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
1660 Value *And;
1661 if (match(Src, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
1663 X->getType() == DestTy) {
1664 Value *ZC = Builder.CreateZExt(C, DestTy);
1665 return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC);
1666 }
1667
1668 // If we are truncating, masking, and then zexting back to the original type,
1669 // that's just a mask. This is not handled by canEvaluateZextd if the
1670 // intermediate values have extra uses. This could be generalized further for
1671 // a non-constant mask operand.
1672 // zext (and (trunc X), C) --> and X, (zext C)
1673 if (match(Src, m_And(m_Trunc(m_Value(X)), m_Constant(C))) &&
1674 X->getType() == DestTy) {
1675 Value *ZextC = Builder.CreateZExt(C, DestTy);
1676 return BinaryOperator::CreateAnd(X, ZextC);
1677 }
1678
1679 if (match(Src, m_VScale())) {
1680 if (Zext.getFunction() &&
1681 Zext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1682 Attribute Attr =
1683 Zext.getFunction()->getFnAttribute(Attribute::VScaleRange);
1684 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
1685 unsigned TypeWidth = Src->getType()->getScalarSizeInBits();
1686 if (Log2_32(*MaxVScale) < TypeWidth)
1687 return replaceInstUsesWith(Zext, Builder.CreateVScale(DestTy));
1688 }
1689 }
1690 }
1691
1692 if (!Zext.hasNonNeg()) {
1693 // If this zero extend is only used by a shift, add nneg flag.
1694 if (Zext.hasOneUse() &&
1695 SrcTy->getScalarSizeInBits() >
1696 Log2_64_Ceil(DestTy->getScalarSizeInBits()) &&
1697 match(Zext.user_back(), m_Shift(m_Value(), m_Specific(&Zext)))) {
1698 Zext.setNonNeg();
1699 return &Zext;
1700 }
1701
1702 if (isKnownNonNegative(Src, SQ.getWithInstruction(&Zext))) {
1703 Zext.setNonNeg();
1704 return &Zext;
1705 }
1706 }
1707
1708 return nullptr;
1709}
1710
1711/// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.
1712Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *Cmp,
1713 SExtInst &Sext) {
1714 Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
1715 ICmpInst::Predicate Pred = Cmp->getPredicate();
1716
1717 // Don't bother if Op1 isn't of vector or integer type.
1718 if (!Op1->getType()->isIntOrIntVectorTy())
1719 return nullptr;
1720
1721 if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_ZeroInt())) {
1722 // sext (x <s 0) --> ashr x, 31 (all ones if negative)
1723 Value *Sh = ConstantInt::get(Op0->getType(),
1724 Op0->getType()->getScalarSizeInBits() - 1);
1725 Value *In = Builder.CreateAShr(Op0, Sh, Op0->getName() + ".lobit");
1726 if (In->getType() != Sext.getType())
1727 In = Builder.CreateIntCast(In, Sext.getType(), true /*SExt*/);
1728
1729 return replaceInstUsesWith(Sext, In);
1730 }
1731
1732 if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1733 // If we know that only one bit of the LHS of the icmp can be set and we
1734 // have an equality comparison with zero or a power of 2, we can transform
1735 // the icmp and sext into bitwise/integer operations.
1736 if (Cmp->hasOneUse() &&
1737 Cmp->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
1738 KnownBits Known = computeKnownBits(Op0, &Sext);
1739
1740 APInt KnownZeroMask(~Known.Zero);
1741 if (KnownZeroMask.isPowerOf2()) {
1742 Value *In = Cmp->getOperand(0);
1743
1744 // If the icmp tests for a known zero bit we can constant fold it.
1745 if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {
1746 Value *V = Pred == ICmpInst::ICMP_NE ?
1748 ConstantInt::getNullValue(Sext.getType());
1749 return replaceInstUsesWith(Sext, V);
1750 }
1751
1752 if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
1753 // sext ((x & 2^n) == 0) -> (x >> n) - 1
1754 // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
1755 unsigned ShiftAmt = KnownZeroMask.countr_zero();
1756 // Perform a right shift to place the desired bit in the LSB.
1757 if (ShiftAmt)
1758 In = Builder.CreateLShr(In,
1759 ConstantInt::get(In->getType(), ShiftAmt));
1760
1761 // At this point "In" is either 1 or 0. Subtract 1 to turn
1762 // {1, 0} -> {0, -1}.
1763 In = Builder.CreateAdd(In,
1764 ConstantInt::getAllOnesValue(In->getType()),
1765 "sext");
1766 } else {
1767 // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
1768 // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
1769 unsigned ShiftAmt = KnownZeroMask.countl_zero();
1770 // Perform a left shift to place the desired bit in the MSB.
1771 if (ShiftAmt)
1772 In = Builder.CreateShl(In,
1773 ConstantInt::get(In->getType(), ShiftAmt));
1774
1775 // Distribute the bit over the whole bit width.
1776 In = Builder.CreateAShr(In, ConstantInt::get(In->getType(),
1777 KnownZeroMask.getBitWidth() - 1), "sext");
1778 }
1779
1780 if (Sext.getType() == In->getType())
1781 return replaceInstUsesWith(Sext, In);
1782 return CastInst::CreateIntegerCast(In, Sext.getType(), true/*SExt*/);
1783 }
1784 }
1785 }
1786
1787 return nullptr;
1788}
1789
1790/// Return true if we can take the specified value and return it as type Ty
1791/// without inserting any new casts and without changing the value of the common
1792/// low bits. This is used by code that tries to promote integer operations to
1793/// a wider types will allow us to eliminate the extension.
1794///
1795/// This function works on both vectors and scalars.
1796///
1797bool TypeEvaluationHelper::canEvaluateSExtd(Value *V, Type *Ty) {
1798 TypeEvaluationHelper TYH;
1799 return TYH.canEvaluateSExtdImpl(V, Ty) && TYH.allPendingVisited();
1800}
1801
1802bool TypeEvaluationHelper::canEvaluateSExtdImpl(Value *V, Type *Ty) {
1803 return canEvaluate(V, Ty, [this](Value *V, Type *Ty) {
1804 return canEvaluateSExtdPred(V, Ty);
1805 });
1806}
1807
1808bool TypeEvaluationHelper::canEvaluateSExtdPred(Value *V, Type *Ty) {
1809 assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
1810 "Can't sign extend type to a smaller type");
1811
1812 auto *I = cast<Instruction>(V);
1813 switch (I->getOpcode()) {
1814 case Instruction::SExt: // sext(sext(x)) -> sext(x)
1815 case Instruction::ZExt: // sext(zext(x)) -> zext(x)
1816 case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x)
1817 return true;
1818 case Instruction::And:
1819 case Instruction::Or:
1820 case Instruction::Xor:
1821 case Instruction::Add:
1822 case Instruction::Sub:
1823 case Instruction::Mul:
1824 // These operators can all arbitrarily be extended if their inputs can.
1825 return canEvaluateSExtdImpl(I->getOperand(0), Ty) &&
1826 canEvaluateSExtdImpl(I->getOperand(1), Ty);
1827
1828 // case Instruction::Shl: TODO
1829 // case Instruction::LShr: TODO
1830
1831 case Instruction::Select:
1832 return canEvaluateSExtdImpl(I->getOperand(1), Ty) &&
1833 canEvaluateSExtdImpl(I->getOperand(2), Ty);
1834
1835 case Instruction::PHI: {
1836 // We can change a phi if we can change all operands. Note that we never
1837 // get into trouble with cyclic PHIs here because canEvaluate handles use
1838 // chain loops.
1839 PHINode *PN = cast<PHINode>(I);
1840 for (Value *IncValue : PN->incoming_values())
1841 if (!canEvaluateSExtdImpl(IncValue, Ty))
1842 return false;
1843 return true;
1844 }
1845 default:
1846 // TODO: Can handle more cases here.
1847 break;
1848 }
1849
1850 return false;
1851}
1852
1854 // If this sign extend is only used by a truncate, let the truncate be
1855 // eliminated before we try to optimize this sext.
1856 if (Sext.hasOneUse() && isa<TruncInst>(Sext.user_back()))
1857 return nullptr;
1858
1859 if (Instruction *I = commonCastTransforms(Sext))
1860 return I;
1861
1862 Value *Src = Sext.getOperand(0);
1863 Type *SrcTy = Src->getType(), *DestTy = Sext.getType();
1864 unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
1865 unsigned DestBitSize = DestTy->getScalarSizeInBits();
1866
1867 // If the value being extended is zero or positive, use a zext instead.
1868 if (isKnownNonNegative(Src, SQ.getWithInstruction(&Sext))) {
1869 auto CI = CastInst::Create(Instruction::ZExt, Src, DestTy);
1870 CI->setNonNeg(true);
1871 return CI;
1872 }
1873
1874 // Try to extend the entire expression tree to the wide destination type.
1875 bool ShouldExtendExpression = true;
1876 Value *TruncSrc = nullptr;
1877 // It is not desirable to extend expression in the trunc + sext pattern when
1878 // destination type is narrower than original (pre-trunc) type.
1879 if (match(Src, m_Trunc(m_Value(TruncSrc))))
1880 if (TruncSrc->getType()->getScalarSizeInBits() > DestBitSize)
1881 ShouldExtendExpression = false;
1882 if (ShouldExtendExpression && shouldChangeType(SrcTy, DestTy) &&
1883 TypeEvaluationHelper::canEvaluateSExtd(Src, DestTy)) {
1884 // Okay, we can transform this! Insert the new expression now.
1885 LLVM_DEBUG(
1886 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1887 " to avoid sign extend: "
1888 << Sext << '\n');
1889 Value *Res = EvaluateInDifferentType(Src, DestTy, true);
1890 assert(Res->getType() == DestTy);
1891
1892 // If the high bits are already filled with sign bit, just replace this
1893 // cast with the result.
1894 if (ComputeNumSignBits(Res, &Sext) > DestBitSize - SrcBitSize)
1895 return replaceInstUsesWith(Sext, Res);
1896
1897 // We need to emit a shl + ashr to do the sign extend.
1898 Value *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
1899 return BinaryOperator::CreateAShr(Builder.CreateShl(Res, ShAmt, "sext"),
1900 ShAmt);
1901 }
1902
1903 Value *X = TruncSrc;
1904 if (X) {
1905 // If the input has more sign bits than bits truncated, then convert
1906 // directly to final type.
1907 unsigned XBitSize = X->getType()->getScalarSizeInBits();
1908 bool HasNSW = cast<TruncInst>(Src)->hasNoSignedWrap();
1909 if (HasNSW || (ComputeNumSignBits(X, &Sext) > XBitSize - SrcBitSize)) {
1910 auto *Res = CastInst::CreateIntegerCast(X, DestTy, /* isSigned */ true);
1911 if (auto *ResTrunc = dyn_cast<TruncInst>(Res); ResTrunc && HasNSW)
1912 ResTrunc->setHasNoSignedWrap(true);
1913 return Res;
1914 }
1915
1916 // If input is a trunc from the destination type, then convert into shifts.
1917 if (Src->hasOneUse() && X->getType() == DestTy) {
1918 // sext (trunc X) --> ashr (shl X, C), C
1919 Constant *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
1920 return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShAmt), ShAmt);
1921 }
1922
1923 // If we are replacing shifted-in high zero bits with sign bits, convert
1924 // the logic shift to arithmetic shift and eliminate the cast to
1925 // intermediate type:
1926 // sext (trunc (lshr Y, C)) --> sext/trunc (ashr Y, C)
1927 Value *Y;
1928 if (Src->hasOneUse() &&
1930 m_SpecificIntAllowPoison(XBitSize - SrcBitSize)))) {
1931 Value *Ashr = Builder.CreateAShr(Y, XBitSize - SrcBitSize);
1932 return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
1933 }
1934 }
1935
1936 if (auto *Cmp = dyn_cast<ICmpInst>(Src))
1937 return transformSExtICmp(Cmp, Sext);
1938
1939 // If the input is a shl/ashr pair of a same constant, then this is a sign
1940 // extension from a smaller value. If we could trust arbitrary bitwidth
1941 // integers, we could turn this into a truncate to the smaller bit and then
1942 // use a sext for the whole extension. Since we don't, look deeper and check
1943 // for a truncate. If the source and dest are the same type, eliminate the
1944 // trunc and extend and just do shifts. For example, turn:
1945 // %a = trunc i32 %i to i8
1946 // %b = shl i8 %a, C
1947 // %c = ashr i8 %b, C
1948 // %d = sext i8 %c to i32
1949 // into:
1950 // %a = shl i32 %i, 32-(8-C)
1951 // %d = ashr i32 %a, 32-(8-C)
1952 Value *A = nullptr;
1953 // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
1954 Constant *BA = nullptr, *CA = nullptr;
1955 if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_Constant(BA)),
1956 m_ImmConstant(CA))) &&
1957 BA->isElementWiseEqual(CA) && A->getType() == DestTy) {
1958 Constant *WideCurrShAmt =
1959 ConstantFoldCastOperand(Instruction::SExt, CA, DestTy, DL);
1960 assert(WideCurrShAmt && "Constant folding of ImmConstant cannot fail");
1961 Constant *NumLowbitsLeft = ConstantExpr::getSub(
1962 ConstantInt::get(DestTy, SrcTy->getScalarSizeInBits()), WideCurrShAmt);
1963 Constant *NewShAmt = ConstantExpr::getSub(
1964 ConstantInt::get(DestTy, DestTy->getScalarSizeInBits()),
1965 NumLowbitsLeft);
1966 NewShAmt =
1968 A = Builder.CreateShl(A, NewShAmt, Sext.getName());
1969 return BinaryOperator::CreateAShr(A, NewShAmt);
1970 }
1971
1972 // Splatting a bit of constant-index across a value:
1973 // sext (ashr (trunc iN X to iM), M-1) to iN --> ashr (shl X, N-M), N-1
1974 // If the dest type is different, use a cast (adjust use check).
1975 if (match(Src, m_OneUse(m_AShr(m_Trunc(m_Value(X)),
1976 m_SpecificInt(SrcBitSize - 1))))) {
1977 Type *XTy = X->getType();
1978 unsigned XBitSize = XTy->getScalarSizeInBits();
1979 Constant *ShlAmtC = ConstantInt::get(XTy, XBitSize - SrcBitSize);
1980 Constant *AshrAmtC = ConstantInt::get(XTy, XBitSize - 1);
1981 if (XTy == DestTy)
1982 return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShlAmtC),
1983 AshrAmtC);
1984 if (cast<BinaryOperator>(Src)->getOperand(0)->hasOneUse()) {
1985 Value *Ashr = Builder.CreateAShr(Builder.CreateShl(X, ShlAmtC), AshrAmtC);
1986 return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
1987 }
1988 }
1989
1990 if (match(Src, m_VScale())) {
1991 if (Sext.getFunction() &&
1992 Sext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1993 Attribute Attr =
1994 Sext.getFunction()->getFnAttribute(Attribute::VScaleRange);
1995 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax())
1996 if (Log2_32(*MaxVScale) < (SrcBitSize - 1))
1997 return replaceInstUsesWith(Sext, Builder.CreateVScale(DestTy));
1998 }
1999 }
2000
2001 // sext(scmp(x, y)) -> scmp(x, y) with a wider result type.
2002 // sext(ucmp(x, y)) -> ucmp(x, y) with a wider result type.
2003 // scmp/ucmp return only -1, 0, or 1, which sign-extend correctly to any
2004 // wider integer type, so we can sink the extension into the intrinsic.
2005 if (auto *II = dyn_cast<IntrinsicInst>(Src)) {
2006 Intrinsic::ID IID = II->getIntrinsicID();
2007 if ((IID == Intrinsic::scmp || IID == Intrinsic::ucmp) && II->hasOneUse())
2008 return replaceInstUsesWith(
2009 Sext, Builder.CreateIntrinsic(
2010 DestTy, IID, {II->getArgOperand(0), II->getArgOperand(1)}));
2011 }
2012
2013 return nullptr;
2014}
2015
2016/// Return a Constant* for the specified floating-point constant if it fits
2017/// in the specified FP type without changing its value.
2018static bool fitsInFPType(APFloat F, const fltSemantics &Sem) {
2019 bool losesInfo;
2020 (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
2021 return !losesInfo;
2022}
2023
2025 bool PreferBFloat) {
2026 // See if the value can be truncated to bfloat and then reextended.
2027 if (PreferBFloat && fitsInFPType(F, APFloat::BFloat()))
2028 return Type::getBFloatTy(Ctx);
2029 // See if the value can be truncated to half and then reextended.
2030 if (!PreferBFloat && fitsInFPType(F, APFloat::IEEEhalf()))
2031 return Type::getHalfTy(Ctx);
2032 // See if the value can be truncated to float and then reextended.
2034 return Type::getFloatTy(Ctx);
2035 if (&F.getSemantics() == &APFloat::IEEEdouble())
2036 return nullptr; // Won't shrink.
2037 // See if the value can be truncated to double and then reextended.
2039 return Type::getDoubleTy(Ctx);
2040 // Don't try to shrink to various long double types.
2041 return nullptr;
2042}
2043
2044static Type *shrinkFPConstant(ConstantFP *CFP, bool PreferBFloat) {
2045 Type *Ty = CFP->getType();
2046 if (Ty->getScalarType()->isPPC_FP128Ty())
2047 return nullptr; // No constant folding of this.
2048
2049 Type *ShrinkTy =
2050 shrinkFPConstant(CFP->getContext(), CFP->getValueAPF(), PreferBFloat);
2051 if (ShrinkTy)
2052 if (auto *VecTy = dyn_cast<VectorType>(Ty))
2053 ShrinkTy = VectorType::get(ShrinkTy, VecTy);
2054
2055 return ShrinkTy;
2056}
2057
2058// Determine if this is a vector of ConstantFPs and if so, return the minimal
2059// type we can safely truncate all elements to.
2060static Type *shrinkFPConstantVector(Value *V, bool PreferBFloat) {
2061 auto *CV = dyn_cast<Constant>(V);
2062 auto *CVVTy = dyn_cast<FixedVectorType>(V->getType());
2063 if (!CV || !CVVTy)
2064 return nullptr;
2065
2066 Type *MinType = nullptr;
2067
2068 unsigned NumElts = CVVTy->getNumElements();
2069
2070 // For fixed-width vectors we find the minimal type by looking
2071 // through the constant values of the vector.
2072 for (unsigned i = 0; i != NumElts; ++i) {
2073 if (isa<UndefValue>(CV->getAggregateElement(i)))
2074 continue;
2075
2076 auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i));
2077 if (!CFP)
2078 return nullptr;
2079
2080 Type *T = shrinkFPConstant(CFP, PreferBFloat);
2081 if (!T)
2082 return nullptr;
2083
2084 // If we haven't found a type yet or this type has a larger mantissa than
2085 // our previous type, this is our new minimal type.
2086 if (!MinType || T->getFPMantissaWidth() > MinType->getFPMantissaWidth())
2087 MinType = T;
2088 }
2089
2090 // Make a vector type from the minimal type.
2091 return MinType ? FixedVectorType::get(MinType, NumElts) : nullptr;
2092}
2093
2094/// Find the minimum FP type we can safely truncate to.
2095static Type *getMinimumFPType(Value *V, Type *PreferredTy, InstCombiner &IC) {
2096 if (auto *FPExt = dyn_cast<FPExtInst>(V))
2097 return FPExt->getOperand(0)->getType();
2098
2099 Value *Src;
2100 if (match(V, m_IToFP(m_Value(Src))) &&
2101 IC.canBeCastedExactlyIntToFP(Src, PreferredTy, isa<SIToFPInst>(V),
2103 return PreferredTy;
2104
2105 bool PreferBFloat = PreferredTy->getScalarType()->isBFloatTy();
2106 // If this value is a constant, return the constant in the smallest FP type
2107 // that can accurately represent it. This allows us to turn
2108 // (float)((double)X+2.0) into x+2.0f.
2109 if (auto *CFP = dyn_cast<ConstantFP>(V))
2110 if (Type *T = shrinkFPConstant(CFP, PreferBFloat))
2111 return T;
2112
2113 // Try to shrink scalable and fixed splat vectors.
2114 if (auto *FPC = dyn_cast<Constant>(V))
2115 if (auto *VTy = dyn_cast<VectorType>(V->getType()))
2116 if (auto *Splat = dyn_cast_or_null<ConstantFP>(FPC->getSplatValue()))
2117 if (Type *T = shrinkFPConstant(Splat, PreferBFloat))
2118 return VectorType::get(T, VTy);
2119
2120 // Try to shrink a vector of FP constants. This returns nullptr on scalable
2121 // vectors
2122 if (Type *T = shrinkFPConstantVector(V, PreferBFloat))
2123 return T;
2124
2125 return V->getType();
2126}
2127
2129 bool IsSigned,
2130 const Instruction *CxtI) const {
2131 Type *SrcTy = V->getType();
2132 assert(SrcTy->isIntOrIntVectorTy() && "Expected an integer type");
2133 int SrcSize = (int)SrcTy->getScalarSizeInBits() - IsSigned;
2134 int DestNumSigBits = FPTy->getFPMantissaWidth();
2135
2136 // Easy case - if the source integer type has less bits than the FP mantissa,
2137 // then the cast must be exact.
2138 if (SrcSize <= DestNumSigBits)
2139 return true;
2140
2141 // Cast from FP to integer and back to FP is independent of the intermediate
2142 // integer width because of poison on overflow.
2143 Value *F;
2144 if (match(V, m_FPToI(m_Value(F)))) {
2145 // If this is uitofp (fptosi F), the source needs an extra bit to avoid
2146 // potential rounding of negative FP input values.
2147 int SrcNumSigBits = F->getType()->getFPMantissaWidth();
2148 if (!IsSigned && match(V, m_FPToSI(m_Value())))
2149 SrcNumSigBits++;
2150
2151 // [su]itofp (fpto[su]i F) --> exact if the source type has less or equal
2152 // significant bits than the destination (and make sure neither type is
2153 // weird -- ppc_fp128).
2154 if (SrcNumSigBits > 0 && DestNumSigBits > 0 &&
2155 SrcNumSigBits <= DestNumSigBits)
2156 return true;
2157 }
2158
2159 // Try harder to find if the source integer type has less significant bits.
2160 // Compute number of sign bits or determine trailing zeros.
2161 KnownBits SrcKnown = computeKnownBits(V, CxtI);
2162 int SigBits = (int)SrcTy->getScalarSizeInBits() -
2163 SrcKnown.countMinLeadingZeros() -
2164 SrcKnown.countMinTrailingZeros();
2165 if (SigBits <= DestNumSigBits)
2166 return true;
2167
2168 // For sitofp, the sign maps to the FP sign bit, so only magnitude bits
2169 // (BitWidth - NumSignBits) consume mantissa.
2170 if (IsSigned) {
2171 SigBits = (int)SrcTy->getScalarSizeInBits() - ComputeNumSignBits(V, CxtI);
2172 if (SigBits <= DestNumSigBits)
2173 return true;
2174 }
2175
2176 return false;
2177}
2178
2180 CastInst::CastOps Opcode = I.getOpcode();
2181 assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) &&
2182 "Unexpected cast");
2183 Value *Src = I.getOperand(0);
2184 Type *FPTy = I.getType();
2185 return canBeCastedExactlyIntToFP(Src, FPTy, Opcode == CastInst::SIToFP, &I);
2186}
2187
2190 return I;
2191
2192 // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
2193 // simplify this expression to avoid one or more of the trunc/extend
2194 // operations if we can do so without changing the numerical results.
2195 //
2196 // The exact manner in which the widths of the operands interact to limit
2197 // what we can and cannot do safely varies from operation to operation, and
2198 // is explained below in the various case statements.
2199 Type *Ty = FPT.getType();
2200 auto *BO = dyn_cast<BinaryOperator>(FPT.getOperand(0));
2201 if (BO && BO->hasOneUse()) {
2202 Type *LHSMinType = getMinimumFPType(BO->getOperand(0), Ty, *this);
2203 Type *RHSMinType = getMinimumFPType(BO->getOperand(1), Ty, *this);
2204 unsigned OpWidth = BO->getType()->getFPMantissaWidth();
2205 unsigned LHSWidth = LHSMinType->getFPMantissaWidth();
2206 unsigned RHSWidth = RHSMinType->getFPMantissaWidth();
2207 unsigned SrcWidth = std::max(LHSWidth, RHSWidth);
2208 unsigned DstWidth = Ty->getFPMantissaWidth();
2209 switch (BO->getOpcode()) {
2210 default: break;
2211 case Instruction::FAdd:
2212 case Instruction::FSub:
2213 // For addition and subtraction, the infinitely precise result can
2214 // essentially be arbitrarily wide; proving that double rounding
2215 // will not occur because the result of OpI is exact (as we will for
2216 // FMul, for example) is hopeless. However, we *can* nonetheless
2217 // frequently know that double rounding cannot occur (or that it is
2218 // innocuous) by taking advantage of the specific structure of
2219 // infinitely-precise results that admit double rounding.
2220 //
2221 // Specifically, if OpWidth >= 2*DstWdith+1 and DstWidth is sufficient
2222 // to represent both sources, we can guarantee that the double
2223 // rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis,
2224 // "A Rigorous Framework for Fully Supporting the IEEE Standard ..."
2225 // for proof of this fact).
2226 //
2227 // Note: Figueroa does not consider the case where DstFormat !=
2228 // SrcFormat. It's possible (likely even!) that this analysis
2229 // could be tightened for those cases, but they are rare (the main
2230 // case of interest here is (float)((double)float + float)).
2231 if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) {
2232 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
2233 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
2234 Instruction *RI = BinaryOperator::Create(BO->getOpcode(), LHS, RHS);
2235 RI->copyFastMathFlags(BO);
2236 return RI;
2237 }
2238 break;
2239 case Instruction::FMul:
2240 // For multiplication, the infinitely precise result has at most
2241 // LHSWidth + RHSWidth significant bits; if OpWidth is sufficient
2242 // that such a value can be exactly represented, then no double
2243 // rounding can possibly occur; we can safely perform the operation
2244 // in the destination format if it can represent both sources.
2245 if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) {
2246 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
2247 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
2248 return BinaryOperator::CreateFMulFMF(LHS, RHS, BO);
2249 }
2250 break;
2251 case Instruction::FDiv:
2252 // For division, we use again use the bound from Figueroa's
2253 // dissertation. I am entirely certain that this bound can be
2254 // tightened in the unbalanced operand case by an analysis based on
2255 // the diophantine rational approximation bound, but the well-known
2256 // condition used here is a good conservative first pass.
2257 // TODO: Tighten bound via rigorous analysis of the unbalanced case.
2258 if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) {
2259 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
2260 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
2261 return BinaryOperator::CreateFDivFMF(LHS, RHS, BO);
2262 }
2263 break;
2264 case Instruction::FRem: {
2265 // Remainder is straightforward. Remainder is always exact, so the
2266 // type of OpI doesn't enter into things at all. We simply evaluate
2267 // in whichever source type is larger, then convert to the
2268 // destination type.
2269 if (SrcWidth == OpWidth)
2270 break;
2271 Value *LHS, *RHS;
2272 if (LHSWidth == SrcWidth) {
2273 LHS = Builder.CreateFPTrunc(BO->getOperand(0), LHSMinType);
2274 RHS = Builder.CreateFPTrunc(BO->getOperand(1), LHSMinType);
2275 } else {
2276 LHS = Builder.CreateFPTrunc(BO->getOperand(0), RHSMinType);
2277 RHS = Builder.CreateFPTrunc(BO->getOperand(1), RHSMinType);
2278 }
2279
2280 Value *ExactResult = Builder.CreateFRemFMF(LHS, RHS, BO);
2281 return CastInst::CreateFPCast(ExactResult, Ty);
2282 }
2283 }
2284 }
2285
2286 // (fptrunc (fneg x)) -> (fneg (fptrunc x))
2287 Value *X;
2289 if (Op && Op->hasOneUse()) {
2290 FastMathFlags FMF = FPT.getFastMathFlags();
2291 if (auto *FPMO = dyn_cast<FPMathOperator>(Op))
2292 FMF &= FPMO->getFastMathFlags();
2293
2294 if (match(Op, m_FNeg(m_Value(X)))) {
2295 Value *InnerTrunc = Builder.CreateFPTruncFMF(X, Ty, FMF);
2296 Value *Neg = Builder.CreateFNegFMF(InnerTrunc, FMF);
2297 return replaceInstUsesWith(FPT, Neg);
2298 }
2299
2300 // If we are truncating a select that has an extended operand, we can
2301 // narrow the other operand and do the select as a narrow op.
2302 Value *Cond, *X, *Y;
2304 X->getType() == Ty) {
2305 // fptrunc (select Cond, (fpext X), Y --> select Cond, X, (fptrunc Y)
2306 Value *NarrowY = Builder.CreateFPTruncFMF(Y, Ty, FMF);
2307 Value *Sel =
2308 Builder.CreateSelectFMF(Cond, X, NarrowY, FMF, "narrow.sel", Op);
2309 return replaceInstUsesWith(FPT, Sel);
2310 }
2312 X->getType() == Ty) {
2313 // fptrunc (select Cond, Y, (fpext X) --> select Cond, (fptrunc Y), X
2314 Value *NarrowY = Builder.CreateFPTruncFMF(Y, Ty, FMF);
2315 Value *Sel =
2316 Builder.CreateSelectFMF(Cond, NarrowY, X, FMF, "narrow.sel", Op);
2317 return replaceInstUsesWith(FPT, Sel);
2318 }
2319 }
2320
2321 if (auto *II = dyn_cast<IntrinsicInst>(FPT.getOperand(0))) {
2322 switch (II->getIntrinsicID()) {
2323 default: break;
2324 case Intrinsic::ceil:
2325 case Intrinsic::fabs:
2326 case Intrinsic::floor:
2327 case Intrinsic::nearbyint:
2328 case Intrinsic::rint:
2329 case Intrinsic::round:
2330 case Intrinsic::roundeven:
2331 case Intrinsic::trunc: {
2332 Value *Src = II->getArgOperand(0);
2333 if (!Src->hasOneUse())
2334 break;
2335
2336 // Except for fabs, this transformation requires the input of the unary FP
2337 // operation to be itself an fpext from the type to which we're
2338 // truncating.
2339 if (II->getIntrinsicID() != Intrinsic::fabs) {
2340 FPExtInst *FPExtSrc = dyn_cast<FPExtInst>(Src);
2341 if (!FPExtSrc || FPExtSrc->getSrcTy() != Ty)
2342 break;
2343 }
2344
2345 // Do unary FP operation on smaller type.
2346 // (fptrunc (fabs x)) -> (fabs (fptrunc x))
2347 Value *InnerTrunc = Builder.CreateFPTrunc(Src, Ty);
2349 FPT.getModule(), II->getIntrinsicID(), Ty);
2351 II->getOperandBundlesAsDefs(OpBundles);
2352 CallInst *NewCI =
2353 CallInst::Create(Overload, {InnerTrunc}, OpBundles, II->getName());
2354 // A normal value may be converted to an infinity. It means that we cannot
2355 // propagate ninf from the intrinsic. So we propagate FMF from fptrunc.
2356 NewCI->copyFastMathFlags(&FPT);
2357 return NewCI;
2358 }
2359 }
2360 }
2361
2362 if (Instruction *I = shrinkInsertElt(FPT, Builder))
2363 return I;
2364
2365 Value *Src = FPT.getOperand(0);
2366 if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
2367 auto *FPCast = cast<CastInst>(Src);
2368 if (isKnownExactCastIntToFP(*FPCast))
2369 return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
2370 }
2371
2372 return nullptr;
2373}
2374
2376 // If the source operand is a cast from integer to FP and known exact, then
2377 // cast the integer operand directly to the destination type.
2378 Type *Ty = FPExt.getType();
2379 Value *Src = FPExt.getOperand(0);
2380 if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
2381 auto *FPCast = cast<CastInst>(Src);
2382 if (isKnownExactCastIntToFP(*FPCast))
2383 return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
2384 }
2385
2386 return commonCastTransforms(FPExt);
2387}
2388
2389/// fpto{s/u}i[.sat]({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
2390/// This is safe if the intermediate type has enough bits in its mantissa to
2391/// accurately represent all values of X. For example, this won't work with
2392/// i64 -> float -> i64.
2393template <typename FPToIntTy>
2395 constexpr bool IsSaturating = std::is_same_v<FPToIntTy, IntrinsicInst>;
2396
2397 if (!isa<UIToFPInst>(FI.getOperand(0)) && !isa<SIToFPInst>(FI.getOperand(0)))
2398 return nullptr;
2399
2400 auto *OpI = cast<CastInst>(FI.getOperand(0));
2401 Value *X = OpI->getOperand(0);
2402 Type *XType = X->getType();
2403 Type *DestType = FI.getType();
2404 bool IsInputSigned = isa<SIToFPInst>(OpI);
2405
2406 bool IsOutputSigned;
2407 if constexpr (IsSaturating)
2408 IsOutputSigned = FI.getIntrinsicID() == Intrinsic::fptosi_sat;
2409 else
2410 IsOutputSigned = isa<FPToSIInst>(FI);
2411
2412 // Since we can assume the conversion won't overflow, our decision as to
2413 // whether the input will fit in the float should depend on the minimum
2414 // of the input range and output range.
2415
2416 // This means this is also safe for a signed input and unsigned output, since
2417 // a negative input would lead to undefined behavior.
2418 if (!isKnownExactCastIntToFP(*OpI)) {
2419 if constexpr (!IsSaturating) {
2420 // The first cast may not round exactly based on the source integer width
2421 // and FP width, but the overflow UB rules can still allow this to fold.
2422 // If the destination type is narrow, that means the intermediate FP value
2423 // must be large enough to hold the source value exactly.
2424 //
2425 // For example, (uint8_t)((float)(uint32_t 16777217) is UB.
2426 int OutputSize = (int)DestType->getScalarSizeInBits();
2427 if (OutputSize > OpI->getType()->getFPMantissaWidth())
2428 return nullptr;
2429 } else {
2430 // Sat intrinsics produce a defined saturated value on overflow, so
2431 // the UB-based shortcut is invalid. Require exactness.
2432 return nullptr;
2433 }
2434 }
2435
2436 unsigned SrcWidth = XType->getScalarSizeInBits();
2437 unsigned DestWidth = DestType->getScalarSizeInBits();
2438
2439 if constexpr (IsSaturating) {
2440 // TODO: cross-sign and narrowing cases could be handled with range
2441 // analysis to prove the source fits in the destination.
2442 if (IsInputSigned != IsOutputSigned || DestWidth < SrcWidth)
2443 return nullptr;
2444 }
2445
2446 if (DestWidth > SrcWidth) {
2447 if (IsInputSigned && IsOutputSigned)
2448 return new SExtInst(X, DestType);
2449 return new ZExtInst(X, DestType);
2450 }
2451 if (DestWidth < SrcWidth)
2452 return new TruncInst(X, DestType);
2453
2454 assert(XType == DestType && "Unexpected types for int to FP to int casts");
2455 return replaceInstUsesWith(FI, X);
2456}
2457
2459template Instruction *
2461
2463 // fpto{u/s}i non-norm --> 0
2464 FPClassTest Mask =
2465 FI.getOpcode() == Instruction::FPToUI ? fcPosNormal : fcNormal;
2467 FI.getOperand(0), Mask, IC.getSimplifyQuery().getWithInstruction(&FI));
2468 if (FPClass.isKnownNever(Mask))
2470
2471 return nullptr;
2472}
2473
2475 if (Instruction *I = foldItoFPtoI(FI))
2476 return I;
2477
2478 if (Instruction *I = foldFPtoI(FI, *this))
2479 return I;
2480
2481 return commonCastTransforms(FI);
2482}
2483
2485 if (Instruction *I = foldItoFPtoI(FI))
2486 return I;
2487
2488 if (Instruction *I = foldFPtoI(FI, *this))
2489 return I;
2490
2491 return commonCastTransforms(FI);
2492}
2493
2495 if (Instruction *R = commonCastTransforms(CI))
2496 return R;
2497 if (!CI.hasNonNeg() && isKnownNonNegative(CI.getOperand(0), SQ)) {
2498 CI.setNonNeg();
2499 return &CI;
2500 }
2501 return nullptr;
2502}
2503
2505 if (Instruction *R = commonCastTransforms(CI))
2506 return R;
2507 if (isKnownNonNegative(CI.getOperand(0), SQ)) {
2508 auto *UI =
2509 CastInst::Create(Instruction::UIToFP, CI.getOperand(0), CI.getType());
2510 UI->setNonNeg(true);
2511 return UI;
2512 }
2513 return nullptr;
2514}
2515
2517 // If the source integer type is not the intptr_t type for this target, do a
2518 // trunc or zext to the intptr_t type, then inttoptr of it. This allows the
2519 // cast to be exposed to other transforms.
2520 unsigned AS = CI.getAddressSpace();
2521 if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
2522 DL.getPointerSizeInBits(AS)) {
2523 Type *Ty = CI.getOperand(0)->getType()->getWithNewType(
2524 DL.getIntPtrType(CI.getContext(), AS));
2525 Value *P = Builder.CreateZExtOrTrunc(CI.getOperand(0), Ty);
2526 return new IntToPtrInst(P, CI.getType());
2527 }
2528
2529 // Replace (inttoptr (add (ptrtoint %Base), %Offset)) with
2530 // (getelementptr i8, %Base, %Offset) if the pointer is only used as integer
2531 // value.
2532 Value *Base;
2533 Value *Offset;
2534 auto UsesPointerAsInt = [](User *U) {
2536 return true;
2537 if (auto *P = dyn_cast<PHINode>(U))
2538 return P->hasOneUse() && isa<ICmpInst, PtrToIntInst>(*P->user_begin());
2539 return false;
2540 };
2541 if (match(CI.getOperand(0),
2543 m_Value(Offset)))) &&
2545 Base->getType()->getPointerAddressSpace() &&
2546 all_of(CI.users(), UsesPointerAsInt)) {
2547 return GetElementPtrInst::Create(Builder.getInt8Ty(), Base, Offset);
2548 }
2549
2551 return I;
2552
2553 return nullptr;
2554}
2555
2557 // Look through chain of one-use GEPs.
2558 Type *PtrTy = Ptr->getType();
2560 while (true) {
2561 auto *GEP = dyn_cast<GEPOperator>(Ptr);
2562 if (!GEP || !GEP->hasOneUse())
2563 break;
2564 GEPs.push_back(GEP);
2565 Ptr = GEP->getPointerOperand();
2566 }
2567
2568 // Don't handle case where GEP converts from pointer to vector.
2569 if (GEPs.empty() || PtrTy != Ptr->getType())
2570 return nullptr;
2571
2572 // Check whether we know the integer value of the base pointer.
2573 Value *Res;
2574 Type *IdxTy = DL.getIndexType(PtrTy);
2575 if (match(Ptr, m_OneUse(m_IntToPtr(m_Value(Res)))) &&
2576 Res->getType() == IntTy && IntTy == IdxTy) {
2577 // pass
2578 } else if (isa<ConstantPointerNull>(Ptr)) {
2579 Res = Constant::getNullValue(IdxTy);
2580 } else {
2581 return nullptr;
2582 }
2583
2584 // Perform the entire operation on integers instead.
2585 for (GEPOperator *GEP : reverse(GEPs)) {
2586 Value *Offset = EmitGEPOffset(GEP);
2587 Res = Builder.CreateAdd(Res, Offset, "", GEP->hasNoUnsignedWrap());
2588 }
2589 return Builder.CreateZExtOrTrunc(Res, IntTy);
2590}
2591
2593 // If the destination integer type is not the intptr_t type for this target,
2594 // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
2595 // to be exposed to other transforms.
2597 Type *SrcTy = SrcOp->getType();
2598 Type *Ty = CI.getType();
2599 unsigned AS = CI.getPointerAddressSpace();
2600 unsigned TySize = Ty->getScalarSizeInBits();
2601 unsigned PtrSize = DL.getPointerSizeInBits(AS);
2602 if (TySize != PtrSize) {
2603 Type *IntPtrTy =
2604 SrcTy->getWithNewType(DL.getIntPtrType(CI.getContext(), AS));
2605 Value *P = Builder.CreatePtrToInt(SrcOp, IntPtrTy);
2606 return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
2607 }
2608
2609 // (ptrtoint (ptrmask P, M))
2610 // -> (and (ptrtoint P), M)
2611 // This is generally beneficial as `and` is better supported than `ptrmask`.
2612 Value *Ptr, *Mask;
2614 m_Value(Mask)))) &&
2615 Mask->getType() == Ty)
2616 return BinaryOperator::CreateAnd(Builder.CreatePtrToInt(Ptr, Ty), Mask);
2617
2618 if (Value *V = foldPtrToIntOrAddrOfGEP(Ty, SrcOp))
2619 return replaceInstUsesWith(CI, V);
2620
2621 Value *Vec, *Scalar, *Index;
2623 m_Value(Scalar), m_Value(Index)))) &&
2624 Vec->getType() == Ty) {
2625 assert(Vec->getType()->getScalarSizeInBits() == PtrSize && "Wrong type");
2626 // Convert the scalar to int followed by insert to eliminate one cast:
2627 // p2i (ins (i2p Vec), Scalar, Index --> ins Vec, (p2i Scalar), Index
2628 Value *NewCast = Builder.CreatePtrToInt(Scalar, Ty->getScalarType());
2629 return InsertElementInst::Create(Vec, NewCast, Index);
2630 }
2631
2632 return commonCastTransforms(CI);
2633}
2634
2637 Type *Ty = CI.getType();
2638
2639 // (ptrtoaddr (ptrmask P, M))
2640 // -> (and (ptrtoaddr P), M)
2641 // This is generally beneficial as `and` is better supported than `ptrmask`.
2642 Value *Ptr, *Mask;
2644 m_Value(Mask)))) &&
2645 Mask->getType() == Ty)
2646 return BinaryOperator::CreateAnd(Builder.CreatePtrToAddr(Ptr), Mask);
2647
2648 if (Value *V = foldPtrToIntOrAddrOfGEP(Ty, SrcOp))
2649 return replaceInstUsesWith(CI, V);
2650
2651 // FIXME: Implement variants of ptrtoint folds.
2652 return commonCastTransforms(CI);
2653}
2654
2655/// This input value (which is known to have vector type) is being zero extended
2656/// or truncated to the specified vector type. Since the zext/trunc is done
2657/// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
2658/// endianness will impact which end of the vector that is extended or
2659/// truncated.
2660///
2661/// A vector is always stored with index 0 at the lowest address, which
2662/// corresponds to the most significant bits for a big endian stored integer and
2663/// the least significant bits for little endian. A trunc/zext of an integer
2664/// impacts the big end of the integer. Thus, we need to add/remove elements at
2665/// the front of the vector for big endian targets, and the back of the vector
2666/// for little endian targets.
2667///
2668/// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
2669///
2670/// The source and destination vector types may have different element types.
2671static Instruction *
2673 InstCombinerImpl &IC) {
2674 // We can only do this optimization if the output is a multiple of the input
2675 // element size, or the input is a multiple of the output element size.
2676 // Convert the input type to have the same element type as the output.
2677 VectorType *SrcTy = cast<VectorType>(InVal->getType());
2678
2679 if (SrcTy->getElementType() != DestTy->getElementType()) {
2680 // The input types don't need to be identical, but for now they must be the
2681 // same size. There is no specific reason we couldn't handle things like
2682 // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
2683 // there yet.
2684 if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
2685 DestTy->getElementType()->getPrimitiveSizeInBits())
2686 return nullptr;
2687
2688 SrcTy =
2689 FixedVectorType::get(DestTy->getElementType(),
2690 cast<FixedVectorType>(SrcTy)->getNumElements());
2691 InVal = IC.Builder.CreateBitCast(InVal, SrcTy);
2692 }
2693
2694 bool IsBigEndian = IC.getDataLayout().isBigEndian();
2695 unsigned SrcElts = cast<FixedVectorType>(SrcTy)->getNumElements();
2696 unsigned DestElts = cast<FixedVectorType>(DestTy)->getNumElements();
2697
2698 assert(SrcElts != DestElts && "Element counts should be different.");
2699
2700 // Now that the element types match, get the shuffle mask and RHS of the
2701 // shuffle to use, which depends on whether we're increasing or decreasing the
2702 // size of the input.
2703 auto ShuffleMaskStorage = llvm::to_vector<16>(llvm::seq<int>(0, SrcElts));
2704 ArrayRef<int> ShuffleMask;
2705 Value *V2;
2706
2707 if (SrcElts > DestElts) {
2708 // If we're shrinking the number of elements (rewriting an integer
2709 // truncate), just shuffle in the elements corresponding to the least
2710 // significant bits from the input and use poison as the second shuffle
2711 // input.
2712 V2 = PoisonValue::get(SrcTy);
2713 // Make sure the shuffle mask selects the "least significant bits" by
2714 // keeping elements from back of the src vector for big endian, and from the
2715 // front for little endian.
2716 ShuffleMask = ShuffleMaskStorage;
2717 if (IsBigEndian)
2718 ShuffleMask = ShuffleMask.take_back(DestElts);
2719 else
2720 ShuffleMask = ShuffleMask.take_front(DestElts);
2721 } else {
2722 // If we're increasing the number of elements (rewriting an integer zext),
2723 // shuffle in all of the elements from InVal. Fill the rest of the result
2724 // elements with zeros from a constant zero.
2725 V2 = Constant::getNullValue(SrcTy);
2726 // Use first elt from V2 when indicating zero in the shuffle mask.
2727 uint32_t NullElt = SrcElts;
2728 // Extend with null values in the "most significant bits" by adding elements
2729 // in front of the src vector for big endian, and at the back for little
2730 // endian.
2731 unsigned DeltaElts = DestElts - SrcElts;
2732 if (IsBigEndian)
2733 ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt);
2734 else
2735 ShuffleMaskStorage.append(DeltaElts, NullElt);
2736 ShuffleMask = ShuffleMaskStorage;
2737 }
2738
2739 return new ShuffleVectorInst(InVal, V2, ShuffleMask);
2740}
2741
2742static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) {
2743 return Value % Ty->getPrimitiveSizeInBits() == 0;
2744}
2745
2746static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
2747 return Value / Ty->getPrimitiveSizeInBits();
2748}
2749
2750/// V is a value which is inserted into a vector of VecEltTy.
2751/// Look through the value to see if we can decompose it into
2752/// insertions into the vector. See the example in the comment for
2753/// OptimizeIntegerToVectorInsertions for the pattern this handles.
2754/// The type of V is always a non-zero multiple of VecEltTy's size.
2755/// Shift is the number of bits between the lsb of V and the lsb of
2756/// the vector.
2757///
2758/// This returns false if the pattern can't be matched or true if it can,
2759/// filling in Elements with the elements found here.
2760static bool collectInsertionElements(Value *V, unsigned Shift,
2761 SmallVectorImpl<Value *> &Elements,
2762 Type *VecEltTy, bool isBigEndian) {
2763 assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
2764 "Shift should be a multiple of the element type size");
2765
2766 // Undef values never contribute useful bits to the result.
2767 if (isa<UndefValue>(V)) return true;
2768
2769 // If we got down to a value of the right type, we win, try inserting into the
2770 // right element.
2771 if (V->getType() == VecEltTy) {
2772 // Inserting null doesn't actually insert any elements.
2773 if (Constant *C = dyn_cast<Constant>(V))
2774 if (C->isNullValue())
2775 return true;
2776
2777 unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
2778 if (isBigEndian)
2779 ElementIndex = Elements.size() - ElementIndex - 1;
2780
2781 // Fail if multiple elements are inserted into this slot.
2782 if (Elements[ElementIndex])
2783 return false;
2784
2785 Elements[ElementIndex] = V;
2786 return true;
2787 }
2788
2789 if (Constant *C = dyn_cast<Constant>(V)) {
2790 // Figure out the # elements this provides, and bitcast it or slice it up
2791 // as required.
2792 unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
2793 VecEltTy);
2794 // If the constant is the size of a vector element, we just need to bitcast
2795 // it to the right type so it gets properly inserted.
2796 if (NumElts == 1)
2798 Shift, Elements, VecEltTy, isBigEndian);
2799
2800 // Okay, this is a constant that covers multiple elements. Slice it up into
2801 // pieces and insert each element-sized piece into the vector.
2802 if (!isa<IntegerType>(C->getType()))
2803 C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),
2804 C->getType()->getPrimitiveSizeInBits()));
2805 unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
2806 Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
2807
2808 for (unsigned i = 0; i != NumElts; ++i) {
2809 unsigned ShiftI = i * ElementSize;
2811 Instruction::LShr, C, ConstantInt::get(C->getType(), ShiftI));
2812 if (!Piece)
2813 return false;
2814
2815 Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
2816 if (!collectInsertionElements(Piece, ShiftI + Shift, Elements, VecEltTy,
2817 isBigEndian))
2818 return false;
2819 }
2820 return true;
2821 }
2822
2823 if (!V->hasOneUse()) return false;
2824
2826 if (!I) return false;
2827 switch (I->getOpcode()) {
2828 default: return false; // Unhandled case.
2829 case Instruction::BitCast:
2830 if (I->getOperand(0)->getType()->isVectorTy())
2831 return false;
2832 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2833 isBigEndian);
2834 case Instruction::ZExt:
2836 I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
2837 VecEltTy))
2838 return false;
2839 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2840 isBigEndian);
2841 case Instruction::Or:
2842 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2843 isBigEndian) &&
2844 collectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
2845 isBigEndian);
2846 case Instruction::Shl: {
2847 // Must be shifting by a constant that is a multiple of the element size.
2848 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
2849 if (!CI) return false;
2850 Shift += CI->getZExtValue();
2851 if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
2852 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2853 isBigEndian);
2854 }
2855
2856 }
2857}
2858
2859
2860/// If the input is an 'or' instruction, we may be doing shifts and ors to
2861/// assemble the elements of the vector manually.
2862/// Try to rip the code out and replace it with insertelements. This is to
2863/// optimize code like this:
2864///
2865/// %tmp37 = bitcast float %inc to i32
2866/// %tmp38 = zext i32 %tmp37 to i64
2867/// %tmp31 = bitcast float %inc5 to i32
2868/// %tmp32 = zext i32 %tmp31 to i64
2869/// %tmp33 = shl i64 %tmp32, 32
2870/// %ins35 = or i64 %tmp33, %tmp38
2871/// %tmp43 = bitcast i64 %ins35 to <2 x float>
2872///
2873/// Into two insertelements that do "buildvector{%inc, %inc5}".
2875 InstCombinerImpl &IC) {
2876 auto *DestVecTy = cast<FixedVectorType>(CI.getType());
2877 Value *IntInput = CI.getOperand(0);
2878
2879 // if the int input is just an undef value do not try to optimize to vector
2880 // insertions as it will prevent undef propagation
2881 if (isa<UndefValue>(IntInput))
2882 return nullptr;
2883
2884 SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
2885 if (!collectInsertionElements(IntInput, 0, Elements,
2886 DestVecTy->getElementType(),
2887 IC.getDataLayout().isBigEndian()))
2888 return nullptr;
2889
2890 // If we succeeded, we know that all of the element are specified by Elements
2891 // or are zero if Elements has a null entry. Recast this as a set of
2892 // insertions.
2893 Value *Result = Constant::getNullValue(CI.getType());
2894 for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
2895 if (!Elements[i]) continue; // Unset element.
2896
2897 Result = IC.Builder.CreateInsertElement(Result, Elements[i],
2898 IC.Builder.getInt32(i));
2899 }
2900
2901 return Result;
2902}
2903
2904/// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
2905/// vector followed by extract element. The backend tends to handle bitcasts of
2906/// vectors better than bitcasts of scalars because vector registers are
2907/// usually not type-specific like scalar integer or scalar floating-point.
2909 InstCombinerImpl &IC) {
2910 Value *VecOp, *Index;
2911 if (!match(BitCast.getOperand(0),
2912 m_OneUse(m_ExtractElt(m_Value(VecOp), m_Value(Index)))))
2913 return nullptr;
2914
2915 // The bitcast must be to a vectorizable type, otherwise we can't make a new
2916 // type to extract from.
2917 Type *DestType = BitCast.getType();
2918 VectorType *VecType = cast<VectorType>(VecOp->getType());
2919 if (VectorType::isValidElementType(DestType)) {
2920 auto *NewVecType = VectorType::get(DestType, VecType);
2921 auto *NewBC = IC.Builder.CreateBitCast(VecOp, NewVecType, "bc");
2922 return ExtractElementInst::Create(NewBC, Index);
2923 }
2924
2925 // Only solve DestType is vector to avoid inverse transform in visitBitCast.
2926 // bitcast (extractelement <1 x elt>, dest) -> bitcast(<1 x elt>, dest)
2927 auto *FixedVType = dyn_cast<FixedVectorType>(VecType);
2928 if (DestType->isVectorTy() && FixedVType && FixedVType->getNumElements() == 1)
2929 return CastInst::Create(Instruction::BitCast, VecOp, DestType);
2930
2931 return nullptr;
2932}
2933
2934/// Change the type of a bitwise logic operation if we can eliminate a bitcast.
2936 InstCombiner::BuilderTy &Builder) {
2937 Type *DestTy = BitCast.getType();
2938 BinaryOperator *BO;
2939
2940 if (!match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) ||
2941 !BO->isBitwiseLogicOp())
2942 return nullptr;
2943
2944 // FIXME: This transform is restricted to vector types to avoid backend
2945 // problems caused by creating potentially illegal operations. If a fix-up is
2946 // added to handle that situation, we can remove this check.
2947 if (!DestTy->isVectorTy() || !BO->getType()->isVectorTy())
2948 return nullptr;
2949
2950 if (DestTy->isFPOrFPVectorTy()) {
2951 Value *X, *Y;
2952 // bitcast(logic(bitcast(X), bitcast(Y))) -> bitcast'(logic(bitcast'(X), Y))
2953 if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
2955 if (X->getType()->isFPOrFPVectorTy() &&
2956 Y->getType()->isIntOrIntVectorTy()) {
2957 Value *CastedOp =
2958 Builder.CreateBitCast(BO->getOperand(0), Y->getType());
2959 Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, Y);
2960 return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
2961 }
2962 if (X->getType()->isIntOrIntVectorTy() &&
2963 Y->getType()->isFPOrFPVectorTy()) {
2964 Value *CastedOp =
2965 Builder.CreateBitCast(BO->getOperand(1), X->getType());
2966 Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, X);
2967 return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
2968 }
2969 }
2970 return nullptr;
2971 }
2972
2973 if (!DestTy->isIntOrIntVectorTy())
2974 return nullptr;
2975
2976 Value *X;
2977 if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
2978 X->getType() == DestTy && !isa<Constant>(X)) {
2979 // bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
2980 Value *CastedOp1 = Builder.CreateBitCast(BO->getOperand(1), DestTy);
2981 return BinaryOperator::Create(BO->getOpcode(), X, CastedOp1);
2982 }
2983
2984 if (match(BO->getOperand(1), m_OneUse(m_BitCast(m_Value(X)))) &&
2985 X->getType() == DestTy && !isa<Constant>(X)) {
2986 // bitcast(logic(Y, bitcast(X))) --> logic'(bitcast(Y), X)
2987 Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2988 return BinaryOperator::Create(BO->getOpcode(), CastedOp0, X);
2989 }
2990
2991 // Canonicalize vector bitcasts to come before vector bitwise logic with a
2992 // constant. This eases recognition of special constants for later ops.
2993 // Example:
2994 // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
2995 Constant *C;
2996 if (match(BO->getOperand(1), m_Constant(C))) {
2997 // bitcast (logic X, C) --> logic (bitcast X, C')
2998 Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2999 Value *CastedC = Builder.CreateBitCast(C, DestTy);
3000 return BinaryOperator::Create(BO->getOpcode(), CastedOp0, CastedC);
3001 }
3002
3003 return nullptr;
3004}
3005
3006/// Change the type of a select if we can eliminate a bitcast.
3008 InstCombiner::BuilderTy &Builder) {
3009 Value *Cond, *TVal, *FVal;
3010 if (!match(BitCast.getOperand(0),
3011 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
3012 return nullptr;
3013
3014 // A vector select must maintain the same number of elements in its operands.
3015 Type *CondTy = Cond->getType();
3016 Type *DestTy = BitCast.getType();
3017
3018 auto *DestVecTy = dyn_cast<VectorType>(DestTy);
3019
3020 if (auto *CondVTy = dyn_cast<VectorType>(CondTy))
3021 if (!DestVecTy ||
3022 CondVTy->getElementCount() != DestVecTy->getElementCount())
3023 return nullptr;
3024
3025 auto *Sel = cast<Instruction>(BitCast.getOperand(0));
3026 auto *SrcVecTy = dyn_cast<VectorType>(TVal->getType());
3027
3028 if ((isa<Constant>(TVal) || isa<Constant>(FVal)) &&
3029 (!DestVecTy ||
3030 (SrcVecTy && ElementCount::isKnownLE(DestVecTy->getElementCount(),
3031 SrcVecTy->getElementCount())))) {
3032 // Avoid introducing select of vector (or select of vector with more
3033 // elements) until the backend can undo this transformation.
3034 Value *CastedTVal = Builder.CreateBitCast(TVal, DestTy);
3035 Value *CastedFVal = Builder.CreateBitCast(FVal, DestTy);
3036 return SelectInst::Create(Cond, CastedTVal, CastedFVal, "", nullptr, Sel);
3037 }
3038
3039 // FIXME: This transform is restricted from changing the select between
3040 // scalars and vectors to avoid backend problems caused by creating
3041 // potentially illegal operations. If a fix-up is added to handle that
3042 // situation, we can remove this check.
3043 if ((DestVecTy != nullptr) != (SrcVecTy != nullptr))
3044 return nullptr;
3045
3046 Value *X;
3047 if (match(TVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
3048 !isa<Constant>(X)) {
3049 // bitcast(select(Cond, bitcast(X), Y)) --> select'(Cond, X, bitcast(Y))
3050 Value *CastedVal = Builder.CreateBitCast(FVal, DestTy);
3051 return SelectInst::Create(Cond, X, CastedVal, "", nullptr, Sel);
3052 }
3053
3054 if (match(FVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
3055 !isa<Constant>(X)) {
3056 // bitcast(select(Cond, Y, bitcast(X))) --> select'(Cond, bitcast(Y), X)
3057 Value *CastedVal = Builder.CreateBitCast(TVal, DestTy);
3058 return SelectInst::Create(Cond, CastedVal, X, "", nullptr, Sel);
3059 }
3060
3061 return nullptr;
3062}
3063
3064/// Check if all users of CI are StoreInsts.
3065static bool hasStoreUsersOnly(CastInst &CI) {
3066 for (User *U : CI.users()) {
3067 if (!isa<StoreInst>(U))
3068 return false;
3069 }
3070 return true;
3071}
3072
3073/// This function handles following case
3074///
3075/// A -> B cast
3076/// PHI
3077/// B -> A cast
3078///
3079/// All the related PHI nodes can be replaced by new PHI nodes with type A.
3080/// The uses of \p CI can be changed to the new PHI node corresponding to \p PN.
3081Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI,
3082 PHINode *PN) {
3083 // BitCast used by Store can be handled in InstCombineLoadStoreAlloca.cpp.
3084 if (hasStoreUsersOnly(CI))
3085 return nullptr;
3086
3087 Value *Src = CI.getOperand(0);
3088 Type *SrcTy = Src->getType(); // Type B
3089 Type *DestTy = CI.getType(); // Type A
3090
3091 SmallVector<PHINode *, 4> PhiWorklist;
3092 SmallSetVector<PHINode *, 4> OldPhiNodes;
3093
3094 // Find all of the A->B casts and PHI nodes.
3095 // We need to inspect all related PHI nodes, but PHIs can be cyclic, so
3096 // OldPhiNodes is used to track all known PHI nodes, before adding a new
3097 // PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
3098 PhiWorklist.push_back(PN);
3099 OldPhiNodes.insert(PN);
3100 while (!PhiWorklist.empty()) {
3101 auto *OldPN = PhiWorklist.pop_back_val();
3102 for (Value *IncValue : OldPN->incoming_values()) {
3103 if (isa<Constant>(IncValue))
3104 continue;
3105
3106 if (auto *LI = dyn_cast<LoadInst>(IncValue)) {
3107 // If there is a sequence of one or more load instructions, each loaded
3108 // value is used as address of later load instruction, bitcast is
3109 // necessary to change the value type, don't optimize it. For
3110 // simplicity we give up if the load address comes from another load.
3111 Value *Addr = LI->getOperand(0);
3112 if (Addr == &CI || isa<LoadInst>(Addr))
3113 return nullptr;
3114 // Don't tranform "load <256 x i32>, <256 x i32>*" to
3115 // "load x86_amx, x86_amx*", because x86_amx* is invalid.
3116 // TODO: Remove this check when bitcast between vector and x86_amx
3117 // is replaced with a specific intrinsic.
3118 if (DestTy->isX86_AMXTy())
3119 return nullptr;
3120 if (LI->hasOneUse() && LI->isSimple())
3121 continue;
3122 // If a LoadInst has more than one use, changing the type of loaded
3123 // value may create another bitcast.
3124 return nullptr;
3125 }
3126
3127 if (auto *PNode = dyn_cast<PHINode>(IncValue)) {
3128 if (OldPhiNodes.insert(PNode))
3129 PhiWorklist.push_back(PNode);
3130 continue;
3131 }
3132
3133 auto *BCI = dyn_cast<BitCastInst>(IncValue);
3134 // We can't handle other instructions.
3135 if (!BCI)
3136 return nullptr;
3137
3138 // Verify it's a A->B cast.
3139 Type *TyA = BCI->getOperand(0)->getType();
3140 Type *TyB = BCI->getType();
3141 if (TyA != DestTy || TyB != SrcTy)
3142 return nullptr;
3143 }
3144 }
3145
3146 // Check that each user of each old PHI node is something that we can
3147 // rewrite, so that all of the old PHI nodes can be cleaned up afterwards.
3148 for (auto *OldPN : OldPhiNodes) {
3149 for (User *V : OldPN->users()) {
3150 if (auto *SI = dyn_cast<StoreInst>(V)) {
3151 if (!SI->isSimple() || SI->getOperand(0) != OldPN)
3152 return nullptr;
3153 } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
3154 // Verify it's a B->A cast.
3155 Type *TyB = BCI->getOperand(0)->getType();
3156 Type *TyA = BCI->getType();
3157 if (TyA != DestTy || TyB != SrcTy)
3158 return nullptr;
3159 } else if (auto *PHI = dyn_cast<PHINode>(V)) {
3160 // As long as the user is another old PHI node, then even if we don't
3161 // rewrite it, the PHI web we're considering won't have any users
3162 // outside itself, so it'll be dead.
3163 if (!OldPhiNodes.contains(PHI))
3164 return nullptr;
3165 } else {
3166 return nullptr;
3167 }
3168 }
3169 }
3170
3171 // For each old PHI node, create a corresponding new PHI node with a type A.
3172 SmallDenseMap<PHINode *, PHINode *> NewPNodes;
3173 for (auto *OldPN : OldPhiNodes) {
3174 Builder.SetInsertPoint(OldPN);
3175 PHINode *NewPN = Builder.CreatePHI(DestTy, OldPN->getNumOperands());
3176 NewPNodes[OldPN] = NewPN;
3177 }
3178
3179 // Fill in the operands of new PHI nodes.
3180 for (auto *OldPN : OldPhiNodes) {
3181 PHINode *NewPN = NewPNodes[OldPN];
3182 for (unsigned j = 0, e = OldPN->getNumOperands(); j != e; ++j) {
3183 Value *V = OldPN->getOperand(j);
3184 Value *NewV = nullptr;
3185 if (auto *C = dyn_cast<Constant>(V)) {
3186 NewV = ConstantExpr::getBitCast(C, DestTy);
3187 } else if (auto *LI = dyn_cast<LoadInst>(V)) {
3188 // Explicitly perform load combine to make sure no opposing transform
3189 // can remove the bitcast in the meantime and trigger an infinite loop.
3190 Builder.SetInsertPoint(LI);
3191 NewV = combineLoadToNewType(*LI, DestTy);
3192 // Remove the old load and its use in the old phi, which itself becomes
3193 // dead once the whole transform finishes.
3194 replaceInstUsesWith(*LI, PoisonValue::get(LI->getType()));
3196 } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
3197 NewV = BCI->getOperand(0);
3198 } else if (auto *PrevPN = dyn_cast<PHINode>(V)) {
3199 NewV = NewPNodes[PrevPN];
3200 }
3201 assert(NewV);
3202 NewPN->addIncoming(NewV, OldPN->getIncomingBlock(j));
3203 }
3204 }
3205
3206 // Traverse all accumulated PHI nodes and process its users,
3207 // which are Stores and BitcCasts. Without this processing
3208 // NewPHI nodes could be replicated and could lead to extra
3209 // moves generated after DeSSA.
3210 // If there is a store with type B, change it to type A.
3211
3212
3213 // Replace users of BitCast B->A with NewPHI. These will help
3214 // later to get rid off a closure formed by OldPHI nodes.
3215 Instruction *RetVal = nullptr;
3216 for (auto *OldPN : OldPhiNodes) {
3217 PHINode *NewPN = NewPNodes[OldPN];
3218 for (User *V : make_early_inc_range(OldPN->users())) {
3219 if (auto *SI = dyn_cast<StoreInst>(V)) {
3220 assert(SI->isSimple() && SI->getOperand(0) == OldPN);
3221 Builder.SetInsertPoint(SI);
3222 auto *NewBC =
3223 cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));
3224 SI->setOperand(0, NewBC);
3225 Worklist.push(SI);
3226 assert(hasStoreUsersOnly(*NewBC));
3227 }
3228 else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
3229 Type *TyB = BCI->getOperand(0)->getType();
3230 Type *TyA = BCI->getType();
3231 assert(TyA == DestTy && TyB == SrcTy);
3232 (void) TyA;
3233 (void) TyB;
3234 Instruction *I = replaceInstUsesWith(*BCI, NewPN);
3235 if (BCI == &CI)
3236 RetVal = I;
3237 } else if (auto *PHI = dyn_cast<PHINode>(V)) {
3238 assert(OldPhiNodes.contains(PHI));
3239 (void) PHI;
3240 } else {
3241 llvm_unreachable("all uses should be handled");
3242 }
3243 }
3244 }
3245
3246 return RetVal;
3247}
3248
3249/// Fold (bitcast (or (and (bitcast X to int), signmask), nneg Y) to fp) to
3250/// copysign((bitcast Y to fp), X)
3252 InstCombiner::BuilderTy &Builder,
3253 const SimplifyQuery &SQ) {
3254 Value *X, *Y;
3255 Type *FTy = CI.getType();
3256 if (!FTy->isFPOrFPVectorTy())
3257 return nullptr;
3260 m_Value(Y)))))
3261 return nullptr;
3262 if (X->getType() != FTy)
3263 return nullptr;
3264 if (!isKnownNonNegative(Y, SQ))
3265 return nullptr;
3266
3267 return Builder.CreateCopySign(Builder.CreateBitCast(Y, FTy), X);
3268}
3269
3271 // If the operands are integer typed then apply the integer transforms,
3272 // otherwise just apply the common ones.
3273 Value *Src = CI.getOperand(0);
3274 Type *SrcTy = Src->getType();
3275 Type *DestTy = CI.getType();
3276
3277 // Get rid of casts from one type to the same type. These are useless and can
3278 // be replaced by the operand.
3279 if (DestTy == Src->getType())
3280 return replaceInstUsesWith(CI, Src);
3281
3282 if (isa<FixedVectorType>(DestTy)) {
3283 if (isa<IntegerType>(SrcTy)) {
3284 // If this is a cast from an integer to vector, check to see if the input
3285 // is a trunc or zext of a bitcast from vector. If so, we can replace all
3286 // the casts with a shuffle and (potentially) a bitcast.
3287 if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {
3288 CastInst *SrcCast = cast<CastInst>(Src);
3289 if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
3290 if (isa<VectorType>(BCIn->getOperand(0)->getType()))
3292 BCIn->getOperand(0), cast<VectorType>(DestTy), *this))
3293 return I;
3294 }
3295
3296 // If the input is an 'or' instruction, we may be doing shifts and ors to
3297 // assemble the elements of the vector manually. Try to rip the code out
3298 // and replace it with insertelements.
3299 if (Value *V = optimizeIntegerToVectorInsertions(CI, *this))
3300 return replaceInstUsesWith(CI, V);
3301 }
3302 }
3303
3304 if (FixedVectorType *SrcVTy = dyn_cast<FixedVectorType>(SrcTy)) {
3305 if (SrcVTy->getNumElements() == 1) {
3306 // If our destination is not a vector, then make this a straight
3307 // scalar-scalar cast.
3308 if (!DestTy->isVectorTy()) {
3309 Value *Elem =
3310 Builder.CreateExtractElement(Src,
3312 return CastInst::Create(Instruction::BitCast, Elem, DestTy);
3313 }
3314
3315 // Otherwise, see if our source is an insert. If so, then use the scalar
3316 // component directly:
3317 // bitcast (inselt <1 x elt> V, X, 0) to <n x m> --> bitcast X to <n x m>
3318 if (auto *InsElt = dyn_cast<InsertElementInst>(Src))
3319 return new BitCastInst(InsElt->getOperand(1), DestTy);
3320 }
3321
3322 // Convert an artificial vector insert into more analyzable bitwise logic.
3323 unsigned BitWidth = DestTy->getScalarSizeInBits();
3324 Value *X, *Y;
3325 uint64_t IndexC;
3327 m_Value(Y), m_ConstantInt(IndexC)))) &&
3328 DestTy->isIntegerTy() && X->getType() == DestTy &&
3329 Y->getType()->isIntegerTy() && isDesirableIntType(BitWidth)) {
3330 // Adjust for big endian - the LSBs are at the high index.
3331 if (DL.isBigEndian())
3332 IndexC = SrcVTy->getNumElements() - 1 - IndexC;
3333
3334 // We only handle (endian-normalized) insert to index 0. Any other insert
3335 // would require a left-shift, so that is an extra instruction.
3336 if (IndexC == 0) {
3337 // bitcast (inselt (bitcast X), Y, 0) --> or (and X, MaskC), (zext Y)
3338 unsigned EltWidth = Y->getType()->getScalarSizeInBits();
3339 APInt MaskC = APInt::getHighBitsSet(BitWidth, BitWidth - EltWidth);
3340 Value *AndX = Builder.CreateAnd(X, MaskC);
3341 Value *ZextY = Builder.CreateZExt(Y, DestTy);
3342 return BinaryOperator::CreateOr(AndX, ZextY);
3343 }
3344 }
3345 }
3346
3347 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Src)) {
3348 // Okay, we have (bitcast (shuffle ..)). Check to see if this is
3349 // a bitcast to a vector with the same # elts.
3350 Value *ShufOp0 = Shuf->getOperand(0);
3351 Value *ShufOp1 = Shuf->getOperand(1);
3352 auto ShufElts = cast<VectorType>(Shuf->getType())->getElementCount();
3353 auto SrcVecElts = cast<VectorType>(ShufOp0->getType())->getElementCount();
3354 if (Shuf->hasOneUse() && DestTy->isVectorTy() &&
3355 cast<VectorType>(DestTy)->getElementCount() == ShufElts &&
3356 ShufElts == SrcVecElts) {
3357 BitCastInst *Tmp;
3358 // If either of the operands is a cast from CI.getType(), then
3359 // evaluating the shuffle in the casted destination's type will allow
3360 // us to eliminate at least one cast.
3361 if (((Tmp = dyn_cast<BitCastInst>(ShufOp0)) &&
3362 Tmp->getOperand(0)->getType() == DestTy) ||
3363 ((Tmp = dyn_cast<BitCastInst>(ShufOp1)) &&
3364 Tmp->getOperand(0)->getType() == DestTy)) {
3365 Value *LHS = Builder.CreateBitCast(ShufOp0, DestTy);
3366 Value *RHS = Builder.CreateBitCast(ShufOp1, DestTy);
3367 // Return a new shuffle vector. Use the same element ID's, as we
3368 // know the vector types match #elts.
3369 return new ShuffleVectorInst(LHS, RHS, Shuf->getShuffleMask());
3370 }
3371 }
3372
3373 // A bitcasted-to-scalar and byte/bit reversing shuffle is better recognized
3374 // as a byte/bit swap:
3375 // bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) -> bswap (bitcast X)
3376 // bitcast <N x i1> (shuf X, undef, <N, N-1,...0>) -> bitreverse (bitcast X)
3377 if (DestTy->isIntegerTy() && ShufElts.getKnownMinValue() % 2 == 0 &&
3378 Shuf->hasOneUse() && Shuf->isReverse()) {
3379 unsigned IntrinsicNum = 0;
3380 if (DL.isLegalInteger(DestTy->getScalarSizeInBits()) &&
3381 SrcTy->getScalarSizeInBits() == 8) {
3382 IntrinsicNum = Intrinsic::bswap;
3383 } else if (SrcTy->getScalarSizeInBits() == 1) {
3384 IntrinsicNum = Intrinsic::bitreverse;
3385 }
3386 if (IntrinsicNum != 0) {
3387 assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask");
3388 assert(match(ShufOp1, m_Undef()) && "Unexpected shuffle op");
3389 Function *BswapOrBitreverse = Intrinsic::getOrInsertDeclaration(
3390 CI.getModule(), IntrinsicNum, DestTy);
3391 Value *ScalarX = Builder.CreateBitCast(ShufOp0, DestTy);
3392 return CallInst::Create(BswapOrBitreverse, {ScalarX});
3393 }
3394 }
3395 }
3396
3397 // Handle the A->B->A cast, and there is an intervening PHI node.
3398 if (PHINode *PN = dyn_cast<PHINode>(Src))
3399 if (Instruction *I = optimizeBitCastFromPhi(CI, PN))
3400 return I;
3401
3402 if (Instruction *I = canonicalizeBitCastExtElt(CI, *this))
3403 return I;
3404
3406 return I;
3407
3409 return I;
3410
3411 if (Value *V = foldCopySignIdioms(CI, Builder, SQ.getWithInstruction(&CI)))
3412 return replaceInstUsesWith(CI, V);
3413
3414 return commonCastTransforms(CI);
3415}
3416
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This file defines the DenseMap class.
static bool isSigned(unsigned Opcode)
Hexagon Common GEP
static bool collectInsertionElements(Value *V, unsigned Shift, SmallVectorImpl< Value * > &Elements, Type *VecEltTy, bool isBigEndian)
V is a value which is inserted into a vector of VecEltTy.
static bool hasStoreUsersOnly(CastInst &CI)
Check if all users of CI are StoreInsts.
static Value * foldCopySignIdioms(BitCastInst &CI, InstCombiner::BuilderTy &Builder, const SimplifyQuery &SQ)
Fold (bitcast (or (and (bitcast X to int), signmask), nneg Y) to fp) to copysign((bitcast Y to fp),...
static Type * shrinkFPConstantVector(Value *V, bool PreferBFloat)
static Instruction * canonicalizeBitCastExtElt(BitCastInst &BitCast, InstCombinerImpl &IC)
Canonicalize scalar bitcasts of extracted elements into a bitcast of the vector followed by extract e...
static Instruction * shrinkSplatShuffle(TruncInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of a splat shuffle.
static Instruction * foldFPtoI(Instruction &FI, InstCombiner &IC)
static Instruction * foldBitCastSelect(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a select if we can eliminate a bitcast.
static Instruction * foldBitCastBitwiseLogic(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a bitwise logic operation if we can eliminate a bitcast.
static bool fitsInFPType(APFloat F, const fltSemantics &Sem)
Return a Constant* for the specified floating-point constant if it fits in the specified FP type with...
static Instruction * optimizeVectorResizeWithIntegerBitCasts(Value *InVal, VectorType *DestTy, InstCombinerImpl &IC)
This input value (which is known to have vector type) is being zero extended or truncated to the spec...
static Instruction * shrinkInsertElt(CastInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of an insert element.
SmallDenseMap< Value *, Value *, 8 > EvaluatedMap
static Type * getMinimumFPType(Value *V, Type *PreferredTy, InstCombiner &IC)
Find the minimum FP type we can safely truncate to.
static bool isMultipleOfTypeSize(unsigned Value, Type *Ty)
static Value * optimizeIntegerToVectorInsertions(BitCastInst &CI, InstCombinerImpl &IC)
If the input is an 'or' instruction, we may be doing shifts and ors to assemble the elements of the v...
static Type * shrinkFPConstant(LLVMContext &Ctx, const APFloat &F, bool PreferBFloat)
static Instruction * foldVecExtTruncToExtElt(TruncInst &Trunc, InstCombinerImpl &IC)
Whenever an element is extracted from a vector, optionally shifted down, and then truncated,...
static Value * EvaluateInDifferentTypeImpl(Value *V, Type *Ty, bool isSigned, InstCombinerImpl &IC, EvaluatedMap &Processed)
static unsigned getTypeSizeIndex(unsigned Value, Type *Ty)
static Instruction * foldVecTruncToExtElt(TruncInst &Trunc, InstCombinerImpl &IC)
Given a vector that is bitcast to an integer, optionally logically right-shifted, and truncated,...
This file provides internal interfaces used to implement the InstCombine.
This file provides the interface for the instcombine pass implementation.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
uint64_t IntrinsicInst * II
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
static unsigned getScalarSizeInBits(Type *Ty)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
Value * RHS
Value * LHS
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static const fltSemantics & BFloat()
Definition APFloat.h:295
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
static LLVM_ABI unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition APFloat.cpp:241
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1616
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1709
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118
int32_t exactLogBase2() const
Definition APInt.h:1806
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1679
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
This class represents a conversion between pointers from one address space to another.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:105
LLVM_ABI std::optional< unsigned > getVScaleRangeMax() const
Returns the maximum value for the vscale_range attribute or std::nullopt when unknown.
BinaryOps getOpcode() const
Definition InstrTypes.h:409
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateFMulFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:279
static BinaryOperator * CreateFDivFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:283
This class represents a no-op cast from one type to another.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:512
Type * getSrcTy() const
Return the source type, as a convenience.
Definition InstrTypes.h:679
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition InstrTypes.h:674
static LLVM_ABI unsigned isEliminableCastPair(Instruction::CastOps firstOpcode, Instruction::CastOps secondOpcode, Type *SrcTy, Type *MidTy, Type *DstTy, const DataLayout *DL)
Determine how a pair of casts can be eliminated, if they can be at all.
static LLVM_ABI CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
static LLVM_ABI CastInst * CreateFPCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create an FPExt, BitCast, or FPTrunc for fp -> fp casts.
static LLVM_ABI CastInst * CreateTruncOrBitCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a Trunc or BitCast cast instruction.
static LLVM_ABI CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Type * getDestTy() const
Return the destination type, as a convenience.
Definition InstrTypes.h:681
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ ICMP_SLT
signed less than
Definition InstrTypes.h:769
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:764
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ ICMP_NE
not equal
Definition InstrTypes.h:762
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:766
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getTrunc(Constant *C, Type *Ty, bool OnlyIfReduced=false)
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
const APFloat & getValueAPF() const
Definition Constants.h:463
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
bool uge(uint64_t Num) const
This function will return true iff this constant represents a value with active bits bigger than 64 b...
Definition Constants.h:262
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * mergeUndefsWith(Constant *C, Constant *Other)
Merges undefs of a Constant with another Constant, along with the undefs already present.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI bool isElementWiseEqual(Value *Y) const
Return true if this constant and a constant 'Y' are element-wise equal.
bool isBigEndian() const
Definition DataLayout.h:218
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
Definition DenseMap.h:205
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This class represents an extension of floating point types.
This class represents a cast from floating point to signed integer.
This class represents a cast from floating point to unsigned integer.
This class represents a truncation of floating point types.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
Class to represent fixed width SIMD vectors.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:869
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:763
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition IRBuilder.h:2627
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Definition IRBuilder.h:534
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition IRBuilder.h:529
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2242
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Instruction * visitZExt(ZExtInst &Zext)
Instruction * visitAddrSpaceCast(AddrSpaceCastInst &CI)
Instruction * foldExtractionOfVectorDeinterleave(ZExtInst &RootZExt)
Instruction * visitSExt(SExtInst &Sext)
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Instruction * visitFPToSI(FPToSIInst &FI)
Instruction * visitTrunc(TruncInst &CI)
Instruction * visitUIToFP(CastInst &CI)
Instruction * visitPtrToInt(PtrToIntInst &CI)
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false, bool SimplifyBothArms=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * foldItoFPtoI(FPToIntTy &FI)
fpto{s/u}i.sat --> X or zext(X) or sext(X) or trunc(X) This is safe if the intermediate type has enou...
Instruction * visitSIToFP(CastInst &CI)
Instruction * commonCastTransforms(CastInst &CI)
Implement the transforms common to all CastInst visitors.
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Instruction * visitFPTrunc(FPTruncInst &CI)
Value * foldPtrToIntOrAddrOfGEP(Type *IntTy, Value *Ptr)
Instruction * visitBitCast(BitCastInst &CI)
Instruction * visitIntToPtr(IntToPtrInst &CI)
Instruction * visitFPToUI(FPToUIInst &FI)
Instruction * visitPtrToAddr(PtrToAddrInst &CI)
Value * EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned)
Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns true for,...
bool SimplifyDemandedInstructionBits(Instruction &Inst)
Tries to simplify operands to an integer instruction based on its demanded bits.
Instruction * visitFPExt(CastInst &CI)
LoadInst * combineLoadToNewType(LoadInst &LI, Type *NewTy, const Twine &Suffix="")
Helper to combine a load to a new type.
The core instruction combiner logic.
SimplifyQuery SQ
const DataLayout & getDataLayout() const
unsigned ComputeMaxSignificantBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
unsigned ComputeNumSignBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
LLVM_ABI bool canBeCastedExactlyIntToFP(Value *V, Type *FPTy, bool IsSigned, const Instruction *CxtI=nullptr) const
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
const DataLayout & DL
void computeKnownBits(const Value *V, KnownBits &Known, const Instruction *CxtI, unsigned Depth=0) const
LLVM_ABI bool isKnownExactCastIntToFP(CastInst &I) const
Return true if the cast from integer to FP can be proven to be exact for all possible inputs (the con...
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const Instruction *CxtI=nullptr, unsigned Depth=0) const
DominatorTree & DT
BuilderTy & Builder
const SimplifyQuery & getSimplifyQuery() const
LLVM_ABI void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI void setNonNeg(bool b=true)
Set or clear the nneg flag on this instruction, which must be a zext instruction.
LLVM_ABI bool hasNonNeg() const LLVM_READONLY
Determine whether the the nneg flag is set.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void setIsExact(bool b=true)
Set or clear the exact flag on this instruction, which must be an operator which supports this flag.
This class represents a cast from an integer to a pointer.
unsigned getAddressSpace() const
Returns the address space of this instruction's pointer type.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:350
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class represents a cast from a pointer to an address (non-capturing ptrtoint).
Value * getPointerOperand()
Gets the pointer operand.
This class represents a cast from a pointer to an integer.
Value * getPointerOperand()
Gets the pointer operand.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
This class represents a sign extension of integer types.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This instruction constructs a fixed permutation of two input vectors.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class represents a truncation of integer types.
void setHasNoSignedWrap(bool B)
void setHasNoUnsignedWrap(bool B)
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:288
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:309
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition Type.h:263
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:147
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:232
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:285
bool isX86_AMXTy() const
Return true if this is X86 AMX.
Definition Type.h:202
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:287
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:227
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:286
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.
Definition Type.cpp:237
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:106
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:284
'undef' values are things that do not have specified contents.
Definition Constants.h:1623
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:399
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
This class represents zero extension of integer types.
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230
static constexpr bool isKnownGE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:237
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
cst_pred_ty< is_lowbit_mask > m_LowBitMask()
Match an integer or vector with only the low bit(s) set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
PtrToIntSameSize_match< OpTy > m_PtrToIntSameSize(const DataLayout &DL, const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
cst_pred_ty< is_sign_mask > m_SignMask()
Match an integer or vector with only the sign bit(s) set.
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
cst_pred_ty< is_power2 > m_Power2()
Match an integer or vector power-of-2.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
specific_intval< true > m_SpecificIntAllowPoison(const APInt &V)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
BinOpPred_match< LHS, RHS, is_logical_shift_op > m_LogicalShift(const LHS &L, const RHS &R)
Matches logical shift operations.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, UIToFPInst >, CastInst_match< OpTy, SIToFPInst > > m_IToFP(const OpTy &Op)
auto m_Value()
Match an arbitrary value and ignore it.
auto m_Constant()
Match an arbitrary Constant and ignore it.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
match_combine_or< CastInst_match< OpTy, FPToUIInst >, CastInst_match< OpTy, FPToSIInst > > m_FPToI(const OpTy &Op)
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
cst_pred_ty< is_negated_power2 > m_NegatedPower2()
Match a integer or vector negated power-of-2.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
CastInst_match< OpTy, FPToSIInst > m_FPToSI(const OpTy &Op)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_Ctlz(const Opnd0 &Op0, const Opnd1 &Op1)
auto m_Undef()
Match an arbitrary undef constant.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
CastOperator_match< OpTy, Instruction::IntToPtr > m_IntToPtr(const OpTy &Op)
Matches IntToPtr.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
ElementWiseBitCast_match< OpTy > m_ElementWiseBitCast(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
cst_pred_ty< icmp_pred_with_threshold > m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold)
Match an integer or vector with every element comparing 'pred' (eg/ne/...) to Threshold.
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI KnownFPClass computeKnownFPClass(const Value *V, const APInt &DemandedElts, FPClassTest InterestedClasses, const SimplifyQuery &SQ, unsigned Depth=0)
Determine which floating-point classes are valid for V, and return them in KnownFPClass bit sets.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
LLVM_ABI Constant * ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2)
Attempt to constant fold a select instruction with the specified operands.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
Definition MathExtras.h:350
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2207
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI Value * simplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, const SimplifyQuery &Q)
Given operands for a CastInst, fold the result or return null.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition Local.cpp:2455
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
DWARFExpression::Operation Op
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition STLExtras.h:2165
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI Constant * ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1, Constant *V2)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:256
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:262
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:146
bool isKnownNever(FPClassTest Mask) const
Return true if it's known this can never be one of the mask entries.
Matching combinators.
SimplifyQuery getWithInstruction(const Instruction *I) const