LLVM 23.0.0git
InstCombineCasts.cpp
Go to the documentation of this file.
1//===- InstCombineCasts.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visit functions for cast operations.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/APInt.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/SetVector.h"
21#include "llvm/IR/DataLayout.h"
22#include "llvm/IR/DebugInfo.h"
23#include "llvm/IR/Instruction.h"
25#include "llvm/IR/Type.h"
26#include "llvm/IR/Value.h"
29#include <iterator>
30#include <optional>
31
32using namespace llvm;
33using namespace PatternMatch;
34
35#define DEBUG_TYPE "instcombine"
36
38
41 EvaluatedMap &Processed) {
42 // Since we cover transformation of instructions with multiple users, we might
43 // come to the same node via multiple paths. We should not create a
44 // replacement for every single one of them though.
45 if (Value *Result = Processed.lookup(V))
46 return Result;
47
50
51 // Otherwise, it must be an instruction.
53 Instruction *Res = nullptr;
54 unsigned Opc = I->getOpcode();
55 switch (Opc) {
56 case Instruction::Add:
57 case Instruction::Sub:
58 case Instruction::Mul:
59 case Instruction::And:
60 case Instruction::Or:
61 case Instruction::Xor:
62 case Instruction::AShr:
63 case Instruction::LShr:
64 case Instruction::Shl:
65 case Instruction::UDiv:
66 case Instruction::URem: {
67 Value *LHS = EvaluateInDifferentTypeImpl(I->getOperand(0), Ty, isSigned, IC,
68 Processed);
69 Value *RHS = EvaluateInDifferentTypeImpl(I->getOperand(1), Ty, isSigned, IC,
70 Processed);
72 if (Opc == Instruction::LShr || Opc == Instruction::AShr)
73 Res->setIsExact(I->isExact());
74 break;
75 }
76 case Instruction::Trunc:
77 case Instruction::ZExt:
78 case Instruction::SExt:
79 // If the source type of the cast is the type we're trying for then we can
80 // just return the source. There's no need to insert it because it is not
81 // new.
82 if (I->getOperand(0)->getType() == Ty)
83 return I->getOperand(0);
84
85 // Otherwise, must be the same type of cast, so just reinsert a new one.
86 // This also handles the case of zext(trunc(x)) -> zext(x).
87 Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
88 Opc == Instruction::SExt);
89 if (auto *Trunc = dyn_cast<TruncInst>(I)) {
90 if (auto *NewTrunc = dyn_cast<TruncInst>(Res)) {
91 if (Trunc->getType()->getScalarSizeInBits() <=
92 Ty->getScalarSizeInBits()) {
93 NewTrunc->setHasNoSignedWrap(Trunc->hasNoSignedWrap());
94 NewTrunc->setHasNoUnsignedWrap(Trunc->hasNoUnsignedWrap());
95 }
96 } else if (auto *NewZExt = dyn_cast<ZExtInst>(Res)) {
97 if (Trunc->hasNoUnsignedWrap())
98 NewZExt->setNonNeg();
99 }
100 }
101 break;
102 case Instruction::Select: {
103 Value *True = EvaluateInDifferentTypeImpl(I->getOperand(1), Ty, isSigned,
104 IC, Processed);
105 Value *False = EvaluateInDifferentTypeImpl(I->getOperand(2), Ty, isSigned,
106 IC, Processed);
107 Res = SelectInst::Create(I->getOperand(0), True, False);
108 break;
109 }
110 case Instruction::PHI: {
111 PHINode *OPN = cast<PHINode>(I);
113 for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
115 isSigned, IC, Processed);
116 NPN->addIncoming(V, OPN->getIncomingBlock(i));
117 }
118 Res = NPN;
119 break;
120 }
121 case Instruction::FPToUI:
122 case Instruction::FPToSI:
123 Res = CastInst::Create(static_cast<Instruction::CastOps>(Opc),
124 I->getOperand(0), Ty);
125 break;
126 case Instruction::Call:
128 switch (II->getIntrinsicID()) {
129 default:
130 llvm_unreachable("Unsupported call!");
131 case Intrinsic::vscale: {
133 I->getModule(), Intrinsic::vscale, {Ty});
134 Res = CallInst::Create(Fn->getFunctionType(), Fn);
135 break;
136 }
137 case Intrinsic::umin:
138 case Intrinsic::umax:
139 case Intrinsic::smin:
140 case Intrinsic::smax: {
141 Value *Op0 = EvaluateInDifferentTypeImpl(II->getArgOperand(0), Ty,
142 isSigned, IC, Processed);
143 Value *Op1 = EvaluateInDifferentTypeImpl(II->getArgOperand(1), Ty,
144 isSigned, IC, Processed);
146 I->getModule(), II->getIntrinsicID(), {Ty});
147 Res = CallInst::Create(Fn->getFunctionType(), Fn, {Op0, Op1});
148 break;
149 }
150 case Intrinsic::abs: {
151 Value *Arg = EvaluateInDifferentTypeImpl(II->getArgOperand(0), Ty,
152 isSigned, IC, Processed);
154 I->getModule(), II->getIntrinsicID(), {Ty});
155 Res = CallInst::Create(Fn->getFunctionType(), Fn,
156 {Arg, ConstantInt::getFalse(I->getContext())});
157 break;
158 }
159 }
160 }
161 break;
162 case Instruction::ShuffleVector: {
163 auto *ScalarTy = cast<VectorType>(Ty)->getElementType();
164 auto *VTy = cast<VectorType>(I->getOperand(0)->getType());
165 auto *FixedTy = VectorType::get(ScalarTy, VTy->getElementCount());
166 Value *Op0 = EvaluateInDifferentTypeImpl(I->getOperand(0), FixedTy,
167 isSigned, IC, Processed);
168 Value *Op1 = EvaluateInDifferentTypeImpl(I->getOperand(1), FixedTy,
169 isSigned, IC, Processed);
170 Res = new ShuffleVectorInst(Op0, Op1,
171 cast<ShuffleVectorInst>(I)->getShuffleMask());
172 break;
173 }
174 default:
175 // TODO: Can handle more cases here.
176 llvm_unreachable("Unreachable!");
177 }
178
179 Res->takeName(I);
180 Value *Result = IC.InsertNewInstWith(Res, I->getIterator());
181 // There is no need in keeping track of the old value/new value relationship
182 // when we have only one user, we came have here from that user and no-one
183 // else cares.
184 if (!V->hasOneUse())
185 Processed[V] = Result;
186
187 return Result;
188}
189
190/// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
191/// true for, actually insert the code to evaluate the expression.
193 bool isSigned) {
194 EvaluatedMap Processed;
195 return EvaluateInDifferentTypeImpl(V, Ty, isSigned, *this, Processed);
196}
197
199InstCombinerImpl::isEliminableCastPair(const CastInst *CI1,
200 const CastInst *CI2) {
201 Type *SrcTy = CI1->getSrcTy();
202 Type *MidTy = CI1->getDestTy();
203 Type *DstTy = CI2->getDestTy();
204
205 Instruction::CastOps firstOp = CI1->getOpcode();
206 Instruction::CastOps secondOp = CI2->getOpcode();
207 Type *SrcIntPtrTy =
208 SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
209 Type *DstIntPtrTy =
210 DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
211 unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
212 DstTy, &DL);
213
214 // We don't want to form an inttoptr or ptrtoint that converts to an integer
215 // type that differs from the pointer size.
216 if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) ||
217 (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy))
218 Res = 0;
219
220 return Instruction::CastOps(Res);
221}
222
223/// Implement the transforms common to all CastInst visitors.
225 Value *Src = CI.getOperand(0);
226 Type *Ty = CI.getType();
227
228 if (Value *Res =
229 simplifyCastInst(CI.getOpcode(), Src, Ty, SQ.getWithInstruction(&CI)))
230 return replaceInstUsesWith(CI, Res);
231
232 // Try to eliminate a cast of a cast.
233 if (auto *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
234 if (Instruction::CastOps NewOpc = isEliminableCastPair(CSrc, &CI)) {
235 // The first cast (CSrc) is eliminable so we need to fix up or replace
236 // the second cast (CI). CSrc will then have a good chance of being dead.
237 auto *Res = CastInst::Create(NewOpc, CSrc->getOperand(0), Ty);
238 // Point debug users of the dying cast to the new one.
239 if (CSrc->hasOneUse())
240 replaceAllDbgUsesWith(*CSrc, *Res, CI, DT);
241 return Res;
242 }
243 }
244
245 if (auto *Sel = dyn_cast<SelectInst>(Src)) {
246 // We are casting a select. Try to fold the cast into the select if the
247 // select does not have a compare instruction with matching operand types
248 // or the select is likely better done in a narrow type.
249 // Creating a select with operands that are different sizes than its
250 // condition may inhibit other folds and lead to worse codegen.
251 auto *Cmp = dyn_cast<CmpInst>(Sel->getCondition());
252 if (!Cmp || Cmp->getOperand(0)->getType() != Sel->getType() ||
253 (CI.getOpcode() == Instruction::Trunc &&
254 shouldChangeType(CI.getSrcTy(), CI.getType()))) {
255
256 // If it's a bitcast involving vectors, make sure it has the same number
257 // of elements on both sides.
258 if (CI.getOpcode() != Instruction::BitCast ||
260 if (Instruction *NV = FoldOpIntoSelect(CI, Sel)) {
261 replaceAllDbgUsesWith(*Sel, *NV, CI, DT);
262 return NV;
263 }
264 }
265 }
266 }
267
268 // If we are casting a PHI, then fold the cast into the PHI.
269 if (auto *PN = dyn_cast<PHINode>(Src)) {
270 // Don't do this if it would create a PHI node with an illegal type from a
271 // legal type.
272 if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() ||
273 shouldChangeType(CI.getSrcTy(), CI.getType()))
274 if (Instruction *NV = foldOpIntoPhi(CI, PN))
275 return NV;
276 }
277
278 // Canonicalize a unary shuffle after the cast if neither operation changes
279 // the size or element size of the input vector.
280 // TODO: We could allow size-changing ops if that doesn't harm codegen.
281 // cast (shuffle X, Mask) --> shuffle (cast X), Mask
282 Value *X;
283 ArrayRef<int> Mask;
284 if (match(Src, m_OneUse(m_Shuffle(m_Value(X), m_Poison(), m_Mask(Mask))))) {
285 // TODO: Allow scalable vectors?
286 auto *SrcTy = dyn_cast<FixedVectorType>(X->getType());
287 auto *DestTy = dyn_cast<FixedVectorType>(Ty);
288 if (SrcTy && DestTy &&
289 SrcTy->getNumElements() == DestTy->getNumElements() &&
290 SrcTy->getPrimitiveSizeInBits() == DestTy->getPrimitiveSizeInBits()) {
291 Value *CastX = Builder.CreateCast(CI.getOpcode(), X, DestTy);
292 return new ShuffleVectorInst(CastX, Mask);
293 }
294 }
295
296 return nullptr;
297}
298
299namespace {
300
301/// Helper class for evaluating whether a value can be computed in a different
302/// type without changing its value. Used by cast simplification transforms.
303class TypeEvaluationHelper {
304public:
305 /// Return true if we can evaluate the specified expression tree as type Ty
306 /// instead of its larger type, and arrive with the same value.
307 /// This is used by code that tries to eliminate truncates.
308 [[nodiscard]] static bool canEvaluateTruncated(Value *V, Type *Ty,
310 Instruction *CxtI);
311
312 /// Determine if the specified value can be computed in the specified wider
313 /// type and produce the same low bits. If not, return false.
314 [[nodiscard]] static bool canEvaluateZExtd(Value *V, Type *Ty,
315 unsigned &BitsToClear,
317 Instruction *CxtI);
318
319 /// Return true if we can take the specified value and return it as type Ty
320 /// without inserting any new casts and without changing the value of the
321 /// common low bits.
322 [[nodiscard]] static bool canEvaluateSExtd(Value *V, Type *Ty);
323
324private:
325 /// Constants and extensions/truncates from the destination type are always
326 /// free to be evaluated in that type.
327 [[nodiscard]] static bool canAlwaysEvaluateInType(Value *V, Type *Ty);
328
329 /// Check if we traversed all the users of the multi-use values we've seen.
330 [[nodiscard]] bool allPendingVisited() const {
331 return llvm::all_of(Pending,
332 [this](Value *V) { return Visited.contains(V); });
333 }
334
335 /// A generic wrapper for canEvaluate* recursions to inject visitation
336 /// tracking and enforce correct multi-use value evaluations.
337 [[nodiscard]] bool
338 canEvaluate(Value *V, Type *Ty,
339 llvm::function_ref<bool(Value *, Type *Type)> Pred) {
340 if (canAlwaysEvaluateInType(V, Ty))
341 return true;
342
343 auto *I = dyn_cast<Instruction>(V);
344
345 if (I == nullptr)
346 return false;
347
348 // We insert false by default to return false when we encounter user loops.
349 const auto [It, Inserted] = Visited.insert({V, false});
350
351 // There are three possible cases for us having information on this value
352 // in the Visited map:
353 // 1. We properly checked it and concluded that we can evaluate it (true)
354 // 2. We properly checked it and concluded that we can't (false)
355 // 3. We started to check it, but during the recursive traversal we came
356 // back to it.
357 //
358 // For cases 1 and 2, we can safely return the stored result. For case 3, we
359 // can potentially have a situation where we can evaluate recursive user
360 // chains, but that can be quite tricky to do properly and isntead, we
361 // return false.
362 //
363 // In any case, we should return whatever was there in the map to begin
364 // with.
365 if (!Inserted)
366 return It->getSecond();
367
368 // We can easily make a decision about single-user values whether they can
369 // be evaluated in a different type or not, we came from that user. This is
370 // not as simple for multi-user values.
371 //
372 // In general, we have the following case (inverted control-flow, users are
373 // at the top):
374 //
375 // Cast %A
376 // ____|
377 // /
378 // %A = Use %B, %C
379 // ________| |
380 // / |
381 // %B = Use %D |
382 // ________| |
383 // / |
384 // %D = Use %C |
385 // ________|___|
386 // /
387 // %C = ...
388 //
389 // In this case, when we check %A, %B and %D, we are confident that we can
390 // make the decision here and now, since we came from their only users.
391 //
392 // For %C, it is harder. We come there twice, and when we come the first
393 // time, it's hard to tell if we will visit the second user (technically
394 // it's not hard, but we might need a lot of repetitive checks with non-zero
395 // cost).
396 //
397 // In the case above, we are allowed to evaluate %C in different type
398 // because all of it users were part of the traversal.
399 //
400 // In the following case, however, we can't make this conclusion:
401 //
402 // Cast %A
403 // ____|
404 // /
405 // %A = Use %B, %C
406 // ________| |
407 // / |
408 // %B = Use %D |
409 // ________| |
410 // / |
411 // %D = Use %C |
412 // | |
413 // foo(%C) | | <- never traversing foo(%C)
414 // ________|___|
415 // /
416 // %C = ...
417 //
418 // In this case, we still can evaluate %C in a different type, but we'd need
419 // to create a copy of the original %C to be used in foo(%C). Such
420 // duplication might be not profitable.
421 //
422 // For this reason, we collect all users of the mult-user values and mark
423 // them as "pending" and defer this decision to the very end. When we are
424 // done and and ready to have a positive verdict, we should double-check all
425 // of the pending users and ensure that we visited them. allPendingVisited
426 // predicate checks exactly that.
427 if (!I->hasOneUse()) {
428 for (Use &U : I->uses()) {
429 // For most instructions, evaluating them in a different type will
430 // change the type of all operands. This is not the case for select
431 // conditions. Make sure we don't retain an extra use via the select
432 // condition.
433 if (isa<SelectInst>(U.getUser()) && U.getOperandNo() == 0)
434 return false;
435
436 Pending.push_back(U.getUser());
437 }
438 }
439
440 const bool Result = Pred(V, Ty);
441 // We have to set result this way and not via It because Pred is recursive
442 // and it is very likely that we grew Visited and invalidated It.
443 Visited[V] = Result;
444 return Result;
445 }
446
447 /// Filter out values that we can not evaluate in the destination type for
448 /// free.
449 [[nodiscard]] bool canNotEvaluateInType(Value *V, Type *Ty);
450
451 [[nodiscard]] bool canEvaluateTruncatedImpl(Value *V, Type *Ty,
452 InstCombinerImpl &IC,
453 Instruction *CxtI);
454 [[nodiscard]] bool canEvaluateTruncatedPred(Value *V, Type *Ty,
455 InstCombinerImpl &IC,
456 Instruction *CxtI);
457 [[nodiscard]] bool canEvaluateZExtdImpl(Value *V, Type *Ty,
458 unsigned &BitsToClear,
459 InstCombinerImpl &IC,
460 Instruction *CxtI);
461 [[nodiscard]] bool canEvaluateSExtdImpl(Value *V, Type *Ty);
462 [[nodiscard]] bool canEvaluateSExtdPred(Value *V, Type *Ty);
463
464 /// A bookkeeping map to memorize an already made decision for a traversed
465 /// value.
466 SmallDenseMap<Value *, bool, 8> Visited;
467
468 /// A list of pending values to check in the end.
469 SmallVector<Value *, 8> Pending;
470};
471
472} // anonymous namespace
473
474/// Constants and extensions/truncates from the destination type are always
475/// free to be evaluated in that type. This is a helper for canEvaluate*.
476bool TypeEvaluationHelper::canAlwaysEvaluateInType(Value *V, Type *Ty) {
477 if (isa<Constant>(V))
478 return match(V, m_ImmConstant());
479
480 Value *X;
481 if ((match(V, m_ZExtOrSExt(m_Value(X))) || match(V, m_Trunc(m_Value(X)))) &&
482 X->getType() == Ty)
483 return true;
484
485 return false;
486}
487
488/// Filter out values that we can not evaluate in the destination type for free.
489/// This is a helper for canEvaluate*.
490bool TypeEvaluationHelper::canNotEvaluateInType(Value *V, Type *Ty) {
491 if (!isa<Instruction>(V))
492 return true;
493 // We don't extend or shrink something that has multiple uses -- doing so
494 // would require duplicating the instruction which isn't profitable.
495 if (!V->hasOneUse())
496 return true;
497
498 return false;
499}
500
501/// Return true if we can evaluate the specified expression tree as type Ty
502/// instead of its larger type, and arrive with the same value.
503/// This is used by code that tries to eliminate truncates.
504///
505/// Ty will always be a type smaller than V. We should return true if trunc(V)
506/// can be computed by computing V in the smaller type. If V is an instruction,
507/// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
508/// makes sense if x and y can be efficiently truncated.
509///
510/// This function works on both vectors and scalars.
511///
512bool TypeEvaluationHelper::canEvaluateTruncated(Value *V, Type *Ty,
514 Instruction *CxtI) {
515 TypeEvaluationHelper TYH;
516 return TYH.canEvaluateTruncatedImpl(V, Ty, IC, CxtI) &&
517 // We need to check whether we visited all users of multi-user values,
518 // and we have to do it at the very end, outside of the recursion.
519 TYH.allPendingVisited();
520}
521
522bool TypeEvaluationHelper::canEvaluateTruncatedImpl(Value *V, Type *Ty,
524 Instruction *CxtI) {
525 return canEvaluate(V, Ty, [this, &IC, CxtI](Value *V, Type *Ty) {
526 return canEvaluateTruncatedPred(V, Ty, IC, CxtI);
527 });
528}
529
530bool TypeEvaluationHelper::canEvaluateTruncatedPred(Value *V, Type *Ty,
532 Instruction *CxtI) {
533 auto *I = cast<Instruction>(V);
534 Type *OrigTy = V->getType();
535 switch (I->getOpcode()) {
536 case Instruction::Add:
537 case Instruction::Sub:
538 case Instruction::Mul:
539 case Instruction::And:
540 case Instruction::Or:
541 case Instruction::Xor:
542 // These operators can all arbitrarily be extended or truncated.
543 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
544 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
545
546 case Instruction::UDiv:
547 case Instruction::URem: {
548 // UDiv and URem can be truncated if all the truncated bits are zero.
549 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
550 uint32_t BitWidth = Ty->getScalarSizeInBits();
551 assert(BitWidth < OrigBitWidth && "Unexpected bitwidths!");
552 APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
553 // Do not preserve the original context instruction. Simplifying div/rem
554 // based on later context may introduce a trap.
555 if (IC.MaskedValueIsZero(I->getOperand(0), Mask, I) &&
556 IC.MaskedValueIsZero(I->getOperand(1), Mask, I)) {
557 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
558 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
559 }
560 break;
561 }
562 case Instruction::Shl: {
563 // If we are truncating the result of this SHL, and if it's a shift of an
564 // inrange amount, we can always perform a SHL in a smaller type.
565 uint32_t BitWidth = Ty->getScalarSizeInBits();
566 KnownBits AmtKnownBits =
567 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
568 if (AmtKnownBits.getMaxValue().ult(BitWidth))
569 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
570 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
571 break;
572 }
573 case Instruction::LShr: {
574 // If this is a truncate of a logical shr, we can truncate it to a smaller
575 // lshr iff we know that the bits we would otherwise be shifting in are
576 // already zeros.
577 // TODO: It is enough to check that the bits we would be shifting in are
578 // zero - use AmtKnownBits.getMaxValue().
579 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
580 uint32_t BitWidth = Ty->getScalarSizeInBits();
581 KnownBits AmtKnownBits = IC.computeKnownBits(I->getOperand(1), CxtI);
582 APInt MaxShiftAmt = AmtKnownBits.getMaxValue();
583 APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
584 if (MaxShiftAmt.ult(BitWidth)) {
585 // If the only user is a trunc then we can narrow the shift if any new
586 // MSBs are not going to be used.
587 if (auto *Trunc = dyn_cast<TruncInst>(V->user_back())) {
588 auto DemandedBits = Trunc->getType()->getScalarSizeInBits();
589 if ((MaxShiftAmt + DemandedBits).ule(BitWidth))
590 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
591 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
592 }
593 if (IC.MaskedValueIsZero(I->getOperand(0), ShiftedBits, CxtI))
594 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
595 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
596 }
597 break;
598 }
599 case Instruction::AShr: {
600 // If this is a truncate of an arithmetic shr, we can truncate it to a
601 // smaller ashr iff we know that all the bits from the sign bit of the
602 // original type and the sign bit of the truncate type are similar.
603 // TODO: It is enough to check that the bits we would be shifting in are
604 // similar to sign bit of the truncate type.
605 uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
606 uint32_t BitWidth = Ty->getScalarSizeInBits();
607 KnownBits AmtKnownBits =
608 llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
609 unsigned ShiftedBits = OrigBitWidth - BitWidth;
610 if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
611 ShiftedBits < IC.ComputeNumSignBits(I->getOperand(0), CxtI))
612 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
613 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
614 break;
615 }
616 case Instruction::Trunc:
617 // trunc(trunc(x)) -> trunc(x)
618 return true;
619 case Instruction::ZExt:
620 case Instruction::SExt:
621 // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
622 // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
623 return true;
624 case Instruction::Select: {
626 return canEvaluateTruncatedImpl(SI->getTrueValue(), Ty, IC, CxtI) &&
627 canEvaluateTruncatedImpl(SI->getFalseValue(), Ty, IC, CxtI);
628 }
629 case Instruction::PHI: {
630 // We can change a phi if we can change all operands. Note that we never
631 // get into trouble with cyclic PHIs here because canEvaluate handles use
632 // chain loops.
633 PHINode *PN = cast<PHINode>(I);
634 return llvm::all_of(
635 PN->incoming_values(), [this, Ty, &IC, CxtI](Value *IncValue) {
636 return canEvaluateTruncatedImpl(IncValue, Ty, IC, CxtI);
637 });
638 }
639 case Instruction::FPToUI:
640 case Instruction::FPToSI: {
641 // If the integer type can hold the max FP value, it is safe to cast
642 // directly to that type. Otherwise, we may create poison via overflow
643 // that did not exist in the original code.
644 Type *InputTy = I->getOperand(0)->getType()->getScalarType();
645 const fltSemantics &Semantics = InputTy->getFltSemantics();
646 uint32_t MinBitWidth = APFloatBase::semanticsIntSizeInBits(
647 Semantics, I->getOpcode() == Instruction::FPToSI);
648 return Ty->getScalarSizeInBits() >= MinBitWidth;
649 }
650 case Instruction::ShuffleVector:
651 return canEvaluateTruncatedImpl(I->getOperand(0), Ty, IC, CxtI) &&
652 canEvaluateTruncatedImpl(I->getOperand(1), Ty, IC, CxtI);
653
654 case Instruction::Call: {
655 Value *AbsOp;
657 if (IC.ComputeMaxSignificantBits(AbsOp, CxtI) > Ty->getScalarSizeInBits())
658 return false;
659 return canEvaluateTruncatedImpl(AbsOp, Ty, IC, CxtI);
660 }
661 auto *MM = dyn_cast<MinMaxIntrinsic>(I);
662 if (!MM)
663 return false;
664 // The min/max can be performed in the narrow type when each operand has
665 // zero high bits (for umin/umax) or enough sign bits (for smin/smax).
666 Value *Op0 = MM->getLHS();
667 Value *Op1 = MM->getRHS();
668 uint32_t BitWidth = Ty->getScalarSizeInBits();
669 if (MM->isSigned()) {
670 if (IC.ComputeMaxSignificantBits(Op0, CxtI) > BitWidth ||
671 IC.ComputeMaxSignificantBits(Op1, CxtI) > BitWidth)
672 break;
673 } else {
674 APInt Mask =
676 if (!IC.MaskedValueIsZero(Op0, Mask, CxtI) ||
677 !IC.MaskedValueIsZero(Op1, Mask, CxtI))
678 break;
679 }
680 return canEvaluateTruncatedImpl(Op0, Ty, IC, CxtI) &&
681 canEvaluateTruncatedImpl(Op1, Ty, IC, CxtI);
682 }
683 default:
684 // TODO: Can handle more cases here.
685 break;
686 }
687
688 return false;
689}
690
691/// Given a vector that is bitcast to an integer, optionally logically
692/// right-shifted, and truncated, convert it to an extractelement.
693/// Example (big endian):
694/// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
695/// --->
696/// extractelement <4 x i32> %X, 1
698 InstCombinerImpl &IC) {
699 Value *TruncOp = Trunc.getOperand(0);
700 Type *DestType = Trunc.getType();
701 if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType))
702 return nullptr;
703
704 Value *VecInput = nullptr;
705 ConstantInt *ShiftVal = nullptr;
706 if (!match(TruncOp, m_CombineOr(m_BitCast(m_Value(VecInput)),
707 m_LShr(m_BitCast(m_Value(VecInput)),
708 m_ConstantInt(ShiftVal)))) ||
709 !isa<VectorType>(VecInput->getType()))
710 return nullptr;
711
712 VectorType *VecType = cast<VectorType>(VecInput->getType());
713 unsigned VecWidth = VecType->getPrimitiveSizeInBits();
714 unsigned DestWidth = DestType->getPrimitiveSizeInBits();
715 unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0;
716
717 if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0))
718 return nullptr;
719
720 // If the element type of the vector doesn't match the result type,
721 // bitcast it to a vector type that we can extract from.
722 unsigned NumVecElts = VecWidth / DestWidth;
723 if (VecType->getElementType() != DestType) {
724 VecType = FixedVectorType::get(DestType, NumVecElts);
725 VecInput = IC.Builder.CreateBitCast(VecInput, VecType, "bc");
726 }
727
728 unsigned Elt = ShiftAmount / DestWidth;
729 if (IC.getDataLayout().isBigEndian())
730 Elt = NumVecElts - 1 - Elt;
731
732 return ExtractElementInst::Create(VecInput, IC.Builder.getInt32(Elt));
733}
734
735/// Whenever an element is extracted from a vector, optionally shifted down, and
736/// then truncated, canonicalize by converting it to a bitcast followed by an
737/// extractelement.
738///
739/// Examples (little endian):
740/// trunc (extractelement <4 x i64> %X, 0) to i32
741/// --->
742/// extractelement <8 x i32> (bitcast <4 x i64> %X to <8 x i32>), i32 0
743///
744/// trunc (lshr (extractelement <4 x i32> %X, 0), 8) to i8
745/// --->
746/// extractelement <16 x i8> (bitcast <4 x i32> %X to <16 x i8>), i32 1
748 InstCombinerImpl &IC) {
749 Value *Src = Trunc.getOperand(0);
750 Type *SrcType = Src->getType();
751 Type *DstType = Trunc.getType();
752
753 // Only attempt this if we have simple aliasing of the vector elements.
754 // A badly fit destination size would result in an invalid cast.
755 unsigned SrcBits = SrcType->getScalarSizeInBits();
756 unsigned DstBits = DstType->getScalarSizeInBits();
757 unsigned TruncRatio = SrcBits / DstBits;
758 if ((SrcBits % DstBits) != 0)
759 return nullptr;
760
761 Value *VecOp;
762 ConstantInt *Cst;
763 const APInt *ShiftAmount = nullptr;
764 if (!match(Src, m_OneUse(m_ExtractElt(m_Value(VecOp), m_ConstantInt(Cst)))) &&
765 !match(Src,
767 m_APInt(ShiftAmount)))))
768 return nullptr;
769
770 auto *VecOpTy = cast<VectorType>(VecOp->getType());
771 auto VecElts = VecOpTy->getElementCount();
772
773 uint64_t BitCastNumElts = VecElts.getKnownMinValue() * TruncRatio;
774 // Make sure we don't overflow in the calculation of the new index.
775 // (VecOpIdx + 1) * TruncRatio should not overflow.
776 if (Cst->uge(std::numeric_limits<uint64_t>::max() / TruncRatio))
777 return nullptr;
778 uint64_t VecOpIdx = Cst->getZExtValue();
779 uint64_t NewIdx = IC.getDataLayout().isBigEndian()
780 ? (VecOpIdx + 1) * TruncRatio - 1
781 : VecOpIdx * TruncRatio;
782
783 // Adjust index by the whole number of truncated elements.
784 if (ShiftAmount) {
785 // Check shift amount is in range and shifts a whole number of truncated
786 // elements.
787 if (ShiftAmount->uge(SrcBits) || ShiftAmount->urem(DstBits) != 0)
788 return nullptr;
789
790 uint64_t IdxOfs = ShiftAmount->udiv(DstBits).getZExtValue();
791 // IdxOfs is guaranteed to be less than TruncRatio, so we won't overflow in
792 // the adjustment.
793 assert(IdxOfs < TruncRatio &&
794 "IdxOfs is expected to be less than TruncRatio.");
795 NewIdx = IC.getDataLayout().isBigEndian() ? (NewIdx - IdxOfs)
796 : (NewIdx + IdxOfs);
797 }
798
799 assert(BitCastNumElts <= std::numeric_limits<uint32_t>::max() &&
800 "overflow 32-bits");
801
802 auto *BitCastTo =
803 VectorType::get(DstType, BitCastNumElts, VecElts.isScalable());
804 Value *BitCast = IC.Builder.CreateBitCast(VecOp, BitCastTo);
805 return ExtractElementInst::Create(BitCast, IC.Builder.getInt64(NewIdx));
806}
807
808/// Funnel/Rotate left/right may occur in a wider type than necessary because of
809/// type promotion rules. Try to narrow the inputs and convert to funnel shift.
810Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {
811 assert((isa<VectorType>(Trunc.getSrcTy()) ||
812 shouldChangeType(Trunc.getSrcTy(), Trunc.getType())) &&
813 "Don't narrow to an illegal scalar type");
814
815 // Bail out on strange types. It is possible to handle some of these patterns
816 // even with non-power-of-2 sizes, but it is not a likely scenario.
817 Type *DestTy = Trunc.getType();
818 unsigned NarrowWidth = DestTy->getScalarSizeInBits();
819 unsigned WideWidth = Trunc.getSrcTy()->getScalarSizeInBits();
820 if (!isPowerOf2_32(NarrowWidth))
821 return nullptr;
822
823 // First, find an or'd pair of opposite shifts:
824 // trunc (or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1))
825 BinaryOperator *Or0, *Or1;
826 if (!match(Trunc.getOperand(0), m_OneUse(m_Or(m_BinOp(Or0), m_BinOp(Or1)))))
827 return nullptr;
828
829 Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
830 if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
831 !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||
832 Or0->getOpcode() == Or1->getOpcode())
833 return nullptr;
834
835 // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
836 if (Or0->getOpcode() == BinaryOperator::LShr) {
837 std::swap(Or0, Or1);
838 std::swap(ShVal0, ShVal1);
839 std::swap(ShAmt0, ShAmt1);
840 }
841 assert(Or0->getOpcode() == BinaryOperator::Shl &&
842 Or1->getOpcode() == BinaryOperator::LShr &&
843 "Illegal or(shift,shift) pair");
844
845 // Match the shift amount operands for a funnel/rotate pattern. This always
846 // matches a subtraction on the R operand.
847 auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
848 // The shift amounts may add up to the narrow bit width:
849 // (shl ShVal0, L) | (lshr ShVal1, Width - L)
850 // If this is a funnel shift (different operands are shifted), then the
851 // shift amount can not over-shift (create poison) in the narrow type.
852 unsigned MaxShiftAmountWidth = Log2_32(NarrowWidth);
853 APInt HiBitMask = ~APInt::getLowBitsSet(WideWidth, MaxShiftAmountWidth);
854 if (ShVal0 == ShVal1 || MaskedValueIsZero(L, HiBitMask))
855 if (match(R, m_OneUse(m_Sub(m_SpecificInt(Width), m_Specific(L)))))
856 return L;
857
858 // The following patterns currently only work for rotation patterns.
859 // TODO: Add more general funnel-shift compatible patterns.
860 if (ShVal0 != ShVal1)
861 return nullptr;
862
863 // The shift amount may be masked with negation:
864 // (shl ShVal0, (X & (Width - 1))) | (lshr ShVal1, ((-X) & (Width - 1)))
865 Value *X;
866 unsigned Mask = Width - 1;
867 if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
869 return X;
870
871 // Same as above, but the shift amount may be extended after masking:
872 if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
874 return X;
875
876 return nullptr;
877 };
878
879 Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, NarrowWidth);
880 bool IsFshl = true; // Sub on LSHR.
881 if (!ShAmt) {
882 ShAmt = matchShiftAmount(ShAmt1, ShAmt0, NarrowWidth);
883 IsFshl = false; // Sub on SHL.
884 }
885 if (!ShAmt)
886 return nullptr;
887
888 // The right-shifted value must have high zeros in the wide type (for example
889 // from 'zext', 'and' or 'shift'). High bits of the left-shifted value are
890 // truncated, so those do not matter.
891 APInt HiBitMask = APInt::getHighBitsSet(WideWidth, WideWidth - NarrowWidth);
892 if (!MaskedValueIsZero(ShVal1, HiBitMask, &Trunc))
893 return nullptr;
894
895 // Adjust the width of ShAmt for narrowed funnel shift operation:
896 // - Zero-extend if ShAmt is narrower than the destination type.
897 // - Truncate if ShAmt is wider, discarding non-significant high-order bits.
898 // This prepares ShAmt for llvm.fshl.i8(trunc(ShVal), trunc(ShVal),
899 // zext/trunc(ShAmt)).
900 Value *NarrowShAmt = Builder.CreateZExtOrTrunc(ShAmt, DestTy);
901
902 Value *X, *Y;
903 X = Y = Builder.CreateTrunc(ShVal0, DestTy);
904 if (ShVal0 != ShVal1)
905 Y = Builder.CreateTrunc(ShVal1, DestTy);
906 Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
907 Function *F =
908 Intrinsic::getOrInsertDeclaration(Trunc.getModule(), IID, DestTy);
909 return CallInst::Create(F, {X, Y, NarrowShAmt});
910}
911
912/// Try to narrow the width of math or bitwise logic instructions by pulling a
913/// truncate ahead of binary operators.
914Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) {
915 Type *SrcTy = Trunc.getSrcTy();
916 Type *DestTy = Trunc.getType();
917 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
918 unsigned DestWidth = DestTy->getScalarSizeInBits();
919
920 if (!isa<VectorType>(SrcTy) && !shouldChangeType(SrcTy, DestTy))
921 return nullptr;
922
923 BinaryOperator *BinOp;
924 if (!match(Trunc.getOperand(0), m_OneUse(m_BinOp(BinOp))))
925 return nullptr;
926
927 Value *BinOp0 = BinOp->getOperand(0);
928 Value *BinOp1 = BinOp->getOperand(1);
929 switch (BinOp->getOpcode()) {
930 case Instruction::And:
931 case Instruction::Or:
932 case Instruction::Xor:
933 case Instruction::Add:
934 case Instruction::Sub:
935 case Instruction::Mul: {
936 Constant *C;
937 if (match(BinOp0, m_Constant(C))) {
938 // trunc (binop C, X) --> binop (trunc C', X)
939 Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
940 Value *TruncX = Builder.CreateTrunc(BinOp1, DestTy);
941 return BinaryOperator::Create(BinOp->getOpcode(), NarrowC, TruncX);
942 }
943 if (match(BinOp1, m_Constant(C))) {
944 // trunc (binop X, C) --> binop (trunc X, C')
945 Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
946 Value *TruncX = Builder.CreateTrunc(BinOp0, DestTy);
947 return BinaryOperator::Create(BinOp->getOpcode(), TruncX, NarrowC);
948 }
949 Value *X;
950 if (match(BinOp0, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
951 // trunc (binop (ext X), Y) --> binop X, (trunc Y)
952 Value *NarrowOp1 = Builder.CreateTrunc(BinOp1, DestTy);
953 return BinaryOperator::Create(BinOp->getOpcode(), X, NarrowOp1);
954 }
955 if (match(BinOp1, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
956 // trunc (binop Y, (ext X)) --> binop (trunc Y), X
957 Value *NarrowOp0 = Builder.CreateTrunc(BinOp0, DestTy);
958 return BinaryOperator::Create(BinOp->getOpcode(), NarrowOp0, X);
959 }
960 break;
961 }
962 case Instruction::LShr:
963 case Instruction::AShr: {
964 // trunc (*shr (trunc A), C) --> trunc(*shr A, C)
965 Value *A;
966 Constant *C;
967 if (match(BinOp0, m_Trunc(m_Value(A))) && match(BinOp1, m_Constant(C))) {
968 unsigned MaxShiftAmt = SrcWidth - DestWidth;
969 // If the shift is small enough, all zero/sign bits created by the shift
970 // are removed by the trunc.
972 APInt(SrcWidth, MaxShiftAmt)))) {
973 auto *OldShift = cast<Instruction>(Trunc.getOperand(0));
974 bool IsExact = OldShift->isExact();
975 if (Constant *ShAmt = ConstantFoldIntegerCast(C, A->getType(),
976 /*IsSigned*/ true, DL)) {
977 ShAmt = Constant::mergeUndefsWith(ShAmt, C);
978 Value *Shift =
979 OldShift->getOpcode() == Instruction::AShr
980 ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact)
981 : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact);
982 return CastInst::CreateTruncOrBitCast(Shift, DestTy);
983 }
984 }
985 }
986 break;
987 }
988 default: break;
989 }
990
991 if (Instruction *NarrowOr = narrowFunnelShift(Trunc))
992 return NarrowOr;
993
994 return nullptr;
995}
996
997/// Try to narrow the width of a splat shuffle. This could be generalized to any
998/// shuffle with a constant operand, but we limit the transform to avoid
999/// creating a shuffle type that targets may not be able to lower effectively.
1001 InstCombiner::BuilderTy &Builder) {
1002 Value *Shuf = Trunc.getOperand(0), *ShufVec;
1003 ArrayRef<int> SplatMask;
1004 if (match(Shuf, m_OneUse(m_Shuffle(m_Value(ShufVec), m_Poison(),
1005 m_Mask(SplatMask)))) &&
1006 match(SplatMask, m_SplatMask()) &&
1008 cast<VectorType>(Shuf->getType())->getElementCount(),
1009 cast<VectorType>(ShufVec->getType())->getElementCount())) {
1010 // trunc (shuf X, poison, SplatMask) --> shuf (trunc X), poison, SplatMask
1011 Type *NewTruncTy =
1012 ShufVec->getType()->getWithNewType(Trunc.getType()->getScalarType());
1013 Value *NarrowOp = Builder.CreateTrunc(ShufVec, NewTruncTy);
1014 return new ShuffleVectorInst(NarrowOp, SplatMask);
1015 }
1016
1017 return nullptr;
1018}
1019
1020/// Try to narrow the width of an insert element. This could be generalized for
1021/// any vector constant, but we limit the transform to insertion into poison to
1022/// avoid potential backend problems from unsupported insertion widths. This
1023/// could also be extended to handle the case of inserting a scalar constant
1024/// into a vector variable.
1026 InstCombiner::BuilderTy &Builder) {
1027 Instruction::CastOps Opcode = Trunc.getOpcode();
1028 assert((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
1029 "Unexpected instruction for shrinking");
1030
1031 Value *Elt, *Index;
1032 if (match(Trunc.getOperand(0),
1033 m_OneUse(m_InsertElt(m_Poison(), m_Value(Elt), m_Value(Index))))) {
1034 // trunc (inselt poison, X, Index) --> inselt poison, (trunc X), Index
1035 // fptrunc (inselt poison, X, Index) --> inselt poison, (fptrunc X), Index
1036 auto *NarrowPoison = PoisonValue::get(Trunc.getType());
1037 Value *NarrowOp =
1038 Builder.CreateCast(Opcode, Elt, Trunc.getType()->getScalarType());
1039 return InsertElementInst::Create(NarrowPoison, NarrowOp, Index);
1040 }
1041
1042 return nullptr;
1043}
1044
1046 if (Instruction *Result = commonCastTransforms(Trunc))
1047 return Result;
1048
1049 Value *Src = Trunc.getOperand(0);
1050 Type *DestTy = Trunc.getType(), *SrcTy = Src->getType();
1051 unsigned DestWidth = DestTy->getScalarSizeInBits();
1052 unsigned SrcWidth = SrcTy->getScalarSizeInBits();
1053
1054 // Attempt to truncate the entire input expression tree to the destination
1055 // type. Only do this if the dest type is a simple type, don't convert the
1056 // expression tree to something weird like i93 unless the source is also
1057 // strange.
1058 if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) &&
1059 TypeEvaluationHelper::canEvaluateTruncated(Src, DestTy, *this, &Trunc)) {
1060
1061 // If this cast is a truncate, evaluting in a different type always
1062 // eliminates the cast, so it is always a win.
1063 LLVM_DEBUG(
1064 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1065 " to avoid cast: "
1066 << Trunc << '\n');
1067 Value *Res = EvaluateInDifferentType(Src, DestTy, false);
1068 assert(Res->getType() == DestTy);
1069 return replaceInstUsesWith(Trunc, Res);
1070 }
1071
1072 // For integer types, check if we can shorten the entire input expression to
1073 // DestWidth * 2, which won't allow removing the truncate, but reducing the
1074 // width may enable further optimizations, e.g. allowing for larger
1075 // vectorization factors.
1076 if (auto *DestITy = dyn_cast<IntegerType>(DestTy)) {
1077 if (DestWidth * 2 < SrcWidth) {
1078 auto *NewDestTy = DestITy->getExtendedType();
1079 if (shouldChangeType(SrcTy, NewDestTy) &&
1080 TypeEvaluationHelper::canEvaluateTruncated(Src, NewDestTy, *this,
1081 &Trunc)) {
1082 LLVM_DEBUG(
1083 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1084 " to reduce the width of operand of"
1085 << Trunc << '\n');
1086 Value *Res = EvaluateInDifferentType(Src, NewDestTy, false);
1087 return new TruncInst(Res, DestTy);
1088 }
1089 }
1090 }
1091
1092 // See if we can simplify any instructions used by the input whose sole
1093 // purpose is to compute bits we don't care about.
1095 return &Trunc;
1096
1097 if (DestWidth == 1) {
1098 Value *Zero = Constant::getNullValue(SrcTy);
1099
1100 Value *X;
1101 const APInt *C1;
1102 Constant *C2;
1103 if (match(Src, m_OneUse(m_Shr(m_Shl(m_Power2(C1), m_Value(X)),
1104 m_ImmConstant(C2))))) {
1105 // trunc ((C1 << X) >> C2) to i1 --> X == (C2-cttz(C1)), where C1 is pow2
1106 Constant *Log2C1 = ConstantInt::get(SrcTy, C1->exactLogBase2());
1107 Constant *CmpC = ConstantExpr::getSub(C2, Log2C1);
1108 return new ICmpInst(ICmpInst::ICMP_EQ, X, CmpC);
1109 }
1110
1111 if (match(Src, m_Shr(m_Value(X), m_SpecificInt(SrcWidth - 1)))) {
1112 // trunc (ashr X, BW-1) to i1 --> icmp slt X, 0
1113 // trunc (lshr X, BW-1) to i1 --> icmp slt X, 0
1114 return new ICmpInst(ICmpInst::ICMP_SLT, X, Zero);
1115 }
1116
1117 Constant *C;
1118 if (match(Src, m_OneUse(m_LShr(m_Value(X), m_ImmConstant(C))))) {
1119 // trunc (lshr X, C) to i1 --> icmp ne (and X, C'), 0
1120 Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
1121 Value *MaskC = Builder.CreateShl(One, C);
1122 Value *And = Builder.CreateAnd(X, MaskC);
1123 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
1124 }
1126 m_Deferred(X))))) {
1127 // trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0
1128 Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
1129 Value *MaskC = Builder.CreateShl(One, C);
1130 Value *And = Builder.CreateAnd(X, Builder.CreateOr(MaskC, One));
1131 return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
1132 }
1133
1134 {
1135 const APInt *C;
1136 if (match(Src, m_Shl(m_APInt(C), m_Value(X))) && (*C)[0] == 1) {
1137 // trunc (C << X) to i1 --> X == 0, where C is odd
1138 return new ICmpInst(ICmpInst::Predicate::ICMP_EQ, X, Zero);
1139 }
1140 }
1141
1142 if (Trunc.hasNoUnsignedWrap() || Trunc.hasNoSignedWrap()) {
1143 Value *X, *Y;
1144 if (match(Src, m_Xor(m_Value(X), m_Value(Y))))
1145 return new ICmpInst(ICmpInst::ICMP_NE, X, Y);
1146 }
1147
1148 if (match(Src,
1150 return new ICmpInst(ICmpInst::ICMP_EQ, X,
1152 }
1153
1154 Value *A, *B;
1155 Constant *C;
1156
1157 // trunc(u/smin(zext(a) + zext(b), MAX)) --> uadd.sat(a, b)
1158 if (match(Src,
1161 m_SpecificInt(APInt::getMaxValue(DestWidth))),
1163 m_SpecificInt(APInt::getMaxValue(DestWidth)))))) &&
1164 A->getType() == DestTy && B->getType() == DestTy) {
1165 return replaceInstUsesWith(
1166 Trunc, Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, A, B));
1167 }
1168
1169 // trunc(smax(zext(a) - zext(b), 0)) --> usub.sat(a, b)
1170 if (match(Src, m_OneUse(m_SMax(
1172 m_Zero()))) &&
1173 A->getType() == DestTy && B->getType() == DestTy) {
1174 return replaceInstUsesWith(
1175 Trunc, Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, A, B));
1176 }
1177
1178 if (match(Src, m_LShr(m_SExt(m_Value(A)), m_Constant(C)))) {
1179 unsigned AWidth = A->getType()->getScalarSizeInBits();
1180 unsigned MaxShiftAmt = SrcWidth - std::max(DestWidth, AWidth);
1181 auto *OldSh = cast<Instruction>(Src);
1182 bool IsExact = OldSh->isExact();
1183
1184 // If the shift is small enough, all zero bits created by the shift are
1185 // removed by the trunc.
1187 APInt(SrcWidth, MaxShiftAmt)))) {
1188 auto GetNewShAmt = [&](unsigned Width) {
1189 Constant *MaxAmt = ConstantInt::get(SrcTy, Width - 1, false);
1190 Constant *Cmp =
1192 Constant *ShAmt = ConstantFoldSelectInstruction(Cmp, C, MaxAmt);
1193 return ConstantFoldCastOperand(Instruction::Trunc, ShAmt, A->getType(),
1194 DL);
1195 };
1196
1197 // trunc (lshr (sext A), C) --> ashr A, C
1198 if (A->getType() == DestTy) {
1199 Constant *ShAmt = GetNewShAmt(DestWidth);
1200 ShAmt = Constant::mergeUndefsWith(ShAmt, C);
1201 return IsExact ? BinaryOperator::CreateExactAShr(A, ShAmt)
1202 : BinaryOperator::CreateAShr(A, ShAmt);
1203 }
1204 // The types are mismatched, so create a cast after shifting:
1205 // trunc (lshr (sext A), C) --> sext/trunc (ashr A, C)
1206 if (Src->hasOneUse()) {
1207 Constant *ShAmt = GetNewShAmt(AWidth);
1208 Value *Shift = Builder.CreateAShr(A, ShAmt, "", IsExact);
1209 return CastInst::CreateIntegerCast(Shift, DestTy, true);
1210 }
1211 }
1212 // TODO: Mask high bits with 'and'.
1213 }
1214
1215 if (Instruction *I = narrowBinOp(Trunc))
1216 return I;
1217
1218 if (Instruction *I = shrinkSplatShuffle(Trunc, Builder))
1219 return I;
1220
1221 if (Instruction *I = shrinkInsertElt(Trunc, Builder))
1222 return I;
1223
1224 if (Src->hasOneUse() &&
1225 (isa<VectorType>(SrcTy) || shouldChangeType(SrcTy, DestTy))) {
1226 // Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the
1227 // dest type is native and cst < dest size.
1228 if (match(Src, m_Shl(m_Value(A), m_Constant(C))) &&
1229 !match(A, m_Shr(m_Value(), m_Constant()))) {
1230 // Skip shifts of shift by constants. It undoes a combine in
1231 // FoldShiftByConstant and is the extend in reg pattern.
1232 APInt Threshold = APInt(C->getType()->getScalarSizeInBits(), DestWidth);
1233 if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold))) {
1234 Value *NewTrunc = Builder.CreateTrunc(A, DestTy, A->getName() + ".tr");
1235 return BinaryOperator::Create(Instruction::Shl, NewTrunc,
1236 ConstantExpr::getTrunc(C, DestTy));
1237 }
1238 }
1239 }
1240
1241 // trunc (select(icmp_ult(A, DestTy_umax+1), A, sext(icmp_sgt(A, 0)))) -->
1242 // trunc (smin(smax(0, A), DestTy_umax))
1243 if (SrcTy->isIntegerTy() && isPowerOf2_64(SrcTy->getPrimitiveSizeInBits()) &&
1245 match(Src, m_OneUse(m_Select(
1247 m_Constant(C))),
1248 m_Deferred(A),
1250 ICmpInst::ICMP_SGT, m_Deferred(A), m_Zero())))))))) {
1251 APInt UpperBound = C->getUniqueInteger();
1252 APInt TruncatedMax = APInt::getAllOnes(DestTy->getIntegerBitWidth());
1253 TruncatedMax = TruncatedMax.zext(UpperBound.getBitWidth());
1254 if (!UpperBound.isZero() && UpperBound - 1 == TruncatedMax) {
1255 Value *SMax = Builder.CreateIntrinsic(Intrinsic::smax, {SrcTy},
1256 {ConstantInt::get(SrcTy, 0), A});
1257 Value *SMin = Builder.CreateIntrinsic(
1258 Intrinsic::smin, {SrcTy},
1259 {SMax, ConstantInt::get(SrcTy, TruncatedMax)});
1260 return new TruncInst(SMin, DestTy);
1261 }
1262 }
1263
1264 if (Instruction *I = foldVecTruncToExtElt(Trunc, *this))
1265 return I;
1266
1267 if (Instruction *I = foldVecExtTruncToExtElt(Trunc, *this))
1268 return I;
1269
1270 // trunc (ctlz_i32(zext(A), B) --> add(ctlz_i16(A, B), C)
1271 if (match(Src, m_OneUse(m_Ctlz(m_ZExt(m_Value(A)), m_Value(B))))) {
1272 unsigned AWidth = A->getType()->getScalarSizeInBits();
1273 if (AWidth == DestWidth && AWidth > Log2_32(SrcWidth)) {
1274 Value *WidthDiff = ConstantInt::get(A->getType(), SrcWidth - AWidth);
1275 Value *NarrowCtlz =
1276 Builder.CreateIntrinsic(Intrinsic::ctlz, {Trunc.getType()}, {A, B});
1277 return BinaryOperator::CreateAdd(NarrowCtlz, WidthDiff);
1278 }
1279 }
1280
1281 if (match(Src, m_VScale())) {
1282 if (Trunc.getFunction() &&
1283 Trunc.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1284 Attribute Attr =
1285 Trunc.getFunction()->getFnAttribute(Attribute::VScaleRange);
1286 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax())
1287 if (Log2_32(*MaxVScale) < DestWidth)
1288 return replaceInstUsesWith(Trunc, Builder.CreateVScale(DestTy));
1289 }
1290 }
1291
1292 // trunc(scmp(x, y)) -> scmp(x, y) with a narrower result type.
1293 // trunc(ucmp(x, y)) -> ucmp(x, y) with a narrower result type.
1294 // scmp/ucmp produce only -1, 0, or 1, so any result type with at least 2
1295 // bits can represent every possible value and the truncation is lossless.
1296 if (DestWidth >= 2)
1297 if (auto *CI = dyn_cast<CmpIntrinsic>(Src); CI && CI->hasOneUse())
1298 return replaceInstUsesWith(
1299 Trunc, Builder.CreateIntrinsic(DestTy, CI->getIntrinsicID(),
1300 {CI->getLHS(), CI->getRHS()}));
1301
1302 if (DestWidth == 1 &&
1303 (Trunc.hasNoUnsignedWrap() || Trunc.hasNoSignedWrap()) &&
1304 isKnownNonZero(Src, SQ.getWithInstruction(&Trunc)))
1305 return replaceInstUsesWith(Trunc, ConstantInt::getTrue(DestTy));
1306
1307 bool Changed = false;
1308 if (!Trunc.hasNoSignedWrap() &&
1309 ComputeMaxSignificantBits(Src, &Trunc) <= DestWidth) {
1310 Trunc.setHasNoSignedWrap(true);
1311 Changed = true;
1312 }
1313 if (!Trunc.hasNoUnsignedWrap() &&
1314 MaskedValueIsZero(Src, APInt::getBitsSetFrom(SrcWidth, DestWidth),
1315 &Trunc)) {
1316 Trunc.setHasNoUnsignedWrap(true);
1317 Changed = true;
1318 }
1319
1320 const APInt *C1;
1321 Value *V1;
1322 // OP = { lshr, ashr }
1323 // trunc ( OP i8 C1, V1) to i1 -> icmp eq V1, log_2(C1) iff C1 is power of 2
1324 if (DestWidth == 1 && match(Src, m_Shr(m_Power2(C1), m_Value(V1)))) {
1325 Value *Right = ConstantInt::get(V1->getType(), C1->countr_zero());
1326 return new ICmpInst(ICmpInst::ICMP_EQ, V1, Right);
1327 }
1328
1329 // OP = { lshr, ashr }
1330 // trunc ( OP i8 C1, V1) to i1 -> icmp ult V1, log_2(C1 + 1) iff (C1 + 1) is
1331 // power of 2
1332 if (DestWidth == 1 && match(Src, m_Shr(m_LowBitMask(C1), m_Value(V1)))) {
1333 Value *Right = ConstantInt::get(V1->getType(), C1->countr_one());
1334 return new ICmpInst(ICmpInst::ICMP_ULT, V1, Right);
1335 }
1336
1337 // OP = { lshr, ashr }
1338 // trunc ( OP i8 C1, V1) to i1 -> icmp ugt V1, cttz(C1) - 1 iff (C1) is
1339 // negative power of 2
1340 if (DestWidth == 1 && match(Src, m_Shr(m_NegatedPower2(C1), m_Value(V1)))) {
1341 Value *Right = ConstantInt::get(V1->getType(), C1->countr_zero());
1342 return new ICmpInst(ICmpInst::ICMP_UGE, V1, Right);
1343 }
1344
1345 return Changed ? &Trunc : nullptr;
1346}
1347
1348Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp,
1349 ZExtInst &Zext) {
1350 // If we are just checking for a icmp eq of a single bit and zext'ing it
1351 // to an integer, then shift the bit to the appropriate place and then
1352 // cast to integer to avoid the comparison.
1353
1354 // FIXME: This set of transforms does not check for extra uses and/or creates
1355 // an extra instruction (an optional final cast is not included
1356 // in the transform comments). We may also want to favor icmp over
1357 // shifts in cases of equal instructions because icmp has better
1358 // analysis in general (invert the transform).
1359
1360 const APInt *Op1CV;
1361 if (match(Cmp->getOperand(1), m_APInt(Op1CV))) {
1362
1363 // zext (x <s 0) to i32 --> x>>u31 true if signbit set.
1364 if (Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isZero()) {
1365 Value *In = Cmp->getOperand(0);
1366 Value *Sh = ConstantInt::get(In->getType(),
1367 In->getType()->getScalarSizeInBits() - 1);
1368 In = Builder.CreateLShr(In, Sh, In->getName() + ".lobit");
1369 if (In->getType() != Zext.getType())
1370 In = Builder.CreateIntCast(In, Zext.getType(), false /*ZExt*/);
1371
1372 return replaceInstUsesWith(Zext, In);
1373 }
1374
1375 // zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
1376 // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
1377 // zext (X != 0) to i32 --> X iff X has only the low bit set.
1378 // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
1379
1380 if (Op1CV->isZero() && Cmp->isEquality()) {
1381 // Exactly 1 possible 1? But not the high-bit because that is
1382 // canonicalized to this form.
1383 KnownBits Known = computeKnownBits(Cmp->getOperand(0), &Zext);
1384 APInt KnownZeroMask(~Known.Zero);
1385 uint32_t ShAmt = KnownZeroMask.logBase2();
1386 bool IsExpectShAmt = KnownZeroMask.isPowerOf2() &&
1387 (Zext.getType()->getScalarSizeInBits() != ShAmt + 1);
1388 if (IsExpectShAmt &&
1389 (Cmp->getOperand(0)->getType() == Zext.getType() ||
1390 Cmp->getPredicate() == ICmpInst::ICMP_NE || ShAmt == 0)) {
1391 Value *In = Cmp->getOperand(0);
1392 if (ShAmt) {
1393 // Perform a logical shr by shiftamt.
1394 // Insert the shift to put the result in the low bit.
1395 In = Builder.CreateLShr(In, ConstantInt::get(In->getType(), ShAmt),
1396 In->getName() + ".lobit");
1397 }
1398
1399 // Toggle the low bit for "X == 0".
1400 if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
1401 In = Builder.CreateXor(In, ConstantInt::get(In->getType(), 1));
1402
1403 if (Zext.getType() == In->getType())
1404 return replaceInstUsesWith(Zext, In);
1405
1406 Value *IntCast = Builder.CreateIntCast(In, Zext.getType(), false);
1407 return replaceInstUsesWith(Zext, IntCast);
1408 }
1409 }
1410 }
1411
1412 if (Cmp->isEquality()) {
1413 // Test if a bit is clear/set using a shifted-one mask:
1414 // zext (icmp eq (and X, (1 << ShAmt)), 0) --> and (lshr (not X), ShAmt), 1
1415 // zext (icmp ne (and X, (1 << ShAmt)), 0) --> and (lshr X, ShAmt), 1
1416 Value *X, *ShAmt;
1417 if (Cmp->hasOneUse() && match(Cmp->getOperand(1), m_ZeroInt()) &&
1418 match(Cmp->getOperand(0),
1419 m_OneUse(m_c_And(m_Shl(m_One(), m_Value(ShAmt)), m_Value(X))))) {
1420 auto *And = cast<BinaryOperator>(Cmp->getOperand(0));
1421 Value *Shift = And->getOperand(X == And->getOperand(0) ? 1 : 0);
1422 if (Zext.getType() == And->getType() ||
1423 Cmp->getPredicate() != ICmpInst::ICMP_EQ || Shift->hasOneUse()) {
1424 if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
1425 X = Builder.CreateNot(X);
1426 Value *Lshr = Builder.CreateLShr(X, ShAmt);
1427 Value *And1 =
1428 Builder.CreateAnd(Lshr, ConstantInt::get(X->getType(), 1));
1429 return replaceInstUsesWith(
1430 Zext, Builder.CreateZExtOrTrunc(And1, Zext.getType()));
1431 }
1432 }
1433 }
1434
1435 return nullptr;
1436}
1437
1438/// Determine if the specified value can be computed in the specified wider type
1439/// and produce the same low bits. If not, return false.
1440///
1441/// If this function returns true, it can also return a non-zero number of bits
1442/// (in BitsToClear) which indicates that the value it computes is correct for
1443/// the zero extend, but that the additional BitsToClear bits need to be zero'd
1444/// out. For example, to promote something like:
1445///
1446/// %B = trunc i64 %A to i32
1447/// %C = lshr i32 %B, 8
1448/// %E = zext i32 %C to i64
1449///
1450/// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
1451/// set to 8 to indicate that the promoted value needs to have bits 24-31
1452/// cleared in addition to bits 32-63. Since an 'and' will be generated to
1453/// clear the top bits anyway, doing this has no extra cost.
1454///
1455/// This function works on both vectors and scalars.
1456bool TypeEvaluationHelper::canEvaluateZExtd(Value *V, Type *Ty,
1457 unsigned &BitsToClear,
1458 InstCombinerImpl &IC,
1459 Instruction *CxtI) {
1460 TypeEvaluationHelper TYH;
1461 return TYH.canEvaluateZExtdImpl(V, Ty, BitsToClear, IC, CxtI);
1462}
1463bool TypeEvaluationHelper::canEvaluateZExtdImpl(Value *V, Type *Ty,
1464 unsigned &BitsToClear,
1465 InstCombinerImpl &IC,
1466 Instruction *CxtI) {
1467 BitsToClear = 0;
1468 if (canAlwaysEvaluateInType(V, Ty))
1469 return true;
1470 // We stick to the one-user limit for the ZExt transform due to the fact
1471 // that this predicate returns two values: predicate result and BitsToClear.
1472 if (canNotEvaluateInType(V, Ty))
1473 return false;
1474
1475 auto *I = cast<Instruction>(V);
1476 unsigned Tmp;
1477 switch (I->getOpcode()) {
1478 case Instruction::ZExt: // zext(zext(x)) -> zext(x).
1479 case Instruction::SExt: // zext(sext(x)) -> sext(x).
1480 case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x)
1481 return true;
1482 case Instruction::And:
1483 case Instruction::Or:
1484 case Instruction::Xor:
1485 case Instruction::Add:
1486 case Instruction::Sub:
1487 case Instruction::Mul:
1488 if (!canEvaluateZExtdImpl(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||
1489 !canEvaluateZExtdImpl(I->getOperand(1), Ty, Tmp, IC, CxtI))
1490 return false;
1491 // These can all be promoted if neither operand has 'bits to clear'.
1492 if (BitsToClear == 0 && Tmp == 0)
1493 return true;
1494
1495 // If the operation is an AND/OR/XOR and the bits to clear are zero in the
1496 // other side, BitsToClear is ok.
1497 if (Tmp == 0 && I->isBitwiseLogicOp()) {
1498 // We use MaskedValueIsZero here for generality, but the case we care
1499 // about the most is constant RHS.
1500 unsigned VSize = V->getType()->getScalarSizeInBits();
1501 if (IC.MaskedValueIsZero(I->getOperand(1),
1502 APInt::getHighBitsSet(VSize, BitsToClear),
1503 CxtI)) {
1504 // If this is an And instruction and all of the BitsToClear are
1505 // known to be zero we can reset BitsToClear.
1506 if (I->getOpcode() == Instruction::And)
1507 BitsToClear = 0;
1508 return true;
1509 }
1510 }
1511
1512 // Otherwise, we don't know how to analyze this BitsToClear case yet.
1513 return false;
1514
1515 case Instruction::Shl: {
1516 // We can promote shl(x, cst) if we can promote x. Since shl overwrites the
1517 // upper bits we can reduce BitsToClear by the shift amount.
1518 uint64_t ShiftAmt;
1519 if (match(I->getOperand(1), m_ConstantInt(ShiftAmt))) {
1520 if (!canEvaluateZExtdImpl(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1521 return false;
1522 BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0;
1523 return true;
1524 }
1525 return false;
1526 }
1527 case Instruction::LShr: {
1528 // We can promote lshr(x, cst) if we can promote x. This requires the
1529 // ultimate 'and' to clear out the high zero bits we're clearing out though.
1530 uint64_t ShiftAmt;
1531 if (match(I->getOperand(1), m_ConstantInt(ShiftAmt))) {
1532 if (!canEvaluateZExtdImpl(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1533 return false;
1534 BitsToClear += ShiftAmt;
1535 if (BitsToClear > V->getType()->getScalarSizeInBits())
1536 BitsToClear = V->getType()->getScalarSizeInBits();
1537 return true;
1538 }
1539 // Cannot promote variable LSHR.
1540 return false;
1541 }
1542 case Instruction::Select:
1543 if (!canEvaluateZExtdImpl(I->getOperand(1), Ty, Tmp, IC, CxtI) ||
1544 !canEvaluateZExtdImpl(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||
1545 // TODO: If important, we could handle the case when the BitsToClear are
1546 // known zero in the disagreeing side.
1547 Tmp != BitsToClear)
1548 return false;
1549 return true;
1550
1551 case Instruction::PHI: {
1552 // We can change a phi if we can change all operands. Note that we never
1553 // get into trouble with cyclic PHIs here because we only consider
1554 // instructions with a single use.
1555 PHINode *PN = cast<PHINode>(I);
1556 if (!canEvaluateZExtdImpl(PN->getIncomingValue(0), Ty, BitsToClear, IC,
1557 CxtI))
1558 return false;
1559 for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
1560 if (!canEvaluateZExtdImpl(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||
1561 // TODO: If important, we could handle the case when the BitsToClear
1562 // are known zero in the disagreeing input.
1563 Tmp != BitsToClear)
1564 return false;
1565 return true;
1566 }
1567 case Instruction::Call:
1568 // llvm.vscale() can always be executed in larger type, because the
1569 // value is automatically zero-extended.
1571 if (II->getIntrinsicID() == Intrinsic::vscale)
1572 return true;
1573 return false;
1574 default:
1575 // TODO: Can handle more cases here.
1576 return false;
1577 }
1578}
1579
1581 // If this zero extend is only used by a truncate, let the truncate be
1582 // eliminated before we try to optimize this zext.
1583 if (Zext.hasOneUse() && isa<TruncInst>(Zext.user_back()) &&
1584 !isa<Constant>(Zext.getOperand(0)))
1585 return nullptr;
1586
1587 // If one of the common conversion will work, do it.
1588 if (Instruction *Result = commonCastTransforms(Zext))
1589 return Result;
1590
1591 if (auto *NewI = foldExtractionOfVectorDeinterleave(Zext))
1592 return NewI;
1593
1594 Value *Src = Zext.getOperand(0);
1595 Type *SrcTy = Src->getType(), *DestTy = Zext.getType();
1596
1597 // zext nneg bool x -> 0
1598 if (SrcTy->isIntOrIntVectorTy(1) && Zext.hasNonNeg())
1600
1601 // Try to extend the entire expression tree to the wide destination type.
1602 unsigned BitsToClear;
1603 if (shouldChangeType(SrcTy, DestTy) &&
1604 TypeEvaluationHelper::canEvaluateZExtd(Src, DestTy, BitsToClear, *this,
1605 &Zext)) {
1606 assert(BitsToClear <= SrcTy->getScalarSizeInBits() &&
1607 "Can't clear more bits than in SrcTy");
1608
1609 // Okay, we can transform this! Insert the new expression now.
1610 LLVM_DEBUG(
1611 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1612 " to avoid zero extend: "
1613 << Zext << '\n');
1614 Value *Res = EvaluateInDifferentType(Src, DestTy, false);
1615 assert(Res->getType() == DestTy);
1616
1617 // Preserve debug values referring to Src if the zext is its last use.
1618 if (auto *SrcOp = dyn_cast<Instruction>(Src))
1619 if (SrcOp->hasOneUse())
1620 replaceAllDbgUsesWith(*SrcOp, *Res, Zext, DT);
1621
1622 uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits() - BitsToClear;
1623 uint32_t DestBitSize = DestTy->getScalarSizeInBits();
1624
1625 // If the high bits are already filled with zeros, just replace this
1626 // cast with the result.
1628 Res, APInt::getHighBitsSet(DestBitSize, DestBitSize - SrcBitsKept),
1629 &Zext))
1630 return replaceInstUsesWith(Zext, Res);
1631
1632 // We need to emit an AND to clear the high bits.
1633 Constant *C = ConstantInt::get(Res->getType(),
1634 APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
1635 return BinaryOperator::CreateAnd(Res, C);
1636 }
1637
1638 // If this is a TRUNC followed by a ZEXT then we are dealing with integral
1639 // types and if the sizes are just right we can convert this into a logical
1640 // 'and' which will be much cheaper than the pair of casts.
1641 if (auto *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast
1642 // TODO: Subsume this into EvaluateInDifferentType.
1643
1644 // Get the sizes of the types involved. We know that the intermediate type
1645 // will be smaller than A or C, but don't know the relation between A and C.
1646 Value *A = CSrc->getOperand(0);
1647 unsigned SrcSize = A->getType()->getScalarSizeInBits();
1648 unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
1649 unsigned DstSize = DestTy->getScalarSizeInBits();
1650 // If we're actually extending zero bits, then if
1651 // SrcSize < DstSize: zext(a & mask)
1652 // SrcSize == DstSize: a & mask
1653 // SrcSize > DstSize: trunc(a) & mask
1654 if (SrcSize < DstSize) {
1655 APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1656 Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
1657 Value *And = Builder.CreateAnd(A, AndConst, CSrc->getName() + ".mask");
1658 return new ZExtInst(And, DestTy);
1659 }
1660
1661 if (SrcSize == DstSize) {
1662 APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1663 return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
1664 AndValue));
1665 }
1666 if (SrcSize > DstSize) {
1667 Value *Trunc = Builder.CreateTrunc(A, DestTy);
1668 APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
1669 return BinaryOperator::CreateAnd(Trunc,
1670 ConstantInt::get(Trunc->getType(),
1671 AndValue));
1672 }
1673 }
1674
1675 if (auto *Cmp = dyn_cast<ICmpInst>(Src))
1676 return transformZExtICmp(Cmp, Zext);
1677
1678 Constant *C;
1679 Value *X;
1680 // zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
1681 Value *And;
1682 if (match(Src, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
1684 X->getType() == DestTy) {
1685 Value *ZC = Builder.CreateZExt(C, DestTy);
1686 return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC);
1687 }
1688
1689 // If we are truncating, masking, and then zexting back to the original type,
1690 // that's just a mask. This is not handled by canEvaluateZextd if the
1691 // intermediate values have extra uses. This could be generalized further for
1692 // a non-constant mask operand.
1693 // zext (and (trunc X), C) --> and X, (zext C)
1694 if (match(Src, m_And(m_Trunc(m_Value(X)), m_Constant(C))) &&
1695 X->getType() == DestTy) {
1696 Value *ZextC = Builder.CreateZExt(C, DestTy);
1697 return BinaryOperator::CreateAnd(X, ZextC);
1698 }
1699
1700 Value *Y;
1701 if (match(Src,
1703 X->getType() == DestTy) {
1704 Value *ZextY = Builder.CreateZExt(Y, DestTy);
1706 ZextY);
1707 }
1708
1709 if (match(Src, m_VScale())) {
1710 if (Zext.getFunction() &&
1711 Zext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1712 Attribute Attr =
1713 Zext.getFunction()->getFnAttribute(Attribute::VScaleRange);
1714 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
1715 unsigned TypeWidth = Src->getType()->getScalarSizeInBits();
1716 if (Log2_32(*MaxVScale) < TypeWidth)
1717 return replaceInstUsesWith(Zext, Builder.CreateVScale(DestTy));
1718 }
1719 }
1720 }
1721
1722 if (!Zext.hasNonNeg()) {
1723 // If this zero extend is only used by a shift, add nneg flag.
1724 if (Zext.hasOneUse() &&
1725 SrcTy->getScalarSizeInBits() >
1726 Log2_64_Ceil(DestTy->getScalarSizeInBits()) &&
1727 match(Zext.user_back(), m_Shift(m_Value(), m_Specific(&Zext)))) {
1728 Zext.setNonNeg();
1729 return &Zext;
1730 }
1731
1732 if (isKnownNonNegative(Src, SQ.getWithInstruction(&Zext))) {
1733 Zext.setNonNeg();
1734 return &Zext;
1735 }
1736 }
1737
1738 return nullptr;
1739}
1740
1741/// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.
1742Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *Cmp,
1743 SExtInst &Sext) {
1744 Value *Op0 = Cmp->getOperand(0), *Op1 = Cmp->getOperand(1);
1745 ICmpInst::Predicate Pred = Cmp->getPredicate();
1746
1747 // Don't bother if Op1 isn't of vector or integer type.
1748 if (!Op1->getType()->isIntOrIntVectorTy())
1749 return nullptr;
1750
1751 if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_ZeroInt())) {
1752 // sext (x <s 0) --> ashr x, 31 (all ones if negative)
1753 Value *Sh = ConstantInt::get(Op0->getType(),
1754 Op0->getType()->getScalarSizeInBits() - 1);
1755 Value *In = Builder.CreateAShr(Op0, Sh, Op0->getName() + ".lobit");
1756 if (In->getType() != Sext.getType())
1757 In = Builder.CreateIntCast(In, Sext.getType(), true /*SExt*/);
1758
1759 return replaceInstUsesWith(Sext, In);
1760 }
1761
1762 if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1763 // If we know that only one bit of the LHS of the icmp can be set and we
1764 // have an equality comparison with zero or a power of 2, we can transform
1765 // the icmp and sext into bitwise/integer operations.
1766 if (Cmp->hasOneUse() &&
1767 Cmp->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
1768 KnownBits Known = computeKnownBits(Op0, &Sext);
1769
1770 APInt KnownZeroMask(~Known.Zero);
1771 if (KnownZeroMask.isPowerOf2()) {
1772 Value *In = Cmp->getOperand(0);
1773
1774 // If the icmp tests for a known zero bit we can constant fold it.
1775 if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {
1776 Value *V = Pred == ICmpInst::ICMP_NE ?
1778 ConstantInt::getNullValue(Sext.getType());
1779 return replaceInstUsesWith(Sext, V);
1780 }
1781
1782 if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
1783 // sext ((x & 2^n) == 0) -> (x >> n) - 1
1784 // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
1785 unsigned ShiftAmt = KnownZeroMask.countr_zero();
1786 // Perform a right shift to place the desired bit in the LSB.
1787 if (ShiftAmt)
1788 In = Builder.CreateLShr(In,
1789 ConstantInt::get(In->getType(), ShiftAmt));
1790
1791 // At this point "In" is either 1 or 0. Subtract 1 to turn
1792 // {1, 0} -> {0, -1}.
1793 In = Builder.CreateAdd(In,
1794 ConstantInt::getAllOnesValue(In->getType()),
1795 "sext");
1796 } else {
1797 // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
1798 // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
1799 unsigned ShiftAmt = KnownZeroMask.countl_zero();
1800 // Perform a left shift to place the desired bit in the MSB.
1801 if (ShiftAmt)
1802 In = Builder.CreateShl(In,
1803 ConstantInt::get(In->getType(), ShiftAmt));
1804
1805 // Distribute the bit over the whole bit width.
1806 In = Builder.CreateAShr(In, ConstantInt::get(In->getType(),
1807 KnownZeroMask.getBitWidth() - 1), "sext");
1808 }
1809
1810 if (Sext.getType() == In->getType())
1811 return replaceInstUsesWith(Sext, In);
1812 return CastInst::CreateIntegerCast(In, Sext.getType(), true/*SExt*/);
1813 }
1814 }
1815 }
1816
1817 return nullptr;
1818}
1819
1820/// Return true if we can take the specified value and return it as type Ty
1821/// without inserting any new casts and without changing the value of the common
1822/// low bits. This is used by code that tries to promote integer operations to
1823/// a wider types will allow us to eliminate the extension.
1824///
1825/// This function works on both vectors and scalars.
1826///
1827bool TypeEvaluationHelper::canEvaluateSExtd(Value *V, Type *Ty) {
1828 TypeEvaluationHelper TYH;
1829 return TYH.canEvaluateSExtdImpl(V, Ty) && TYH.allPendingVisited();
1830}
1831
1832bool TypeEvaluationHelper::canEvaluateSExtdImpl(Value *V, Type *Ty) {
1833 return canEvaluate(V, Ty, [this](Value *V, Type *Ty) {
1834 return canEvaluateSExtdPred(V, Ty);
1835 });
1836}
1837
1838bool TypeEvaluationHelper::canEvaluateSExtdPred(Value *V, Type *Ty) {
1839 assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
1840 "Can't sign extend type to a smaller type");
1841
1842 auto *I = cast<Instruction>(V);
1843 switch (I->getOpcode()) {
1844 case Instruction::SExt: // sext(sext(x)) -> sext(x)
1845 case Instruction::ZExt: // sext(zext(x)) -> zext(x)
1846 case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x)
1847 return true;
1848 case Instruction::And:
1849 case Instruction::Or:
1850 case Instruction::Xor:
1851 case Instruction::Add:
1852 case Instruction::Sub:
1853 case Instruction::Mul:
1854 // These operators can all arbitrarily be extended if their inputs can.
1855 return canEvaluateSExtdImpl(I->getOperand(0), Ty) &&
1856 canEvaluateSExtdImpl(I->getOperand(1), Ty);
1857
1858 // case Instruction::Shl: TODO
1859 // case Instruction::LShr: TODO
1860
1861 case Instruction::Select:
1862 return canEvaluateSExtdImpl(I->getOperand(1), Ty) &&
1863 canEvaluateSExtdImpl(I->getOperand(2), Ty);
1864
1865 case Instruction::PHI: {
1866 // We can change a phi if we can change all operands. Note that we never
1867 // get into trouble with cyclic PHIs here because canEvaluate handles use
1868 // chain loops.
1869 PHINode *PN = cast<PHINode>(I);
1870 for (Value *IncValue : PN->incoming_values())
1871 if (!canEvaluateSExtdImpl(IncValue, Ty))
1872 return false;
1873 return true;
1874 }
1875 default:
1876 // TODO: Can handle more cases here.
1877 break;
1878 }
1879
1880 return false;
1881}
1882
1884 // If this sign extend is only used by a truncate, let the truncate be
1885 // eliminated before we try to optimize this sext.
1886 if (Sext.hasOneUse() && isa<TruncInst>(Sext.user_back()))
1887 return nullptr;
1888
1889 if (Instruction *I = commonCastTransforms(Sext))
1890 return I;
1891
1892 Value *Src = Sext.getOperand(0);
1893 Type *SrcTy = Src->getType(), *DestTy = Sext.getType();
1894 unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
1895 unsigned DestBitSize = DestTy->getScalarSizeInBits();
1896
1897 // If the value being extended is zero or positive, use a zext instead.
1898 if (isKnownNonNegative(Src, SQ.getWithInstruction(&Sext))) {
1899 auto CI = CastInst::Create(Instruction::ZExt, Src, DestTy);
1900 CI->setNonNeg(true);
1901 return CI;
1902 }
1903
1904 // Try to extend the entire expression tree to the wide destination type.
1905 bool ShouldExtendExpression = true;
1906 Value *TruncSrc = nullptr;
1907 // It is not desirable to extend expression in the trunc + sext pattern when
1908 // destination type is narrower than original (pre-trunc) type.
1909 if (match(Src, m_Trunc(m_Value(TruncSrc))))
1910 if (TruncSrc->getType()->getScalarSizeInBits() > DestBitSize)
1911 ShouldExtendExpression = false;
1912 if (ShouldExtendExpression && shouldChangeType(SrcTy, DestTy) &&
1913 TypeEvaluationHelper::canEvaluateSExtd(Src, DestTy)) {
1914 // Okay, we can transform this! Insert the new expression now.
1915 LLVM_DEBUG(
1916 dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1917 " to avoid sign extend: "
1918 << Sext << '\n');
1919 Value *Res = EvaluateInDifferentType(Src, DestTy, true);
1920 assert(Res->getType() == DestTy);
1921
1922 // If the high bits are already filled with sign bit, just replace this
1923 // cast with the result.
1924 if (ComputeNumSignBits(Res, &Sext) > DestBitSize - SrcBitSize)
1925 return replaceInstUsesWith(Sext, Res);
1926
1927 // We need to emit a shl + ashr to do the sign extend.
1928 Value *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
1929 return BinaryOperator::CreateAShr(Builder.CreateShl(Res, ShAmt, "sext"),
1930 ShAmt);
1931 }
1932
1933 Value *X = TruncSrc;
1934 if (X) {
1935 // If the input has more sign bits than bits truncated, then convert
1936 // directly to final type.
1937 unsigned XBitSize = X->getType()->getScalarSizeInBits();
1938 bool HasNSW = cast<TruncInst>(Src)->hasNoSignedWrap();
1939 if (HasNSW || (ComputeNumSignBits(X, &Sext) > XBitSize - SrcBitSize)) {
1940 auto *Res = CastInst::CreateIntegerCast(X, DestTy, /* isSigned */ true);
1941 if (auto *ResTrunc = dyn_cast<TruncInst>(Res); ResTrunc && HasNSW)
1942 ResTrunc->setHasNoSignedWrap(true);
1943 return Res;
1944 }
1945
1946 // If input is a trunc from the destination type, then convert into shifts.
1947 if (Src->hasOneUse() && X->getType() == DestTy) {
1948 // sext (trunc X) --> ashr (shl X, C), C
1949 Constant *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
1950 return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShAmt), ShAmt);
1951 }
1952
1953 // If we are replacing shifted-in high zero bits with sign bits, convert
1954 // the logic shift to arithmetic shift and eliminate the cast to
1955 // intermediate type:
1956 // sext (trunc (lshr Y, C)) --> sext/trunc (ashr Y, C)
1957 Value *Y;
1958 if (Src->hasOneUse() &&
1960 m_SpecificIntAllowPoison(XBitSize - SrcBitSize)))) {
1961 Value *Ashr = Builder.CreateAShr(Y, XBitSize - SrcBitSize);
1962 return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
1963 }
1964 }
1965
1966 if (auto *Cmp = dyn_cast<ICmpInst>(Src))
1967 return transformSExtICmp(Cmp, Sext);
1968
1969 // If the input is a shl/ashr pair of a same constant, then this is a sign
1970 // extension from a smaller value. If we could trust arbitrary bitwidth
1971 // integers, we could turn this into a truncate to the smaller bit and then
1972 // use a sext for the whole extension. Since we don't, look deeper and check
1973 // for a truncate. If the source and dest are the same type, eliminate the
1974 // trunc and extend and just do shifts. For example, turn:
1975 // %a = trunc i32 %i to i8
1976 // %b = shl i8 %a, C
1977 // %c = ashr i8 %b, C
1978 // %d = sext i8 %c to i32
1979 // into:
1980 // %a = shl i32 %i, 32-(8-C)
1981 // %d = ashr i32 %a, 32-(8-C)
1982 Value *A = nullptr;
1983 // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
1984 Constant *BA = nullptr, *CA = nullptr;
1985 if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_Constant(BA)),
1986 m_ImmConstant(CA))) &&
1987 BA->isElementWiseEqual(CA) && A->getType() == DestTy) {
1988 Constant *WideCurrShAmt =
1989 ConstantFoldCastOperand(Instruction::SExt, CA, DestTy, DL);
1990 assert(WideCurrShAmt && "Constant folding of ImmConstant cannot fail");
1991 Constant *NumLowbitsLeft = ConstantExpr::getSub(
1992 ConstantInt::get(DestTy, SrcTy->getScalarSizeInBits()), WideCurrShAmt);
1993 Constant *NewShAmt = ConstantExpr::getSub(
1994 ConstantInt::get(DestTy, DestTy->getScalarSizeInBits()),
1995 NumLowbitsLeft);
1996 NewShAmt =
1998 A = Builder.CreateShl(A, NewShAmt, Sext.getName());
1999 return BinaryOperator::CreateAShr(A, NewShAmt);
2000 }
2001
2002 // Splatting a bit of constant-index across a value:
2003 // sext (ashr (trunc iN X to iM), M-1) to iN --> ashr (shl X, N-M), N-1
2004 // If the dest type is different, use a cast (adjust use check).
2005 if (match(Src, m_OneUse(m_AShr(m_Trunc(m_Value(X)),
2006 m_SpecificInt(SrcBitSize - 1))))) {
2007 Type *XTy = X->getType();
2008 unsigned XBitSize = XTy->getScalarSizeInBits();
2009 Constant *ShlAmtC = ConstantInt::get(XTy, XBitSize - SrcBitSize);
2010 Constant *AshrAmtC = ConstantInt::get(XTy, XBitSize - 1);
2011 if (XTy == DestTy)
2012 return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShlAmtC),
2013 AshrAmtC);
2014 if (cast<BinaryOperator>(Src)->getOperand(0)->hasOneUse()) {
2015 Value *Ashr = Builder.CreateAShr(Builder.CreateShl(X, ShlAmtC), AshrAmtC);
2016 return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
2017 }
2018 }
2019
2020 if (match(Src, m_VScale())) {
2021 if (Sext.getFunction() &&
2022 Sext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
2023 Attribute Attr =
2024 Sext.getFunction()->getFnAttribute(Attribute::VScaleRange);
2025 if (std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax())
2026 if (Log2_32(*MaxVScale) < (SrcBitSize - 1))
2027 return replaceInstUsesWith(Sext, Builder.CreateVScale(DestTy));
2028 }
2029 }
2030
2031 // sext(scmp(x, y)) -> scmp(x, y) with a wider result type.
2032 // sext(ucmp(x, y)) -> ucmp(x, y) with a wider result type.
2033 // scmp/ucmp return only -1, 0, or 1, which sign-extend correctly to any
2034 // wider integer type, so we can sink the extension into the intrinsic.
2035 if (auto *CI = dyn_cast<CmpIntrinsic>(Src); CI && CI->hasOneUse())
2036 return replaceInstUsesWith(
2037 Sext, Builder.CreateIntrinsic(DestTy, CI->getIntrinsicID(),
2038 {CI->getLHS(), CI->getRHS()}));
2039
2040 Value *Y;
2041 if (match(Src,
2043 X->getType() == DestTy) {
2044 Value *SextY = Builder.CreateSExt(Y, DestTy);
2046 SextY);
2047 }
2048
2049 return nullptr;
2050}
2051
2052/// Return a Constant* for the specified floating-point constant if it fits
2053/// in the specified FP type without changing its value.
2054static bool fitsInFPType(APFloat F, const fltSemantics &Sem) {
2055 bool losesInfo;
2056 (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
2057 return !losesInfo;
2058}
2059
2061 bool PreferBFloat) {
2062 // See if the value can be truncated to bfloat and then reextended.
2063 if (PreferBFloat && fitsInFPType(F, APFloat::BFloat()))
2064 return Type::getBFloatTy(Ctx);
2065 // See if the value can be truncated to half and then reextended.
2066 if (!PreferBFloat && fitsInFPType(F, APFloat::IEEEhalf()))
2067 return Type::getHalfTy(Ctx);
2068 // See if the value can be truncated to float and then reextended.
2070 return Type::getFloatTy(Ctx);
2071 if (&F.getSemantics() == &APFloat::IEEEdouble())
2072 return nullptr; // Won't shrink.
2073 // See if the value can be truncated to double and then reextended.
2075 return Type::getDoubleTy(Ctx);
2076 // Don't try to shrink to various long double types.
2077 return nullptr;
2078}
2079
2080static Type *shrinkFPConstant(ConstantFP *CFP, bool PreferBFloat) {
2081 Type *Ty = CFP->getType();
2082 if (Ty->getScalarType()->isPPC_FP128Ty())
2083 return nullptr; // No constant folding of this.
2084
2085 Type *ShrinkTy =
2086 shrinkFPConstant(CFP->getContext(), CFP->getValueAPF(), PreferBFloat);
2087 if (ShrinkTy)
2088 if (auto *VecTy = dyn_cast<VectorType>(Ty))
2089 ShrinkTy = VectorType::get(ShrinkTy, VecTy);
2090
2091 return ShrinkTy;
2092}
2093
2094// Determine if this is a vector of ConstantFPs and if so, return the minimal
2095// type we can safely truncate all elements to.
2096static Type *shrinkFPConstantVector(Value *V, bool PreferBFloat) {
2097 auto *CV = dyn_cast<Constant>(V);
2098 auto *CVVTy = dyn_cast<FixedVectorType>(V->getType());
2099 if (!CV || !CVVTy)
2100 return nullptr;
2101
2102 Type *MinType = nullptr;
2103
2104 unsigned NumElts = CVVTy->getNumElements();
2105
2106 // For fixed-width vectors we find the minimal type by looking
2107 // through the constant values of the vector.
2108 for (unsigned I = 0; I != NumElts; ++I) {
2109 if (match(CV->getAggregateElement(I), m_Poison()))
2110 continue;
2111
2112 auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(I));
2113 if (!CFP)
2114 return nullptr;
2115
2116 Type *T = shrinkFPConstant(CFP, PreferBFloat);
2117 if (!T)
2118 return nullptr;
2119
2120 // If we haven't found a type yet or this type has a larger mantissa than
2121 // our previous type, this is our new minimal type.
2122 if (!MinType || T->getFPMantissaWidth() > MinType->getFPMantissaWidth())
2123 MinType = T;
2124 }
2125
2126 // Make a vector type from the minimal type.
2127 return MinType ? FixedVectorType::get(MinType, NumElts) : nullptr;
2128}
2129
2130/// Find the minimum FP type we can safely truncate to.
2131static Type *getMinimumFPType(Value *V, Type *PreferredTy, InstCombiner &IC) {
2132 if (auto *FPExt = dyn_cast<FPExtInst>(V))
2133 return FPExt->getOperand(0)->getType();
2134
2135 Value *Src;
2136 if (match(V, m_IToFP(m_Value(Src))) &&
2137 IC.canBeCastedExactlyIntToFP(Src, PreferredTy, isa<SIToFPInst>(V),
2139 return PreferredTy;
2140
2141 bool PreferBFloat = PreferredTy->getScalarType()->isBFloatTy();
2142 // If this value is a constant, return the constant in the smallest FP type
2143 // that can accurately represent it. This allows us to turn
2144 // (float)((double)X+2.0) into x+2.0f.
2145 if (auto *CFP = dyn_cast<ConstantFP>(V))
2146 if (Type *T = shrinkFPConstant(CFP, PreferBFloat))
2147 return T;
2148
2149 // Try to shrink scalable and fixed splat vectors.
2150 if (auto *FPC = dyn_cast<Constant>(V))
2151 if (auto *VTy = dyn_cast<VectorType>(V->getType()))
2152 if (auto *Splat = dyn_cast_or_null<ConstantFP>(FPC->getSplatValue()))
2153 if (Type *T = shrinkFPConstant(Splat, PreferBFloat))
2154 return VectorType::get(T, VTy);
2155
2156 // Try to shrink a vector of FP constants. This returns nullptr on scalable
2157 // vectors
2158 if (Type *T = shrinkFPConstantVector(V, PreferBFloat))
2159 return T;
2160
2161 return V->getType();
2162}
2163
2165 bool IsSigned,
2166 const Instruction *CxtI) const {
2167 Type *SrcTy = V->getType();
2168 assert(SrcTy->isIntOrIntVectorTy() && "Expected an integer type");
2169 int SrcSize = (int)SrcTy->getScalarSizeInBits() - IsSigned;
2170 int DestNumSigBits = FPTy->getFPMantissaWidth();
2171
2172 // Easy case - if the source integer type has less bits than the FP mantissa,
2173 // then the cast must be exact.
2174 if (SrcSize <= DestNumSigBits)
2175 return true;
2176
2177 // Cast from FP to integer and back to FP is independent of the intermediate
2178 // integer width because of poison on overflow.
2179 Value *F;
2180 if (match(V, m_FPToI(m_Value(F)))) {
2181 // If this is uitofp (fptosi F), the source needs an extra bit to avoid
2182 // potential rounding of negative FP input values.
2183 int SrcNumSigBits = F->getType()->getFPMantissaWidth();
2184 if (!IsSigned && match(V, m_FPToSI(m_Value())))
2185 SrcNumSigBits++;
2186
2187 // [su]itofp (fpto[su]i F) --> exact if the source type has less or equal
2188 // significant bits than the destination (and make sure neither type is
2189 // weird -- ppc_fp128).
2190 if (SrcNumSigBits > 0 && DestNumSigBits > 0 &&
2191 SrcNumSigBits <= DestNumSigBits)
2192 return true;
2193 }
2194
2195 // Try harder to find if the source integer type has less significant bits.
2196 // Compute number of sign bits or determine trailing zeros.
2197 KnownBits SrcKnown = computeKnownBits(V, CxtI);
2198 int SigBits = (int)SrcTy->getScalarSizeInBits() -
2199 SrcKnown.countMinLeadingZeros() -
2200 SrcKnown.countMinTrailingZeros();
2201 if (SigBits <= DestNumSigBits)
2202 return true;
2203
2204 // For sitofp, the sign maps to the FP sign bit, so only magnitude bits
2205 // (BitWidth - NumSignBits) consume mantissa.
2206 if (IsSigned) {
2207 SigBits = (int)SrcTy->getScalarSizeInBits() - ComputeNumSignBits(V, CxtI);
2208 if (SigBits <= DestNumSigBits)
2209 return true;
2210 }
2211
2212 return false;
2213}
2214
2216 CastInst::CastOps Opcode = I.getOpcode();
2217 assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) &&
2218 "Unexpected cast");
2219 Value *Src = I.getOperand(0);
2220 Type *FPTy = I.getType();
2221 return canBeCastedExactlyIntToFP(Src, FPTy, Opcode == CastInst::SIToFP, &I);
2222}
2223
2226 return I;
2227
2228 // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
2229 // simplify this expression to avoid one or more of the trunc/extend
2230 // operations if we can do so without changing the numerical results.
2231 //
2232 // The exact manner in which the widths of the operands interact to limit
2233 // what we can and cannot do safely varies from operation to operation, and
2234 // is explained below in the various case statements.
2235 Type *Ty = FPT.getType();
2236 auto *BO = dyn_cast<BinaryOperator>(FPT.getOperand(0));
2237 if (BO && BO->hasOneUse()) {
2238 Type *LHSMinType = getMinimumFPType(BO->getOperand(0), Ty, *this);
2239 Type *RHSMinType = getMinimumFPType(BO->getOperand(1), Ty, *this);
2240 unsigned OpWidth = BO->getType()->getFPMantissaWidth();
2241 unsigned LHSWidth = LHSMinType->getFPMantissaWidth();
2242 unsigned RHSWidth = RHSMinType->getFPMantissaWidth();
2243 unsigned SrcWidth = std::max(LHSWidth, RHSWidth);
2244 unsigned DstWidth = Ty->getFPMantissaWidth();
2245 switch (BO->getOpcode()) {
2246 default: break;
2247 case Instruction::FAdd:
2248 case Instruction::FSub:
2249 // For addition and subtraction, the infinitely precise result can
2250 // essentially be arbitrarily wide; proving that double rounding
2251 // will not occur because the result of OpI is exact (as we will for
2252 // FMul, for example) is hopeless. However, we *can* nonetheless
2253 // frequently know that double rounding cannot occur (or that it is
2254 // innocuous) by taking advantage of the specific structure of
2255 // infinitely-precise results that admit double rounding.
2256 //
2257 // Specifically, if OpWidth >= 2*DstWdith+1 and DstWidth is sufficient
2258 // to represent both sources, we can guarantee that the double
2259 // rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis,
2260 // "A Rigorous Framework for Fully Supporting the IEEE Standard ..."
2261 // for proof of this fact).
2262 //
2263 // Note: Figueroa does not consider the case where DstFormat !=
2264 // SrcFormat. It's possible (likely even!) that this analysis
2265 // could be tightened for those cases, but they are rare (the main
2266 // case of interest here is (float)((double)float + float)).
2267 if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) {
2268 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
2269 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
2270 Instruction *RI = BinaryOperator::Create(BO->getOpcode(), LHS, RHS);
2271 RI->copyFastMathFlags(BO);
2272 return RI;
2273 }
2274 break;
2275 case Instruction::FMul:
2276 // For multiplication, the infinitely precise result has at most
2277 // LHSWidth + RHSWidth significant bits; if OpWidth is sufficient
2278 // that such a value can be exactly represented, then no double
2279 // rounding can possibly occur; we can safely perform the operation
2280 // in the destination format if it can represent both sources.
2281 if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) {
2282 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
2283 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
2284 return BinaryOperator::CreateFMulFMF(LHS, RHS, BO);
2285 }
2286 break;
2287 case Instruction::FDiv:
2288 // For division, we use again use the bound from Figueroa's
2289 // dissertation. I am entirely certain that this bound can be
2290 // tightened in the unbalanced operand case by an analysis based on
2291 // the diophantine rational approximation bound, but the well-known
2292 // condition used here is a good conservative first pass.
2293 // TODO: Tighten bound via rigorous analysis of the unbalanced case.
2294 if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) {
2295 Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
2296 Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
2297 return BinaryOperator::CreateFDivFMF(LHS, RHS, BO);
2298 }
2299 break;
2300 case Instruction::FRem: {
2301 // Remainder is straightforward. Remainder is always exact, so the
2302 // type of OpI doesn't enter into things at all. We simply evaluate
2303 // in whichever source type is larger, then convert to the
2304 // destination type.
2305 if (SrcWidth == OpWidth)
2306 break;
2307 Value *LHS, *RHS;
2308 if (LHSWidth == SrcWidth) {
2309 LHS = Builder.CreateFPTrunc(BO->getOperand(0), LHSMinType);
2310 RHS = Builder.CreateFPTrunc(BO->getOperand(1), LHSMinType);
2311 } else {
2312 LHS = Builder.CreateFPTrunc(BO->getOperand(0), RHSMinType);
2313 RHS = Builder.CreateFPTrunc(BO->getOperand(1), RHSMinType);
2314 }
2315
2316 Value *ExactResult = Builder.CreateFRemFMF(LHS, RHS, BO);
2317 return CastInst::CreateFPCast(ExactResult, Ty);
2318 }
2319 }
2320 }
2321
2322 // (fptrunc (fneg x)) -> (fneg (fptrunc x))
2323 Value *X;
2325 if (Op && Op->hasOneUse()) {
2326 FastMathFlags FMF = FPT.getFastMathFlags();
2327 if (auto *FPMO = dyn_cast<FPMathOperator>(Op))
2328 FMF &= FPMO->getFastMathFlags();
2329
2330 if (match(Op, m_FNeg(m_Value(X)))) {
2331 Value *InnerTrunc = Builder.CreateFPTruncFMF(X, Ty, FMF);
2332 Value *Neg = Builder.CreateFNegFMF(InnerTrunc, FMF);
2333 return replaceInstUsesWith(FPT, Neg);
2334 }
2335
2336 // If we are truncating a select that has an extended operand, we can
2337 // narrow the other operand and do the select as a narrow op.
2338 Value *Cond, *X, *Y;
2340 X->getType() == Ty) {
2341 // fptrunc (select Cond, (fpext X), Y --> select Cond, X, (fptrunc Y)
2342 Value *NarrowY = Builder.CreateFPTruncFMF(Y, Ty, FMF);
2343 Value *Sel =
2344 Builder.CreateSelectFMF(Cond, X, NarrowY, FMF, "narrow.sel", Op);
2345 return replaceInstUsesWith(FPT, Sel);
2346 }
2348 X->getType() == Ty) {
2349 // fptrunc (select Cond, Y, (fpext X) --> select Cond, (fptrunc Y), X
2350 Value *NarrowY = Builder.CreateFPTruncFMF(Y, Ty, FMF);
2351 Value *Sel =
2352 Builder.CreateSelectFMF(Cond, NarrowY, X, FMF, "narrow.sel", Op);
2353 return replaceInstUsesWith(FPT, Sel);
2354 }
2355 }
2356
2357 if (auto *II = dyn_cast<IntrinsicInst>(FPT.getOperand(0))) {
2358 switch (II->getIntrinsicID()) {
2359 default: break;
2360 case Intrinsic::ceil:
2361 case Intrinsic::fabs:
2362 case Intrinsic::floor:
2363 case Intrinsic::nearbyint:
2364 case Intrinsic::rint:
2365 case Intrinsic::round:
2366 case Intrinsic::roundeven:
2367 case Intrinsic::trunc: {
2368 Value *Src = II->getArgOperand(0);
2369 if (!Src->hasOneUse())
2370 break;
2371
2372 // Except for fabs, this transformation requires the input of the unary FP
2373 // operation to be itself an fpext from the type to which we're
2374 // truncating.
2375 if (II->getIntrinsicID() != Intrinsic::fabs) {
2376 FPExtInst *FPExtSrc = dyn_cast<FPExtInst>(Src);
2377 if (!FPExtSrc || FPExtSrc->getSrcTy() != Ty)
2378 break;
2379 }
2380
2381 // Do unary FP operation on smaller type.
2382 // (fptrunc (fabs x)) -> (fabs (fptrunc x))
2383 Value *InnerTrunc = Builder.CreateFPTrunc(Src, Ty);
2385 FPT.getModule(), II->getIntrinsicID(), Ty);
2387 II->getOperandBundlesAsDefs(OpBundles);
2388 CallInst *NewCI =
2389 CallInst::Create(Overload, {InnerTrunc}, OpBundles, II->getName());
2390 // A normal value may be converted to an infinity. It means that we cannot
2391 // propagate ninf from the intrinsic. So we propagate FMF from fptrunc.
2392 NewCI->copyFastMathFlags(&FPT);
2393 return NewCI;
2394 }
2395 }
2396 }
2397
2398 if (Instruction *I = shrinkInsertElt(FPT, Builder))
2399 return I;
2400
2401 Value *Src = FPT.getOperand(0);
2402 if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
2403 auto *FPCast = cast<CastInst>(Src);
2404 if (isKnownExactCastIntToFP(*FPCast))
2405 return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
2406 }
2407
2408 return nullptr;
2409}
2410
2412 // If the source operand is a cast from integer to FP and known exact, then
2413 // cast the integer operand directly to the destination type.
2414 Type *Ty = FPExt.getType();
2415 Value *Src = FPExt.getOperand(0);
2416 if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
2417 auto *FPCast = cast<CastInst>(Src);
2418 if (isKnownExactCastIntToFP(*FPCast))
2419 return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
2420 }
2421
2422 return commonCastTransforms(FPExt);
2423}
2424
2425/// fpto{s/u}i[.sat]({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
2426/// This is safe if the intermediate type has enough bits in its mantissa to
2427/// accurately represent all values of X. For example, this won't work with
2428/// i64 -> float -> i64.
2429template <typename FPToIntTy>
2431 constexpr bool IsSaturating = std::is_same_v<FPToIntTy, IntrinsicInst>;
2432
2433 if (!isa<UIToFPInst>(FI.getOperand(0)) && !isa<SIToFPInst>(FI.getOperand(0)))
2434 return nullptr;
2435
2436 auto *OpI = cast<CastInst>(FI.getOperand(0));
2437 Value *X = OpI->getOperand(0);
2438 Type *XType = X->getType();
2439 Type *DestType = FI.getType();
2440 bool IsInputSigned = isa<SIToFPInst>(OpI);
2441
2442 bool IsOutputSigned;
2443 if constexpr (IsSaturating)
2444 IsOutputSigned = FI.getIntrinsicID() == Intrinsic::fptosi_sat;
2445 else
2446 IsOutputSigned = isa<FPToSIInst>(FI);
2447
2448 // Since we can assume the conversion won't overflow, our decision as to
2449 // whether the input will fit in the float should depend on the minimum
2450 // of the input range and output range.
2451
2452 // This means this is also safe for a signed input and unsigned output, since
2453 // a negative input would lead to undefined behavior.
2454 if (!isKnownExactCastIntToFP(*OpI)) {
2455 if constexpr (!IsSaturating) {
2456 // The first cast may not round exactly based on the source integer width
2457 // and FP width, but the overflow UB rules can still allow this to fold.
2458 // If the destination type is narrow, that means the intermediate FP value
2459 // must be large enough to hold the source value exactly.
2460 //
2461 // For example, (uint8_t)((float)(uint32_t 16777217) is UB.
2462 int OutputSize = (int)DestType->getScalarSizeInBits();
2463 if (OutputSize > OpI->getType()->getFPMantissaWidth())
2464 return nullptr;
2465 } else {
2466 // Sat intrinsics produce a defined saturated value on overflow, so
2467 // the UB-based shortcut is invalid. Require exactness.
2468 return nullptr;
2469 }
2470 }
2471
2472 unsigned SrcWidth = XType->getScalarSizeInBits();
2473 unsigned DestWidth = DestType->getScalarSizeInBits();
2474
2475 if constexpr (IsSaturating) {
2476 // TODO: cross-sign and narrowing cases could be handled with range
2477 // analysis to prove the source fits in the destination.
2478 if (IsInputSigned != IsOutputSigned || DestWidth < SrcWidth)
2479 return nullptr;
2480 }
2481
2482 if (DestWidth > SrcWidth) {
2483 if (IsInputSigned && IsOutputSigned)
2484 return new SExtInst(X, DestType);
2485 return new ZExtInst(X, DestType);
2486 }
2487 if (DestWidth < SrcWidth)
2488 return new TruncInst(X, DestType);
2489
2490 assert(XType == DestType && "Unexpected types for int to FP to int casts");
2491 return replaceInstUsesWith(FI, X);
2492}
2493
2495template Instruction *
2497
2499 // fpto{u/s}i non-norm --> 0
2500 FPClassTest Mask =
2501 FI.getOpcode() == Instruction::FPToUI ? fcPosNormal : fcNormal;
2503 FI.getOperand(0), Mask, IC.getSimplifyQuery().getWithInstruction(&FI));
2504 if (FPClass.isKnownNever(Mask))
2506
2507 return nullptr;
2508}
2509
2511 if (Instruction *I = foldItoFPtoI(FI))
2512 return I;
2513
2514 if (Instruction *I = foldFPtoI(FI, *this))
2515 return I;
2516
2517 return commonCastTransforms(FI);
2518}
2519
2521 if (Instruction *I = foldItoFPtoI(FI))
2522 return I;
2523
2524 if (Instruction *I = foldFPtoI(FI, *this))
2525 return I;
2526
2527 return commonCastTransforms(FI);
2528}
2529
2531 if (Instruction *R = commonCastTransforms(CI))
2532 return R;
2533 if (!CI.hasNonNeg() && isKnownNonNegative(CI.getOperand(0), SQ)) {
2534 CI.setNonNeg();
2535 return &CI;
2536 }
2537 return nullptr;
2538}
2539
2541 if (Instruction *R = commonCastTransforms(CI))
2542 return R;
2543 if (isKnownNonNegative(CI.getOperand(0), SQ)) {
2544 auto *UI =
2545 CastInst::Create(Instruction::UIToFP, CI.getOperand(0), CI.getType());
2546 UI->setNonNeg(true);
2547 return UI;
2548 }
2549 return nullptr;
2550}
2551
2553 // If the source integer type is not the intptr_t type for this target, do a
2554 // trunc or zext to the intptr_t type, then inttoptr of it. This allows the
2555 // cast to be exposed to other transforms.
2556 unsigned AS = CI.getAddressSpace();
2557 if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
2558 DL.getPointerSizeInBits(AS)) {
2559 Type *Ty = CI.getOperand(0)->getType()->getWithNewType(
2560 DL.getIntPtrType(CI.getContext(), AS));
2561 Value *P = Builder.CreateZExtOrTrunc(CI.getOperand(0), Ty);
2562 return new IntToPtrInst(P, CI.getType());
2563 }
2564
2565 // Replace (inttoptr (add (ptrtoint %Base), %Offset)) with
2566 // (getelementptr i8, %Base, %Offset) if the pointer is only used as integer
2567 // value.
2568 Value *Base;
2569 Value *Offset;
2570 auto UsesPointerAsInt = [](User *U) {
2572 return true;
2573 if (auto *P = dyn_cast<PHINode>(U))
2574 return P->hasOneUse() && isa<ICmpInst, PtrToIntInst>(*P->user_begin());
2575 return false;
2576 };
2577 if (match(CI.getOperand(0),
2579 m_Value(Offset)))) &&
2581 Base->getType()->getPointerAddressSpace() &&
2582 all_of(CI.users(), UsesPointerAsInt)) {
2583 return GetElementPtrInst::Create(Builder.getInt8Ty(), Base, Offset);
2584 }
2585
2587 return I;
2588
2589 return nullptr;
2590}
2591
2593 // Look through chain of one-use GEPs.
2594 Type *PtrTy = Ptr->getType();
2596 while (true) {
2597 auto *GEP = dyn_cast<GEPOperator>(Ptr);
2598 if (!GEP || !GEP->hasOneUse())
2599 break;
2600 GEPs.push_back(GEP);
2601 Ptr = GEP->getPointerOperand();
2602 }
2603
2604 // Don't handle case where GEP converts from pointer to vector.
2605 if (GEPs.empty() || PtrTy != Ptr->getType())
2606 return nullptr;
2607
2608 // Check whether we know the integer value of the base pointer.
2609 Value *Res;
2610 Type *IdxTy = DL.getIndexType(PtrTy);
2611 if (match(Ptr, m_OneUse(m_IntToPtr(m_Value(Res)))) &&
2612 Res->getType() == IntTy && IntTy == IdxTy) {
2613 // pass
2614 } else if (isa<ConstantPointerNull>(Ptr)) {
2615 Res = Constant::getNullValue(IdxTy);
2616 } else {
2617 return nullptr;
2618 }
2619
2620 // Perform the entire operation on integers instead.
2621 for (GEPOperator *GEP : reverse(GEPs)) {
2622 Value *Offset = EmitGEPOffset(GEP);
2623 Res = Builder.CreateAdd(Res, Offset, "", GEP->hasNoUnsignedWrap());
2624 }
2625 return Builder.CreateZExtOrTrunc(Res, IntTy);
2626}
2627
2629 // If the destination integer type is not the intptr_t type for this target,
2630 // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
2631 // to be exposed to other transforms.
2633 Type *SrcTy = SrcOp->getType();
2634 Type *Ty = CI.getType();
2635 unsigned AS = CI.getPointerAddressSpace();
2636 unsigned TySize = Ty->getScalarSizeInBits();
2637 unsigned PtrSize = DL.getPointerSizeInBits(AS);
2638 if (TySize != PtrSize) {
2639 Type *IntPtrTy =
2640 SrcTy->getWithNewType(DL.getIntPtrType(CI.getContext(), AS));
2641 Value *P = Builder.CreatePtrToInt(SrcOp, IntPtrTy);
2642 return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
2643 }
2644
2645 // (ptrtoint (ptrmask P, M))
2646 // -> (and (ptrtoint P), M)
2647 // This is generally beneficial as `and` is better supported than `ptrmask`.
2648 Value *Ptr, *Mask;
2650 m_Value(Mask)))) &&
2651 Mask->getType() == Ty)
2652 return BinaryOperator::CreateAnd(Builder.CreatePtrToInt(Ptr, Ty), Mask);
2653
2654 if (Value *V = foldPtrToIntOrAddrOfGEP(Ty, SrcOp))
2655 return replaceInstUsesWith(CI, V);
2656
2657 Value *Vec, *Scalar, *Index;
2659 m_Value(Scalar), m_Value(Index)))) &&
2660 Vec->getType() == Ty) {
2661 assert(Vec->getType()->getScalarSizeInBits() == PtrSize && "Wrong type");
2662 // Convert the scalar to int followed by insert to eliminate one cast:
2663 // p2i (ins (i2p Vec), Scalar, Index --> ins Vec, (p2i Scalar), Index
2664 Value *NewCast = Builder.CreatePtrToInt(Scalar, Ty->getScalarType());
2665 return InsertElementInst::Create(Vec, NewCast, Index);
2666 }
2667
2668 return commonCastTransforms(CI);
2669}
2670
2673 Type *Ty = CI.getType();
2674
2675 // (ptrtoaddr (ptrmask P, M))
2676 // -> (and (ptrtoaddr P), M)
2677 // This is generally beneficial as `and` is better supported than `ptrmask`.
2678 Value *Ptr, *Mask;
2680 m_Value(Mask)))) &&
2681 Mask->getType() == Ty)
2682 return BinaryOperator::CreateAnd(Builder.CreatePtrToAddr(Ptr), Mask);
2683
2684 if (Value *V = foldPtrToIntOrAddrOfGEP(Ty, SrcOp))
2685 return replaceInstUsesWith(CI, V);
2686
2687 // FIXME: Implement variants of ptrtoint folds.
2688 return commonCastTransforms(CI);
2689}
2690
2691/// This input value (which is known to have vector type) is being zero extended
2692/// or truncated to the specified vector type. Since the zext/trunc is done
2693/// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
2694/// endianness will impact which end of the vector that is extended or
2695/// truncated.
2696///
2697/// A vector is always stored with index 0 at the lowest address, which
2698/// corresponds to the most significant bits for a big endian stored integer and
2699/// the least significant bits for little endian. A trunc/zext of an integer
2700/// impacts the big end of the integer. Thus, we need to add/remove elements at
2701/// the front of the vector for big endian targets, and the back of the vector
2702/// for little endian targets.
2703///
2704/// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
2705///
2706/// The source and destination vector types may have different element types.
2707static Instruction *
2709 InstCombinerImpl &IC) {
2710 // We can only do this optimization if the output is a multiple of the input
2711 // element size, or the input is a multiple of the output element size.
2712 // Convert the input type to have the same element type as the output.
2713 VectorType *SrcTy = cast<VectorType>(InVal->getType());
2714
2715 if (SrcTy->getElementType() != DestTy->getElementType()) {
2716 // The input types don't need to be identical, but for now they must be the
2717 // same size. There is no specific reason we couldn't handle things like
2718 // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
2719 // there yet.
2720 if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
2721 DestTy->getElementType()->getPrimitiveSizeInBits())
2722 return nullptr;
2723
2724 SrcTy =
2725 FixedVectorType::get(DestTy->getElementType(),
2726 cast<FixedVectorType>(SrcTy)->getNumElements());
2727 InVal = IC.Builder.CreateBitCast(InVal, SrcTy);
2728 }
2729
2730 bool IsBigEndian = IC.getDataLayout().isBigEndian();
2731 unsigned SrcElts = cast<FixedVectorType>(SrcTy)->getNumElements();
2732 unsigned DestElts = cast<FixedVectorType>(DestTy)->getNumElements();
2733
2734 assert(SrcElts != DestElts && "Element counts should be different.");
2735
2736 // Now that the element types match, get the shuffle mask and RHS of the
2737 // shuffle to use, which depends on whether we're increasing or decreasing the
2738 // size of the input.
2739 auto ShuffleMaskStorage = llvm::to_vector<16>(llvm::seq<int>(0, SrcElts));
2740 ArrayRef<int> ShuffleMask;
2741 Value *V2;
2742
2743 if (SrcElts > DestElts) {
2744 // If we're shrinking the number of elements (rewriting an integer
2745 // truncate), just shuffle in the elements corresponding to the least
2746 // significant bits from the input and use poison as the second shuffle
2747 // input.
2748 V2 = PoisonValue::get(SrcTy);
2749 // Make sure the shuffle mask selects the "least significant bits" by
2750 // keeping elements from back of the src vector for big endian, and from the
2751 // front for little endian.
2752 ShuffleMask = ShuffleMaskStorage;
2753 if (IsBigEndian)
2754 ShuffleMask = ShuffleMask.take_back(DestElts);
2755 else
2756 ShuffleMask = ShuffleMask.take_front(DestElts);
2757 } else {
2758 // If we're increasing the number of elements (rewriting an integer zext),
2759 // shuffle in all of the elements from InVal. Fill the rest of the result
2760 // elements with zeros from a constant zero.
2761 V2 = Constant::getNullValue(SrcTy);
2762 // Use first elt from V2 when indicating zero in the shuffle mask.
2763 uint32_t NullElt = SrcElts;
2764 // Extend with null values in the "most significant bits" by adding elements
2765 // in front of the src vector for big endian, and at the back for little
2766 // endian.
2767 unsigned DeltaElts = DestElts - SrcElts;
2768 if (IsBigEndian)
2769 ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt);
2770 else
2771 ShuffleMaskStorage.append(DeltaElts, NullElt);
2772 ShuffleMask = ShuffleMaskStorage;
2773 }
2774
2775 return new ShuffleVectorInst(InVal, V2, ShuffleMask);
2776}
2777
2778static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) {
2779 return Value % Ty->getPrimitiveSizeInBits() == 0;
2780}
2781
2782static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
2783 return Value / Ty->getPrimitiveSizeInBits();
2784}
2785
2786/// V is a value which is inserted into a vector of VecEltTy.
2787/// Look through the value to see if we can decompose it into
2788/// insertions into the vector. See the example in the comment for
2789/// OptimizeIntegerToVectorInsertions for the pattern this handles.
2790/// The type of V is always a non-zero multiple of VecEltTy's size.
2791/// Shift is the number of bits between the lsb of V and the lsb of
2792/// the vector.
2793///
2794/// This returns false if the pattern can't be matched or true if it can,
2795/// filling in Elements with the elements found here.
2796static bool collectInsertionElements(Value *V, unsigned Shift,
2797 SmallVectorImpl<Value *> &Elements,
2798 Type *VecEltTy, bool isBigEndian) {
2799 assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
2800 "Shift should be a multiple of the element type size");
2801
2802 // Poison values never contribute useful bits to the result.
2803 if (match(V, m_Poison()))
2804 return true;
2805
2806 // If we got down to a value of the right type, we win, try inserting into the
2807 // right element.
2808 if (V->getType() == VecEltTy) {
2809 // Inserting null doesn't actually insert any elements.
2810 if (Constant *C = dyn_cast<Constant>(V))
2811 if (C->isNullValue())
2812 return true;
2813
2814 unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
2815 if (isBigEndian)
2816 ElementIndex = Elements.size() - ElementIndex - 1;
2817
2818 // Fail if multiple elements are inserted into this slot.
2819 if (Elements[ElementIndex])
2820 return false;
2821
2822 Elements[ElementIndex] = V;
2823 return true;
2824 }
2825
2826 if (Constant *C = dyn_cast<Constant>(V)) {
2827 // Figure out the # elements this provides, and bitcast it or slice it up
2828 // as required.
2829 unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
2830 VecEltTy);
2831 // If the constant is the size of a vector element, we just need to bitcast
2832 // it to the right type so it gets properly inserted.
2833 if (NumElts == 1)
2835 Shift, Elements, VecEltTy, isBigEndian);
2836
2837 // Okay, this is a constant that covers multiple elements. Slice it up into
2838 // pieces and insert each element-sized piece into the vector.
2839 if (!isa<IntegerType>(C->getType()))
2840 C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),
2841 C->getType()->getPrimitiveSizeInBits()));
2842 unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
2843 Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
2844
2845 for (unsigned i = 0; i != NumElts; ++i) {
2846 unsigned ShiftI = i * ElementSize;
2848 Instruction::LShr, C, ConstantInt::get(C->getType(), ShiftI));
2849 if (!Piece)
2850 return false;
2851
2852 Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
2853 if (!collectInsertionElements(Piece, ShiftI + Shift, Elements, VecEltTy,
2854 isBigEndian))
2855 return false;
2856 }
2857 return true;
2858 }
2859
2860 if (!V->hasOneUse()) return false;
2861
2863 if (!I) return false;
2864 switch (I->getOpcode()) {
2865 default: return false; // Unhandled case.
2866 case Instruction::BitCast:
2867 if (I->getOperand(0)->getType()->isVectorTy())
2868 return false;
2869 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2870 isBigEndian);
2871 case Instruction::ZExt:
2873 I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
2874 VecEltTy))
2875 return false;
2876 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2877 isBigEndian);
2878 case Instruction::Or:
2879 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2880 isBigEndian) &&
2881 collectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
2882 isBigEndian);
2883 case Instruction::Shl: {
2884 // Must be shifting by a constant that is a multiple of the element size.
2885 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
2886 if (!CI) return false;
2887 Shift += CI->getZExtValue();
2888 if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
2889 return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2890 isBigEndian);
2891 }
2892
2893 }
2894}
2895
2896
2897/// If the input is an 'or' instruction, we may be doing shifts and ors to
2898/// assemble the elements of the vector manually.
2899/// Try to rip the code out and replace it with insertelements. This is to
2900/// optimize code like this:
2901///
2902/// %tmp37 = bitcast float %inc to i32
2903/// %tmp38 = zext i32 %tmp37 to i64
2904/// %tmp31 = bitcast float %inc5 to i32
2905/// %tmp32 = zext i32 %tmp31 to i64
2906/// %tmp33 = shl i64 %tmp32, 32
2907/// %ins35 = or i64 %tmp33, %tmp38
2908/// %tmp43 = bitcast i64 %ins35 to <2 x float>
2909///
2910/// Into two insertelements that do "buildvector{%inc, %inc5}".
2912 InstCombinerImpl &IC) {
2913 auto *DestVecTy = cast<FixedVectorType>(CI.getType());
2914 Value *IntInput = CI.getOperand(0);
2915
2916 // if the int input is just an undef value do not try to optimize to vector
2917 // insertions as it will prevent undef propagation
2918 if (isa<UndefValue>(IntInput))
2919 return nullptr;
2920
2921 SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
2922 if (!collectInsertionElements(IntInput, 0, Elements,
2923 DestVecTy->getElementType(),
2924 IC.getDataLayout().isBigEndian()))
2925 return nullptr;
2926
2927 // If we succeeded, we know that all of the element are specified by Elements
2928 // or are zero if Elements has a null entry. Recast this as a set of
2929 // insertions.
2930 Value *Result = Constant::getNullValue(CI.getType());
2931 for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
2932 if (!Elements[i]) continue; // Unset element.
2933
2934 Result = IC.Builder.CreateInsertElement(Result, Elements[i],
2935 IC.Builder.getInt32(i));
2936 }
2937
2938 return Result;
2939}
2940
2941/// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
2942/// vector followed by extract element. The backend tends to handle bitcasts of
2943/// vectors better than bitcasts of scalars because vector registers are
2944/// usually not type-specific like scalar integer or scalar floating-point.
2946 InstCombinerImpl &IC) {
2947 Value *VecOp, *Index;
2948 if (!match(BitCast.getOperand(0),
2949 m_OneUse(m_ExtractElt(m_Value(VecOp), m_Value(Index)))))
2950 return nullptr;
2951
2952 // The bitcast must be to a vectorizable type, otherwise we can't make a new
2953 // type to extract from.
2954 Type *DestType = BitCast.getType();
2955 VectorType *VecType = cast<VectorType>(VecOp->getType());
2956 if (VectorType::isValidElementType(DestType)) {
2957 auto *NewVecType = VectorType::get(DestType, VecType);
2958 auto *NewBC = IC.Builder.CreateBitCast(VecOp, NewVecType, "bc");
2959 return ExtractElementInst::Create(NewBC, Index);
2960 }
2961
2962 // Only solve DestType is vector to avoid inverse transform in visitBitCast.
2963 // bitcast (extractelement <1 x elt>, dest) -> bitcast(<1 x elt>, dest)
2964 auto *FixedVType = dyn_cast<FixedVectorType>(VecType);
2965 if (DestType->isVectorTy() && FixedVType && FixedVType->getNumElements() == 1)
2966 return CastInst::Create(Instruction::BitCast, VecOp, DestType);
2967
2968 return nullptr;
2969}
2970
2971/// Change the type of a bitwise logic operation if we can eliminate a bitcast.
2973 InstCombiner::BuilderTy &Builder) {
2974 Type *DestTy = BitCast.getType();
2975 BinaryOperator *BO;
2976
2977 if (!match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) ||
2978 !BO->isBitwiseLogicOp())
2979 return nullptr;
2980
2981 // FIXME: This transform is restricted to vector types to avoid backend
2982 // problems caused by creating potentially illegal operations. If a fix-up is
2983 // added to handle that situation, we can remove this check.
2984 if (!DestTy->isVectorTy() || !BO->getType()->isVectorTy())
2985 return nullptr;
2986
2987 if (DestTy->isFPOrFPVectorTy()) {
2988 Value *X, *Y;
2989 // bitcast(logic(bitcast(X), bitcast(Y))) -> bitcast'(logic(bitcast'(X), Y))
2990 if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
2992 if (X->getType()->isFPOrFPVectorTy() &&
2993 Y->getType()->isIntOrIntVectorTy()) {
2994 Value *CastedOp =
2995 Builder.CreateBitCast(BO->getOperand(0), Y->getType());
2996 Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, Y);
2997 return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
2998 }
2999 if (X->getType()->isIntOrIntVectorTy() &&
3000 Y->getType()->isFPOrFPVectorTy()) {
3001 Value *CastedOp =
3002 Builder.CreateBitCast(BO->getOperand(1), X->getType());
3003 Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, X);
3004 return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
3005 }
3006 }
3007 return nullptr;
3008 }
3009
3010 if (!DestTy->isIntOrIntVectorTy())
3011 return nullptr;
3012
3013 Value *X;
3014 if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
3015 X->getType() == DestTy && !isa<Constant>(X)) {
3016 // bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
3017 Value *CastedOp1 = Builder.CreateBitCast(BO->getOperand(1), DestTy);
3018 return BinaryOperator::Create(BO->getOpcode(), X, CastedOp1);
3019 }
3020
3021 if (match(BO->getOperand(1), m_OneUse(m_BitCast(m_Value(X)))) &&
3022 X->getType() == DestTy && !isa<Constant>(X)) {
3023 // bitcast(logic(Y, bitcast(X))) --> logic'(bitcast(Y), X)
3024 Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
3025 return BinaryOperator::Create(BO->getOpcode(), CastedOp0, X);
3026 }
3027
3028 // Canonicalize vector bitcasts to come before vector bitwise logic with a
3029 // constant. This eases recognition of special constants for later ops.
3030 // Example:
3031 // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
3032 Constant *C;
3033 if (match(BO->getOperand(1), m_Constant(C))) {
3034 // bitcast (logic X, C) --> logic (bitcast X, C')
3035 Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
3036 Value *CastedC = Builder.CreateBitCast(C, DestTy);
3037 return BinaryOperator::Create(BO->getOpcode(), CastedOp0, CastedC);
3038 }
3039
3040 return nullptr;
3041}
3042
3043/// Change the type of a select if we can eliminate a bitcast.
3045 InstCombiner::BuilderTy &Builder) {
3046 Value *Cond, *TVal, *FVal;
3047 if (!match(BitCast.getOperand(0),
3048 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
3049 return nullptr;
3050
3051 // A vector select must maintain the same number of elements in its operands.
3052 Type *CondTy = Cond->getType();
3053 Type *DestTy = BitCast.getType();
3054
3055 auto *DestVecTy = dyn_cast<VectorType>(DestTy);
3056
3057 if (auto *CondVTy = dyn_cast<VectorType>(CondTy))
3058 if (!DestVecTy ||
3059 CondVTy->getElementCount() != DestVecTy->getElementCount())
3060 return nullptr;
3061
3062 auto *Sel = cast<Instruction>(BitCast.getOperand(0));
3063 auto *SrcVecTy = dyn_cast<VectorType>(TVal->getType());
3064
3065 if ((isa<Constant>(TVal) || isa<Constant>(FVal)) &&
3066 (!DestVecTy ||
3067 (SrcVecTy && ElementCount::isKnownLE(DestVecTy->getElementCount(),
3068 SrcVecTy->getElementCount())))) {
3069 // Avoid introducing select of vector (or select of vector with more
3070 // elements) until the backend can undo this transformation.
3071 Value *CastedTVal = Builder.CreateBitCast(TVal, DestTy);
3072 Value *CastedFVal = Builder.CreateBitCast(FVal, DestTy);
3073 return SelectInst::Create(Cond, CastedTVal, CastedFVal, "", nullptr, Sel);
3074 }
3075
3076 // FIXME: This transform is restricted from changing the select between
3077 // scalars and vectors to avoid backend problems caused by creating
3078 // potentially illegal operations. If a fix-up is added to handle that
3079 // situation, we can remove this check.
3080 if ((DestVecTy != nullptr) != (SrcVecTy != nullptr))
3081 return nullptr;
3082
3083 Value *X;
3084 if (match(TVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
3085 !isa<Constant>(X)) {
3086 // bitcast(select(Cond, bitcast(X), Y)) --> select'(Cond, X, bitcast(Y))
3087 Value *CastedVal = Builder.CreateBitCast(FVal, DestTy);
3088 return SelectInst::Create(Cond, X, CastedVal, "", nullptr, Sel);
3089 }
3090
3091 if (match(FVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
3092 !isa<Constant>(X)) {
3093 // bitcast(select(Cond, Y, bitcast(X))) --> select'(Cond, bitcast(Y), X)
3094 Value *CastedVal = Builder.CreateBitCast(TVal, DestTy);
3095 return SelectInst::Create(Cond, CastedVal, X, "", nullptr, Sel);
3096 }
3097
3098 return nullptr;
3099}
3100
3101/// Check if all users of CI are StoreInsts.
3102static bool hasStoreUsersOnly(CastInst &CI) {
3103 for (User *U : CI.users()) {
3104 if (!isa<StoreInst>(U))
3105 return false;
3106 }
3107 return true;
3108}
3109
3110/// This function handles following case
3111///
3112/// A -> B cast
3113/// PHI
3114/// B -> A cast
3115///
3116/// All the related PHI nodes can be replaced by new PHI nodes with type A.
3117/// The uses of \p CI can be changed to the new PHI node corresponding to \p PN.
3118Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI,
3119 PHINode *PN) {
3120 // BitCast used by Store can be handled in InstCombineLoadStoreAlloca.cpp.
3121 if (hasStoreUsersOnly(CI))
3122 return nullptr;
3123
3124 Value *Src = CI.getOperand(0);
3125 Type *SrcTy = Src->getType(); // Type B
3126 Type *DestTy = CI.getType(); // Type A
3127
3128 SmallVector<PHINode *, 4> PhiWorklist;
3129 SmallSetVector<PHINode *, 4> OldPhiNodes;
3130
3131 // Find all of the A->B casts and PHI nodes.
3132 // We need to inspect all related PHI nodes, but PHIs can be cyclic, so
3133 // OldPhiNodes is used to track all known PHI nodes, before adding a new
3134 // PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
3135 PhiWorklist.push_back(PN);
3136 OldPhiNodes.insert(PN);
3137 while (!PhiWorklist.empty()) {
3138 auto *OldPN = PhiWorklist.pop_back_val();
3139 for (Value *IncValue : OldPN->incoming_values()) {
3140 if (isa<Constant>(IncValue))
3141 continue;
3142
3143 if (auto *LI = dyn_cast<LoadInst>(IncValue)) {
3144 // If there is a sequence of one or more load instructions, each loaded
3145 // value is used as address of later load instruction, bitcast is
3146 // necessary to change the value type, don't optimize it. For
3147 // simplicity we give up if the load address comes from another load.
3148 Value *Addr = LI->getOperand(0);
3149 if (Addr == &CI || isa<LoadInst>(Addr))
3150 return nullptr;
3151 // Don't tranform "load <256 x i32>, <256 x i32>*" to
3152 // "load x86_amx, x86_amx*", because x86_amx* is invalid.
3153 // TODO: Remove this check when bitcast between vector and x86_amx
3154 // is replaced with a specific intrinsic.
3155 if (DestTy->isX86_AMXTy())
3156 return nullptr;
3157 if (LI->hasOneUse() && LI->isSimple())
3158 continue;
3159 // If a LoadInst has more than one use, changing the type of loaded
3160 // value may create another bitcast.
3161 return nullptr;
3162 }
3163
3164 if (auto *PNode = dyn_cast<PHINode>(IncValue)) {
3165 if (OldPhiNodes.insert(PNode))
3166 PhiWorklist.push_back(PNode);
3167 continue;
3168 }
3169
3170 auto *BCI = dyn_cast<BitCastInst>(IncValue);
3171 // We can't handle other instructions.
3172 if (!BCI)
3173 return nullptr;
3174
3175 // Verify it's a A->B cast.
3176 Type *TyA = BCI->getOperand(0)->getType();
3177 Type *TyB = BCI->getType();
3178 if (TyA != DestTy || TyB != SrcTy)
3179 return nullptr;
3180 }
3181 }
3182
3183 // Check that each user of each old PHI node is something that we can
3184 // rewrite, so that all of the old PHI nodes can be cleaned up afterwards.
3185 for (auto *OldPN : OldPhiNodes) {
3186 for (User *V : OldPN->users()) {
3187 if (auto *SI = dyn_cast<StoreInst>(V)) {
3188 if (!SI->isSimple() || SI->getOperand(0) != OldPN)
3189 return nullptr;
3190 } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
3191 // Verify it's a B->A cast.
3192 Type *TyB = BCI->getOperand(0)->getType();
3193 Type *TyA = BCI->getType();
3194 if (TyA != DestTy || TyB != SrcTy)
3195 return nullptr;
3196 } else if (auto *PHI = dyn_cast<PHINode>(V)) {
3197 // As long as the user is another old PHI node, then even if we don't
3198 // rewrite it, the PHI web we're considering won't have any users
3199 // outside itself, so it'll be dead.
3200 if (!OldPhiNodes.contains(PHI))
3201 return nullptr;
3202 } else {
3203 return nullptr;
3204 }
3205 }
3206 }
3207
3208 // For each old PHI node, create a corresponding new PHI node with a type A.
3209 SmallDenseMap<PHINode *, PHINode *> NewPNodes;
3210 for (auto *OldPN : OldPhiNodes) {
3211 Builder.SetInsertPoint(OldPN);
3212 PHINode *NewPN = Builder.CreatePHI(DestTy, OldPN->getNumOperands());
3213 NewPNodes[OldPN] = NewPN;
3214 }
3215
3216 // Fill in the operands of new PHI nodes.
3217 for (auto *OldPN : OldPhiNodes) {
3218 PHINode *NewPN = NewPNodes[OldPN];
3219 for (unsigned j = 0, e = OldPN->getNumOperands(); j != e; ++j) {
3220 Value *V = OldPN->getOperand(j);
3221 Value *NewV = nullptr;
3222 if (auto *C = dyn_cast<Constant>(V)) {
3223 NewV = ConstantExpr::getBitCast(C, DestTy);
3224 } else if (auto *LI = dyn_cast<LoadInst>(V)) {
3225 // Explicitly perform load combine to make sure no opposing transform
3226 // can remove the bitcast in the meantime and trigger an infinite loop.
3227 Builder.SetInsertPoint(LI);
3228 NewV = combineLoadToNewType(*LI, DestTy);
3229 // Remove the old load and its use in the old phi, which itself becomes
3230 // dead once the whole transform finishes.
3231 replaceInstUsesWith(*LI, PoisonValue::get(LI->getType()));
3233 } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
3234 NewV = BCI->getOperand(0);
3235 } else if (auto *PrevPN = dyn_cast<PHINode>(V)) {
3236 NewV = NewPNodes[PrevPN];
3237 }
3238 assert(NewV);
3239 NewPN->addIncoming(NewV, OldPN->getIncomingBlock(j));
3240 }
3241 }
3242
3243 // Traverse all accumulated PHI nodes and process its users,
3244 // which are Stores and BitcCasts. Without this processing
3245 // NewPHI nodes could be replicated and could lead to extra
3246 // moves generated after DeSSA.
3247 // If there is a store with type B, change it to type A.
3248
3249
3250 // Replace users of BitCast B->A with NewPHI. These will help
3251 // later to get rid off a closure formed by OldPHI nodes.
3252 Instruction *RetVal = nullptr;
3253 for (auto *OldPN : OldPhiNodes) {
3254 PHINode *NewPN = NewPNodes[OldPN];
3255 for (User *V : make_early_inc_range(OldPN->users())) {
3256 if (auto *SI = dyn_cast<StoreInst>(V)) {
3257 assert(SI->isSimple() && SI->getOperand(0) == OldPN);
3258 Builder.SetInsertPoint(SI);
3259 auto *NewBC =
3260 cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));
3261 SI->setOperand(0, NewBC);
3262 Worklist.push(SI);
3263 assert(hasStoreUsersOnly(*NewBC));
3264 }
3265 else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
3266 Type *TyB = BCI->getOperand(0)->getType();
3267 Type *TyA = BCI->getType();
3268 assert(TyA == DestTy && TyB == SrcTy);
3269 (void) TyA;
3270 (void) TyB;
3271 Instruction *I = replaceInstUsesWith(*BCI, NewPN);
3272 if (BCI == &CI)
3273 RetVal = I;
3274 } else if (auto *PHI = dyn_cast<PHINode>(V)) {
3275 assert(OldPhiNodes.contains(PHI));
3276 (void) PHI;
3277 } else {
3278 llvm_unreachable("all uses should be handled");
3279 }
3280 }
3281 }
3282
3283 return RetVal;
3284}
3285
3286/// Fold (bitcast (or (and (bitcast X to int), signmask), nneg Y) to fp) to
3287/// copysign((bitcast Y to fp), X)
3289 InstCombiner::BuilderTy &Builder,
3290 const SimplifyQuery &SQ) {
3291 Value *X, *Y;
3292 Type *FTy = CI.getType();
3293 if (!FTy->isFPOrFPVectorTy())
3294 return nullptr;
3297 m_Value(Y)))))
3298 return nullptr;
3299 if (X->getType() != FTy)
3300 return nullptr;
3301 if (!isKnownNonNegative(Y, SQ))
3302 return nullptr;
3303
3304 return Builder.CreateCopySign(Builder.CreateBitCast(Y, FTy), X);
3305}
3306
3308 // If the operands are integer typed then apply the integer transforms,
3309 // otherwise just apply the common ones.
3310 Value *Src = CI.getOperand(0);
3311 Type *SrcTy = Src->getType();
3312 Type *DestTy = CI.getType();
3313
3314 // Get rid of casts from one type to the same type. These are useless and can
3315 // be replaced by the operand.
3316 if (DestTy == Src->getType())
3317 return replaceInstUsesWith(CI, Src);
3318
3319 if (isa<FixedVectorType>(DestTy)) {
3320 if (isa<IntegerType>(SrcTy)) {
3321 // If this is a cast from an integer to vector, check to see if the input
3322 // is a trunc or zext of a bitcast from vector. If so, we can replace all
3323 // the casts with a shuffle and (potentially) a bitcast.
3324 if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {
3325 CastInst *SrcCast = cast<CastInst>(Src);
3326 if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
3327 if (isa<VectorType>(BCIn->getOperand(0)->getType()))
3329 BCIn->getOperand(0), cast<VectorType>(DestTy), *this))
3330 return I;
3331 }
3332
3333 // If the input is an 'or' instruction, we may be doing shifts and ors to
3334 // assemble the elements of the vector manually. Try to rip the code out
3335 // and replace it with insertelements.
3336 if (Value *V = optimizeIntegerToVectorInsertions(CI, *this))
3337 return replaceInstUsesWith(CI, V);
3338 }
3339 }
3340
3341 if (FixedVectorType *SrcVTy = dyn_cast<FixedVectorType>(SrcTy)) {
3342 if (SrcVTy->getNumElements() == 1) {
3343 // If our destination is not a vector, then make this a straight
3344 // scalar-scalar cast.
3345 if (!DestTy->isVectorTy()) {
3346 Value *Elem =
3347 Builder.CreateExtractElement(Src,
3349 return CastInst::Create(Instruction::BitCast, Elem, DestTy);
3350 }
3351
3352 // Otherwise, see if our source is an insert. If so, then use the scalar
3353 // component directly:
3354 // bitcast (inselt <1 x elt> V, X, 0) to <n x m> --> bitcast X to <n x m>
3355 if (auto *InsElt = dyn_cast<InsertElementInst>(Src))
3356 return new BitCastInst(InsElt->getOperand(1), DestTy);
3357 }
3358
3359 // Convert an artificial vector insert into more analyzable bitwise logic.
3360 unsigned BitWidth = DestTy->getScalarSizeInBits();
3361 Value *X, *Y;
3362 uint64_t IndexC;
3364 m_Value(Y), m_ConstantInt(IndexC)))) &&
3365 DestTy->isIntegerTy() && X->getType() == DestTy &&
3366 Y->getType()->isIntegerTy() && isDesirableIntType(BitWidth)) {
3367 // Adjust for big endian - the LSBs are at the high index.
3368 if (DL.isBigEndian())
3369 IndexC = SrcVTy->getNumElements() - 1 - IndexC;
3370
3371 // We only handle (endian-normalized) insert to index 0. Any other insert
3372 // would require a left-shift, so that is an extra instruction.
3373 if (IndexC == 0) {
3374 // bitcast (inselt (bitcast X), Y, 0) --> or (and X, MaskC), (zext Y)
3375 unsigned EltWidth = Y->getType()->getScalarSizeInBits();
3376 APInt MaskC = APInt::getHighBitsSet(BitWidth, BitWidth - EltWidth);
3377 Value *AndX = Builder.CreateAnd(X, MaskC);
3378 Value *ZextY = Builder.CreateZExt(Y, DestTy);
3379 return BinaryOperator::CreateOr(AndX, ZextY);
3380 }
3381 }
3382 }
3383
3384 if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Src)) {
3385 // Okay, we have (bitcast (shuffle ..)). Check to see if this is
3386 // a bitcast to a vector with the same # elts.
3387 Value *ShufOp0 = Shuf->getOperand(0);
3388 Value *ShufOp1 = Shuf->getOperand(1);
3389 auto ShufElts = cast<VectorType>(Shuf->getType())->getElementCount();
3390 auto SrcVecElts = cast<VectorType>(ShufOp0->getType())->getElementCount();
3391 if (Shuf->hasOneUse() && DestTy->isVectorTy() &&
3392 cast<VectorType>(DestTy)->getElementCount() == ShufElts &&
3393 ShufElts == SrcVecElts) {
3394 BitCastInst *Tmp;
3395 // If either of the operands is a cast from CI.getType(), then
3396 // evaluating the shuffle in the casted destination's type will allow
3397 // us to eliminate at least one cast.
3398 if (((Tmp = dyn_cast<BitCastInst>(ShufOp0)) &&
3399 Tmp->getOperand(0)->getType() == DestTy) ||
3400 ((Tmp = dyn_cast<BitCastInst>(ShufOp1)) &&
3401 Tmp->getOperand(0)->getType() == DestTy)) {
3402 Value *LHS = Builder.CreateBitCast(ShufOp0, DestTy);
3403 Value *RHS = Builder.CreateBitCast(ShufOp1, DestTy);
3404 // Return a new shuffle vector. Use the same element ID's, as we
3405 // know the vector types match #elts.
3406 return new ShuffleVectorInst(LHS, RHS, Shuf->getShuffleMask());
3407 }
3408 }
3409
3410 // A bitcasted-to-scalar and byte/bit reversing shuffle is better recognized
3411 // as a byte/bit swap:
3412 // bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) -> bswap (bitcast X)
3413 // bitcast <N x i1> (shuf X, undef, <N, N-1,...0>) -> bitreverse (bitcast X)
3414 if (DestTy->isIntegerTy() && ShufElts.getKnownMinValue() % 2 == 0 &&
3415 Shuf->hasOneUse() && Shuf->isReverse()) {
3416 unsigned IntrinsicNum = 0;
3417 if (DL.isLegalInteger(DestTy->getScalarSizeInBits()) &&
3418 SrcTy->getScalarSizeInBits() == 8) {
3419 IntrinsicNum = Intrinsic::bswap;
3420 } else if (SrcTy->getScalarSizeInBits() == 1) {
3421 IntrinsicNum = Intrinsic::bitreverse;
3422 }
3423 if (IntrinsicNum != 0) {
3424 assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask");
3425 assert(match(ShufOp1, m_Undef()) && "Unexpected shuffle op");
3426 Function *BswapOrBitreverse = Intrinsic::getOrInsertDeclaration(
3427 CI.getModule(), IntrinsicNum, DestTy);
3428 Value *ScalarX = Builder.CreateBitCast(ShufOp0, DestTy);
3429 return CallInst::Create(BswapOrBitreverse, {ScalarX});
3430 }
3431 }
3432 }
3433
3434 // Handle the A->B->A cast, and there is an intervening PHI node.
3435 if (PHINode *PN = dyn_cast<PHINode>(Src))
3436 if (Instruction *I = optimizeBitCastFromPhi(CI, PN))
3437 return I;
3438
3439 if (Instruction *I = canonicalizeBitCastExtElt(CI, *this))
3440 return I;
3441
3443 return I;
3444
3446 return I;
3447
3448 if (Value *V = foldCopySignIdioms(CI, Builder, SQ.getWithInstruction(&CI)))
3449 return replaceInstUsesWith(CI, V);
3450
3451 return commonCastTransforms(CI);
3452}
3453
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This file defines the DenseMap class.
static bool isSigned(unsigned Opcode)
Hexagon Common GEP
static bool collectInsertionElements(Value *V, unsigned Shift, SmallVectorImpl< Value * > &Elements, Type *VecEltTy, bool isBigEndian)
V is a value which is inserted into a vector of VecEltTy.
static bool hasStoreUsersOnly(CastInst &CI)
Check if all users of CI are StoreInsts.
static Value * foldCopySignIdioms(BitCastInst &CI, InstCombiner::BuilderTy &Builder, const SimplifyQuery &SQ)
Fold (bitcast (or (and (bitcast X to int), signmask), nneg Y) to fp) to copysign((bitcast Y to fp),...
static Type * shrinkFPConstantVector(Value *V, bool PreferBFloat)
static Instruction * canonicalizeBitCastExtElt(BitCastInst &BitCast, InstCombinerImpl &IC)
Canonicalize scalar bitcasts of extracted elements into a bitcast of the vector followed by extract e...
static Instruction * shrinkSplatShuffle(TruncInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of a splat shuffle.
static Instruction * foldFPtoI(Instruction &FI, InstCombiner &IC)
static Instruction * foldBitCastSelect(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a select if we can eliminate a bitcast.
static Instruction * foldBitCastBitwiseLogic(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a bitwise logic operation if we can eliminate a bitcast.
static bool fitsInFPType(APFloat F, const fltSemantics &Sem)
Return a Constant* for the specified floating-point constant if it fits in the specified FP type with...
static Instruction * optimizeVectorResizeWithIntegerBitCasts(Value *InVal, VectorType *DestTy, InstCombinerImpl &IC)
This input value (which is known to have vector type) is being zero extended or truncated to the spec...
static Instruction * shrinkInsertElt(CastInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of an insert element.
SmallDenseMap< Value *, Value *, 8 > EvaluatedMap
static Type * getMinimumFPType(Value *V, Type *PreferredTy, InstCombiner &IC)
Find the minimum FP type we can safely truncate to.
static bool isMultipleOfTypeSize(unsigned Value, Type *Ty)
static Value * optimizeIntegerToVectorInsertions(BitCastInst &CI, InstCombinerImpl &IC)
If the input is an 'or' instruction, we may be doing shifts and ors to assemble the elements of the v...
static Type * shrinkFPConstant(LLVMContext &Ctx, const APFloat &F, bool PreferBFloat)
static Instruction * foldVecExtTruncToExtElt(TruncInst &Trunc, InstCombinerImpl &IC)
Whenever an element is extracted from a vector, optionally shifted down, and then truncated,...
static Value * EvaluateInDifferentTypeImpl(Value *V, Type *Ty, bool isSigned, InstCombinerImpl &IC, EvaluatedMap &Processed)
static unsigned getTypeSizeIndex(unsigned Value, Type *Ty)
static Instruction * foldVecTruncToExtElt(TruncInst &Trunc, InstCombinerImpl &IC)
Given a vector that is bitcast to an integer, optionally logically right-shifted, and truncated,...
This file provides internal interfaces used to implement the InstCombine.
This file provides the interface for the instcombine pass implementation.
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
uint64_t IntrinsicInst * II
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
static unsigned getScalarSizeInBits(Type *Ty)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
Value * RHS
Value * LHS
static const fltSemantics & IEEEsingle()
Definition APFloat.h:297
static const fltSemantics & BFloat()
Definition APFloat.h:296
static const fltSemantics & IEEEdouble()
Definition APFloat.h:298
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:345
static const fltSemantics & IEEEhalf()
Definition APFloat.h:295
static LLVM_ABI unsigned int semanticsIntSizeInBits(const fltSemantics &, bool)
Definition APFloat.cpp:243
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition APInt.cpp:1599
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1692
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118
int32_t exactLogBase2() const
Definition APInt.h:1806
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition APInt.h:1662
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition APInt.h:287
unsigned countr_one() const
Count the number of trailing one bits.
Definition APInt.h:1679
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
This class represents a conversion between pointers from one address space to another.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:105
LLVM_ABI std::optional< unsigned > getVScaleRangeMax() const
Returns the maximum value for the vscale_range attribute or std::nullopt when unknown.
BinaryOps getOpcode() const
Definition InstrTypes.h:409
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateFMulFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:279
static BinaryOperator * CreateFDivFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:283
This class represents a no-op cast from one type to another.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:512
Type * getSrcTy() const
Return the source type, as a convenience.
Definition InstrTypes.h:679
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition InstrTypes.h:674
static LLVM_ABI unsigned isEliminableCastPair(Instruction::CastOps firstOpcode, Instruction::CastOps secondOpcode, Type *SrcTy, Type *MidTy, Type *DstTy, const DataLayout *DL)
Determine how a pair of casts can be eliminated, if they can be at all.
static LLVM_ABI CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
static LLVM_ABI CastInst * CreateFPCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create an FPExt, BitCast, or FPTrunc for fp -> fp casts.
static LLVM_ABI CastInst * CreateTruncOrBitCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a Trunc or BitCast cast instruction.
static LLVM_ABI CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Type * getDestTy() const
Return the destination type, as a convenience.
Definition InstrTypes.h:681
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ ICMP_SLT
signed less than
Definition InstrTypes.h:769
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:764
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ ICMP_NE
not equal
Definition InstrTypes.h:762
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:766
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getTrunc(Constant *C, Type *Ty, bool OnlyIfReduced=false)
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
const APFloat & getValueAPF() const
Definition Constants.h:463
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
bool uge(uint64_t Num) const
This function will return true iff this constant represents a value with active bits bigger than 64 b...
Definition Constants.h:262
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * mergeUndefsWith(Constant *C, Constant *Other)
Merges undefs of a Constant with another Constant, along with the undefs already present.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI bool isElementWiseEqual(Value *Y) const
Return true if this constant and a constant 'Y' are element-wise equal.
bool isBigEndian() const
Definition DataLayout.h:218
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
Definition DenseMap.h:252
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This class represents an extension of floating point types.
This class represents a cast from floating point to signed integer.
This class represents a cast from floating point to unsigned integer.
This class represents a truncation of floating point types.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
Class to represent fixed width SIMD vectors.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:869
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:759
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:724
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition IRBuilder.h:2669
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Definition IRBuilder.h:534
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition IRBuilder.h:529
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2284
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Instruction * visitZExt(ZExtInst &Zext)
Instruction * visitAddrSpaceCast(AddrSpaceCastInst &CI)
Instruction * foldExtractionOfVectorDeinterleave(ZExtInst &RootZExt)
Instruction * visitSExt(SExtInst &Sext)
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Instruction * visitFPToSI(FPToSIInst &FI)
Instruction * visitTrunc(TruncInst &CI)
Instruction * visitUIToFP(CastInst &CI)
Instruction * visitPtrToInt(PtrToIntInst &CI)
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false, bool SimplifyBothArms=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * foldItoFPtoI(FPToIntTy &FI)
fpto{s/u}i.sat --> X or zext(X) or sext(X) or trunc(X) This is safe if the intermediate type has enou...
Instruction * visitSIToFP(CastInst &CI)
Instruction * commonCastTransforms(CastInst &CI)
Implement the transforms common to all CastInst visitors.
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Instruction * visitFPTrunc(FPTruncInst &CI)
Value * foldPtrToIntOrAddrOfGEP(Type *IntTy, Value *Ptr)
Instruction * visitBitCast(BitCastInst &CI)
Instruction * visitIntToPtr(IntToPtrInst &CI)
Instruction * visitFPToUI(FPToUIInst &FI)
Instruction * visitPtrToAddr(PtrToAddrInst &CI)
Value * EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned)
Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns true for,...
bool SimplifyDemandedInstructionBits(Instruction &Inst)
Tries to simplify operands to an integer instruction based on its demanded bits.
Instruction * visitFPExt(CastInst &CI)
LoadInst * combineLoadToNewType(LoadInst &LI, Type *NewTy, const Twine &Suffix="")
Helper to combine a load to a new type.
The core instruction combiner logic.
SimplifyQuery SQ
const DataLayout & getDataLayout() const
unsigned ComputeMaxSignificantBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
unsigned ComputeNumSignBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
LLVM_ABI bool canBeCastedExactlyIntToFP(Value *V, Type *FPTy, bool IsSigned, const Instruction *CxtI=nullptr) const
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
Instruction * InsertNewInstWith(Instruction *New, BasicBlock::iterator Old)
Same as InsertNewInstBefore, but also sets the debug loc.
const DataLayout & DL
void computeKnownBits(const Value *V, KnownBits &Known, const Instruction *CxtI, unsigned Depth=0) const
LLVM_ABI bool isKnownExactCastIntToFP(CastInst &I) const
Return true if the cast from integer to FP can be proven to be exact for all possible inputs (the con...
IRBuilder< TargetFolder, IRBuilderInstCombineInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const Instruction *CxtI=nullptr, unsigned Depth=0) const
DominatorTree & DT
const SimplifyQuery & getSimplifyQuery() const
LLVM_ABI void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI void setNonNeg(bool b=true)
Set or clear the nneg flag on this instruction, which must be a zext instruction.
LLVM_ABI bool hasNonNeg() const LLVM_READONLY
Determine whether the the nneg flag is set.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI void setIsExact(bool b=true)
Set or clear the exact flag on this instruction, which must be an operator which supports this flag.
This class represents a cast from an integer to a pointer.
unsigned getAddressSpace() const
Returns the address space of this instruction's pointer type.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:350
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
This class represents a cast from a pointer to an address (non-capturing ptrtoint).
Value * getPointerOperand()
Gets the pointer operand.
This class represents a cast from a pointer to an integer.
Value * getPointerOperand()
Gets the pointer operand.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
This class represents a sign extension of integer types.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This instruction constructs a fixed permutation of two input vectors.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class represents a truncation of integer types.
void setHasNoSignedWrap(bool B)
void setHasNoUnsignedWrap(bool B)
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:288
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:309
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition Type.h:263
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:147
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:232
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:285
bool isX86_AMXTy() const
Return true if this is X86 AMX.
Definition Type.h:202
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:287
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:227
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:286
LLVM_ABI int getFPMantissaWidth() const
Return the width of the mantissa of this type.
Definition Type.cpp:237
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:106
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:284
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:319
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:400
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
This class represents zero extension of integer types.
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230
static constexpr bool isKnownGE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:237
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
cst_pred_ty< is_lowbit_mask > m_LowBitMask()
Match an integer or vector with only the low bit(s) set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
PtrToIntSameSize_match< OpTy > m_PtrToIntSameSize(const DataLayout &DL, const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
cst_pred_ty< is_sign_mask > m_SignMask()
Match an integer or vector with only the sign bit(s) set.
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
cst_pred_ty< is_power2 > m_Power2()
Match an integer or vector power-of-2.
auto m_Poison()
Match an arbitrary poison constant.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
specific_intval< true > m_SpecificIntAllowPoison(const APInt &V)
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
IntrinsicID_match m_VScale()
Matches a call to llvm.vscale().
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
BinOpPred_match< LHS, RHS, is_logical_shift_op > m_LogicalShift(const LHS &L, const RHS &R)
Matches logical shift operations.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, UIToFPInst >, CastInst_match< OpTy, SIToFPInst > > m_IToFP(const OpTy &Op)
auto m_Value()
Match an arbitrary value and ignore it.
auto m_Constant()
Match an arbitrary Constant and ignore it.
NoWrapTrunc_match< OpTy, TruncInst::NoSignedWrap > m_NSWTrunc(const OpTy &Op)
Matches trunc nsw.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
match_combine_or< CastInst_match< OpTy, FPToUIInst >, CastInst_match< OpTy, FPToSIInst > > m_FPToI(const OpTy &Op)
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
BinOpPred_match< LHS, RHS, is_bitwiselogic_op, true > m_c_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations in either order.
cst_pred_ty< is_negated_power2 > m_NegatedPower2()
Match a integer or vector negated power-of-2.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
NoWrapTrunc_match< OpTy, TruncInst::NoUnsignedWrap > m_NUWTrunc(const OpTy &Op)
Matches trunc nuw.
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
CastInst_match< OpTy, FPToSIInst > m_FPToSI(const OpTy &Op)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_Ctlz(const Opnd0 &Op0, const Opnd1 &Op1)
auto m_Undef()
Match an arbitrary undef constant.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
CastOperator_match< OpTy, Instruction::IntToPtr > m_IntToPtr(const OpTy &Op)
Matches IntToPtr.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
ElementWiseBitCast_match< OpTy > m_ElementWiseBitCast(const OpTy &Op)
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
cst_pred_ty< icmp_pred_with_threshold > m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold)
Match an integer or vector with every element comparing 'pred' (eg/ne/...) to Threshold.
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI KnownFPClass computeKnownFPClass(const Value *V, const APInt &DemandedElts, FPClassTest InterestedClasses, const SimplifyQuery &SQ, unsigned Depth=0)
Determine which floating-point classes are valid for V, and return them in KnownFPClass bit sets.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
LLVM_ABI Constant * ConstantFoldSelectInstruction(Constant *Cond, Constant *V1, Constant *V2)
Attempt to constant fold a select instruction with the specified operands.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
Definition MathExtras.h:350
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI Value * simplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, const SimplifyQuery &Q)
Given operands for a CastInst, fold the result or return null.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
LLVM_ABI Constant * ConstantFoldCastOperand(unsigned Opcode, Constant *C, Type *DestTy, const DataLayout &DL)
Attempt to constant fold a cast with the specified operand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition Local.cpp:2440
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
@ SMax
Signed integer max implemented in terms of select(cmp()).
@ And
Bitwise or logical AND of integers.
@ SMin
Signed integer min implemented in terms of select(cmp()).
DWARFExpression::Operation Op
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI Constant * ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1, Constant *V2)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:256
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:262
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:146
bool isKnownNever(FPClassTest Mask) const
Return true if it's known this can never be one of the mask entries.
Matching combinators.
SimplifyQuery getWithInstruction(const Instruction *I) const