LLVM 19.0.0git
ValueTracking.cpp
Go to the documentation of this file.
1//===- ValueTracking.cpp - Walk computations to compute properties --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains routines that help analyze properties that chains of
10// computations have.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/APFloat.h"
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/ScopeExit.h"
21#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/StringRef.h"
32#include "llvm/Analysis/Loads.h"
38#include "llvm/IR/Argument.h"
39#include "llvm/IR/Attributes.h"
40#include "llvm/IR/BasicBlock.h"
41#include "llvm/IR/Constant.h"
43#include "llvm/IR/Constants.h"
46#include "llvm/IR/Dominators.h"
48#include "llvm/IR/Function.h"
50#include "llvm/IR/GlobalAlias.h"
51#include "llvm/IR/GlobalValue.h"
53#include "llvm/IR/InstrTypes.h"
54#include "llvm/IR/Instruction.h"
57#include "llvm/IR/Intrinsics.h"
58#include "llvm/IR/IntrinsicsAArch64.h"
59#include "llvm/IR/IntrinsicsAMDGPU.h"
60#include "llvm/IR/IntrinsicsRISCV.h"
61#include "llvm/IR/IntrinsicsX86.h"
62#include "llvm/IR/LLVMContext.h"
63#include "llvm/IR/Metadata.h"
64#include "llvm/IR/Module.h"
65#include "llvm/IR/Operator.h"
67#include "llvm/IR/Type.h"
68#include "llvm/IR/User.h"
69#include "llvm/IR/Value.h"
77#include <algorithm>
78#include <cassert>
79#include <cstdint>
80#include <optional>
81#include <utility>
82
83using namespace llvm;
84using namespace llvm::PatternMatch;
85
86// Controls the number of uses of the value searched for possible
87// dominating comparisons.
88static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
89 cl::Hidden, cl::init(20));
90
91
92/// Returns the bitwidth of the given scalar or pointer type. For vector types,
93/// returns the element type's bitwidth.
94static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
95 if (unsigned BitWidth = Ty->getScalarSizeInBits())
96 return BitWidth;
97
98 return DL.getPointerTypeSizeInBits(Ty);
99}
100
101// Given the provided Value and, potentially, a context instruction, return
102// the preferred context instruction (if any).
103static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) {
104 // If we've been provided with a context instruction, then use that (provided
105 // it has been inserted).
106 if (CxtI && CxtI->getParent())
107 return CxtI;
108
109 // If the value is really an already-inserted instruction, then use that.
110 CxtI = dyn_cast<Instruction>(V);
111 if (CxtI && CxtI->getParent())
112 return CxtI;
113
114 return nullptr;
115}
116
117static const Instruction *safeCxtI(const Value *V1, const Value *V2, const Instruction *CxtI) {
118 // If we've been provided with a context instruction, then use that (provided
119 // it has been inserted).
120 if (CxtI && CxtI->getParent())
121 return CxtI;
122
123 // If the value is really an already-inserted instruction, then use that.
124 CxtI = dyn_cast<Instruction>(V1);
125 if (CxtI && CxtI->getParent())
126 return CxtI;
127
128 CxtI = dyn_cast<Instruction>(V2);
129 if (CxtI && CxtI->getParent())
130 return CxtI;
131
132 return nullptr;
133}
134
136 const APInt &DemandedElts,
137 APInt &DemandedLHS, APInt &DemandedRHS) {
138 if (isa<ScalableVectorType>(Shuf->getType())) {
139 assert(DemandedElts == APInt(1,1));
140 DemandedLHS = DemandedRHS = DemandedElts;
141 return true;
142 }
143
144 int NumElts =
145 cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements();
146 return llvm::getShuffleDemandedElts(NumElts, Shuf->getShuffleMask(),
147 DemandedElts, DemandedLHS, DemandedRHS);
148}
149
150static void computeKnownBits(const Value *V, const APInt &DemandedElts,
151 KnownBits &Known, unsigned Depth,
152 const SimplifyQuery &Q);
153
154void llvm::computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
155 const SimplifyQuery &Q) {
156 // Since the number of lanes in a scalable vector is unknown at compile time,
157 // we track one bit which is implicitly broadcast to all lanes. This means
158 // that all lanes in a scalable vector are considered demanded.
159 auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
160 APInt DemandedElts =
161 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
162 ::computeKnownBits(V, DemandedElts, Known, Depth, Q);
163}
164
166 const DataLayout &DL, unsigned Depth,
167 AssumptionCache *AC, const Instruction *CxtI,
168 const DominatorTree *DT, bool UseInstrInfo) {
170 V, Known, Depth,
171 SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
172}
173
175 unsigned Depth, AssumptionCache *AC,
176 const Instruction *CxtI,
177 const DominatorTree *DT, bool UseInstrInfo) {
178 return computeKnownBits(
179 V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
180}
181
182KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts,
183 const DataLayout &DL, unsigned Depth,
184 AssumptionCache *AC, const Instruction *CxtI,
185 const DominatorTree *DT, bool UseInstrInfo) {
186 return computeKnownBits(
187 V, DemandedElts, Depth,
188 SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
189}
190
191static bool haveNoCommonBitsSetSpecialCases(const Value *LHS, const Value *RHS,
192 const SimplifyQuery &SQ) {
193 // Look for an inverted mask: (X & ~M) op (Y & M).
194 {
195 Value *M;
196 if (match(LHS, m_c_And(m_Not(m_Value(M)), m_Value())) &&
198 isGuaranteedNotToBeUndef(M, SQ.AC, SQ.CxtI, SQ.DT))
199 return true;
200 }
201
202 // X op (Y & ~X)
205 return true;
206
207 // X op ((X & Y) ^ Y) -- this is the canonical form of the previous pattern
208 // for constant Y.
209 Value *Y;
210 if (match(RHS,
212 isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT) &&
213 isGuaranteedNotToBeUndef(Y, SQ.AC, SQ.CxtI, SQ.DT))
214 return true;
215
216 // Peek through extends to find a 'not' of the other side:
217 // (ext Y) op ext(~Y)
218 if (match(LHS, m_ZExtOrSExt(m_Value(Y))) &&
220 isGuaranteedNotToBeUndef(Y, SQ.AC, SQ.CxtI, SQ.DT))
221 return true;
222
223 // Look for: (A & B) op ~(A | B)
224 {
225 Value *A, *B;
226 if (match(LHS, m_And(m_Value(A), m_Value(B))) &&
228 isGuaranteedNotToBeUndef(A, SQ.AC, SQ.CxtI, SQ.DT) &&
229 isGuaranteedNotToBeUndef(B, SQ.AC, SQ.CxtI, SQ.DT))
230 return true;
231 }
232
233 return false;
234}
235
237 const WithCache<const Value *> &RHSCache,
238 const SimplifyQuery &SQ) {
239 const Value *LHS = LHSCache.getValue();
240 const Value *RHS = RHSCache.getValue();
241
242 assert(LHS->getType() == RHS->getType() &&
243 "LHS and RHS should have the same type");
245 "LHS and RHS should be integers");
246
249 return true;
250
252 RHSCache.getKnownBits(SQ));
253}
254
256 return !I->user_empty() && all_of(I->users(), [](const User *U) {
257 ICmpInst::Predicate P;
258 return match(U, m_ICmp(P, m_Value(), m_Zero()));
259 });
260}
261
263 return !I->user_empty() && all_of(I->users(), [](const User *U) {
264 ICmpInst::Predicate P;
265 return match(U, m_ICmp(P, m_Value(), m_Zero())) && ICmpInst::isEquality(P);
266 });
267}
268
269static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
270 const SimplifyQuery &Q);
271
273 bool OrZero, unsigned Depth,
274 AssumptionCache *AC, const Instruction *CxtI,
275 const DominatorTree *DT, bool UseInstrInfo) {
276 return ::isKnownToBeAPowerOfTwo(
277 V, OrZero, Depth,
278 SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
279}
280
281static bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
282 const SimplifyQuery &Q, unsigned Depth);
283
285 unsigned Depth) {
286 return computeKnownBits(V, Depth, SQ).isNonNegative();
287}
288
290 unsigned Depth) {
291 if (auto *CI = dyn_cast<ConstantInt>(V))
292 return CI->getValue().isStrictlyPositive();
293
294 // If `isKnownNonNegative` ever becomes more sophisticated, make sure to keep
295 // this updated.
296 KnownBits Known = computeKnownBits(V, Depth, SQ);
297 return Known.isNonNegative() &&
298 (Known.isNonZero() || isKnownNonZero(V, SQ, Depth));
299}
300
302 unsigned Depth) {
303 return computeKnownBits(V, Depth, SQ).isNegative();
304}
305
306static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth,
307 const SimplifyQuery &Q);
308
309bool llvm::isKnownNonEqual(const Value *V1, const Value *V2,
310 const DataLayout &DL, AssumptionCache *AC,
311 const Instruction *CxtI, const DominatorTree *DT,
312 bool UseInstrInfo) {
313 return ::isKnownNonEqual(
314 V1, V2, 0,
315 SimplifyQuery(DL, DT, AC, safeCxtI(V2, V1, CxtI), UseInstrInfo));
316}
317
318bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask,
319 const SimplifyQuery &SQ, unsigned Depth) {
320 KnownBits Known(Mask.getBitWidth());
321 computeKnownBits(V, Known, Depth, SQ);
322 return Mask.isSubsetOf(Known.Zero);
323}
324
325static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts,
326 unsigned Depth, const SimplifyQuery &Q);
327
328static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
329 const SimplifyQuery &Q) {
330 auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
331 APInt DemandedElts =
332 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
333 return ComputeNumSignBits(V, DemandedElts, Depth, Q);
334}
335
336unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL,
337 unsigned Depth, AssumptionCache *AC,
338 const Instruction *CxtI,
339 const DominatorTree *DT, bool UseInstrInfo) {
340 return ::ComputeNumSignBits(
341 V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
342}
343
345 unsigned Depth, AssumptionCache *AC,
346 const Instruction *CxtI,
347 const DominatorTree *DT) {
348 unsigned SignBits = ComputeNumSignBits(V, DL, Depth, AC, CxtI, DT);
349 return V->getType()->getScalarSizeInBits() - SignBits + 1;
350}
351
352static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
353 bool NSW, bool NUW,
354 const APInt &DemandedElts,
355 KnownBits &KnownOut, KnownBits &Known2,
356 unsigned Depth, const SimplifyQuery &Q) {
357 computeKnownBits(Op1, DemandedElts, KnownOut, Depth + 1, Q);
358
359 // If one operand is unknown and we have no nowrap information,
360 // the result will be unknown independently of the second operand.
361 if (KnownOut.isUnknown() && !NSW && !NUW)
362 return;
363
364 computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q);
365 KnownOut = KnownBits::computeForAddSub(Add, NSW, NUW, Known2, KnownOut);
366}
367
368static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
369 const APInt &DemandedElts, KnownBits &Known,
370 KnownBits &Known2, unsigned Depth,
371 const SimplifyQuery &Q) {
372 computeKnownBits(Op1, DemandedElts, Known, Depth + 1, Q);
373 computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q);
374
375 bool isKnownNegative = false;
376 bool isKnownNonNegative = false;
377 // If the multiplication is known not to overflow, compute the sign bit.
378 if (NSW) {
379 if (Op0 == Op1) {
380 // The product of a number with itself is non-negative.
381 isKnownNonNegative = true;
382 } else {
383 bool isKnownNonNegativeOp1 = Known.isNonNegative();
384 bool isKnownNonNegativeOp0 = Known2.isNonNegative();
385 bool isKnownNegativeOp1 = Known.isNegative();
386 bool isKnownNegativeOp0 = Known2.isNegative();
387 // The product of two numbers with the same sign is non-negative.
388 isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) ||
389 (isKnownNonNegativeOp1 && isKnownNonNegativeOp0);
390 // The product of a negative number and a non-negative number is either
391 // negative or zero.
394 (isKnownNegativeOp1 && isKnownNonNegativeOp0 &&
395 Known2.isNonZero()) ||
396 (isKnownNegativeOp0 && isKnownNonNegativeOp1 && Known.isNonZero());
397 }
398 }
399
400 bool SelfMultiply = Op0 == Op1;
401 if (SelfMultiply)
402 SelfMultiply &=
403 isGuaranteedNotToBeUndef(Op0, Q.AC, Q.CxtI, Q.DT, Depth + 1);
404 Known = KnownBits::mul(Known, Known2, SelfMultiply);
405
406 // Only make use of no-wrap flags if we failed to compute the sign bit
407 // directly. This matters if the multiplication always overflows, in
408 // which case we prefer to follow the result of the direct computation,
409 // though as the program is invoking undefined behaviour we can choose
410 // whatever we like here.
411 if (isKnownNonNegative && !Known.isNegative())
412 Known.makeNonNegative();
413 else if (isKnownNegative && !Known.isNonNegative())
414 Known.makeNegative();
415}
416
418 KnownBits &Known) {
419 unsigned BitWidth = Known.getBitWidth();
420 unsigned NumRanges = Ranges.getNumOperands() / 2;
421 assert(NumRanges >= 1);
422
423 Known.Zero.setAllBits();
424 Known.One.setAllBits();
425
426 for (unsigned i = 0; i < NumRanges; ++i) {
428 mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0));
430 mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1));
431 ConstantRange Range(Lower->getValue(), Upper->getValue());
432
433 // The first CommonPrefixBits of all values in Range are equal.
434 unsigned CommonPrefixBits =
435 (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countl_zero();
436 APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits);
437 APInt UnsignedMax = Range.getUnsignedMax().zextOrTrunc(BitWidth);
438 Known.One &= UnsignedMax & Mask;
439 Known.Zero &= ~UnsignedMax & Mask;
440 }
441}
442
443static bool isEphemeralValueOf(const Instruction *I, const Value *E) {
447
448 // The instruction defining an assumption's condition itself is always
449 // considered ephemeral to that assumption (even if it has other
450 // non-ephemeral users). See r246696's test case for an example.
451 if (is_contained(I->operands(), E))
452 return true;
453
454 while (!WorkSet.empty()) {
455 const Value *V = WorkSet.pop_back_val();
456 if (!Visited.insert(V).second)
457 continue;
458
459 // If all uses of this value are ephemeral, then so is this value.
460 if (llvm::all_of(V->users(), [&](const User *U) {
461 return EphValues.count(U);
462 })) {
463 if (V == E)
464 return true;
465
466 if (V == I || (isa<Instruction>(V) &&
467 !cast<Instruction>(V)->mayHaveSideEffects() &&
468 !cast<Instruction>(V)->isTerminator())) {
469 EphValues.insert(V);
470 if (const User *U = dyn_cast<User>(V))
471 append_range(WorkSet, U->operands());
472 }
473 }
474 }
475
476 return false;
477}
478
479// Is this an intrinsic that cannot be speculated but also cannot trap?
481 if (const IntrinsicInst *CI = dyn_cast<IntrinsicInst>(I))
482 return CI->isAssumeLikeIntrinsic();
483
484 return false;
485}
486
488 const Instruction *CxtI,
489 const DominatorTree *DT,
490 bool AllowEphemerals) {
491 // There are two restrictions on the use of an assume:
492 // 1. The assume must dominate the context (or the control flow must
493 // reach the assume whenever it reaches the context).
494 // 2. The context must not be in the assume's set of ephemeral values
495 // (otherwise we will use the assume to prove that the condition
496 // feeding the assume is trivially true, thus causing the removal of
497 // the assume).
498
499 if (Inv->getParent() == CxtI->getParent()) {
500 // If Inv and CtxI are in the same block, check if the assume (Inv) is first
501 // in the BB.
502 if (Inv->comesBefore(CxtI))
503 return true;
504
505 // Don't let an assume affect itself - this would cause the problems
506 // `isEphemeralValueOf` is trying to prevent, and it would also make
507 // the loop below go out of bounds.
508 if (!AllowEphemerals && Inv == CxtI)
509 return false;
510
511 // The context comes first, but they're both in the same block.
512 // Make sure there is nothing in between that might interrupt
513 // the control flow, not even CxtI itself.
514 // We limit the scan distance between the assume and its context instruction
515 // to avoid a compile-time explosion. This limit is chosen arbitrarily, so
516 // it can be adjusted if needed (could be turned into a cl::opt).
517 auto Range = make_range(CxtI->getIterator(), Inv->getIterator());
519 return false;
520
521 return AllowEphemerals || !isEphemeralValueOf(Inv, CxtI);
522 }
523
524 // Inv and CxtI are in different blocks.
525 if (DT) {
526 if (DT->dominates(Inv, CxtI))
527 return true;
528 } else if (Inv->getParent() == CxtI->getParent()->getSinglePredecessor()) {
529 // We don't have a DT, but this trivially dominates.
530 return true;
531 }
532
533 return false;
534}
535
536// TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but
537// we still have enough information about `RHS` to conclude non-zero. For
538// example Pred=EQ, RHS=isKnownNonZero. cmpExcludesZero is called in loops
539// so the extra compile time may not be worth it, but possibly a second API
540// should be created for use outside of loops.
541static bool cmpExcludesZero(CmpInst::Predicate Pred, const Value *RHS) {
542 // v u> y implies v != 0.
543 if (Pred == ICmpInst::ICMP_UGT)
544 return true;
545
546 // Special-case v != 0 to also handle v != null.
547 if (Pred == ICmpInst::ICMP_NE)
548 return match(RHS, m_Zero());
549
550 // All other predicates - rely on generic ConstantRange handling.
551 const APInt *C;
553 if (match(RHS, m_APInt(C))) {
555 return !TrueValues.contains(Zero);
556 }
557
558 auto *VC = dyn_cast<ConstantDataVector>(RHS);
559 if (VC == nullptr)
560 return false;
561
562 for (unsigned ElemIdx = 0, NElem = VC->getNumElements(); ElemIdx < NElem;
563 ++ElemIdx) {
565 Pred, VC->getElementAsAPInt(ElemIdx));
566 if (TrueValues.contains(Zero))
567 return false;
568 }
569 return true;
570}
571
572static bool isKnownNonZeroFromAssume(const Value *V, const SimplifyQuery &Q) {
573 // Use of assumptions is context-sensitive. If we don't have a context, we
574 // cannot use them!
575 if (!Q.AC || !Q.CxtI)
576 return false;
577
578 for (AssumptionCache::ResultElem &Elem : Q.AC->assumptionsFor(V)) {
579 if (!Elem.Assume)
580 continue;
581
582 AssumeInst *I = cast<AssumeInst>(Elem.Assume);
583 assert(I->getFunction() == Q.CxtI->getFunction() &&
584 "Got assumption for the wrong function!");
585
586 if (Elem.Index != AssumptionCache::ExprResultIdx) {
587 if (!V->getType()->isPointerTy())
588 continue;
590 *I, I->bundle_op_info_begin()[Elem.Index])) {
591 if (RK.WasOn == V &&
592 (RK.AttrKind == Attribute::NonNull ||
593 (RK.AttrKind == Attribute::Dereferenceable &&
595 V->getType()->getPointerAddressSpace()))) &&
597 return true;
598 }
599 continue;
600 }
601
602 // Warning: This loop can end up being somewhat performance sensitive.
603 // We're running this loop for once for each value queried resulting in a
604 // runtime of ~O(#assumes * #values).
605
606 Value *RHS;
608 auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V)));
609 if (!match(I->getArgOperand(0), m_c_ICmp(Pred, m_V, m_Value(RHS))))
610 return false;
611
612 if (cmpExcludesZero(Pred, RHS) && isValidAssumeForContext(I, Q.CxtI, Q.DT))
613 return true;
614 }
615
616 return false;
617}
618
620 Value *LHS, Value *RHS, KnownBits &Known,
621 const SimplifyQuery &Q) {
622 if (RHS->getType()->isPointerTy()) {
623 // Handle comparison of pointer to null explicitly, as it will not be
624 // covered by the m_APInt() logic below.
625 if (LHS == V && match(RHS, m_Zero())) {
626 switch (Pred) {
627 case ICmpInst::ICMP_EQ:
628 Known.setAllZero();
629 break;
630 case ICmpInst::ICMP_SGE:
631 case ICmpInst::ICMP_SGT:
632 Known.makeNonNegative();
633 break;
634 case ICmpInst::ICMP_SLT:
635 Known.makeNegative();
636 break;
637 default:
638 break;
639 }
640 }
641 return;
642 }
643
644 unsigned BitWidth = Known.getBitWidth();
645 auto m_V =
647
648 Value *Y;
649 const APInt *Mask, *C;
650 uint64_t ShAmt;
651 switch (Pred) {
652 case ICmpInst::ICMP_EQ:
653 // assume(V = C)
654 if (match(LHS, m_V) && match(RHS, m_APInt(C))) {
655 Known = Known.unionWith(KnownBits::makeConstant(*C));
656 // assume(V & Mask = C)
657 } else if (match(LHS, m_c_And(m_V, m_Value(Y))) &&
658 match(RHS, m_APInt(C))) {
659 // For one bits in Mask, we can propagate bits from C to V.
660 Known.One |= *C;
661 if (match(Y, m_APInt(Mask)))
662 Known.Zero |= ~*C & *Mask;
663 // assume(V | Mask = C)
664 } else if (match(LHS, m_c_Or(m_V, m_Value(Y))) && match(RHS, m_APInt(C))) {
665 // For zero bits in Mask, we can propagate bits from C to V.
666 Known.Zero |= ~*C;
667 if (match(Y, m_APInt(Mask)))
668 Known.One |= *C & ~*Mask;
669 // assume(V ^ Mask = C)
670 } else if (match(LHS, m_Xor(m_V, m_APInt(Mask))) &&
671 match(RHS, m_APInt(C))) {
672 // Equivalent to assume(V == Mask ^ C)
673 Known = Known.unionWith(KnownBits::makeConstant(*C ^ *Mask));
674 // assume(V << ShAmt = C)
675 } else if (match(LHS, m_Shl(m_V, m_ConstantInt(ShAmt))) &&
676 match(RHS, m_APInt(C)) && ShAmt < BitWidth) {
677 // For those bits in C that are known, we can propagate them to known
678 // bits in V shifted to the right by ShAmt.
680 RHSKnown.Zero.lshrInPlace(ShAmt);
681 RHSKnown.One.lshrInPlace(ShAmt);
682 Known = Known.unionWith(RHSKnown);
683 // assume(V >> ShAmt = C)
684 } else if (match(LHS, m_Shr(m_V, m_ConstantInt(ShAmt))) &&
685 match(RHS, m_APInt(C)) && ShAmt < BitWidth) {
687 // For those bits in RHS that are known, we can propagate them to known
688 // bits in V shifted to the right by C.
689 Known.Zero |= RHSKnown.Zero << ShAmt;
690 Known.One |= RHSKnown.One << ShAmt;
691 }
692 break;
693 case ICmpInst::ICMP_NE: {
694 // assume (V & B != 0) where B is a power of 2
695 const APInt *BPow2;
696 if (match(LHS, m_And(m_V, m_Power2(BPow2))) && match(RHS, m_Zero()))
697 Known.One |= *BPow2;
698 break;
699 }
700 default:
701 if (match(RHS, m_APInt(C))) {
702 const APInt *Offset = nullptr;
703 if (match(LHS, m_CombineOr(m_V, m_AddLike(m_V, m_APInt(Offset))))) {
705 if (Offset)
706 LHSRange = LHSRange.sub(*Offset);
707 Known = Known.unionWith(LHSRange.toKnownBits());
708 }
709 if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) {
710 // X & Y u> C -> X u> C && Y u> C
711 // X nuw- Y u> C -> X u> C
712 if (match(LHS, m_c_And(m_V, m_Value())) ||
713 match(LHS, m_NUWSub(m_V, m_Value())))
714 Known.One.setHighBits(
715 (*C + (Pred == ICmpInst::ICMP_UGT)).countLeadingOnes());
716 }
717 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
718 // X | Y u< C -> X u< C && Y u< C
719 // X nuw+ Y u< C -> X u< C && Y u< C
720 if (match(LHS, m_c_Or(m_V, m_Value())) ||
721 match(LHS, m_c_NUWAdd(m_V, m_Value()))) {
722 Known.Zero.setHighBits(
723 (*C - (Pred == ICmpInst::ICMP_ULT)).countLeadingZeros());
724 }
725 }
726 }
727 break;
728 }
729}
730
731static void computeKnownBitsFromICmpCond(const Value *V, ICmpInst *Cmp,
732 KnownBits &Known,
733 const SimplifyQuery &SQ, bool Invert) {
735 Invert ? Cmp->getInversePredicate() : Cmp->getPredicate();
736 Value *LHS = Cmp->getOperand(0);
737 Value *RHS = Cmp->getOperand(1);
738
739 // Handle icmp pred (trunc V), C
740 if (match(LHS, m_Trunc(m_Specific(V)))) {
742 computeKnownBitsFromCmp(LHS, Pred, LHS, RHS, DstKnown, SQ);
743 Known = Known.unionWith(DstKnown.anyext(Known.getBitWidth()));
744 return;
745 }
746
747 computeKnownBitsFromCmp(V, Pred, LHS, RHS, Known, SQ);
748}
749
751 KnownBits &Known, unsigned Depth,
752 const SimplifyQuery &SQ, bool Invert) {
753 Value *A, *B;
756 KnownBits Known2(Known.getBitWidth());
757 KnownBits Known3(Known.getBitWidth());
758 computeKnownBitsFromCond(V, A, Known2, Depth + 1, SQ, Invert);
759 computeKnownBitsFromCond(V, B, Known3, Depth + 1, SQ, Invert);
760 if (Invert ? match(Cond, m_LogicalOr(m_Value(), m_Value()))
762 Known2 = Known2.unionWith(Known3);
763 else
764 Known2 = Known2.intersectWith(Known3);
765 Known = Known.unionWith(Known2);
766 }
767
768 if (auto *Cmp = dyn_cast<ICmpInst>(Cond))
769 computeKnownBitsFromICmpCond(V, Cmp, Known, SQ, Invert);
770}
771
773 unsigned Depth, const SimplifyQuery &Q) {
774 if (!Q.CxtI)
775 return;
776
777 if (Q.DC && Q.DT) {
778 // Handle dominating conditions.
779 for (BranchInst *BI : Q.DC->conditionsFor(V)) {
780 BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0));
781 if (Q.DT->dominates(Edge0, Q.CxtI->getParent()))
782 computeKnownBitsFromCond(V, BI->getCondition(), Known, Depth, Q,
783 /*Invert*/ false);
784
785 BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1));
786 if (Q.DT->dominates(Edge1, Q.CxtI->getParent()))
787 computeKnownBitsFromCond(V, BI->getCondition(), Known, Depth, Q,
788 /*Invert*/ true);
789 }
790
791 if (Known.hasConflict())
792 Known.resetAll();
793 }
794
795 if (!Q.AC)
796 return;
797
798 unsigned BitWidth = Known.getBitWidth();
799
800 // Note that the patterns below need to be kept in sync with the code
801 // in AssumptionCache::updateAffectedValues.
802
803 for (AssumptionCache::ResultElem &Elem : Q.AC->assumptionsFor(V)) {
804 if (!Elem.Assume)
805 continue;
806
807 AssumeInst *I = cast<AssumeInst>(Elem.Assume);
808 assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() &&
809 "Got assumption for the wrong function!");
810
811 if (Elem.Index != AssumptionCache::ExprResultIdx) {
812 if (!V->getType()->isPointerTy())
813 continue;
815 *I, I->bundle_op_info_begin()[Elem.Index])) {
816 if (RK.WasOn == V && RK.AttrKind == Attribute::Alignment &&
817 isPowerOf2_64(RK.ArgValue) &&
819 Known.Zero.setLowBits(Log2_64(RK.ArgValue));
820 }
821 continue;
822 }
823
824 // Warning: This loop can end up being somewhat performance sensitive.
825 // We're running this loop for once for each value queried resulting in a
826 // runtime of ~O(#assumes * #values).
827
828 Value *Arg = I->getArgOperand(0);
829
830 if (Arg == V && isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
831 assert(BitWidth == 1 && "assume operand is not i1?");
832 (void)BitWidth;
833 Known.setAllOnes();
834 return;
835 }
836 if (match(Arg, m_Not(m_Specific(V))) &&
838 assert(BitWidth == 1 && "assume operand is not i1?");
839 (void)BitWidth;
840 Known.setAllZero();
841 return;
842 }
843
844 // The remaining tests are all recursive, so bail out if we hit the limit.
846 continue;
847
848 ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg);
849 if (!Cmp)
850 continue;
851
852 if (!isValidAssumeForContext(I, Q.CxtI, Q.DT))
853 continue;
854
855 computeKnownBitsFromICmpCond(V, Cmp, Known, Q, /*Invert=*/false);
856 }
857
858 // Conflicting assumption: Undefined behavior will occur on this execution
859 // path.
860 if (Known.hasConflict())
861 Known.resetAll();
862}
863
864/// Compute known bits from a shift operator, including those with a
865/// non-constant shift amount. Known is the output of this function. Known2 is a
866/// pre-allocated temporary with the same bit width as Known and on return
867/// contains the known bit of the shift value source. KF is an
868/// operator-specific function that, given the known-bits and a shift amount,
869/// compute the implied known-bits of the shift operator's result respectively
870/// for that shift amount. The results from calling KF are conservatively
871/// combined for all permitted shift amounts.
873 const Operator *I, const APInt &DemandedElts, KnownBits &Known,
874 KnownBits &Known2, unsigned Depth, const SimplifyQuery &Q,
875 function_ref<KnownBits(const KnownBits &, const KnownBits &, bool)> KF) {
876 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
877 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
878 // To limit compile-time impact, only query isKnownNonZero() if we know at
879 // least something about the shift amount.
880 bool ShAmtNonZero =
881 Known.isNonZero() ||
882 (Known.getMaxValue().ult(Known.getBitWidth()) &&
883 isKnownNonZero(I->getOperand(1), DemandedElts, Q, Depth + 1));
884 Known = KF(Known2, Known, ShAmtNonZero);
885}
886
887static KnownBits
888getKnownBitsFromAndXorOr(const Operator *I, const APInt &DemandedElts,
889 const KnownBits &KnownLHS, const KnownBits &KnownRHS,
890 unsigned Depth, const SimplifyQuery &Q) {
891 unsigned BitWidth = KnownLHS.getBitWidth();
892 KnownBits KnownOut(BitWidth);
893 bool IsAnd = false;
894 bool HasKnownOne = !KnownLHS.One.isZero() || !KnownRHS.One.isZero();
895 Value *X = nullptr, *Y = nullptr;
896
897 switch (I->getOpcode()) {
898 case Instruction::And:
899 KnownOut = KnownLHS & KnownRHS;
900 IsAnd = true;
901 // and(x, -x) is common idioms that will clear all but lowest set
902 // bit. If we have a single known bit in x, we can clear all bits
903 // above it.
904 // TODO: instcombine often reassociates independent `and` which can hide
905 // this pattern. Try to match and(x, and(-x, y)) / and(and(x, y), -x).
906 if (HasKnownOne && match(I, m_c_And(m_Value(X), m_Neg(m_Deferred(X))))) {
907 // -(-x) == x so using whichever (LHS/RHS) gets us a better result.
908 if (KnownLHS.countMaxTrailingZeros() <= KnownRHS.countMaxTrailingZeros())
909 KnownOut = KnownLHS.blsi();
910 else
911 KnownOut = KnownRHS.blsi();
912 }
913 break;
914 case Instruction::Or:
915 KnownOut = KnownLHS | KnownRHS;
916 break;
917 case Instruction::Xor:
918 KnownOut = KnownLHS ^ KnownRHS;
919 // xor(x, x-1) is common idioms that will clear all but lowest set
920 // bit. If we have a single known bit in x, we can clear all bits
921 // above it.
922 // TODO: xor(x, x-1) is often rewritting as xor(x, x-C) where C !=
923 // -1 but for the purpose of demanded bits (xor(x, x-C) &
924 // Demanded) == (xor(x, x-1) & Demanded). Extend the xor pattern
925 // to use arbitrary C if xor(x, x-C) as the same as xor(x, x-1).
926 if (HasKnownOne &&
928 const KnownBits &XBits = I->getOperand(0) == X ? KnownLHS : KnownRHS;
929 KnownOut = XBits.blsmsk();
930 }
931 break;
932 default:
933 llvm_unreachable("Invalid Op used in 'analyzeKnownBitsFromAndXorOr'");
934 }
935
936 // and(x, add (x, -1)) is a common idiom that always clears the low bit;
937 // xor/or(x, add (x, -1)) is an idiom that will always set the low bit.
938 // here we handle the more general case of adding any odd number by
939 // matching the form and/xor/or(x, add(x, y)) where y is odd.
940 // TODO: This could be generalized to clearing any bit set in y where the
941 // following bit is known to be unset in y.
942 if (!KnownOut.Zero[0] && !KnownOut.One[0] &&
946 KnownBits KnownY(BitWidth);
947 computeKnownBits(Y, DemandedElts, KnownY, Depth + 1, Q);
948 if (KnownY.countMinTrailingOnes() > 0) {
949 if (IsAnd)
950 KnownOut.Zero.setBit(0);
951 else
952 KnownOut.One.setBit(0);
953 }
954 }
955 return KnownOut;
956}
957
958// Public so this can be used in `SimplifyDemandedUseBits`.
960 const KnownBits &KnownLHS,
961 const KnownBits &KnownRHS,
962 unsigned Depth,
963 const SimplifyQuery &SQ) {
964 auto *FVTy = dyn_cast<FixedVectorType>(I->getType());
965 APInt DemandedElts =
966 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
967
968 return getKnownBitsFromAndXorOr(I, DemandedElts, KnownLHS, KnownRHS, Depth,
969 SQ);
970}
971
973 Attribute Attr = F->getFnAttribute(Attribute::VScaleRange);
974 // Without vscale_range, we only know that vscale is non-zero.
975 if (!Attr.isValid())
977
978 unsigned AttrMin = Attr.getVScaleRangeMin();
979 // Minimum is larger than vscale width, result is always poison.
980 if ((unsigned)llvm::bit_width(AttrMin) > BitWidth)
981 return ConstantRange::getEmpty(BitWidth);
982
983 APInt Min(BitWidth, AttrMin);
984 std::optional<unsigned> AttrMax = Attr.getVScaleRangeMax();
985 if (!AttrMax || (unsigned)llvm::bit_width(*AttrMax) > BitWidth)
987
988 return ConstantRange(Min, APInt(BitWidth, *AttrMax) + 1);
989}
990
992 const APInt &DemandedElts,
993 KnownBits &Known, unsigned Depth,
994 const SimplifyQuery &Q) {
995 unsigned BitWidth = Known.getBitWidth();
996
997 KnownBits Known2(BitWidth);
998 switch (I->getOpcode()) {
999 default: break;
1000 case Instruction::Load:
1001 if (MDNode *MD =
1002 Q.IIQ.getMetadata(cast<LoadInst>(I), LLVMContext::MD_range))
1004 break;
1005 case Instruction::And:
1006 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
1007 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1008
1009 Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q);
1010 break;
1011 case Instruction::Or:
1012 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
1013 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1014
1015 Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q);
1016 break;
1017 case Instruction::Xor:
1018 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
1019 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1020
1021 Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q);
1022 break;
1023 case Instruction::Mul: {
1024 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1025 computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, DemandedElts,
1026 Known, Known2, Depth, Q);
1027 break;
1028 }
1029 case Instruction::UDiv: {
1030 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1031 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1032 Known =
1033 KnownBits::udiv(Known, Known2, Q.IIQ.isExact(cast<BinaryOperator>(I)));
1034 break;
1035 }
1036 case Instruction::SDiv: {
1037 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1038 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1039 Known =
1040 KnownBits::sdiv(Known, Known2, Q.IIQ.isExact(cast<BinaryOperator>(I)));
1041 break;
1042 }
1043 case Instruction::Select: {
1044 auto ComputeForArm = [&](Value *Arm, bool Invert) {
1045 KnownBits Res(Known.getBitWidth());
1046 computeKnownBits(Arm, Res, Depth + 1, Q);
1047 // If we have a constant arm, we are done.
1048 if (Res.isConstant())
1049 return Res;
1050
1051 // See what condition implies about the bits of the two select arms.
1052 KnownBits CondRes(Res.getBitWidth());
1053 computeKnownBitsFromCond(Arm, I->getOperand(0), CondRes, Depth + 1, Q,
1054 Invert);
1055 // If we don't get any information from the condition, no reason to
1056 // proceed.
1057 if (CondRes.isUnknown())
1058 return Res;
1059
1060 // We can have conflict if the condition is dead. I.e if we have
1061 // (x | 64) < 32 ? (x | 64) : y
1062 // we will have conflict at bit 6 from the condition/the `or`.
1063 // In that case just return. Its not particularly important
1064 // what we do, as this select is going to be simplified soon.
1065 CondRes = CondRes.unionWith(Res);
1066 if (CondRes.hasConflict())
1067 return Res;
1068
1069 // Finally make sure the information we found is valid. This is relatively
1070 // expensive so it's left for the very end.
1071 if (!isGuaranteedNotToBeUndef(Arm, Q.AC, Q.CxtI, Q.DT, Depth + 1))
1072 return Res;
1073
1074 // Finally, we know we get information from the condition and its valid,
1075 // so return it.
1076 return CondRes;
1077 };
1078 // Only known if known in both the LHS and RHS.
1079 Known =
1080 ComputeForArm(I->getOperand(1), /*Invert=*/false)
1081 .intersectWith(ComputeForArm(I->getOperand(2), /*Invert=*/true));
1082 break;
1083 }
1084 case Instruction::FPTrunc:
1085 case Instruction::FPExt:
1086 case Instruction::FPToUI:
1087 case Instruction::FPToSI:
1088 case Instruction::SIToFP:
1089 case Instruction::UIToFP:
1090 break; // Can't work with floating point.
1091 case Instruction::PtrToInt:
1092 case Instruction::IntToPtr:
1093 // Fall through and handle them the same as zext/trunc.
1094 [[fallthrough]];
1095 case Instruction::ZExt:
1096 case Instruction::Trunc: {
1097 Type *SrcTy = I->getOperand(0)->getType();
1098
1099 unsigned SrcBitWidth;
1100 // Note that we handle pointer operands here because of inttoptr/ptrtoint
1101 // which fall through here.
1102 Type *ScalarTy = SrcTy->getScalarType();
1103 SrcBitWidth = ScalarTy->isPointerTy() ?
1104 Q.DL.getPointerTypeSizeInBits(ScalarTy) :
1105 Q.DL.getTypeSizeInBits(ScalarTy);
1106
1107 assert(SrcBitWidth && "SrcBitWidth can't be zero");
1108 Known = Known.anyextOrTrunc(SrcBitWidth);
1109 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1110 if (auto *Inst = dyn_cast<PossiblyNonNegInst>(I);
1111 Inst && Inst->hasNonNeg() && !Known.isNegative())
1112 Known.makeNonNegative();
1113 Known = Known.zextOrTrunc(BitWidth);
1114 break;
1115 }
1116 case Instruction::BitCast: {
1117 Type *SrcTy = I->getOperand(0)->getType();
1118 if (SrcTy->isIntOrPtrTy() &&
1119 // TODO: For now, not handling conversions like:
1120 // (bitcast i64 %x to <2 x i32>)
1121 !I->getType()->isVectorTy()) {
1122 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1123 break;
1124 }
1125
1126 const Value *V;
1127 // Handle bitcast from floating point to integer.
1128 if (match(I, m_ElementWiseBitCast(m_Value(V))) &&
1129 V->getType()->isFPOrFPVectorTy()) {
1130 Type *FPType = V->getType()->getScalarType();
1131 KnownFPClass Result = computeKnownFPClass(V, fcAllFlags, Depth + 1, Q);
1132 FPClassTest FPClasses = Result.KnownFPClasses;
1133
1134 // TODO: Treat it as zero/poison if the use of I is unreachable.
1135 if (FPClasses == fcNone)
1136 break;
1137
1138 if (Result.isKnownNever(fcNormal | fcSubnormal | fcNan)) {
1139 Known.Zero.setAllBits();
1140 Known.One.setAllBits();
1141
1142 if (FPClasses & fcInf)
1145
1146 if (FPClasses & fcZero)
1149
1150 Known.Zero.clearSignBit();
1151 Known.One.clearSignBit();
1152 }
1153
1154 if (Result.SignBit) {
1155 if (*Result.SignBit)
1156 Known.makeNegative();
1157 else
1158 Known.makeNonNegative();
1159 }
1160
1161 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1162 break;
1163 }
1164
1165 // Handle cast from vector integer type to scalar or vector integer.
1166 auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcTy);
1167 if (!SrcVecTy || !SrcVecTy->getElementType()->isIntegerTy() ||
1168 !I->getType()->isIntOrIntVectorTy() ||
1169 isa<ScalableVectorType>(I->getType()))
1170 break;
1171
1172 // Look through a cast from narrow vector elements to wider type.
1173 // Examples: v4i32 -> v2i64, v3i8 -> v24
1174 unsigned SubBitWidth = SrcVecTy->getScalarSizeInBits();
1175 if (BitWidth % SubBitWidth == 0) {
1176 // Known bits are automatically intersected across demanded elements of a
1177 // vector. So for example, if a bit is computed as known zero, it must be
1178 // zero across all demanded elements of the vector.
1179 //
1180 // For this bitcast, each demanded element of the output is sub-divided
1181 // across a set of smaller vector elements in the source vector. To get
1182 // the known bits for an entire element of the output, compute the known
1183 // bits for each sub-element sequentially. This is done by shifting the
1184 // one-set-bit demanded elements parameter across the sub-elements for
1185 // consecutive calls to computeKnownBits. We are using the demanded
1186 // elements parameter as a mask operator.
1187 //
1188 // The known bits of each sub-element are then inserted into place
1189 // (dependent on endian) to form the full result of known bits.
1190 unsigned NumElts = DemandedElts.getBitWidth();
1191 unsigned SubScale = BitWidth / SubBitWidth;
1192 APInt SubDemandedElts = APInt::getZero(NumElts * SubScale);
1193 for (unsigned i = 0; i != NumElts; ++i) {
1194 if (DemandedElts[i])
1195 SubDemandedElts.setBit(i * SubScale);
1196 }
1197
1198 KnownBits KnownSrc(SubBitWidth);
1199 for (unsigned i = 0; i != SubScale; ++i) {
1200 computeKnownBits(I->getOperand(0), SubDemandedElts.shl(i), KnownSrc,
1201 Depth + 1, Q);
1202 unsigned ShiftElt = Q.DL.isLittleEndian() ? i : SubScale - 1 - i;
1203 Known.insertBits(KnownSrc, ShiftElt * SubBitWidth);
1204 }
1205 }
1206 break;
1207 }
1208 case Instruction::SExt: {
1209 // Compute the bits in the result that are not present in the input.
1210 unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
1211
1212 Known = Known.trunc(SrcBitWidth);
1213 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1214 // If the sign bit of the input is known set or clear, then we know the
1215 // top bits of the result.
1216 Known = Known.sext(BitWidth);
1217 break;
1218 }
1219 case Instruction::Shl: {
1220 bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I));
1221 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1222 auto KF = [NUW, NSW](const KnownBits &KnownVal, const KnownBits &KnownAmt,
1223 bool ShAmtNonZero) {
1224 return KnownBits::shl(KnownVal, KnownAmt, NUW, NSW, ShAmtNonZero);
1225 };
1226 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
1227 KF);
1228 // Trailing zeros of a right-shifted constant never decrease.
1229 const APInt *C;
1230 if (match(I->getOperand(0), m_APInt(C)))
1231 Known.Zero.setLowBits(C->countr_zero());
1232 break;
1233 }
1234 case Instruction::LShr: {
1235 bool Exact = Q.IIQ.isExact(cast<BinaryOperator>(I));
1236 auto KF = [Exact](const KnownBits &KnownVal, const KnownBits &KnownAmt,
1237 bool ShAmtNonZero) {
1238 return KnownBits::lshr(KnownVal, KnownAmt, ShAmtNonZero, Exact);
1239 };
1240 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
1241 KF);
1242 // Leading zeros of a left-shifted constant never decrease.
1243 const APInt *C;
1244 if (match(I->getOperand(0), m_APInt(C)))
1245 Known.Zero.setHighBits(C->countl_zero());
1246 break;
1247 }
1248 case Instruction::AShr: {
1249 bool Exact = Q.IIQ.isExact(cast<BinaryOperator>(I));
1250 auto KF = [Exact](const KnownBits &KnownVal, const KnownBits &KnownAmt,
1251 bool ShAmtNonZero) {
1252 return KnownBits::ashr(KnownVal, KnownAmt, ShAmtNonZero, Exact);
1253 };
1254 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
1255 KF);
1256 break;
1257 }
1258 case Instruction::Sub: {
1259 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1260 bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I));
1261 computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, NUW,
1262 DemandedElts, Known, Known2, Depth, Q);
1263 break;
1264 }
1265 case Instruction::Add: {
1266 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1267 bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I));
1268 computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, NUW,
1269 DemandedElts, Known, Known2, Depth, Q);
1270 break;
1271 }
1272 case Instruction::SRem:
1273 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1274 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1275 Known = KnownBits::srem(Known, Known2);
1276 break;
1277
1278 case Instruction::URem:
1279 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1280 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1281 Known = KnownBits::urem(Known, Known2);
1282 break;
1283 case Instruction::Alloca:
1284 Known.Zero.setLowBits(Log2(cast<AllocaInst>(I)->getAlign()));
1285 break;
1286 case Instruction::GetElementPtr: {
1287 // Analyze all of the subscripts of this getelementptr instruction
1288 // to determine if we can prove known low zero bits.
1289 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1290 // Accumulate the constant indices in a separate variable
1291 // to minimize the number of calls to computeForAddSub.
1292 APInt AccConstIndices(BitWidth, 0, /*IsSigned*/ true);
1293
1295 for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
1296 // TrailZ can only become smaller, short-circuit if we hit zero.
1297 if (Known.isUnknown())
1298 break;
1299
1300 Value *Index = I->getOperand(i);
1301
1302 // Handle case when index is zero.
1303 Constant *CIndex = dyn_cast<Constant>(Index);
1304 if (CIndex && CIndex->isZeroValue())
1305 continue;
1306
1307 if (StructType *STy = GTI.getStructTypeOrNull()) {
1308 // Handle struct member offset arithmetic.
1309
1310 assert(CIndex &&
1311 "Access to structure field must be known at compile time");
1312
1313 if (CIndex->getType()->isVectorTy())
1314 Index = CIndex->getSplatValue();
1315
1316 unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
1317 const StructLayout *SL = Q.DL.getStructLayout(STy);
1319 AccConstIndices += Offset;
1320 continue;
1321 }
1322
1323 // Handle array index arithmetic.
1324 Type *IndexedTy = GTI.getIndexedType();
1325 if (!IndexedTy->isSized()) {
1326 Known.resetAll();
1327 break;
1328 }
1329
1330 unsigned IndexBitWidth = Index->getType()->getScalarSizeInBits();
1331 KnownBits IndexBits(IndexBitWidth);
1332 computeKnownBits(Index, IndexBits, Depth + 1, Q);
1333 TypeSize IndexTypeSize = GTI.getSequentialElementStride(Q.DL);
1334 uint64_t TypeSizeInBytes = IndexTypeSize.getKnownMinValue();
1335 KnownBits ScalingFactor(IndexBitWidth);
1336 // Multiply by current sizeof type.
1337 // &A[i] == A + i * sizeof(*A[i]).
1338 if (IndexTypeSize.isScalable()) {
1339 // For scalable types the only thing we know about sizeof is
1340 // that this is a multiple of the minimum size.
1341 ScalingFactor.Zero.setLowBits(llvm::countr_zero(TypeSizeInBytes));
1342 } else if (IndexBits.isConstant()) {
1343 APInt IndexConst = IndexBits.getConstant();
1344 APInt ScalingFactor(IndexBitWidth, TypeSizeInBytes);
1345 IndexConst *= ScalingFactor;
1346 AccConstIndices += IndexConst.sextOrTrunc(BitWidth);
1347 continue;
1348 } else {
1349 ScalingFactor =
1350 KnownBits::makeConstant(APInt(IndexBitWidth, TypeSizeInBytes));
1351 }
1352 IndexBits = KnownBits::mul(IndexBits, ScalingFactor);
1353
1354 // If the offsets have a different width from the pointer, according
1355 // to the language reference we need to sign-extend or truncate them
1356 // to the width of the pointer.
1357 IndexBits = IndexBits.sextOrTrunc(BitWidth);
1358
1359 // Note that inbounds does *not* guarantee nsw for the addition, as only
1360 // the offset is signed, while the base address is unsigned.
1362 /*Add=*/true, /*NSW=*/false, /* NUW=*/false, Known, IndexBits);
1363 }
1364 if (!Known.isUnknown() && !AccConstIndices.isZero()) {
1365 KnownBits Index = KnownBits::makeConstant(AccConstIndices);
1367 /*Add=*/true, /*NSW=*/false, /* NUW=*/false, Known, Index);
1368 }
1369 break;
1370 }
1371 case Instruction::PHI: {
1372 const PHINode *P = cast<PHINode>(I);
1373 BinaryOperator *BO = nullptr;
1374 Value *R = nullptr, *L = nullptr;
1375 if (matchSimpleRecurrence(P, BO, R, L)) {
1376 // Handle the case of a simple two-predecessor recurrence PHI.
1377 // There's a lot more that could theoretically be done here, but
1378 // this is sufficient to catch some interesting cases.
1379 unsigned Opcode = BO->getOpcode();
1380
1381 // If this is a shift recurrence, we know the bits being shifted in.
1382 // We can combine that with information about the start value of the
1383 // recurrence to conclude facts about the result.
1384 if ((Opcode == Instruction::LShr || Opcode == Instruction::AShr ||
1385 Opcode == Instruction::Shl) &&
1386 BO->getOperand(0) == I) {
1387
1388 // We have matched a recurrence of the form:
1389 // %iv = [R, %entry], [%iv.next, %backedge]
1390 // %iv.next = shift_op %iv, L
1391
1392 // Recurse with the phi context to avoid concern about whether facts
1393 // inferred hold at original context instruction. TODO: It may be
1394 // correct to use the original context. IF warranted, explore and
1395 // add sufficient tests to cover.
1396 SimplifyQuery RecQ = Q;
1397 RecQ.CxtI = P;
1398 computeKnownBits(R, DemandedElts, Known2, Depth + 1, RecQ);
1399 switch (Opcode) {
1400 case Instruction::Shl:
1401 // A shl recurrence will only increase the tailing zeros
1402 Known.Zero.setLowBits(Known2.countMinTrailingZeros());
1403 break;
1404 case Instruction::LShr:
1405 // A lshr recurrence will preserve the leading zeros of the
1406 // start value
1407 Known.Zero.setHighBits(Known2.countMinLeadingZeros());
1408 break;
1409 case Instruction::AShr:
1410 // An ashr recurrence will extend the initial sign bit
1411 Known.Zero.setHighBits(Known2.countMinLeadingZeros());
1412 Known.One.setHighBits(Known2.countMinLeadingOnes());
1413 break;
1414 };
1415 }
1416
1417 // Check for operations that have the property that if
1418 // both their operands have low zero bits, the result
1419 // will have low zero bits.
1420 if (Opcode == Instruction::Add ||
1421 Opcode == Instruction::Sub ||
1422 Opcode == Instruction::And ||
1423 Opcode == Instruction::Or ||
1424 Opcode == Instruction::Mul) {
1425 // Change the context instruction to the "edge" that flows into the
1426 // phi. This is important because that is where the value is actually
1427 // "evaluated" even though it is used later somewhere else. (see also
1428 // D69571).
1429 SimplifyQuery RecQ = Q;
1430
1431 unsigned OpNum = P->getOperand(0) == R ? 0 : 1;
1432 Instruction *RInst = P->getIncomingBlock(OpNum)->getTerminator();
1433 Instruction *LInst = P->getIncomingBlock(1-OpNum)->getTerminator();
1434
1435 // Ok, we have a PHI of the form L op= R. Check for low
1436 // zero bits.
1437 RecQ.CxtI = RInst;
1438 computeKnownBits(R, Known2, Depth + 1, RecQ);
1439
1440 // We need to take the minimum number of known bits
1441 KnownBits Known3(BitWidth);
1442 RecQ.CxtI = LInst;
1443 computeKnownBits(L, Known3, Depth + 1, RecQ);
1444
1445 Known.Zero.setLowBits(std::min(Known2.countMinTrailingZeros(),
1446 Known3.countMinTrailingZeros()));
1447
1448 auto *OverflowOp = dyn_cast<OverflowingBinaryOperator>(BO);
1449 if (OverflowOp && Q.IIQ.hasNoSignedWrap(OverflowOp)) {
1450 // If initial value of recurrence is nonnegative, and we are adding
1451 // a nonnegative number with nsw, the result can only be nonnegative
1452 // or poison value regardless of the number of times we execute the
1453 // add in phi recurrence. If initial value is negative and we are
1454 // adding a negative number with nsw, the result can only be
1455 // negative or poison value. Similar arguments apply to sub and mul.
1456 //
1457 // (add non-negative, non-negative) --> non-negative
1458 // (add negative, negative) --> negative
1459 if (Opcode == Instruction::Add) {
1460 if (Known2.isNonNegative() && Known3.isNonNegative())
1461 Known.makeNonNegative();
1462 else if (Known2.isNegative() && Known3.isNegative())
1463 Known.makeNegative();
1464 }
1465
1466 // (sub nsw non-negative, negative) --> non-negative
1467 // (sub nsw negative, non-negative) --> negative
1468 else if (Opcode == Instruction::Sub && BO->getOperand(0) == I) {
1469 if (Known2.isNonNegative() && Known3.isNegative())
1470 Known.makeNonNegative();
1471 else if (Known2.isNegative() && Known3.isNonNegative())
1472 Known.makeNegative();
1473 }
1474
1475 // (mul nsw non-negative, non-negative) --> non-negative
1476 else if (Opcode == Instruction::Mul && Known2.isNonNegative() &&
1477 Known3.isNonNegative())
1478 Known.makeNonNegative();
1479 }
1480
1481 break;
1482 }
1483 }
1484
1485 // Unreachable blocks may have zero-operand PHI nodes.
1486 if (P->getNumIncomingValues() == 0)
1487 break;
1488
1489 // Otherwise take the unions of the known bit sets of the operands,
1490 // taking conservative care to avoid excessive recursion.
1491 if (Depth < MaxAnalysisRecursionDepth - 1 && Known.isUnknown()) {
1492 // Skip if every incoming value references to ourself.
1493 if (isa_and_nonnull<UndefValue>(P->hasConstantValue()))
1494 break;
1495
1496 Known.Zero.setAllBits();
1497 Known.One.setAllBits();
1498 for (unsigned u = 0, e = P->getNumIncomingValues(); u < e; ++u) {
1499 Value *IncValue = P->getIncomingValue(u);
1500 // Skip direct self references.
1501 if (IncValue == P) continue;
1502
1503 // Change the context instruction to the "edge" that flows into the
1504 // phi. This is important because that is where the value is actually
1505 // "evaluated" even though it is used later somewhere else. (see also
1506 // D69571).
1507 SimplifyQuery RecQ = Q;
1508 RecQ.CxtI = P->getIncomingBlock(u)->getTerminator();
1509
1510 Known2 = KnownBits(BitWidth);
1511
1512 // Recurse, but cap the recursion to one level, because we don't
1513 // want to waste time spinning around in loops.
1514 // TODO: See if we can base recursion limiter on number of incoming phi
1515 // edges so we don't overly clamp analysis.
1516 computeKnownBits(IncValue, Known2, MaxAnalysisRecursionDepth - 1, RecQ);
1517
1518 // See if we can further use a conditional branch into the phi
1519 // to help us determine the range of the value.
1520 if (!Known2.isConstant()) {
1522 const APInt *RHSC;
1523 BasicBlock *TrueSucc, *FalseSucc;
1524 // TODO: Use RHS Value and compute range from its known bits.
1525 if (match(RecQ.CxtI,
1526 m_Br(m_c_ICmp(Pred, m_Specific(IncValue), m_APInt(RHSC)),
1527 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) {
1528 // Check for cases of duplicate successors.
1529 if ((TrueSucc == P->getParent()) != (FalseSucc == P->getParent())) {
1530 // If we're using the false successor, invert the predicate.
1531 if (FalseSucc == P->getParent())
1532 Pred = CmpInst::getInversePredicate(Pred);
1533 // Get the knownbits implied by the incoming phi condition.
1534 auto CR = ConstantRange::makeExactICmpRegion(Pred, *RHSC);
1535 KnownBits KnownUnion = Known2.unionWith(CR.toKnownBits());
1536 // We can have conflicts here if we are analyzing deadcode (its
1537 // impossible for us reach this BB based the icmp).
1538 if (KnownUnion.hasConflict()) {
1539 // No reason to continue analyzing in a known dead region, so
1540 // just resetAll and break. This will cause us to also exit the
1541 // outer loop.
1542 Known.resetAll();
1543 break;
1544 }
1545 Known2 = KnownUnion;
1546 }
1547 }
1548 }
1549
1550 Known = Known.intersectWith(Known2);
1551 // If all bits have been ruled out, there's no need to check
1552 // more operands.
1553 if (Known.isUnknown())
1554 break;
1555 }
1556 }
1557 break;
1558 }
1559 case Instruction::Call:
1560 case Instruction::Invoke: {
1561 // If range metadata is attached to this call, set known bits from that,
1562 // and then intersect with known bits based on other properties of the
1563 // function.
1564 if (MDNode *MD =
1565 Q.IIQ.getMetadata(cast<Instruction>(I), LLVMContext::MD_range))
1567
1568 const auto *CB = cast<CallBase>(I);
1569
1570 if (std::optional<ConstantRange> Range = CB->getRange())
1571 Known = Known.unionWith(Range->toKnownBits());
1572
1573 if (const Value *RV = CB->getReturnedArgOperand()) {
1574 if (RV->getType() == I->getType()) {
1575 computeKnownBits(RV, Known2, Depth + 1, Q);
1576 Known = Known.unionWith(Known2);
1577 // If the function doesn't return properly for all input values
1578 // (e.g. unreachable exits) then there might be conflicts between the
1579 // argument value and the range metadata. Simply discard the known bits
1580 // in case of conflicts.
1581 if (Known.hasConflict())
1582 Known.resetAll();
1583 }
1584 }
1585 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1586 switch (II->getIntrinsicID()) {
1587 default: break;
1588 case Intrinsic::abs: {
1589 computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1590 bool IntMinIsPoison = match(II->getArgOperand(1), m_One());
1591 Known = Known2.abs(IntMinIsPoison);
1592 break;
1593 }
1594 case Intrinsic::bitreverse:
1595 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1596 Known.Zero |= Known2.Zero.reverseBits();
1597 Known.One |= Known2.One.reverseBits();
1598 break;
1599 case Intrinsic::bswap:
1600 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1601 Known.Zero |= Known2.Zero.byteSwap();
1602 Known.One |= Known2.One.byteSwap();
1603 break;
1604 case Intrinsic::ctlz: {
1605 computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1606 // If we have a known 1, its position is our upper bound.
1607 unsigned PossibleLZ = Known2.countMaxLeadingZeros();
1608 // If this call is poison for 0 input, the result will be less than 2^n.
1609 if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
1610 PossibleLZ = std::min(PossibleLZ, BitWidth - 1);
1611 unsigned LowBits = llvm::bit_width(PossibleLZ);
1612 Known.Zero.setBitsFrom(LowBits);
1613 break;
1614 }
1615 case Intrinsic::cttz: {
1616 computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1617 // If we have a known 1, its position is our upper bound.
1618 unsigned PossibleTZ = Known2.countMaxTrailingZeros();
1619 // If this call is poison for 0 input, the result will be less than 2^n.
1620 if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
1621 PossibleTZ = std::min(PossibleTZ, BitWidth - 1);
1622 unsigned LowBits = llvm::bit_width(PossibleTZ);
1623 Known.Zero.setBitsFrom(LowBits);
1624 break;
1625 }
1626 case Intrinsic::ctpop: {
1627 computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1628 // We can bound the space the count needs. Also, bits known to be zero
1629 // can't contribute to the population.
1630 unsigned BitsPossiblySet = Known2.countMaxPopulation();
1631 unsigned LowBits = llvm::bit_width(BitsPossiblySet);
1632 Known.Zero.setBitsFrom(LowBits);
1633 // TODO: we could bound KnownOne using the lower bound on the number
1634 // of bits which might be set provided by popcnt KnownOne2.
1635 break;
1636 }
1637 case Intrinsic::fshr:
1638 case Intrinsic::fshl: {
1639 const APInt *SA;
1640 if (!match(I->getOperand(2), m_APInt(SA)))
1641 break;
1642
1643 // Normalize to funnel shift left.
1644 uint64_t ShiftAmt = SA->urem(BitWidth);
1645 if (II->getIntrinsicID() == Intrinsic::fshr)
1646 ShiftAmt = BitWidth - ShiftAmt;
1647
1648 KnownBits Known3(BitWidth);
1649 computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1650 computeKnownBits(I->getOperand(1), Known3, Depth + 1, Q);
1651
1652 Known.Zero =
1653 Known2.Zero.shl(ShiftAmt) | Known3.Zero.lshr(BitWidth - ShiftAmt);
1654 Known.One =
1655 Known2.One.shl(ShiftAmt) | Known3.One.lshr(BitWidth - ShiftAmt);
1656 break;
1657 }
1658 case Intrinsic::uadd_sat:
1659 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1660 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1661 Known = KnownBits::uadd_sat(Known, Known2);
1662 break;
1663 case Intrinsic::usub_sat:
1664 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1665 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1666 Known = KnownBits::usub_sat(Known, Known2);
1667 break;
1668 case Intrinsic::sadd_sat:
1669 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1670 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1671 Known = KnownBits::sadd_sat(Known, Known2);
1672 break;
1673 case Intrinsic::ssub_sat:
1674 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1675 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1676 Known = KnownBits::ssub_sat(Known, Known2);
1677 break;
1678 // for min/max/and/or reduce, any bit common to each element in the
1679 // input vec is set in the output.
1680 case Intrinsic::vector_reduce_and:
1681 case Intrinsic::vector_reduce_or:
1682 case Intrinsic::vector_reduce_umax:
1683 case Intrinsic::vector_reduce_umin:
1684 case Intrinsic::vector_reduce_smax:
1685 case Intrinsic::vector_reduce_smin:
1686 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1687 break;
1688 case Intrinsic::vector_reduce_xor: {
1689 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1690 // The zeros common to all vecs are zero in the output.
1691 // If the number of elements is odd, then the common ones remain. If the
1692 // number of elements is even, then the common ones becomes zeros.
1693 auto *VecTy = cast<VectorType>(I->getOperand(0)->getType());
1694 // Even, so the ones become zeros.
1695 bool EvenCnt = VecTy->getElementCount().isKnownEven();
1696 if (EvenCnt)
1697 Known.Zero |= Known.One;
1698 // Maybe even element count so need to clear ones.
1699 if (VecTy->isScalableTy() || EvenCnt)
1700 Known.One.clearAllBits();
1701 break;
1702 }
1703 case Intrinsic::umin:
1704 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1705 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1706 Known = KnownBits::umin(Known, Known2);
1707 break;
1708 case Intrinsic::umax:
1709 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1710 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1711 Known = KnownBits::umax(Known, Known2);
1712 break;
1713 case Intrinsic::smin:
1714 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1715 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1716 Known = KnownBits::smin(Known, Known2);
1717 break;
1718 case Intrinsic::smax:
1719 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1720 computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1721 Known = KnownBits::smax(Known, Known2);
1722 break;
1723 case Intrinsic::ptrmask: {
1724 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1725
1726 const Value *Mask = I->getOperand(1);
1727 Known2 = KnownBits(Mask->getType()->getScalarSizeInBits());
1728 computeKnownBits(Mask, Known2, Depth + 1, Q);
1729 // TODO: 1-extend would be more precise.
1730 Known &= Known2.anyextOrTrunc(BitWidth);
1731 break;
1732 }
1733 case Intrinsic::x86_sse42_crc32_64_64:
1734 Known.Zero.setBitsFrom(32);
1735 break;
1736 case Intrinsic::riscv_vsetvli:
1737 case Intrinsic::riscv_vsetvlimax: {
1738 bool HasAVL = II->getIntrinsicID() == Intrinsic::riscv_vsetvli;
1739 const ConstantRange Range = getVScaleRange(II->getFunction(), BitWidth);
1741 cast<ConstantInt>(II->getArgOperand(HasAVL))->getZExtValue());
1742 RISCVII::VLMUL VLMUL = static_cast<RISCVII::VLMUL>(
1743 cast<ConstantInt>(II->getArgOperand(1 + HasAVL))->getZExtValue());
1744 // The Range is [Lower, Upper), so we need to subtract 1 here to get the
1745 // real upper value.
1746 uint64_t MaxVLEN =
1747 (Range.getUpper().getZExtValue() - 1) * RISCV::RVVBitsPerBlock;
1748 uint64_t MaxVL = MaxVLEN / RISCVVType::getSEWLMULRatio(SEW, VLMUL);
1749
1750 // Result of vsetvli must be not larger than AVL.
1751 if (HasAVL)
1752 if (auto *CI = dyn_cast<ConstantInt>(II->getArgOperand(0)))
1753 MaxVL = std::min(MaxVL, CI->getZExtValue());
1754
1755 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
1756 if (BitWidth > KnownZeroFirstBit)
1757 Known.Zero.setBitsFrom(KnownZeroFirstBit);
1758 break;
1759 }
1760 case Intrinsic::vscale: {
1761 if (!II->getParent() || !II->getFunction())
1762 break;
1763
1764 Known = getVScaleRange(II->getFunction(), BitWidth).toKnownBits();
1765 break;
1766 }
1767 }
1768 }
1769 break;
1770 }
1771 case Instruction::ShuffleVector: {
1772 auto *Shuf = dyn_cast<ShuffleVectorInst>(I);
1773 // FIXME: Do we need to handle ConstantExpr involving shufflevectors?
1774 if (!Shuf) {
1775 Known.resetAll();
1776 return;
1777 }
1778 // For undef elements, we don't know anything about the common state of
1779 // the shuffle result.
1780 APInt DemandedLHS, DemandedRHS;
1781 if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS)) {
1782 Known.resetAll();
1783 return;
1784 }
1785 Known.One.setAllBits();
1786 Known.Zero.setAllBits();
1787 if (!!DemandedLHS) {
1788 const Value *LHS = Shuf->getOperand(0);
1789 computeKnownBits(LHS, DemandedLHS, Known, Depth + 1, Q);
1790 // If we don't know any bits, early out.
1791 if (Known.isUnknown())
1792 break;
1793 }
1794 if (!!DemandedRHS) {
1795 const Value *RHS = Shuf->getOperand(1);
1796 computeKnownBits(RHS, DemandedRHS, Known2, Depth + 1, Q);
1797 Known = Known.intersectWith(Known2);
1798 }
1799 break;
1800 }
1801 case Instruction::InsertElement: {
1802 if (isa<ScalableVectorType>(I->getType())) {
1803 Known.resetAll();
1804 return;
1805 }
1806 const Value *Vec = I->getOperand(0);
1807 const Value *Elt = I->getOperand(1);
1808 auto *CIdx = dyn_cast<ConstantInt>(I->getOperand(2));
1809 unsigned NumElts = DemandedElts.getBitWidth();
1810 APInt DemandedVecElts = DemandedElts;
1811 bool NeedsElt = true;
1812 // If we know the index we are inserting too, clear it from Vec check.
1813 if (CIdx && CIdx->getValue().ult(NumElts)) {
1814 DemandedVecElts.clearBit(CIdx->getZExtValue());
1815 NeedsElt = DemandedElts[CIdx->getZExtValue()];
1816 }
1817
1818 Known.One.setAllBits();
1819 Known.Zero.setAllBits();
1820 if (NeedsElt) {
1821 computeKnownBits(Elt, Known, Depth + 1, Q);
1822 // If we don't know any bits, early out.
1823 if (Known.isUnknown())
1824 break;
1825 }
1826
1827 if (!DemandedVecElts.isZero()) {
1828 computeKnownBits(Vec, DemandedVecElts, Known2, Depth + 1, Q);
1829 Known = Known.intersectWith(Known2);
1830 }
1831 break;
1832 }
1833 case Instruction::ExtractElement: {
1834 // Look through extract element. If the index is non-constant or
1835 // out-of-range demand all elements, otherwise just the extracted element.
1836 const Value *Vec = I->getOperand(0);
1837 const Value *Idx = I->getOperand(1);
1838 auto *CIdx = dyn_cast<ConstantInt>(Idx);
1839 if (isa<ScalableVectorType>(Vec->getType())) {
1840 // FIXME: there's probably *something* we can do with scalable vectors
1841 Known.resetAll();
1842 break;
1843 }
1844 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1845 APInt DemandedVecElts = APInt::getAllOnes(NumElts);
1846 if (CIdx && CIdx->getValue().ult(NumElts))
1847 DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
1848 computeKnownBits(Vec, DemandedVecElts, Known, Depth + 1, Q);
1849 break;
1850 }
1851 case Instruction::ExtractValue:
1852 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) {
1853 const ExtractValueInst *EVI = cast<ExtractValueInst>(I);
1854 if (EVI->getNumIndices() != 1) break;
1855 if (EVI->getIndices()[0] == 0) {
1856 switch (II->getIntrinsicID()) {
1857 default: break;
1858 case Intrinsic::uadd_with_overflow:
1859 case Intrinsic::sadd_with_overflow:
1861 true, II->getArgOperand(0), II->getArgOperand(1), /*NSW=*/false,
1862 /* NUW=*/false, DemandedElts, Known, Known2, Depth, Q);
1863 break;
1864 case Intrinsic::usub_with_overflow:
1865 case Intrinsic::ssub_with_overflow:
1867 false, II->getArgOperand(0), II->getArgOperand(1), /*NSW=*/false,
1868 /* NUW=*/false, DemandedElts, Known, Known2, Depth, Q);
1869 break;
1870 case Intrinsic::umul_with_overflow:
1871 case Intrinsic::smul_with_overflow:
1872 computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false,
1873 DemandedElts, Known, Known2, Depth, Q);
1874 break;
1875 }
1876 }
1877 }
1878 break;
1879 case Instruction::Freeze:
1880 if (isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT,
1881 Depth + 1))
1882 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1883 break;
1884 }
1885}
1886
1887/// Determine which bits of V are known to be either zero or one and return
1888/// them.
1889KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts,
1890 unsigned Depth, const SimplifyQuery &Q) {
1891 KnownBits Known(getBitWidth(V->getType(), Q.DL));
1892 ::computeKnownBits(V, DemandedElts, Known, Depth, Q);
1893 return Known;
1894}
1895
1896/// Determine which bits of V are known to be either zero or one and return
1897/// them.
1899 const SimplifyQuery &Q) {
1900 KnownBits Known(getBitWidth(V->getType(), Q.DL));
1901 computeKnownBits(V, Known, Depth, Q);
1902 return Known;
1903}
1904
1905/// Determine which bits of V are known to be either zero or one and return
1906/// them in the Known bit set.
1907///
1908/// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that
1909/// we cannot optimize based on the assumption that it is zero without changing
1910/// it to be an explicit zero. If we don't change it to zero, other code could
1911/// optimized based on the contradictory assumption that it is non-zero.
1912/// Because instcombine aggressively folds operations with undef args anyway,
1913/// this won't lose us code quality.
1914///
1915/// This function is defined on values with integer type, values with pointer
1916/// type, and vectors of integers. In the case
1917/// where V is a vector, known zero, and known one values are the
1918/// same width as the vector element, and the bit is set only if it is true
1919/// for all of the demanded elements in the vector specified by DemandedElts.
1920void computeKnownBits(const Value *V, const APInt &DemandedElts,
1921 KnownBits &Known, unsigned Depth,
1922 const SimplifyQuery &Q) {
1923 if (!DemandedElts) {
1924 // No demanded elts, better to assume we don't know anything.
1925 Known.resetAll();
1926 return;
1927 }
1928
1929 assert(V && "No Value?");
1930 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
1931
1932#ifndef NDEBUG
1933 Type *Ty = V->getType();
1934 unsigned BitWidth = Known.getBitWidth();
1935
1937 "Not integer or pointer type!");
1938
1939 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
1940 assert(
1941 FVTy->getNumElements() == DemandedElts.getBitWidth() &&
1942 "DemandedElt width should equal the fixed vector number of elements");
1943 } else {
1944 assert(DemandedElts == APInt(1, 1) &&
1945 "DemandedElt width should be 1 for scalars or scalable vectors");
1946 }
1947
1948 Type *ScalarTy = Ty->getScalarType();
1949 if (ScalarTy->isPointerTy()) {
1950 assert(BitWidth == Q.DL.getPointerTypeSizeInBits(ScalarTy) &&
1951 "V and Known should have same BitWidth");
1952 } else {
1953 assert(BitWidth == Q.DL.getTypeSizeInBits(ScalarTy) &&
1954 "V and Known should have same BitWidth");
1955 }
1956#endif
1957
1958 const APInt *C;
1959 if (match(V, m_APInt(C))) {
1960 // We know all of the bits for a scalar constant or a splat vector constant!
1961 Known = KnownBits::makeConstant(*C);
1962 return;
1963 }
1964 // Null and aggregate-zero are all-zeros.
1965 if (isa<ConstantPointerNull>(V) || isa<ConstantAggregateZero>(V)) {
1966 Known.setAllZero();
1967 return;
1968 }
1969 // Handle a constant vector by taking the intersection of the known bits of
1970 // each element.
1971 if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(V)) {
1972 assert(!isa<ScalableVectorType>(V->getType()));
1973 // We know that CDV must be a vector of integers. Take the intersection of
1974 // each element.
1975 Known.Zero.setAllBits(); Known.One.setAllBits();
1976 for (unsigned i = 0, e = CDV->getNumElements(); i != e; ++i) {
1977 if (!DemandedElts[i])
1978 continue;
1979 APInt Elt = CDV->getElementAsAPInt(i);
1980 Known.Zero &= ~Elt;
1981 Known.One &= Elt;
1982 }
1983 if (Known.hasConflict())
1984 Known.resetAll();
1985 return;
1986 }
1987
1988 if (const auto *CV = dyn_cast<ConstantVector>(V)) {
1989 assert(!isa<ScalableVectorType>(V->getType()));
1990 // We know that CV must be a vector of integers. Take the intersection of
1991 // each element.
1992 Known.Zero.setAllBits(); Known.One.setAllBits();
1993 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
1994 if (!DemandedElts[i])
1995 continue;
1996 Constant *Element = CV->getAggregateElement(i);
1997 if (isa<PoisonValue>(Element))
1998 continue;
1999 auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);
2000 if (!ElementCI) {
2001 Known.resetAll();
2002 return;
2003 }
2004 const APInt &Elt = ElementCI->getValue();
2005 Known.Zero &= ~Elt;
2006 Known.One &= Elt;
2007 }
2008 if (Known.hasConflict())
2009 Known.resetAll();
2010 return;
2011 }
2012
2013 // Start out not knowing anything.
2014 Known.resetAll();
2015
2016 // We can't imply anything about undefs.
2017 if (isa<UndefValue>(V))
2018 return;
2019
2020 // There's no point in looking through other users of ConstantData for
2021 // assumptions. Confirm that we've handled them all.
2022 assert(!isa<ConstantData>(V) && "Unhandled constant data!");
2023
2024 if (const auto *A = dyn_cast<Argument>(V))
2025 if (std::optional<ConstantRange> Range = A->getRange())
2026 Known = Range->toKnownBits();
2027
2028 // All recursive calls that increase depth must come after this.
2030 return;
2031
2032 // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
2033 // the bits of its aliasee.
2034 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
2035 if (!GA->isInterposable())
2036 computeKnownBits(GA->getAliasee(), Known, Depth + 1, Q);
2037 return;
2038 }
2039
2040 if (const Operator *I = dyn_cast<Operator>(V))
2041 computeKnownBitsFromOperator(I, DemandedElts, Known, Depth, Q);
2042 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
2043 if (std::optional<ConstantRange> CR = GV->getAbsoluteSymbolRange())
2044 Known = CR->toKnownBits();
2045 }
2046
2047 // Aligned pointers have trailing zeros - refine Known.Zero set
2048 if (isa<PointerType>(V->getType())) {
2049 Align Alignment = V->getPointerAlignment(Q.DL);
2050 Known.Zero.setLowBits(Log2(Alignment));
2051 }
2052
2053 // computeKnownBitsFromContext strictly refines Known.
2054 // Therefore, we run them after computeKnownBitsFromOperator.
2055
2056 // Check whether we can determine known bits from context such as assumes.
2057 computeKnownBitsFromContext(V, Known, Depth, Q);
2058
2059 assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
2060}
2061
2062/// Try to detect a recurrence that the value of the induction variable is
2063/// always a power of two (or zero).
2064static bool isPowerOfTwoRecurrence(const PHINode *PN, bool OrZero,
2065 unsigned Depth, SimplifyQuery &Q) {
2066 BinaryOperator *BO = nullptr;
2067 Value *Start = nullptr, *Step = nullptr;
2068 if (!matchSimpleRecurrence(PN, BO, Start, Step))
2069 return false;
2070
2071 // Initial value must be a power of two.
2072 for (const Use &U : PN->operands()) {
2073 if (U.get() == Start) {
2074 // Initial value comes from a different BB, need to adjust context
2075 // instruction for analysis.
2076 Q.CxtI = PN->getIncomingBlock(U)->getTerminator();
2077 if (!isKnownToBeAPowerOfTwo(Start, OrZero, Depth, Q))
2078 return false;
2079 }
2080 }
2081
2082 // Except for Mul, the induction variable must be on the left side of the
2083 // increment expression, otherwise its value can be arbitrary.
2084 if (BO->getOpcode() != Instruction::Mul && BO->getOperand(1) != Step)
2085 return false;
2086
2087 Q.CxtI = BO->getParent()->getTerminator();
2088 switch (BO->getOpcode()) {
2089 case Instruction::Mul:
2090 // Power of two is closed under multiplication.
2091 return (OrZero || Q.IIQ.hasNoUnsignedWrap(BO) ||
2092 Q.IIQ.hasNoSignedWrap(BO)) &&
2093 isKnownToBeAPowerOfTwo(Step, OrZero, Depth, Q);
2094 case Instruction::SDiv:
2095 // Start value must not be signmask for signed division, so simply being a
2096 // power of two is not sufficient, and it has to be a constant.
2097 if (!match(Start, m_Power2()) || match(Start, m_SignMask()))
2098 return false;
2099 [[fallthrough]];
2100 case Instruction::UDiv:
2101 // Divisor must be a power of two.
2102 // If OrZero is false, cannot guarantee induction variable is non-zero after
2103 // division, same for Shr, unless it is exact division.
2104 return (OrZero || Q.IIQ.isExact(BO)) &&
2105 isKnownToBeAPowerOfTwo(Step, false, Depth, Q);
2106 case Instruction::Shl:
2107 return OrZero || Q.IIQ.hasNoUnsignedWrap(BO) || Q.IIQ.hasNoSignedWrap(BO);
2108 case Instruction::AShr:
2109 if (!match(Start, m_Power2()) || match(Start, m_SignMask()))
2110 return false;
2111 [[fallthrough]];
2112 case Instruction::LShr:
2113 return OrZero || Q.IIQ.isExact(BO);
2114 default:
2115 return false;
2116 }
2117}
2118
2119/// Return true if the given value is known to have exactly one
2120/// bit set when defined. For vectors return true if every element is known to
2121/// be a power of two when defined. Supports values with integer or pointer
2122/// types and vectors of integers.
2123bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
2124 const SimplifyQuery &Q) {
2125 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
2126
2127 if (isa<Constant>(V))
2128 return OrZero ? match(V, m_Power2OrZero()) : match(V, m_Power2());
2129
2130 // i1 is by definition a power of 2 or zero.
2131 if (OrZero && V->getType()->getScalarSizeInBits() == 1)
2132 return true;
2133
2134 auto *I = dyn_cast<Instruction>(V);
2135 if (!I)
2136 return false;
2137
2138 if (Q.CxtI && match(V, m_VScale())) {
2139 const Function *F = Q.CxtI->getFunction();
2140 // The vscale_range indicates vscale is a power-of-two.
2141 return F->hasFnAttribute(Attribute::VScaleRange);
2142 }
2143
2144 // 1 << X is clearly a power of two if the one is not shifted off the end. If
2145 // it is shifted off the end then the result is undefined.
2146 if (match(I, m_Shl(m_One(), m_Value())))
2147 return true;
2148
2149 // (signmask) >>l X is clearly a power of two if the one is not shifted off
2150 // the bottom. If it is shifted off the bottom then the result is undefined.
2151 if (match(I, m_LShr(m_SignMask(), m_Value())))
2152 return true;
2153
2154 // The remaining tests are all recursive, so bail out if we hit the limit.
2156 return false;
2157
2158 switch (I->getOpcode()) {
2159 case Instruction::ZExt:
2160 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
2161 case Instruction::Trunc:
2162 return OrZero && isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
2163 case Instruction::Shl:
2164 if (OrZero || Q.IIQ.hasNoUnsignedWrap(I) || Q.IIQ.hasNoSignedWrap(I))
2165 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
2166 return false;
2167 case Instruction::LShr:
2168 if (OrZero || Q.IIQ.isExact(cast<BinaryOperator>(I)))
2169 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
2170 return false;
2171 case Instruction::UDiv:
2172 if (Q.IIQ.isExact(cast<BinaryOperator>(I)))
2173 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
2174 return false;
2175 case Instruction::Mul:
2176 return isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q) &&
2177 isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q) &&
2178 (OrZero || isKnownNonZero(I, Q, Depth));
2179 case Instruction::And:
2180 // A power of two and'd with anything is a power of two or zero.
2181 if (OrZero &&
2182 (isKnownToBeAPowerOfTwo(I->getOperand(1), /*OrZero*/ true, Depth, Q) ||
2183 isKnownToBeAPowerOfTwo(I->getOperand(0), /*OrZero*/ true, Depth, Q)))
2184 return true;
2185 // X & (-X) is always a power of two or zero.
2186 if (match(I->getOperand(0), m_Neg(m_Specific(I->getOperand(1)))) ||
2187 match(I->getOperand(1), m_Neg(m_Specific(I->getOperand(0)))))
2188 return OrZero || isKnownNonZero(I->getOperand(0), Q, Depth);
2189 return false;
2190 case Instruction::Add: {
2191 // Adding a power-of-two or zero to the same power-of-two or zero yields
2192 // either the original power-of-two, a larger power-of-two or zero.
2193 const OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V);
2194 if (OrZero || Q.IIQ.hasNoUnsignedWrap(VOBO) ||
2195 Q.IIQ.hasNoSignedWrap(VOBO)) {
2196 if (match(I->getOperand(0),
2197 m_c_And(m_Specific(I->getOperand(1)), m_Value())) &&
2198 isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q))
2199 return true;
2200 if (match(I->getOperand(1),
2201 m_c_And(m_Specific(I->getOperand(0)), m_Value())) &&
2202 isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q))
2203 return true;
2204
2205 unsigned BitWidth = V->getType()->getScalarSizeInBits();
2206 KnownBits LHSBits(BitWidth);
2207 computeKnownBits(I->getOperand(0), LHSBits, Depth, Q);
2208
2209 KnownBits RHSBits(BitWidth);
2210 computeKnownBits(I->getOperand(1), RHSBits, Depth, Q);
2211 // If i8 V is a power of two or zero:
2212 // ZeroBits: 1 1 1 0 1 1 1 1
2213 // ~ZeroBits: 0 0 0 1 0 0 0 0
2214 if ((~(LHSBits.Zero & RHSBits.Zero)).isPowerOf2())
2215 // If OrZero isn't set, we cannot give back a zero result.
2216 // Make sure either the LHS or RHS has a bit set.
2217 if (OrZero || RHSBits.One.getBoolValue() || LHSBits.One.getBoolValue())
2218 return true;
2219 }
2220
2221 // LShr(UINT_MAX, Y) + 1 is a power of two (if add is nuw) or zero.
2222 if (OrZero || Q.IIQ.hasNoUnsignedWrap(VOBO))
2223 if (match(I, m_Add(m_LShr(m_AllOnes(), m_Value()), m_One())))
2224 return true;
2225 return false;
2226 }
2227 case Instruction::Select:
2228 return isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q) &&
2229 isKnownToBeAPowerOfTwo(I->getOperand(2), OrZero, Depth, Q);
2230 case Instruction::PHI: {
2231 // A PHI node is power of two if all incoming values are power of two, or if
2232 // it is an induction variable where in each step its value is a power of
2233 // two.
2234 auto *PN = cast<PHINode>(I);
2235 SimplifyQuery RecQ = Q;
2236
2237 // Check if it is an induction variable and always power of two.
2238 if (isPowerOfTwoRecurrence(PN, OrZero, Depth, RecQ))
2239 return true;
2240
2241 // Recursively check all incoming values. Limit recursion to 2 levels, so
2242 // that search complexity is limited to number of operands^2.
2243 unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1);
2244 return llvm::all_of(PN->operands(), [&](const Use &U) {
2245 // Value is power of 2 if it is coming from PHI node itself by induction.
2246 if (U.get() == PN)
2247 return true;
2248
2249 // Change the context instruction to the incoming block where it is
2250 // evaluated.
2251 RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator();
2252 return isKnownToBeAPowerOfTwo(U.get(), OrZero, NewDepth, RecQ);
2253 });
2254 }
2255 case Instruction::Invoke:
2256 case Instruction::Call: {
2257 if (auto *II = dyn_cast<IntrinsicInst>(I)) {
2258 switch (II->getIntrinsicID()) {
2259 case Intrinsic::umax:
2260 case Intrinsic::smax:
2261 case Intrinsic::umin:
2262 case Intrinsic::smin:
2263 return isKnownToBeAPowerOfTwo(II->getArgOperand(1), OrZero, Depth, Q) &&
2264 isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q);
2265 // bswap/bitreverse just move around bits, but don't change any 1s/0s
2266 // thus dont change pow2/non-pow2 status.
2267 case Intrinsic::bitreverse:
2268 case Intrinsic::bswap:
2269 return isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q);
2270 case Intrinsic::fshr:
2271 case Intrinsic::fshl:
2272 // If Op0 == Op1, this is a rotate. is_pow2(rotate(x, y)) == is_pow2(x)
2273 if (II->getArgOperand(0) == II->getArgOperand(1))
2274 return isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q);
2275 break;
2276 default:
2277 break;
2278 }
2279 }
2280 return false;
2281 }
2282 default:
2283 return false;
2284 }
2285}
2286
2287/// Test whether a GEP's result is known to be non-null.
2288///
2289/// Uses properties inherent in a GEP to try to determine whether it is known
2290/// to be non-null.
2291///
2292/// Currently this routine does not support vector GEPs.
2293static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth,
2294 const SimplifyQuery &Q) {
2295 const Function *F = nullptr;
2296 if (const Instruction *I = dyn_cast<Instruction>(GEP))
2297 F = I->getFunction();
2298
2299 if (!GEP->isInBounds() ||
2300 NullPointerIsDefined(F, GEP->getPointerAddressSpace()))
2301 return false;
2302
2303 // FIXME: Support vector-GEPs.
2304 assert(GEP->getType()->isPointerTy() && "We only support plain pointer GEP");
2305
2306 // If the base pointer is non-null, we cannot walk to a null address with an
2307 // inbounds GEP in address space zero.
2308 if (isKnownNonZero(GEP->getPointerOperand(), Q, Depth))
2309 return true;
2310
2311 // Walk the GEP operands and see if any operand introduces a non-zero offset.
2312 // If so, then the GEP cannot produce a null pointer, as doing so would
2313 // inherently violate the inbounds contract within address space zero.
2315 GTI != GTE; ++GTI) {
2316 // Struct types are easy -- they must always be indexed by a constant.
2317 if (StructType *STy = GTI.getStructTypeOrNull()) {
2318 ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand());
2319 unsigned ElementIdx = OpC->getZExtValue();
2320 const StructLayout *SL = Q.DL.getStructLayout(STy);
2321 uint64_t ElementOffset = SL->getElementOffset(ElementIdx);
2322 if (ElementOffset > 0)
2323 return true;
2324 continue;
2325 }
2326
2327 // If we have a zero-sized type, the index doesn't matter. Keep looping.
2328 if (GTI.getSequentialElementStride(Q.DL).isZero())
2329 continue;
2330
2331 // Fast path the constant operand case both for efficiency and so we don't
2332 // increment Depth when just zipping down an all-constant GEP.
2333 if (ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand())) {
2334 if (!OpC->isZero())
2335 return true;
2336 continue;
2337 }
2338
2339 // We post-increment Depth here because while isKnownNonZero increments it
2340 // as well, when we pop back up that increment won't persist. We don't want
2341 // to recurse 10k times just because we have 10k GEP operands. We don't
2342 // bail completely out because we want to handle constant GEPs regardless
2343 // of depth.
2345 continue;
2346
2347 if (isKnownNonZero(GTI.getOperand(), Q, Depth))
2348 return true;
2349 }
2350
2351 return false;
2352}
2353
2355 const Instruction *CtxI,
2356 const DominatorTree *DT) {
2357 assert(!isa<Constant>(V) && "Called for constant?");
2358
2359 if (!CtxI || !DT)
2360 return false;
2361
2362 unsigned NumUsesExplored = 0;
2363 for (const auto *U : V->users()) {
2364 // Avoid massive lists
2365 if (NumUsesExplored >= DomConditionsMaxUses)
2366 break;
2367 NumUsesExplored++;
2368
2369 // If the value is used as an argument to a call or invoke, then argument
2370 // attributes may provide an answer about null-ness.
2371 if (const auto *CB = dyn_cast<CallBase>(U))
2372 if (auto *CalledFunc = CB->getCalledFunction())
2373 for (const Argument &Arg : CalledFunc->args())
2374 if (CB->getArgOperand(Arg.getArgNo()) == V &&
2375 Arg.hasNonNullAttr(/* AllowUndefOrPoison */ false) &&
2376 DT->dominates(CB, CtxI))
2377 return true;
2378
2379 // If the value is used as a load/store, then the pointer must be non null.
2380 if (V == getLoadStorePointerOperand(U)) {
2381 const Instruction *I = cast<Instruction>(U);
2382 if (!NullPointerIsDefined(I->getFunction(),
2383 V->getType()->getPointerAddressSpace()) &&
2384 DT->dominates(I, CtxI))
2385 return true;
2386 }
2387
2388 if ((match(U, m_IDiv(m_Value(), m_Specific(V))) ||
2389 match(U, m_IRem(m_Value(), m_Specific(V)))) &&
2390 isValidAssumeForContext(cast<Instruction>(U), CtxI, DT))
2391 return true;
2392
2393 // Consider only compare instructions uniquely controlling a branch
2394 Value *RHS;
2395 CmpInst::Predicate Pred;
2396 if (!match(U, m_c_ICmp(Pred, m_Specific(V), m_Value(RHS))))
2397 continue;
2398
2399 bool NonNullIfTrue;
2400 if (cmpExcludesZero(Pred, RHS))
2401 NonNullIfTrue = true;
2403 NonNullIfTrue = false;
2404 else
2405 continue;
2406
2409 for (const auto *CmpU : U->users()) {
2410 assert(WorkList.empty() && "Should be!");
2411 if (Visited.insert(CmpU).second)
2412 WorkList.push_back(CmpU);
2413
2414 while (!WorkList.empty()) {
2415 auto *Curr = WorkList.pop_back_val();
2416
2417 // If a user is an AND, add all its users to the work list. We only
2418 // propagate "pred != null" condition through AND because it is only
2419 // correct to assume that all conditions of AND are met in true branch.
2420 // TODO: Support similar logic of OR and EQ predicate?
2421 if (NonNullIfTrue)
2422 if (match(Curr, m_LogicalAnd(m_Value(), m_Value()))) {
2423 for (const auto *CurrU : Curr->users())
2424 if (Visited.insert(CurrU).second)
2425 WorkList.push_back(CurrU);
2426 continue;
2427 }
2428
2429 if (const BranchInst *BI = dyn_cast<BranchInst>(Curr)) {
2430 assert(BI->isConditional() && "uses a comparison!");
2431
2432 BasicBlock *NonNullSuccessor =
2433 BI->getSuccessor(NonNullIfTrue ? 0 : 1);
2434 BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor);
2435 if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent()))
2436 return true;
2437 } else if (NonNullIfTrue && isGuard(Curr) &&
2438 DT->dominates(cast<Instruction>(Curr), CtxI)) {
2439 return true;
2440 }
2441 }
2442 }
2443 }
2444
2445 return false;
2446}
2447
2448/// Does the 'Range' metadata (which must be a valid MD_range operand list)
2449/// ensure that the value it's attached to is never Value? 'RangeType' is
2450/// is the type of the value described by the range.
2451static bool rangeMetadataExcludesValue(const MDNode* Ranges, const APInt& Value) {
2452 const unsigned NumRanges = Ranges->getNumOperands() / 2;
2453 assert(NumRanges >= 1);
2454 for (unsigned i = 0; i < NumRanges; ++i) {
2456 mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 0));
2458 mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 1));
2459 ConstantRange Range(Lower->getValue(), Upper->getValue());
2460 if (Range.contains(Value))
2461 return false;
2462 }
2463 return true;
2464}
2465
2466/// Try to detect a recurrence that monotonically increases/decreases from a
2467/// non-zero starting value. These are common as induction variables.
2468static bool isNonZeroRecurrence(const PHINode *PN) {
2469 BinaryOperator *BO = nullptr;
2470 Value *Start = nullptr, *Step = nullptr;
2471 const APInt *StartC, *StepC;
2472 if (!matchSimpleRecurrence(PN, BO, Start, Step) ||
2473 !match(Start, m_APInt(StartC)) || StartC->isZero())
2474 return false;
2475
2476 switch (BO->getOpcode()) {
2477 case Instruction::Add:
2478 // Starting from non-zero and stepping away from zero can never wrap back
2479 // to zero.
2480 return BO->hasNoUnsignedWrap() ||
2481 (BO->hasNoSignedWrap() && match(Step, m_APInt(StepC)) &&
2482 StartC->isNegative() == StepC->isNegative());
2483 case Instruction::Mul:
2484 return (BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap()) &&
2485 match(Step, m_APInt(StepC)) && !StepC->isZero();
2486 case Instruction::Shl:
2487 return BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap();
2488 case Instruction::AShr:
2489 case Instruction::LShr:
2490 return BO->isExact();
2491 default:
2492 return false;
2493 }
2494}
2495
2496static bool matchOpWithOpEqZero(Value *Op0, Value *Op1) {
2498 return (match(Op0, m_ZExtOrSExt(m_ICmp(Pred, m_Specific(Op1), m_Zero()))) ||
2499 match(Op1, m_ZExtOrSExt(m_ICmp(Pred, m_Specific(Op0), m_Zero())))) &&
2500 Pred == ICmpInst::ICMP_EQ;
2501}
2502
2503static bool isNonZeroAdd(const APInt &DemandedElts, unsigned Depth,
2504 const SimplifyQuery &Q, unsigned BitWidth, Value *X,
2505 Value *Y, bool NSW, bool NUW) {
2506 // (X + (X != 0)) is non zero
2507 if (matchOpWithOpEqZero(X, Y))
2508 return true;
2509
2510 if (NUW)
2511 return isKnownNonZero(Y, DemandedElts, Q, Depth) ||
2512 isKnownNonZero(X, DemandedElts, Q, Depth);
2513
2514 KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q);
2515 KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q);
2516
2517 // If X and Y are both non-negative (as signed values) then their sum is not
2518 // zero unless both X and Y are zero.
2519 if (XKnown.isNonNegative() && YKnown.isNonNegative())
2520 if (isKnownNonZero(Y, DemandedElts, Q, Depth) ||
2521 isKnownNonZero(X, DemandedElts, Q, Depth))
2522 return true;
2523
2524 // If X and Y are both negative (as signed values) then their sum is not
2525 // zero unless both X and Y equal INT_MIN.
2526 if (XKnown.isNegative() && YKnown.isNegative()) {
2528 // The sign bit of X is set. If some other bit is set then X is not equal
2529 // to INT_MIN.
2530 if (XKnown.One.intersects(Mask))
2531 return true;
2532 // The sign bit of Y is set. If some other bit is set then Y is not equal
2533 // to INT_MIN.
2534 if (YKnown.One.intersects(Mask))
2535 return true;
2536 }
2537
2538 // The sum of a non-negative number and a power of two is not zero.
2539 if (XKnown.isNonNegative() &&
2540 isKnownToBeAPowerOfTwo(Y, /*OrZero*/ false, Depth, Q))
2541 return true;
2542 if (YKnown.isNonNegative() &&
2543 isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, Depth, Q))
2544 return true;
2545
2546 return KnownBits::computeForAddSub(/*Add=*/true, NSW, NUW, XKnown, YKnown)
2547 .isNonZero();
2548}
2549
2550static bool isNonZeroSub(const APInt &DemandedElts, unsigned Depth,
2551 const SimplifyQuery &Q, unsigned BitWidth, Value *X,
2552 Value *Y) {
2553 // (X - (X != 0)) is non zero
2554 // ((X != 0) - X) is non zero
2555 if (matchOpWithOpEqZero(X, Y))
2556 return true;
2557
2558 // TODO: Move this case into isKnownNonEqual().
2559 if (auto *C = dyn_cast<Constant>(X))
2560 if (C->isNullValue() && isKnownNonZero(Y, DemandedElts, Q, Depth))
2561 return true;
2562
2563 return ::isKnownNonEqual(X, Y, Depth, Q);
2564}
2565
2566static bool isNonZeroMul(const APInt &DemandedElts, unsigned Depth,
2567 const SimplifyQuery &Q, unsigned BitWidth, Value *X,
2568 Value *Y, bool NSW, bool NUW) {
2569 // If X and Y are non-zero then so is X * Y as long as the multiplication
2570 // does not overflow.
2571 if (NSW || NUW)
2572 return isKnownNonZero(X, DemandedElts, Q, Depth) &&
2573 isKnownNonZero(Y, DemandedElts, Q, Depth);
2574
2575 // If either X or Y is odd, then if the other is non-zero the result can't
2576 // be zero.
2577 KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q);
2578 if (XKnown.One[0])
2579 return isKnownNonZero(Y, DemandedElts, Q, Depth);
2580
2581 KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q);
2582 if (YKnown.One[0])
2583 return XKnown.isNonZero() || isKnownNonZero(X, DemandedElts, Q, Depth);
2584
2585 // If there exists any subset of X (sX) and subset of Y (sY) s.t sX * sY is
2586 // non-zero, then X * Y is non-zero. We can find sX and sY by just taking
2587 // the lowest known One of X and Y. If they are non-zero, the result
2588 // must be non-zero. We can check if LSB(X) * LSB(Y) != 0 by doing
2589 // X.CountLeadingZeros + Y.CountLeadingZeros < BitWidth.
2590 return (XKnown.countMaxTrailingZeros() + YKnown.countMaxTrailingZeros()) <
2591 BitWidth;
2592}
2593
2594static bool isNonZeroShift(const Operator *I, const APInt &DemandedElts,
2595 unsigned Depth, const SimplifyQuery &Q,
2596 const KnownBits &KnownVal) {
2597 auto ShiftOp = [&](const APInt &Lhs, const APInt &Rhs) {
2598 switch (I->getOpcode()) {
2599 case Instruction::Shl:
2600 return Lhs.shl(Rhs);
2601 case Instruction::LShr:
2602 return Lhs.lshr(Rhs);
2603 case Instruction::AShr:
2604 return Lhs.ashr(Rhs);
2605 default:
2606 llvm_unreachable("Unknown Shift Opcode");
2607 }
2608 };
2609
2610 auto InvShiftOp = [&](const APInt &Lhs, const APInt &Rhs) {
2611 switch (I->getOpcode()) {
2612 case Instruction::Shl:
2613 return Lhs.lshr(Rhs);
2614 case Instruction::LShr:
2615 case Instruction::AShr:
2616 return Lhs.shl(Rhs);
2617 default:
2618 llvm_unreachable("Unknown Shift Opcode");
2619 }
2620 };
2621
2622 if (KnownVal.isUnknown())
2623 return false;
2624
2625 KnownBits KnownCnt =
2626 computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q);
2627 APInt MaxShift = KnownCnt.getMaxValue();
2628 unsigned NumBits = KnownVal.getBitWidth();
2629 if (MaxShift.uge(NumBits))
2630 return false;
2631
2632 if (!ShiftOp(KnownVal.One, MaxShift).isZero())
2633 return true;
2634
2635 // If all of the bits shifted out are known to be zero, and Val is known
2636 // non-zero then at least one non-zero bit must remain.
2637 if (InvShiftOp(KnownVal.Zero, NumBits - MaxShift)
2638 .eq(InvShiftOp(APInt::getAllOnes(NumBits), NumBits - MaxShift)) &&
2639 isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth))
2640 return true;
2641
2642 return false;
2643}
2644
2646 const APInt &DemandedElts,
2647 unsigned Depth, const SimplifyQuery &Q) {
2648 unsigned BitWidth = getBitWidth(I->getType()->getScalarType(), Q.DL);
2649 switch (I->getOpcode()) {
2650 case Instruction::Alloca:
2651 // Alloca never returns null, malloc might.
2652 return I->getType()->getPointerAddressSpace() == 0;
2653 case Instruction::GetElementPtr:
2654 if (I->getType()->isPointerTy())
2655 return isGEPKnownNonNull(cast<GEPOperator>(I), Depth, Q);
2656 break;
2657 case Instruction::BitCast: {
2658 // We need to be a bit careful here. We can only peek through the bitcast
2659 // if the scalar size of elements in the operand are smaller than and a
2660 // multiple of the size they are casting too. Take three cases:
2661 //
2662 // 1) Unsafe:
2663 // bitcast <2 x i16> %NonZero to <4 x i8>
2664 //
2665 // %NonZero can have 2 non-zero i16 elements, but isKnownNonZero on a
2666 // <4 x i8> requires that all 4 i8 elements be non-zero which isn't
2667 // guranteed (imagine just sign bit set in the 2 i16 elements).
2668 //
2669 // 2) Unsafe:
2670 // bitcast <4 x i3> %NonZero to <3 x i4>
2671 //
2672 // Even though the scalar size of the src (`i3`) is smaller than the
2673 // scalar size of the dst `i4`, because `i3` is not a multiple of `i4`
2674 // its possible for the `3 x i4` elements to be zero because there are
2675 // some elements in the destination that don't contain any full src
2676 // element.
2677 //
2678 // 3) Safe:
2679 // bitcast <4 x i8> %NonZero to <2 x i16>
2680 //
2681 // This is always safe as non-zero in the 4 i8 elements implies
2682 // non-zero in the combination of any two adjacent ones. Since i8 is a
2683 // multiple of i16, each i16 is guranteed to have 2 full i8 elements.
2684 // This all implies the 2 i16 elements are non-zero.
2685 Type *FromTy = I->getOperand(0)->getType();
2686 if ((FromTy->isIntOrIntVectorTy() || FromTy->isPtrOrPtrVectorTy()) &&
2687 (BitWidth % getBitWidth(FromTy->getScalarType(), Q.DL)) == 0)
2688 return isKnownNonZero(I->getOperand(0), Q, Depth);
2689 } break;
2690 case Instruction::IntToPtr:
2691 // Note that we have to take special care to avoid looking through
2692 // truncating casts, e.g., int2ptr/ptr2int with appropriate sizes, as well
2693 // as casts that can alter the value, e.g., AddrSpaceCasts.
2694 if (!isa<ScalableVectorType>(I->getType()) &&
2695 Q.DL.getTypeSizeInBits(I->getOperand(0)->getType()).getFixedValue() <=
2696 Q.DL.getTypeSizeInBits(I->getType()).getFixedValue())
2697 return isKnownNonZero(I->getOperand(0), Q, Depth);
2698 break;
2699 case Instruction::PtrToInt:
2700 // Similar to int2ptr above, we can look through ptr2int here if the cast
2701 // is a no-op or an extend and not a truncate.
2702 if (!isa<ScalableVectorType>(I->getType()) &&
2703 Q.DL.getTypeSizeInBits(I->getOperand(0)->getType()).getFixedValue() <=
2704 Q.DL.getTypeSizeInBits(I->getType()).getFixedValue())
2705 return isKnownNonZero(I->getOperand(0), Q, Depth);
2706 break;
2707 case Instruction::Trunc:
2708 // nuw/nsw trunc preserves zero/non-zero status of input.
2709 if (auto *TI = dyn_cast<TruncInst>(I))
2710 if (TI->hasNoSignedWrap() || TI->hasNoUnsignedWrap())
2711 return isKnownNonZero(TI->getOperand(0), Q, Depth);
2712 break;
2713
2714 case Instruction::Sub:
2715 return isNonZeroSub(DemandedElts, Depth, Q, BitWidth, I->getOperand(0),
2716 I->getOperand(1));
2717 case Instruction::Xor:
2718 // (X ^ (X != 0)) is non zero
2719 if (matchOpWithOpEqZero(I->getOperand(0), I->getOperand(1)))
2720 return true;
2721 break;
2722 case Instruction::Or:
2723 // (X | (X != 0)) is non zero
2724 if (matchOpWithOpEqZero(I->getOperand(0), I->getOperand(1)))
2725 return true;
2726 // X | Y != 0 if X != 0 or Y != 0.
2727 return isKnownNonZero(I->getOperand(1), DemandedElts, Q, Depth) ||
2728 isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
2729 case Instruction::SExt:
2730 case Instruction::ZExt:
2731 // ext X != 0 if X != 0.
2732 return isKnownNonZero(I->getOperand(0), Q, Depth);
2733
2734 case Instruction::Shl: {
2735 // shl nsw/nuw can't remove any non-zero bits.
2736 const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(I);
2737 if (Q.IIQ.hasNoUnsignedWrap(BO) || Q.IIQ.hasNoSignedWrap(BO))
2738 return isKnownNonZero(I->getOperand(0), Q, Depth);
2739
2740 // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined
2741 // if the lowest bit is shifted off the end.
2742 KnownBits Known(BitWidth);
2743 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth, Q);
2744 if (Known.One[0])
2745 return true;
2746
2747 return isNonZeroShift(I, DemandedElts, Depth, Q, Known);
2748 }
2749 case Instruction::LShr:
2750 case Instruction::AShr: {
2751 // shr exact can only shift out zero bits.
2752 const PossiblyExactOperator *BO = cast<PossiblyExactOperator>(I);
2753 if (BO->isExact())
2754 return isKnownNonZero(I->getOperand(0), Q, Depth);
2755
2756 // shr X, Y != 0 if X is negative. Note that the value of the shift is not
2757 // defined if the sign bit is shifted off the end.
2758 KnownBits Known =
2759 computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q);
2760 if (Known.isNegative())
2761 return true;
2762
2763 return isNonZeroShift(I, DemandedElts, Depth, Q, Known);
2764 }
2765 case Instruction::UDiv:
2766 case Instruction::SDiv: {
2767 // X / Y
2768 // div exact can only produce a zero if the dividend is zero.
2769 if (cast<PossiblyExactOperator>(I)->isExact())
2770 return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
2771
2772 KnownBits XKnown =
2773 computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q);
2774 // If X is fully unknown we won't be able to figure anything out so don't
2775 // both computing knownbits for Y.
2776 if (XKnown.isUnknown())
2777 return false;
2778
2779 KnownBits YKnown =
2780 computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q);
2781 if (I->getOpcode() == Instruction::SDiv) {
2782 // For signed division need to compare abs value of the operands.
2783 XKnown = XKnown.abs(/*IntMinIsPoison*/ false);
2784 YKnown = YKnown.abs(/*IntMinIsPoison*/ false);
2785 }
2786 // If X u>= Y then div is non zero (0/0 is UB).
2787 std::optional<bool> XUgeY = KnownBits::uge(XKnown, YKnown);
2788 // If X is total unknown or X u< Y we won't be able to prove non-zero
2789 // with compute known bits so just return early.
2790 return XUgeY && *XUgeY;
2791 }
2792 case Instruction::Add: {
2793 // X + Y.
2794
2795 // If Add has nuw wrap flag, then if either X or Y is non-zero the result is
2796 // non-zero.
2797 auto *BO = cast<OverflowingBinaryOperator>(I);
2798 return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth, I->getOperand(0),
2799 I->getOperand(1), Q.IIQ.hasNoSignedWrap(BO),
2800 Q.IIQ.hasNoUnsignedWrap(BO));
2801 }
2802 case Instruction::Mul: {
2803 const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(I);
2804 return isNonZeroMul(DemandedElts, Depth, Q, BitWidth, I->getOperand(0),
2805 I->getOperand(1), Q.IIQ.hasNoSignedWrap(BO),
2806 Q.IIQ.hasNoUnsignedWrap(BO));
2807 }
2808 case Instruction::Select: {
2809 // (C ? X : Y) != 0 if X != 0 and Y != 0.
2810
2811 // First check if the arm is non-zero using `isKnownNonZero`. If that fails,
2812 // then see if the select condition implies the arm is non-zero. For example
2813 // (X != 0 ? X : Y), we know the true arm is non-zero as the `X` "return" is
2814 // dominated by `X != 0`.
2815 auto SelectArmIsNonZero = [&](bool IsTrueArm) {
2816 Value *Op;
2817 Op = IsTrueArm ? I->getOperand(1) : I->getOperand(2);
2818 // Op is trivially non-zero.
2819 if (isKnownNonZero(Op, DemandedElts, Q, Depth))
2820 return true;
2821
2822 // The condition of the select dominates the true/false arm. Check if the
2823 // condition implies that a given arm is non-zero.
2824 Value *X;
2825 CmpInst::Predicate Pred;
2826 if (!match(I->getOperand(0), m_c_ICmp(Pred, m_Specific(Op), m_Value(X))))
2827 return false;
2828
2829 if (!IsTrueArm)
2830 Pred = ICmpInst::getInversePredicate(Pred);
2831
2832 return cmpExcludesZero(Pred, X);
2833 };
2834
2835 if (SelectArmIsNonZero(/* IsTrueArm */ true) &&
2836 SelectArmIsNonZero(/* IsTrueArm */ false))
2837 return true;
2838 break;
2839 }
2840 case Instruction::PHI: {
2841 auto *PN = cast<PHINode>(I);
2843 return true;
2844
2845 // Check if all incoming values are non-zero using recursion.
2846 SimplifyQuery RecQ = Q;
2847 unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1);
2848 return llvm::all_of(PN->operands(), [&](const Use &U) {
2849 if (U.get() == PN)
2850 return true;
2851 RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator();
2852 // Check if the branch on the phi excludes zero.
2853 ICmpInst::Predicate Pred;
2854 Value *X;
2855 BasicBlock *TrueSucc, *FalseSucc;
2856 if (match(RecQ.CxtI,
2857 m_Br(m_c_ICmp(Pred, m_Specific(U.get()), m_Value(X)),
2858 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) {
2859 // Check for cases of duplicate successors.
2860 if ((TrueSucc == PN->getParent()) != (FalseSucc == PN->getParent())) {
2861 // If we're using the false successor, invert the predicate.
2862 if (FalseSucc == PN->getParent())
2863 Pred = CmpInst::getInversePredicate(Pred);
2864 if (cmpExcludesZero(Pred, X))
2865 return true;
2866 }
2867 }
2868 // Finally recurse on the edge and check it directly.
2869 return isKnownNonZero(U.get(), DemandedElts, RecQ, NewDepth);
2870 });
2871 }
2872 case Instruction::InsertElement: {
2873 if (isa<ScalableVectorType>(I->getType()))
2874 break;
2875
2876 const Value *Vec = I->getOperand(0);
2877 const Value *Elt = I->getOperand(1);
2878 auto *CIdx = dyn_cast<ConstantInt>(I->getOperand(2));
2879
2880 unsigned NumElts = DemandedElts.getBitWidth();
2881 APInt DemandedVecElts = DemandedElts;
2882 bool SkipElt = false;
2883 // If we know the index we are inserting too, clear it from Vec check.
2884 if (CIdx && CIdx->getValue().ult(NumElts)) {
2885 DemandedVecElts.clearBit(CIdx->getZExtValue());
2886 SkipElt = !DemandedElts[CIdx->getZExtValue()];
2887 }
2888
2889 // Result is zero if Elt is non-zero and rest of the demanded elts in Vec
2890 // are non-zero.
2891 return (SkipElt || isKnownNonZero(Elt, Q, Depth)) &&
2892 (DemandedVecElts.isZero() ||
2893 isKnownNonZero(Vec, DemandedVecElts, Q, Depth));
2894 }
2895 case Instruction::ExtractElement:
2896 if (const auto *EEI = dyn_cast<ExtractElementInst>(I)) {
2897 const Value *Vec = EEI->getVectorOperand();
2898 const Value *Idx = EEI->getIndexOperand();
2899 auto *CIdx = dyn_cast<ConstantInt>(Idx);
2900 if (auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType())) {
2901 unsigned NumElts = VecTy->getNumElements();
2902 APInt DemandedVecElts = APInt::getAllOnes(NumElts);
2903 if (CIdx && CIdx->getValue().ult(NumElts))
2904 DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
2905 return isKnownNonZero(Vec, DemandedVecElts, Q, Depth);
2906 }
2907 }
2908 break;
2909 case Instruction::ShuffleVector: {
2910 auto *Shuf = dyn_cast<ShuffleVectorInst>(I);
2911 if (!Shuf)
2912 break;
2913 APInt DemandedLHS, DemandedRHS;
2914 // For undef elements, we don't know anything about the common state of
2915 // the shuffle result.
2916 if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS))
2917 break;
2918 // If demanded elements for both vecs are non-zero, the shuffle is non-zero.
2919 return (DemandedRHS.isZero() ||
2920 isKnownNonZero(Shuf->getOperand(1), DemandedRHS, Q, Depth)) &&
2921 (DemandedLHS.isZero() ||
2922 isKnownNonZero(Shuf->getOperand(0), DemandedLHS, Q, Depth));
2923 }
2924 case Instruction::Freeze:
2925 return isKnownNonZero(I->getOperand(0), Q, Depth) &&
2926 isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT,
2927 Depth);
2928 case Instruction::Load: {
2929 auto *LI = cast<LoadInst>(I);
2930 // A Load tagged with nonnull or dereferenceable with null pointer undefined
2931 // is never null.
2932 if (auto *PtrT = dyn_cast<PointerType>(I->getType())) {
2933 if (Q.IIQ.getMetadata(LI, LLVMContext::MD_nonnull) ||
2934 (Q.IIQ.getMetadata(LI, LLVMContext::MD_dereferenceable) &&
2935 !NullPointerIsDefined(LI->getFunction(), PtrT->getAddressSpace())))
2936 return true;
2937 } else if (MDNode *Ranges = Q.IIQ.getMetadata(LI, LLVMContext::MD_range)) {
2939 }
2940
2941 // No need to fall through to computeKnownBits as range metadata is already
2942 // handled in isKnownNonZero.
2943 return false;
2944 }
2945 case Instruction::ExtractValue: {
2946 const WithOverflowInst *WO;
2947 if (match(I, m_ExtractValue<0>(m_WithOverflowInst(WO)))) {
2948 switch (WO->getBinaryOp()) {
2949 default:
2950 break;
2951 case Instruction::Add:
2952 return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth,
2953 WO->getArgOperand(0), WO->getArgOperand(1),
2954 /*NSW=*/false,
2955 /*NUW=*/false);
2956 case Instruction::Sub:
2957 return isNonZeroSub(DemandedElts, Depth, Q, BitWidth,
2958 WO->getArgOperand(0), WO->getArgOperand(1));
2959 case Instruction::Mul:
2960 return isNonZeroMul(DemandedElts, Depth, Q, BitWidth,
2961 WO->getArgOperand(0), WO->getArgOperand(1),
2962 /*NSW=*/false, /*NUW=*/false);
2963 break;
2964 }
2965 }
2966 break;
2967 }
2968 case Instruction::Call:
2969 case Instruction::Invoke: {
2970 const auto *Call = cast<CallBase>(I);
2971 if (I->getType()->isPointerTy()) {
2972 if (Call->isReturnNonNull())
2973 return true;
2974 if (const auto *RP = getArgumentAliasingToReturnedPointer(Call, true))
2975 return isKnownNonZero(RP, Q, Depth);
2976 } else {
2977 if (MDNode *Ranges = Q.IIQ.getMetadata(Call, LLVMContext::MD_range))
2979 if (std::optional<ConstantRange> Range = Call->getRange()) {
2980 const APInt ZeroValue(Range->getBitWidth(), 0);
2981 if (!Range->contains(ZeroValue))
2982 return true;
2983 }
2984 if (const Value *RV = Call->getReturnedArgOperand())
2985 if (RV->getType() == I->getType() && isKnownNonZero(RV, Q, Depth))
2986 return true;
2987 }
2988
2989 if (auto *II = dyn_cast<IntrinsicInst>(I)) {
2990 switch (II->getIntrinsicID()) {
2991 case Intrinsic::sshl_sat:
2992 case Intrinsic::ushl_sat:
2993 case Intrinsic::abs:
2994 case Intrinsic::bitreverse:
2995 case Intrinsic::bswap:
2996 case Intrinsic::ctpop:
2997 return isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth);
2998 // NB: We don't do usub_sat here as in any case we can prove its
2999 // non-zero, we will fold it to `sub nuw` in InstCombine.
3000 case Intrinsic::ssub_sat:
3001 return isNonZeroSub(DemandedElts, Depth, Q, BitWidth,
3002 II->getArgOperand(0), II->getArgOperand(1));
3003 case Intrinsic::sadd_sat:
3004 return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth,
3005 II->getArgOperand(0), II->getArgOperand(1),
3006 /*NSW=*/true, /* NUW=*/false);
3007 // umin/smin/smax/smin/or of all non-zero elements is always non-zero.
3008 case Intrinsic::vector_reduce_or:
3009 case Intrinsic::vector_reduce_umax:
3010 case Intrinsic::vector_reduce_umin:
3011 case Intrinsic::vector_reduce_smax:
3012 case Intrinsic::vector_reduce_smin:
3013 return isKnownNonZero(II->getArgOperand(0), Q, Depth);
3014 case Intrinsic::umax:
3015 case Intrinsic::uadd_sat:
3016 // umax(X, (X != 0)) is non zero
3017 // X +usat (X != 0) is non zero
3018 if (matchOpWithOpEqZero(II->getArgOperand(0), II->getArgOperand(1)))
3019 return true;
3020
3021 return isKnownNonZero(II->getArgOperand(1), DemandedElts, Q, Depth) ||
3022 isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth);
3023 case Intrinsic::smax: {
3024 // If either arg is strictly positive the result is non-zero. Otherwise
3025 // the result is non-zero if both ops are non-zero.
3026 auto IsNonZero = [&](Value *Op, std::optional<bool> &OpNonZero,
3027 const KnownBits &OpKnown) {
3028 if (!OpNonZero.has_value())
3029 OpNonZero = OpKnown.isNonZero() ||
3030 isKnownNonZero(Op, DemandedElts, Q, Depth);
3031 return *OpNonZero;
3032 };
3033 // Avoid re-computing isKnownNonZero.
3034 std::optional<bool> Op0NonZero, Op1NonZero;
3035 KnownBits Op1Known =
3036 computeKnownBits(II->getArgOperand(1), DemandedElts, Depth, Q);
3037 if (Op1Known.isNonNegative() &&
3038 IsNonZero(II->getArgOperand(1), Op1NonZero, Op1Known))
3039 return true;
3040 KnownBits Op0Known =
3041 computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q);
3042 if (Op0Known.isNonNegative() &&
3043 IsNonZero(II->getArgOperand(0), Op0NonZero, Op0Known))
3044 return true;
3045 return IsNonZero(II->getArgOperand(1), Op1NonZero, Op1Known) &&
3046 IsNonZero(II->getArgOperand(0), Op0NonZero, Op0Known);
3047 }
3048 case Intrinsic::smin: {
3049 // If either arg is negative the result is non-zero. Otherwise
3050 // the result is non-zero if both ops are non-zero.
3051 KnownBits Op1Known =
3052 computeKnownBits(II->getArgOperand(1), DemandedElts, Depth, Q);
3053 if (Op1Known.isNegative())
3054 return true;
3055 KnownBits Op0Known =
3056 computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q);
3057 if (Op0Known.isNegative())
3058 return true;
3059
3060 if (Op1Known.isNonZero() && Op0Known.isNonZero())
3061 return true;
3062 }
3063 [[fallthrough]];
3064 case Intrinsic::umin:
3065 return isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth) &&
3066 isKnownNonZero(II->getArgOperand(1), DemandedElts, Q, Depth);
3067 case Intrinsic::cttz:
3068 return computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q)
3069 .Zero[0];
3070 case Intrinsic::ctlz:
3071 return computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q)
3072 .isNonNegative();
3073 case Intrinsic::fshr:
3074 case Intrinsic::fshl:
3075 // If Op0 == Op1, this is a rotate. rotate(x, y) != 0 iff x != 0.
3076 if (II->getArgOperand(0) == II->getArgOperand(1))
3077 return isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth);
3078 break;
3079 case Intrinsic::vscale:
3080 return true;
3081 case Intrinsic::experimental_get_vector_length:
3082 return isKnownNonZero(I->getOperand(0), Q, Depth);
3083 default:
3084 break;
3085 }
3086 break;
3087 }
3088
3089 return false;
3090 }
3091 }
3092
3093 KnownBits Known(BitWidth);
3094 computeKnownBits(I, DemandedElts, Known, Depth, Q);
3095 return Known.One != 0;
3096}
3097
3098/// Return true if the given value is known to be non-zero when defined. For
3099/// vectors, return true if every demanded element is known to be non-zero when
3100/// defined. For pointers, if the context instruction and dominator tree are
3101/// specified, perform context-sensitive analysis and return true if the
3102/// pointer couldn't possibly be null at the specified instruction.
3103/// Supports values with integer or pointer type and vectors of integers.
3104bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
3105 const SimplifyQuery &Q, unsigned Depth) {
3106 Type *Ty = V->getType();
3107
3108#ifndef NDEBUG
3109 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
3110
3111 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
3112 assert(
3113 FVTy->getNumElements() == DemandedElts.getBitWidth() &&
3114 "DemandedElt width should equal the fixed vector number of elements");
3115 } else {
3116 assert(DemandedElts == APInt(1, 1) &&
3117 "DemandedElt width should be 1 for scalars");
3118 }
3119#endif
3120
3121 if (auto *C = dyn_cast<Constant>(V)) {
3122 if (C->isNullValue())
3123 return false;
3124 if (isa<ConstantInt>(C))
3125 // Must be non-zero due to null test above.
3126 return true;
3127
3128 // For constant vectors, check that all elements are poison or known
3129 // non-zero to determine that the whole vector is known non-zero.
3130 if (auto *VecTy = dyn_cast<FixedVectorType>(Ty)) {
3131 for (unsigned i = 0, e = VecTy->getNumElements(); i != e; ++i) {
3132 if (!DemandedElts[i])
3133 continue;
3134 Constant *Elt = C->getAggregateElement(i);
3135 if (!Elt || Elt->isNullValue())
3136 return false;
3137 if (!isa<PoisonValue>(Elt) && !isa<ConstantInt>(Elt))
3138 return false;
3139 }
3140 return true;
3141 }
3142
3143 // A global variable in address space 0 is non null unless extern weak
3144 // or an absolute symbol reference. Other address spaces may have null as a
3145 // valid address for a global, so we can't assume anything.
3146 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
3147 if (!GV->isAbsoluteSymbolRef() && !GV->hasExternalWeakLinkage() &&
3148 GV->getType()->getAddressSpace() == 0)
3149 return true;
3150 }
3151
3152 // For constant expressions, fall through to the Operator code below.
3153 if (!isa<ConstantExpr>(V))
3154 return false;
3155 }
3156
3157 if (const auto *A = dyn_cast<Argument>(V))
3158 if (std::optional<ConstantRange> Range = A->getRange()) {
3159 const APInt ZeroValue(Range->getBitWidth(), 0);
3160 if (!Range->contains(ZeroValue))
3161 return true;
3162 }
3163
3164 if (!isa<Constant>(V) && isKnownNonZeroFromAssume(V, Q))
3165 return true;
3166
3167 // Some of the tests below are recursive, so bail out if we hit the limit.
3169 return false;
3170
3171 // Check for pointer simplifications.
3172
3173 if (PointerType *PtrTy = dyn_cast<PointerType>(Ty)) {
3174 // A byval, inalloca may not be null in a non-default addres space. A
3175 // nonnull argument is assumed never 0.
3176 if (const Argument *A = dyn_cast<Argument>(V)) {
3177 if (((A->hasPassPointeeByValueCopyAttr() &&
3178 !NullPointerIsDefined(A->getParent(), PtrTy->getAddressSpace())) ||
3179 A->hasNonNullAttr()))
3180 return true;
3181 }
3182 }
3183
3184 if (const auto *I = dyn_cast<Operator>(V))
3185 if (isKnownNonZeroFromOperator(I, DemandedElts, Depth, Q))
3186 return true;
3187
3188 if (!isa<Constant>(V) &&
3190 return true;
3191
3192 return false;
3193}
3194
3196 unsigned Depth) {
3197 auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
3198 APInt DemandedElts =
3199 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
3200 return ::isKnownNonZero(V, DemandedElts, Q, Depth);
3201}
3202
3203/// If the pair of operators are the same invertible function, return the
3204/// the operands of the function corresponding to each input. Otherwise,
3205/// return std::nullopt. An invertible function is one that is 1-to-1 and maps
3206/// every input value to exactly one output value. This is equivalent to
3207/// saying that Op1 and Op2 are equal exactly when the specified pair of
3208/// operands are equal, (except that Op1 and Op2 may be poison more often.)
3209static std::optional<std::pair<Value*, Value*>>
3211 const Operator *Op2) {
3212 if (Op1->getOpcode() != Op2->getOpcode())
3213 return std::nullopt;
3214
3215 auto getOperands = [&](unsigned OpNum) -> auto {
3216 return std::make_pair(Op1->getOperand(OpNum), Op2->getOperand(OpNum));
3217 };
3218
3219 switch (Op1->getOpcode()) {
3220 default:
3221 break;
3222 case Instruction::Or:
3223 if (!cast<PossiblyDisjointInst>(Op1)->isDisjoint() ||
3224 !cast<PossiblyDisjointInst>(Op2)->isDisjoint())
3225 break;
3226 [[fallthrough]];
3227 case Instruction::Xor:
3228 case Instruction::Add: {
3229 Value *Other;
3230 if (match(Op2, m_c_BinOp(m_Specific(Op1->getOperand(0)), m_Value(Other))))
3231 return std::make_pair(Op1->getOperand(1), Other);
3232 if (match(Op2, m_c_BinOp(m_Specific(Op1->getOperand(1)), m_Value(Other))))
3233 return std::make_pair(Op1->getOperand(0), Other);
3234 break;
3235 }
3236 case Instruction::Sub:
3237 if (Op1->getOperand(0) == Op2->getOperand(0))
3238 return getOperands(1);
3239 if (Op1->getOperand(1) == Op2->getOperand(1))
3240 return getOperands(0);
3241 break;
3242 case Instruction::Mul: {
3243 // invertible if A * B == (A * B) mod 2^N where A, and B are integers
3244 // and N is the bitwdith. The nsw case is non-obvious, but proven by
3245 // alive2: https://alive2.llvm.org/ce/z/Z6D5qK
3246 auto *OBO1 = cast<OverflowingBinaryOperator>(Op1);
3247 auto *OBO2 = cast<OverflowingBinaryOperator>(Op2);
3248 if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) &&
3249 (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap()))
3250 break;
3251
3252 // Assume operand order has been canonicalized
3253 if (Op1->getOperand(1) == Op2->getOperand(1) &&
3254 isa<ConstantInt>(Op1->getOperand(1)) &&
3255 !cast<ConstantInt>(Op1->getOperand(1))->isZero())
3256 return getOperands(0);
3257 break;
3258 }
3259 case Instruction::Shl: {
3260 // Same as multiplies, with the difference that we don't need to check
3261 // for a non-zero multiply. Shifts always multiply by non-zero.
3262 auto *OBO1 = cast<OverflowingBinaryOperator>(Op1);
3263 auto *OBO2 = cast<OverflowingBinaryOperator>(Op2);
3264 if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) &&
3265 (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap()))
3266 break;
3267
3268 if (Op1->getOperand(1) == Op2->getOperand(1))
3269 return getOperands(0);
3270 break;
3271 }
3272 case Instruction::AShr:
3273 case Instruction::LShr: {
3274 auto *PEO1 = cast<PossiblyExactOperator>(Op1);
3275 auto *PEO2 = cast<PossiblyExactOperator>(Op2);
3276 if (!PEO1->isExact() || !PEO2->isExact())
3277 break;
3278
3279 if (Op1->getOperand(1) == Op2->getOperand(1))
3280 return getOperands(0);
3281 break;
3282 }
3283 case Instruction::SExt:
3284 case Instruction::ZExt:
3285 if (Op1->getOperand(0)->getType() == Op2->getOperand(0)->getType())
3286 return getOperands(0);
3287 break;
3288 case Instruction::PHI: {
3289 const PHINode *PN1 = cast<PHINode>(Op1);
3290 const PHINode *PN2 = cast<PHINode>(Op2);
3291
3292 // If PN1 and PN2 are both recurrences, can we prove the entire recurrences
3293 // are a single invertible function of the start values? Note that repeated
3294 // application of an invertible function is also invertible
3295 BinaryOperator *BO1 = nullptr;
3296 Value *Start1 = nullptr, *Step1 = nullptr;
3297 BinaryOperator *BO2 = nullptr;
3298 Value *Start2 = nullptr, *Step2 = nullptr;
3299 if (PN1->getParent() != PN2->getParent() ||
3300 !matchSimpleRecurrence(PN1, BO1, Start1, Step1) ||
3301 !matchSimpleRecurrence(PN2, BO2, Start2, Step2))
3302 break;
3303
3304 auto Values = getInvertibleOperands(cast<Operator>(BO1),
3305 cast<Operator>(BO2));
3306 if (!Values)
3307 break;
3308
3309 // We have to be careful of mutually defined recurrences here. Ex:
3310 // * X_i = X_(i-1) OP Y_(i-1), and Y_i = X_(i-1) OP V
3311 // * X_i = Y_i = X_(i-1) OP Y_(i-1)
3312 // The invertibility of these is complicated, and not worth reasoning
3313 // about (yet?).
3314 if (Values->first != PN1 || Values->second != PN2)
3315 break;
3316
3317 return std::make_pair(Start1, Start2);
3318 }
3319 }
3320 return std::nullopt;
3321}
3322
3323/// Return true if V1 == (binop V2, X), where X is known non-zero.
3324/// Only handle a small subset of binops where (binop V2, X) with non-zero X
3325/// implies V2 != V1.
3326static bool isModifyingBinopOfNonZero(const Value *V1, const Value *V2,
3327 unsigned Depth, const SimplifyQuery &Q) {
3328 const BinaryOperator *BO = dyn_cast<BinaryOperator>(V1);
3329 if (!BO)
3330 return false;
3331 switch (BO->getOpcode()) {
3332 default:
3333 break;
3334 case Instruction::Or:
3335 if (!cast<PossiblyDisjointInst>(V1)->isDisjoint())
3336 break;
3337 [[fallthrough]];
3338 case Instruction::Xor:
3339 case Instruction::Add:
3340 Value *Op = nullptr;
3341 if (V2 == BO->getOperand(0))
3342 Op = BO->getOperand(1);
3343 else if (V2 == BO->getOperand(1))
3344 Op = BO->getOperand(0);
3345 else
3346 return false;
3347 return isKnownNonZero(Op, Q, Depth + 1);
3348 }
3349 return false;
3350}
3351
3352/// Return true if V2 == V1 * C, where V1 is known non-zero, C is not 0/1 and
3353/// the multiplication is nuw or nsw.
3354static bool isNonEqualMul(const Value *V1, const Value *V2, unsigned Depth,
3355 const SimplifyQuery &Q) {
3356 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(V2)) {
3357 const APInt *C;
3358 return match(OBO, m_Mul(m_Specific(V1), m_APInt(C))) &&
3359 (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) &&
3360 !C->isZero() && !C->isOne() && isKnownNonZero(V1, Q, Depth + 1);
3361 }
3362 return false;
3363}
3364
3365/// Return true if V2 == V1 << C, where V1 is known non-zero, C is not 0 and
3366/// the shift is nuw or nsw.
3367static bool isNonEqualShl(const Value *V1, const Value *V2, unsigned Depth,
3368 const SimplifyQuery &Q) {
3369 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(V2)) {
3370 const APInt *C;
3371 return match(OBO, m_Shl(m_Specific(V1), m_APInt(C))) &&
3372 (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) &&
3373 !C->isZero() && isKnownNonZero(V1, Q, Depth + 1);
3374 }
3375 return false;
3376}
3377
3378static bool isNonEqualPHIs(const PHINode *PN1, const PHINode *PN2,
3379 unsigned Depth, const SimplifyQuery &Q) {
3380 // Check two PHIs are in same block.
3381 if (PN1->getParent() != PN2->getParent())
3382 return false;
3383
3385 bool UsedFullRecursion = false;
3386 for (const BasicBlock *IncomBB : PN1->blocks()) {
3387 if (!VisitedBBs.insert(IncomBB).second)
3388 continue; // Don't reprocess blocks that we have dealt with already.
3389 const Value *IV1 = PN1->getIncomingValueForBlock(IncomBB);
3390 const Value *IV2 = PN2->getIncomingValueForBlock(IncomBB);
3391 const APInt *C1, *C2;
3392 if (match(IV1, m_APInt(C1)) && match(IV2, m_APInt(C2)) && *C1 != *C2)
3393 continue;
3394
3395 // Only one pair of phi operands is allowed for full recursion.
3396 if (UsedFullRecursion)
3397 return false;
3398
3399 SimplifyQuery RecQ = Q;
3400 RecQ.CxtI = IncomBB->getTerminator();
3401 if (!isKnownNonEqual(IV1, IV2, Depth + 1, RecQ))
3402 return false;
3403 UsedFullRecursion = true;
3404 }
3405 return true;
3406}
3407
3408static bool isNonEqualSelect(const Value *V1, const Value *V2, unsigned Depth,
3409 const SimplifyQuery &Q) {
3410 const SelectInst *SI1 = dyn_cast<SelectInst>(V1);
3411 if (!SI1)
3412 return false;
3413
3414 if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2)) {
3415 const Value *Cond1 = SI1->getCondition();
3416 const Value *Cond2 = SI2->getCondition();
3417 if (Cond1 == Cond2)
3418 return isKnownNonEqual(SI1->getTrueValue(), SI2->getTrueValue(),
3419 Depth + 1, Q) &&
3420 isKnownNonEqual(SI1->getFalseValue(), SI2->getFalseValue(),
3421 Depth + 1, Q);
3422 }
3423 return isKnownNonEqual(SI1->getTrueValue(), V2, Depth + 1, Q) &&
3424 isKnownNonEqual(SI1->getFalseValue(), V2, Depth + 1, Q);
3425}
3426
3427// Check to see if A is both a GEP and is the incoming value for a PHI in the
3428// loop, and B is either a ptr or another GEP. If the PHI has 2 incoming values,
3429// one of them being the recursive GEP A and the other a ptr at same base and at
3430// the same/higher offset than B we are only incrementing the pointer further in
3431// loop if offset of recursive GEP is greater than 0.
3433 const SimplifyQuery &Q) {
3434 if (!A->getType()->isPointerTy() || !B->getType()->isPointerTy())
3435 return false;
3436
3437 auto *GEPA = dyn_cast<GEPOperator>(A);
3438 if (!GEPA || GEPA->getNumIndices() != 1 || !isa<Constant>(GEPA->idx_begin()))
3439 return false;
3440
3441 // Handle 2 incoming PHI values with one being a recursive GEP.
3442 auto *PN = dyn_cast<PHINode>(GEPA->getPointerOperand());
3443 if (!PN || PN->getNumIncomingValues() != 2)
3444 return false;
3445
3446 // Search for the recursive GEP as an incoming operand, and record that as
3447 // Step.
3448 Value *Start = nullptr;
3449 Value *Step = const_cast<Value *>(A);
3450 if (PN->getIncomingValue(0) == Step)
3451 Start = PN->getIncomingValue(1);
3452 else if (PN->getIncomingValue(1) == Step)
3453 Start = PN->getIncomingValue(0);
3454 else
3455 return false;
3456
3457 // Other incoming node base should match the B base.
3458 // StartOffset >= OffsetB && StepOffset > 0?
3459 // StartOffset <= OffsetB && StepOffset < 0?
3460 // Is non-equal if above are true.
3461 // We use stripAndAccumulateInBoundsConstantOffsets to restrict the
3462 // optimisation to inbounds GEPs only.
3463 unsigned IndexWidth = Q.DL.getIndexTypeSizeInBits(Start->getType());
3464 APInt StartOffset(IndexWidth, 0);
3465 Start = Start->stripAndAccumulateInBoundsConstantOffsets(Q.DL, StartOffset);
3466 APInt StepOffset(IndexWidth, 0);
3467 Step = Step->stripAndAccumulateInBoundsConstantOffsets(Q.DL, StepOffset);
3468
3469 // Check if Base Pointer of Step matches the PHI.
3470 if (Step != PN)
3471 return false;
3472 APInt OffsetB(IndexWidth, 0);
3473 B = B->stripAndAccumulateInBoundsConstantOffsets(Q.DL, OffsetB);
3474 return Start == B &&
3475 ((StartOffset.sge(OffsetB) && StepOffset.isStrictlyPositive()) ||
3476 (StartOffset.sle(OffsetB) && StepOffset.isNegative()));
3477}
3478
3479/// Return true if it is known that V1 != V2.
3480static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth,
3481 const SimplifyQuery &Q) {
3482 if (V1 == V2)
3483 return false;
3484 if (V1->getType() != V2->getType())
3485 // We can't look through casts yet.
3486 return false;
3487
3489 return false;
3490
3491 // See if we can recurse through (exactly one of) our operands. This
3492 // requires our operation be 1-to-1 and map every input value to exactly
3493 // one output value. Such an operation is invertible.
3494 auto *O1 = dyn_cast<Operator>(V1);
3495 auto *O2 = dyn_cast<Operator>(V2);
3496 if (O1 && O2 && O1->getOpcode() == O2->getOpcode()) {
3497 if (auto Values = getInvertibleOperands(O1, O2))
3498 return isKnownNonEqual(Values->first, Values->second, Depth + 1, Q);
3499
3500 if (const PHINode *PN1 = dyn_cast<PHINode>(V1)) {
3501 const PHINode *PN2 = cast<PHINode>(V2);
3502 // FIXME: This is missing a generalization to handle the case where one is
3503 // a PHI and another one isn't.
3504 if (isNonEqualPHIs(PN1, PN2, Depth, Q))
3505 return true;
3506 };
3507 }
3508
3509 if (isModifyingBinopOfNonZero(V1, V2, Depth, Q) ||
3510 isModifyingBinopOfNonZero(V2, V1, Depth, Q))
3511 return true;
3512
3513 if (isNonEqualMul(V1, V2, Depth, Q) || isNonEqualMul(V2, V1, Depth, Q))
3514 return true;
3515
3516 if (isNonEqualShl(V1, V2, Depth, Q) || isNonEqualShl(V2, V1, Depth, Q))
3517 return true;
3518
3519 if (V1->getType()->isIntOrIntVectorTy()) {
3520 // Are any known bits in V1 contradictory to known bits in V2? If V1
3521 // has a known zero where V2 has a known one, they must not be equal.
3522 KnownBits Known1 = computeKnownBits(V1, Depth, Q);
3523 if (!Known1.isUnknown()) {
3524 KnownBits Known2 = computeKnownBits(V2, Depth, Q);
3525 if (Known1.Zero.intersects(Known2.One) ||
3526 Known2.Zero.intersects(Known1.One))
3527 return true;
3528 }
3529 }
3530
3531 if (isNonEqualSelect(V1, V2, Depth, Q) || isNonEqualSelect(V2, V1, Depth, Q))
3532 return true;
3533
3534 if (isNonEqualPointersWithRecursiveGEP(V1, V2, Q) ||
3536 return true;
3537
3538 Value *A, *B;
3539 // PtrToInts are NonEqual if their Ptrs are NonEqual.
3540 // Check PtrToInt type matches the pointer size.
3541 if (match(V1, m_PtrToIntSameSize(Q.DL, m_Value(A))) &&
3543 return isKnownNonEqual(A, B, Depth + 1, Q);
3544
3545 return false;
3546}
3547
3548// Match a signed min+max clamp pattern like smax(smin(In, CHigh), CLow).
3549// Returns the input and lower/upper bounds.
3550static bool isSignedMinMaxClamp(const Value *Select, const Value *&In,
3551 const APInt *&CLow, const APInt *&CHigh) {
3552 assert(isa<Operator>(Select) &&
3553 cast<Operator>(Select)->getOpcode() == Instruction::Select &&
3554 "Input should be a Select!");
3555
3556 const Value *LHS = nullptr, *RHS = nullptr;
3558 if (SPF != SPF_SMAX && SPF != SPF_SMIN)
3559 return false;
3560
3561 if (!match(RHS, m_APInt(CLow)))
3562 return false;
3563
3564 const Value *LHS2 = nullptr, *RHS2 = nullptr;
3566 if (getInverseMinMaxFlavor(SPF) != SPF2)
3567 return false;
3568
3569 if (!match(RHS2, m_APInt(CHigh)))
3570 return false;
3571
3572 if (SPF == SPF_SMIN)
3573 std::swap(CLow, CHigh);
3574
3575 In = LHS2;
3576 return CLow->sle(*CHigh);
3577}
3578
3580 const APInt *&CLow,
3581 const APInt *&CHigh) {
3582 assert((II->getIntrinsicID() == Intrinsic::smin ||
3583 II->getIntrinsicID() == Intrinsic::smax) && "Must be smin/smax");
3584
3586 auto *InnerII = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
3587 if (!InnerII || InnerII->getIntrinsicID() != InverseID ||
3588 !match(II->getArgOperand(1), m_APInt(CLow)) ||
3589 !match(InnerII->getArgOperand(1), m_APInt(CHigh)))
3590 return false;
3591
3592 if (II->getIntrinsicID() == Intrinsic::smin)
3593 std::swap(CLow, CHigh);
3594 return CLow->sle(*CHigh);
3595}
3596
3597/// For vector constants, loop over the elements and find the constant with the
3598/// minimum number of sign bits. Return 0 if the value is not a vector constant
3599/// or if any element was not analyzed; otherwise, return the count for the
3600/// element with the minimum number of sign bits.
3602 const APInt &DemandedElts,
3603 unsigned TyBits) {
3604 const auto *CV = dyn_cast<Constant>(V);
3605 if (!CV || !isa<FixedVectorType>(CV->getType()))
3606 return 0;
3607
3608 unsigned MinSignBits = TyBits;
3609 unsigned NumElts = cast<FixedVectorType>(CV->getType())->getNumElements();
3610 for (unsigned i = 0; i != NumElts; ++i) {
3611 if (!DemandedElts[i])
3612 continue;
3613 // If we find a non-ConstantInt, bail out.
3614 auto *Elt = dyn_cast_or_null<ConstantInt>(CV->getAggregateElement(i));
3615 if (!Elt)
3616 return 0;
3617
3618 MinSignBits = std::min(MinSignBits, Elt->getValue().getNumSignBits());
3619 }
3620
3621 return MinSignBits;
3622}
3623
3624static unsigned ComputeNumSignBitsImpl(const Value *V,
3625 const APInt &DemandedElts,
3626 unsigned Depth, const SimplifyQuery &Q);
3627
3628static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts,
3629 unsigned Depth, const SimplifyQuery &Q) {
3630 unsigned Result = ComputeNumSignBitsImpl(V, DemandedElts, Depth, Q);
3631 assert(Result > 0 && "At least one sign bit needs to be present!");
3632 return Result;
3633}
3634
3635/// Return the number of times the sign bit of the register is replicated into
3636/// the other bits. We know that at least 1 bit is always equal to the sign bit
3637/// (itself), but other cases can give us information. For example, immediately
3638/// after an "ashr X, 2", we know that the top 3 bits are all equal to each
3639/// other, so we return 3. For vectors, return the number of sign bits for the
3640/// vector element with the minimum number of known sign bits of the demanded
3641/// elements in the vector specified by DemandedElts.
3642static unsigned ComputeNumSignBitsImpl(const Value *V,
3643 const APInt &DemandedElts,
3644 unsigned Depth, const SimplifyQuery &Q) {
3645 Type *Ty = V->getType();
3646#ifndef NDEBUG
3647 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
3648
3649 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
3650 assert(
3651 FVTy->getNumElements() == DemandedElts.getBitWidth() &&
3652 "DemandedElt width should equal the fixed vector number of elements");
3653 } else {
3654 assert(DemandedElts == APInt(1, 1) &&
3655 "DemandedElt width should be 1 for scalars");
3656 }
3657#endif
3658
3659 // We return the minimum number of sign bits that are guaranteed to be present
3660 // in V, so for undef we have to conservatively return 1. We don't have the
3661 // same behavior for poison though -- that's a FIXME today.
3662
3663 Type *ScalarTy = Ty->getScalarType();
3664 unsigned TyBits = ScalarTy->isPointerTy() ?
3665 Q.DL.getPointerTypeSizeInBits(ScalarTy) :
3666 Q.DL.getTypeSizeInBits(ScalarTy);
3667
3668 unsigned Tmp, Tmp2;
3669 unsigned FirstAnswer = 1;
3670
3671 // Note that ConstantInt is handled by the general computeKnownBits case
3672 // below.
3673
3675 return 1;
3676
3677 if (auto *U = dyn_cast<Operator>(V)) {
3678 switch (Operator::getOpcode(V)) {
3679 default: break;
3680 case Instruction::SExt:
3681 Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
3682 return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q) + Tmp;
3683
3684 case Instruction::SDiv: {
3685 const APInt *Denominator;
3686 // sdiv X, C -> adds log(C) sign bits.
3687 if (match(U->getOperand(1), m_APInt(Denominator))) {
3688
3689 // Ignore non-positive denominator.
3690 if (!Denominator->isStrictlyPositive())
3691 break;
3692
3693 // Calculate the incoming numerator bits.
3694 unsigned NumBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
3695
3696 // Add floor(log(C)) bits to the numerator bits.
3697 return std::min(TyBits, NumBits + Denominator->logBase2());
3698 }
3699 break;
3700 }
3701
3702 case Instruction::SRem: {
3703 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
3704
3705 const APInt *Denominator;
3706 // srem X, C -> we know that the result is within [-C+1,C) when C is a
3707 // positive constant. This let us put a lower bound on the number of sign
3708 // bits.
3709 if (match(U->getOperand(1), m_APInt(Denominator))) {
3710
3711 // Ignore non-positive denominator.
3712 if (Denominator->isStrictlyPositive()) {
3713 // Calculate the leading sign bit constraints by examining the
3714 // denominator. Given that the denominator is positive, there are two
3715 // cases:
3716 //
3717 // 1. The numerator is positive. The result range is [0,C) and
3718 // [0,C) u< (1 << ceilLogBase2(C)).
3719 //
3720 // 2. The numerator is negative. Then the result range is (-C,0] and
3721 // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)).
3722 //
3723 // Thus a lower bound on the number of sign bits is `TyBits -
3724 // ceilLogBase2(C)`.
3725
3726 unsigned ResBits = TyBits - Denominator->ceilLogBase2();
3727 Tmp = std::max(Tmp, ResBits);
3728 }
3729 }
3730 return Tmp;
3731 }
3732
3733 case Instruction::AShr: {
3734 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
3735 // ashr X, C -> adds C sign bits. Vectors too.
3736 const APInt *ShAmt;
3737 if (match(U->getOperand(1), m_APInt(ShAmt))) {
3738 if (ShAmt->uge(TyBits))
3739 break; // Bad shift.
3740 unsigned ShAmtLimited = ShAmt->getZExtValue();
3741 Tmp += ShAmtLimited;
3742 if (Tmp > TyBits) Tmp = TyBits;
3743 }
3744 return Tmp;
3745 }
3746 case Instruction::Shl: {
3747 const APInt *ShAmt;
3748 if (match(U->getOperand(1), m_APInt(ShAmt))) {
3749 // shl destroys sign bits.
3750 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
3751 if (ShAmt->uge(TyBits) || // Bad shift.
3752 ShAmt->uge(Tmp)) break; // Shifted all sign bits out.
3753 Tmp2 = ShAmt->getZExtValue();
3754 return Tmp - Tmp2;
3755 }
3756 break;
3757 }
3758 case Instruction::And:
3759 case Instruction::Or:
3760 case Instruction::Xor: // NOT is handled here.
3761 // Logical binary ops preserve the number of sign bits at the worst.
3762 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
3763 if (Tmp != 1) {
3764 Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
3765 FirstAnswer = std::min(Tmp, Tmp2);
3766 // We computed what we know about the sign bits as our first
3767 // answer. Now proceed to the generic code that uses
3768 // computeKnownBits, and pick whichever answer is better.
3769 }
3770 break;
3771
3772 case Instruction::Select: {
3773 // If we have a clamp pattern, we know that the number of sign bits will
3774 // be the minimum of the clamp min/max range.
3775 const Value *X;
3776 const APInt *CLow, *CHigh;
3777 if (isSignedMinMaxClamp(U, X, CLow, CHigh))
3778 return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits());
3779
3780 Tmp = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
3781 if (Tmp == 1) break;
3782 Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth + 1, Q);
3783 return std::min(Tmp, Tmp2);
3784 }
3785
3786 case Instruction::Add:
3787 // Add can have at most one carry bit. Thus we know that the output
3788 // is, at worst, one more bit than the inputs.
3789 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
3790 if (Tmp == 1) break;
3791
3792 // Special case decrementing a value (ADD X, -1):
3793 if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1)))
3794 if (CRHS->isAllOnesValue()) {
3795 KnownBits Known(TyBits);
3796 computeKnownBits(U->getOperand(0), Known, Depth + 1, Q);
3797
3798 // If the input is known to be 0 or 1, the output is 0/-1, which is
3799 // all sign bits set.
3800 if ((Known.Zero | 1).isAllOnes())
3801 return TyBits;
3802
3803 // If we are subtracting one from a positive number, there is no carry
3804 // out of the result.
3805 if (Known.isNonNegative())
3806 return Tmp;
3807 }
3808
3809 Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
3810 if (Tmp2 == 1) break;
3811 return std::min(Tmp, Tmp2) - 1;
3812
3813 case Instruction::Sub:
3814 Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
3815 if (Tmp2 == 1) break;
3816
3817 // Handle NEG.
3818 if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0)))
3819 if (CLHS->isNullValue()) {
3820 KnownBits Known(TyBits);
3821 computeKnownBits(U->getOperand(1), Known, Depth + 1, Q);
3822 // If the input is known to be 0 or 1, the output is 0/-1, which is
3823 // all sign bits set.
3824 if ((Known.Zero | 1).isAllOnes())
3825 return TyBits;
3826
3827 // If the input is known to be positive (the sign bit is known clear),
3828 // the output of the NEG has the same number of sign bits as the
3829 // input.
3830 if (Known.isNonNegative())
3831 return Tmp2;
3832
3833 // Otherwise, we treat this like a SUB.
3834 }
3835
3836 // Sub can have at most one carry bit. Thus we know that the output
3837 // is, at worst, one more bit than the inputs.
3838 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
3839 if (Tmp == 1) break;
3840 return std::min(Tmp, Tmp2) - 1;
3841
3842 case Instruction::Mul: {
3843 // The output of the Mul can be at most twice the valid bits in the
3844 // inputs.
3845 unsigned SignBitsOp0 = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
3846 if (SignBitsOp0 == 1) break;
3847 unsigned SignBitsOp1 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
3848 if (SignBitsOp1 == 1) break;
3849 unsigned OutValidBits =
3850 (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1);
3851 return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1;
3852 }
3853
3854 case Instruction::PHI: {
3855 const PHINode *PN = cast<PHINode>(U);
3856 unsigned NumIncomingValues = PN->getNumIncomingValues();
3857 // Don't analyze large in-degree PHIs.
3858 if (NumIncomingValues > 4) break;
3859 // Unreachable blocks may have zero-operand PHI nodes.
3860 if (NumIncomingValues == 0) break;
3861
3862 // Take the minimum of all incoming values. This can't infinitely loop
3863 // because of our depth threshold.
3864 SimplifyQuery RecQ = Q;
3865 Tmp = TyBits;
3866 for (unsigned i = 0, e = NumIncomingValues; i != e; ++i) {
3867 if (Tmp == 1) return Tmp;
3868 RecQ.CxtI = PN->getIncomingBlock(i)->getTerminator();
3869 Tmp = std::min(
3870 Tmp, ComputeNumSignBits(PN->getIncomingValue(i), Depth + 1, RecQ));
3871 }
3872 return Tmp;
3873 }
3874
3875 case Instruction::Trunc: {
3876 // If the input contained enough sign bits that some remain after the
3877 // truncation, then we can make use of that. Otherwise we don't know
3878 // anything.
3879 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
3880 unsigned OperandTyBits = U->getOperand(0)->getType()->getScalarSizeInBits();
3881 if (Tmp > (OperandTyBits - TyBits))
3882 return Tmp - (OperandTyBits - TyBits);
3883
3884 return 1;
3885 }
3886
3887 case Instruction::ExtractElement:
3888 // Look through extract element. At the moment we keep this simple and
3889 // skip tracking the specific element. But at least we might find
3890 // information valid for all elements of the vector (for example if vector
3891 // is sign extended, shifted, etc).
3892 return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
3893
3894 case Instruction::ShuffleVector: {
3895 // Collect the minimum number of sign bits that are shared by every vector
3896 // element referenced by the shuffle.
3897 auto *Shuf = dyn_cast<ShuffleVectorInst>(U);
3898 if (!Shuf) {
3899 // FIXME: Add support for shufflevector constant expressions.
3900 return 1;
3901 }
3902 APInt DemandedLHS, DemandedRHS;
3903 // For undef elements, we don't know anything about the common state of
3904 // the shuffle result.
3905 if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS))
3906 return 1;
3907 Tmp = std::numeric_limits<unsigned>::max();
3908 if (!!DemandedLHS) {
3909 const Value *LHS = Shuf->getOperand(0);
3910 Tmp = ComputeNumSignBits(LHS, DemandedLHS, Depth + 1, Q);
3911 }
3912 // If we don't know anything, early out and try computeKnownBits
3913 // fall-back.
3914 if (Tmp == 1)
3915 break;
3916 if (!!DemandedRHS) {
3917 const Value *RHS = Shuf->getOperand(1);
3918 Tmp2 = ComputeNumSignBits(RHS, DemandedRHS, Depth + 1, Q);
3919 Tmp = std::min(Tmp, Tmp2);
3920 }
3921 // If we don't know anything, early out and try computeKnownBits
3922 // fall-back.
3923 if (Tmp == 1)
3924 break;
3925 assert(Tmp <= TyBits && "Failed to determine minimum sign bits");
3926 return Tmp;
3927 }
3928 case Instruction::Call: {
3929 if (const auto *II = dyn_cast<IntrinsicInst>(U)) {
3930 switch (II->getIntrinsicID()) {
3931 default: break;
3932 case Intrinsic::abs:
3933 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
3934 if (Tmp == 1) break;
3935
3936 // Absolute value reduces number of sign bits by at most 1.
3937 return Tmp - 1;
3938 case Intrinsic::smin:
3939 case Intrinsic::smax: {
3940 const APInt *CLow, *CHigh;
3941 if (isSignedMinMaxIntrinsicClamp(II, CLow, CHigh))
3942 return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits());
3943 }
3944 }
3945 }
3946 }
3947 }
3948 }
3949
3950 // Finally, if we can prove that the top bits of the result are 0's or 1's,
3951 // use this information.
3952
3953 // If we can examine all elements of a vector constant successfully, we're
3954 // done (we can't do any better than that). If not, keep trying.
3955 if (unsigned VecSignBits =
3956 computeNumSignBitsVectorConstant(V, DemandedElts, TyBits))
3957 return VecSignBits;
3958
3959 KnownBits Known(TyBits);
3960 computeKnownBits(V, DemandedElts, Known, Depth, Q);
3961
3962 // If we know that the sign bit is either zero or one, determine the number of
3963 // identical bits in the top of the input value.
3964 return std::max(FirstAnswer, Known.countMinSignBits());
3965}
3966
3968 const TargetLibraryInfo *TLI) {
3969 const Function *F = CB.getCalledFunction();
3970 if (!F)
3972
3973 if (F->isIntrinsic())
3974 return F->getIntrinsicID();
3975
3976 // We are going to infer semantics of a library function based on mapping it
3977 // to an LLVM intrinsic. Check that the library function is available from
3978 // this callbase and in this environment.
3979 LibFunc Func;
3980 if (F->hasLocalLinkage() || !TLI || !TLI->getLibFunc(CB, Func) ||
3981 !CB.onlyReadsMemory())
3983
3984 switch (Func) {
3985 default:
3986 break;
3987 case LibFunc_sin:
3988 case LibFunc_sinf:
3989 case LibFunc_sinl:
3990 return Intrinsic::sin;
3991 case LibFunc_cos:
3992 case LibFunc_cosf:
3993 case LibFunc_cosl:
3994 return Intrinsic::cos;
3995 case LibFunc_exp:
3996 case LibFunc_expf:
3997 case LibFunc_expl:
3998 return Intrinsic::exp;
3999 case LibFunc_exp2:
4000 case LibFunc_exp2f:
4001 case LibFunc_exp2l:
4002 return Intrinsic::exp2;
4003 case LibFunc_log:
4004 case LibFunc_logf:
4005 case LibFunc_logl:
4006 return Intrinsic::log;
4007 case LibFunc_log10:
4008 case LibFunc_log10f:
4009 case LibFunc_log10l:
4010 return Intrinsic::log10;
4011 case LibFunc_log2:
4012 case LibFunc_log2f:
4013 case LibFunc_log2l:
4014 return Intrinsic::log2;
4015 case LibFunc_fabs:
4016 case LibFunc_fabsf:
4017 case LibFunc_fabsl:
4018 return Intrinsic::fabs;
4019 case LibFunc_fmin:
4020 case LibFunc_fminf:
4021 case LibFunc_fminl:
4022 return Intrinsic::minnum;
4023 case LibFunc_fmax:
4024 case LibFunc_fmaxf:
4025 case LibFunc_fmaxl:
4026 return Intrinsic::maxnum;
4027 case LibFunc_copysign:
4028 case LibFunc_copysignf:
4029 case LibFunc_copysignl:
4030 return Intrinsic::copysign;
4031 case LibFunc_floor:
4032 case LibFunc_floorf:
4033 case LibFunc_floorl:
4034 return Intrinsic::floor;
4035 case LibFunc_ceil:
4036 case LibFunc_ceilf:
4037 case LibFunc_ceill:
4038 return Intrinsic::ceil;
4039 case LibFunc_trunc:
4040 case LibFunc_truncf:
4041 case LibFunc_truncl:
4042 return Intrinsic::trunc;
4043 case LibFunc_rint:
4044 case LibFunc_rintf:
4045 case LibFunc_rintl:
4046 return Intrinsic::rint;
4047 case LibFunc_nearbyint:
4048 case LibFunc_nearbyintf:
4049 case LibFunc_nearbyintl:
4050 return Intrinsic::nearbyint;
4051 case LibFunc_round:
4052 case LibFunc_roundf:
4053 case LibFunc_roundl:
4054 return Intrinsic::round;
4055 case LibFunc_roundeven:
4056 case LibFunc_roundevenf:
4057 case LibFunc_roundevenl:
4058 return Intrinsic::roundeven;
4059 case LibFunc_pow:
4060 case LibFunc_powf:
4061 case LibFunc_powl:
4062 return Intrinsic::pow;
4063 case LibFunc_sqrt:
4064 case LibFunc_sqrtf:
4065 case LibFunc_sqrtl:
4066 return Intrinsic::sqrt;
4067 }
4068
4070}
4071
4072/// Return true if it's possible to assume IEEE treatment of input denormals in
4073/// \p F for \p Val.
4074static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) {
4075 Ty = Ty->getScalarType();
4076 return F.getDenormalMode(Ty->getFltSemantics()).Input == DenormalMode::IEEE;
4077}
4078
4079static bool inputDenormalIsIEEEOrPosZero(const Function &F, const Type *Ty) {
4080 Ty = Ty->getScalarType();
4081 DenormalMode Mode = F.getDenormalMode(Ty->getFltSemantics());
4082 return Mode.Input == DenormalMode::IEEE ||
4083 Mode.Input == DenormalMode::PositiveZero;
4084}
4085
4086static bool outputDenormalIsIEEEOrPosZero(const Function &F, const Type *Ty) {
4087 Ty = Ty->getScalarType();
4088 DenormalMode Mode = F.getDenormalMode(Ty->getFltSemantics());
4089 return Mode.Output == DenormalMode::IEEE ||
4090 Mode.Output == DenormalMode::PositiveZero;
4091}
4092
4094 return isKnownNeverZero() &&
4096}
4097
4099 Type *Ty) const {
4100 return isKnownNeverNegZero() &&
4102}
4103
4105 Type *Ty) const {
4106 if (!isKnownNeverPosZero())
4107 return false;
4108
4109 // If we know there are no denormals, nothing can be flushed to zero.
4111 return true;
4112
4113 DenormalMode Mode = F.getDenormalMode(Ty->getScalarType()->getFltSemantics());
4114 switch (Mode.Input) {
4115 case DenormalMode::IEEE:
4116 return true;
4118 // Negative subnormal won't flush to +0
4119 return isKnownNeverPosSubnormal();
4121 default:
4122 // Both positive and negative subnormal could flush to +0
4123 return false;
4124 }
4125
4126 llvm_unreachable("covered switch over denormal mode");
4127}
4128
4130 Type *Ty) {
4131 KnownFPClasses = Src.KnownFPClasses;
4132 // If we aren't assuming the source can't be a zero, we don't have to check if
4133 // a denormal input could be flushed.
4134 if (!Src.isKnownNeverPosZero() && !Src.isKnownNeverNegZero())
4135 return;
4136
4137 // If we know the input can't be a denormal, it can't be flushed to 0.
4138 if (Src.isKnownNeverSubnormal())
4139 return;
4140
4141 DenormalMode Mode = F.getDenormalMode(Ty->getScalarType()->getFltSemantics());
4142
4143 if (!Src.isKnownNeverPosSubnormal() && Mode != DenormalMode::getIEEE())
4145
4146 if (!Src.isKnownNeverNegSubnormal() && Mode != DenormalMode::getIEEE()) {
4147 if (Mode != DenormalMode::getPositiveZero())
4149
4150 if (Mode.Input == DenormalMode::PositiveZero ||
4151 Mode.Output == DenormalMode::PositiveZero ||
4152 Mode.Input == DenormalMode::Dynamic ||
4153 Mode.Output == DenormalMode::Dynamic)
4155 }
4156}
4157
4159 const Function &F, Type *Ty) {
4160 propagateDenormal(Src, F, Ty);
4161 propagateNaN(Src, /*PreserveSign=*/true);
4162}
4163
4164/// Given an exploded icmp instruction, return true if the comparison only
4165/// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if
4166/// the result of the comparison is true when the input value is signed.
4168 bool &TrueIfSigned) {
4169 switch (Pred) {
4170 case ICmpInst::ICMP_SLT: // True if LHS s< 0
4171 TrueIfSigned = true;
4172 return RHS.isZero();
4173 case ICmpInst::ICMP_SLE: // True if LHS s<= -1
4174 TrueIfSigned = true;
4175 return RHS.isAllOnes();
4176 case ICmpInst::ICMP_SGT: // True if LHS s> -1
4177 TrueIfSigned = false;
4178 return RHS.isAllOnes();
4179 case ICmpInst::ICMP_SGE: // True if LHS s>= 0
4180 TrueIfSigned = false;
4181 return RHS.isZero();
4182 case ICmpInst::ICMP_UGT:
4183 // True if LHS u> RHS and RHS == sign-bit-mask - 1
4184 TrueIfSigned = true;
4185 return RHS.isMaxSignedValue();
4186 case ICmpInst::ICMP_UGE:
4187 // True if LHS u>= RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc)
4188 TrueIfSigned = true;
4189 return RHS.isMinSignedValue();
4190 case ICmpInst::ICMP_ULT:
4191 // True if LHS u< RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc)
4192 TrueIfSigned = false;
4193 return RHS.isMinSignedValue();
4194 case ICmpInst::ICMP_ULE:
4195 // True if LHS u<= RHS and RHS == sign-bit-mask - 1
4196 TrueIfSigned = false;
4197 return RHS.isMaxSignedValue();
4198 default:
4199 return false;
4200 }
4201}
4202
4203/// Returns a pair of values, which if passed to llvm.is.fpclass, returns the
4204/// same result as an fcmp with the given operands.
4205std::pair<Value *, FPClassTest> llvm::fcmpToClassTest(FCmpInst::Predicate Pred,
4206 const Function &F,
4207 Value *LHS, Value *RHS,
4208 bool LookThroughSrc) {
4209 const APFloat *ConstRHS;
4210 if (!match(RHS, m_APFloatAllowPoison(ConstRHS)))
4211 return {nullptr, fcAllFlags};
4212
4213 return fcmpToClassTest(Pred, F, LHS, ConstRHS, LookThroughSrc);
4214}
4215
4216std::pair<Value *, FPClassTest>
4218 const APFloat *ConstRHS, bool LookThroughSrc) {
4219
4220 auto [Src, ClassIfTrue, ClassIfFalse] =
4221 fcmpImpliesClass(Pred, F, LHS, *ConstRHS, LookThroughSrc);
4222 if (Src && ClassIfTrue == ~ClassIfFalse)
4223 return {Src, ClassIfTrue};
4224 return {nullptr, fcAllFlags};
4225}
4226
4227/// Return the return value for fcmpImpliesClass for a compare that produces an
4228/// exact class test.
4229static std::tuple<Value *, FPClassTest, FPClassTest> exactClass(Value *V,
4230 FPClassTest M) {
4231 return {V, M, ~M};
4232}
4233
4234std::tuple<Value *, FPClassTest, FPClassTest>
4236 FPClassTest RHSClass, bool LookThroughSrc) {
4237 assert(RHSClass != fcNone);
4238 Value *Src = LHS;
4239
4240 if (Pred == FCmpInst::FCMP_TRUE)
4241 return exactClass(Src, fcAllFlags);
4242
4243 if (Pred == FCmpInst::FCMP_FALSE)
4244 return exactClass(Src, fcNone);
4245
4246 const FPClassTest OrigClass = RHSClass;
4247
4248 const bool IsNegativeRHS = (RHSClass & fcNegative) == RHSClass;
4249 const bool IsPositiveRHS = (RHSClass & fcPositive) == RHSClass;
4250 const bool IsNaN = (RHSClass & ~fcNan) == fcNone;
4251
4252 if (IsNaN) {
4253 // fcmp o__ x, nan -> false
4254 // fcmp u__ x, nan -> true
4255 return exactClass(Src, CmpInst::isOrdered(Pred) ? fcNone : fcAllFlags);
4256 }
4257
4258 // fcmp ord x, zero|normal|subnormal|inf -> ~fcNan
4259 if (Pred == FCmpInst::FCMP_ORD)
4260 return exactClass(Src, ~fcNan);
4261
4262 // fcmp uno x, zero|normal|subnormal|inf -> fcNan
4263 if (Pred == FCmpInst::FCMP_UNO)
4264 return exactClass(Src, fcNan);
4265
4266 const bool IsFabs = LookThroughSrc && match(LHS, m_FAbs(m_Value(Src)));
4267 if (IsFabs)
4268 RHSClass = llvm::inverse_fabs(RHSClass);
4269
4270 const bool IsZero = (OrigClass & fcZero) == OrigClass;
4271 if (IsZero) {
4272 assert(Pred != FCmpInst::FCMP_ORD && Pred != FCmpInst::FCMP_UNO);
4273 // Compares with fcNone are only exactly equal to fcZero if input denormals
4274 // are not flushed.
4275 // TODO: Handle DAZ by expanding masks to cover subnormal cases.
4276 if (!inputDenormalIsIEEE(F, LHS->getType()))
4277 return {nullptr, fcAllFlags, fcAllFlags};
4278
4279 switch (Pred) {
4280 case FCmpInst::FCMP_OEQ: // Match x == 0.0
4281 return exactClass(Src, fcZero);
4282 case FCmpInst::FCMP_UEQ: // Match isnan(x) || (x == 0.0)
4283 return exactClass(Src, fcZero | fcNan);
4284 case FCmpInst::FCMP_UNE: // Match (x != 0.0)
4285 return exactClass(Src, ~fcZero);
4286 case FCmpInst::FCMP_ONE: // Match !isnan(x) && x != 0.0
4287 return exactClass(Src, ~fcNan & ~fcZero);
4288 case FCmpInst::FCMP_ORD:
4289 // Canonical form of ord/uno is with a zero. We could also handle
4290 // non-canonical other non-NaN constants or LHS == RHS.
4291 return exactClass(Src, ~fcNan);
4292 case FCmpInst::FCMP_UNO:
4293 return exactClass(Src, fcNan);
4294 case FCmpInst::FCMP_OGT: // x > 0
4296 case FCmpInst::FCMP_UGT: // isnan(x) || x > 0
4298 case FCmpInst::FCMP_OGE: // x >= 0
4299 return exactClass(Src, fcPositive | fcNegZero);
4300 case FCmpInst::FCMP_UGE: // isnan(x) || x >= 0
4301 return exactClass(Src, fcPositive | fcNegZero | fcNan);
4302 case FCmpInst::FCMP_OLT: // x < 0
4304 case FCmpInst::FCMP_ULT: // isnan(x) || x < 0
4306 case FCmpInst::FCMP_OLE: // x <= 0
4307 return exactClass(Src, fcNegative | fcPosZero);
4308 case FCmpInst::FCMP_ULE: // isnan(x) || x <= 0
4309 return exactClass(Src, fcNegative | fcPosZero | fcNan);
4310 default:
4311 llvm_unreachable("all compare types are handled");
4312 }
4313
4314 return {nullptr, fcAllFlags, fcAllFlags};
4315 }
4316
4317 const bool IsDenormalRHS = (OrigClass & fcSubnormal) == OrigClass;
4318
4319 const bool IsInf = (OrigClass & fcInf) == OrigClass;
4320 if (IsInf) {
4321 FPClassTest Mask = fcAllFlags;
4322
4323 switch (Pred) {
4324 case FCmpInst::FCMP_OEQ:
4325 case FCmpInst::FCMP_UNE: {
4326 // Match __builtin_isinf patterns
4327 //
4328 // fcmp oeq x, +inf -> is_fpclass x, fcPosInf
4329 // fcmp oeq fabs(x), +inf -> is_fpclass x, fcInf
4330 // fcmp oeq x, -inf -> is_fpclass x, fcNegInf
4331 // fcmp oeq fabs(x), -inf -> is_fpclass x, 0 -> false
4332 //
4333 // fcmp une x, +inf -> is_fpclass x, ~fcPosInf
4334 // fcmp une fabs(x), +inf -> is_fpclass x, ~fcInf
4335 // fcmp une x, -inf -> is_fpclass x, ~fcNegInf
4336 // fcmp une fabs(x), -inf -> is_fpclass x, fcAllFlags -> true
4337 if (IsNegativeRHS) {
4338 Mask = fcNegInf;
4339 if (IsFabs)
4340 Mask = fcNone;
4341 } else {
4342 Mask = fcPosInf;
4343 if (IsFabs)
4344 Mask |= fcNegInf;
4345 }
4346 break;
4347 }
4348 case FCmpInst::FCMP_ONE:
4349 case FCmpInst::FCMP_UEQ: {
4350 // Match __builtin_isinf patterns
4351 // fcmp one x, -inf -> is_fpclass x, fcNegInf
4352 // fcmp one fabs(x), -inf -> is_fpclass x, ~fcNegInf & ~fcNan
4353 // fcmp one x, +inf -> is_fpclass x, ~fcNegInf & ~fcNan
4354 // fcmp one fabs(x), +inf -> is_fpclass x, ~fcInf & fcNan
4355 //
4356 // fcmp ueq x, +inf -> is_fpclass x, fcPosInf|fcNan
4357 // fcmp ueq (fabs x), +inf -> is_fpclass x, fcInf|fcNan
4358 // fcmp ueq x, -inf -> is_fpclass x, fcNegInf|fcNan
4359 // fcmp ueq fabs(x), -inf -> is_fpclass x, fcNan
4360 if (IsNegativeRHS) {
4361 Mask = ~fcNegInf & ~fcNan;
4362 if (IsFabs)
4363 Mask = ~fcNan;
4364 } else {
4365 Mask = ~fcPosInf & ~fcNan;
4366 if (IsFabs)
4367 Mask &= ~fcNegInf;
4368 }
4369
4370 break;
4371 }
4372 case FCmpInst::FCMP_OLT:
4373 case FCmpInst::FCMP_UGE: {
4374 if (IsNegativeRHS) {
4375 // No value is ordered and less than negative infinity.
4376 // All values are unordered with or at least negative infinity.
4377 // fcmp olt x, -inf -> false
4378 // fcmp uge x, -inf -> true
4379 Mask = fcNone;
4380 break;
4381 }
4382
4383 // fcmp olt fabs(x), +inf -> fcFinite
4384 // fcmp uge fabs(x), +inf -> ~fcFinite
4385 // fcmp olt x, +inf -> fcFinite|fcNegInf
4386 // fcmp uge x, +inf -> ~(fcFinite|fcNegInf)
4387 Mask = fcFinite;
4388 if (!IsFabs)
4389 Mask |= fcNegInf;
4390 break;
4391 }
4392 case FCmpInst::FCMP_OGE:
4393 case FCmpInst::FCMP_ULT: {
4394 if (IsNegativeRHS) {
4395 // fcmp oge x, -inf -> ~fcNan
4396 // fcmp oge fabs(x), -inf -> ~fcNan
4397 // fcmp ult x, -inf -> fcNan
4398 // fcmp ult fabs(x), -inf -> fcNan
4399 Mask = ~fcNan;
4400 break;
4401 }
4402
4403 // fcmp oge fabs(x), +inf -> fcInf
4404 // fcmp oge x, +inf -> fcPosInf
4405 // fcmp ult fabs(x), +inf -> ~fcInf
4406 // fcmp ult x, +inf -> ~fcPosInf
4407 Mask = fcPosInf;
4408 if (IsFabs)
4409 Mask |= fcNegInf;
4410 break;
4411 }
4412 case FCmpInst::FCMP_OGT:
4413 case FCmpInst::FCMP_ULE: {
4414 if (IsNegativeRHS) {
4415 // fcmp ogt x, -inf -> fcmp one x, -inf
4416 // fcmp ogt fabs(x), -inf -> fcmp ord x, x
4417 // fcmp ule x, -inf -> fcmp ueq x, -inf
4418 // fcmp ule fabs(x), -inf -> fcmp uno x, x
4419 Mask = IsFabs ? ~fcNan : ~(fcNegInf | fcNan);
4420 break;
4421 }
4422
4423 // No value is ordered and greater than infinity.
4424 Mask = fcNone;
4425 break;
4426 }
4427 case FCmpInst::FCMP_OLE:
4428 case FCmpInst::FCMP_UGT: {
4429 if (IsNegativeRHS) {
4430 Mask = IsFabs ? fcNone : fcNegInf;
4431 break;
4432 }
4433
4434 // fcmp ole x, +inf -> fcmp ord x, x
4435 // fcmp ole fabs(x), +inf -> fcmp ord x, x
4436 // fcmp ole x, -inf -> fcmp oeq x, -inf
4437 // fcmp ole fabs(x), -inf -> false
4438 Mask = ~fcNan;
4439 break;
4440 }
4441 default:
4442 llvm_unreachable("all compare types are handled");
4443 }
4444
4445 // Invert the comparison for the unordered cases.
4446 if (FCmpInst::isUnordered(Pred))
4447 Mask = ~Mask;
4448
4449 return exactClass(Src, Mask);
4450 }
4451
4452 if (Pred == FCmpInst::FCMP_OEQ)
4453 return {Src, RHSClass, fcAllFlags};
4454
4455 if (Pred == FCmpInst::FCMP_UEQ) {
4456 FPClassTest Class = RHSClass | fcNan;
4457 return {Src, Class, ~fcNan};
4458 }
4459
4460 if (Pred == FCmpInst::FCMP_ONE)
4461 return {Src, ~fcNan, RHSClass | fcNan};
4462
4463 if (Pred == FCmpInst::FCMP_UNE)
4464 return {Src, fcAllFlags, RHSClass};
4465
4466 assert((RHSClass == fcNone || RHSClass == fcPosNormal ||
4467 RHSClass == fcNegNormal || RHSClass == fcNormal ||
4468 RHSClass == fcPosSubnormal || RHSClass == fcNegSubnormal ||
4469 RHSClass == fcSubnormal) &&
4470 "should have been recognized as an exact class test");
4471
4472 if (IsNegativeRHS) {
4473 // TODO: Handle fneg(fabs)
4474 if (IsFabs) {
4475 // fabs(x) o> -k -> fcmp ord x, x
4476 // fabs(x) u> -k -> true
4477 // fabs(x) o< -k -> false
4478 // fabs(x) u< -k -> fcmp uno x, x
4479 switch (Pred) {
4480 case FCmpInst::FCMP_OGT:
4481 case FCmpInst::FCMP_OGE:
4482 return {Src, ~fcNan, fcNan};
4483 case FCmpInst::FCMP_UGT:
4484 case FCmpInst::FCMP_UGE:
4485 return {Src, fcAllFlags, fcNone};
4486 case FCmpInst::FCMP_OLT:
4487 case FCmpInst::FCMP_OLE:
4488 return {Src, fcNone, fcAllFlags};
4489 case FCmpInst::FCMP_ULT:
4490 case FCmpInst::FCMP_ULE:
4491 return {Src, fcNan, ~fcNan};
4492 default:
4493 break;
4494 }
4495
4496 return {nullptr, fcAllFlags, fcAllFlags};
4497 }
4498
4499 FPClassTest ClassesLE = fcNegInf | fcNegNormal;
4501
4502 if (IsDenormalRHS)
4503 ClassesLE |= fcNegSubnormal;
4504