LLVM 20.0.0git
ValueTracking.cpp
Go to the documentation of this file.
1//===- ValueTracking.cpp - Walk computations to compute properties --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains routines that help analyze properties that chains of
10// computations have.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/APFloat.h"
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/ScopeExit.h"
21#include "llvm/ADT/SmallSet.h"
23#include "llvm/ADT/StringRef.h"
32#include "llvm/Analysis/Loads.h"
37#include "llvm/IR/Argument.h"
38#include "llvm/IR/Attributes.h"
39#include "llvm/IR/BasicBlock.h"
40#include "llvm/IR/Constant.h"
42#include "llvm/IR/Constants.h"
45#include "llvm/IR/Dominators.h"
47#include "llvm/IR/Function.h"
49#include "llvm/IR/GlobalAlias.h"
50#include "llvm/IR/GlobalValue.h"
52#include "llvm/IR/InstrTypes.h"
53#include "llvm/IR/Instruction.h"
56#include "llvm/IR/Intrinsics.h"
57#include "llvm/IR/IntrinsicsAArch64.h"
58#include "llvm/IR/IntrinsicsAMDGPU.h"
59#include "llvm/IR/IntrinsicsRISCV.h"
60#include "llvm/IR/IntrinsicsX86.h"
61#include "llvm/IR/LLVMContext.h"
62#include "llvm/IR/Metadata.h"
63#include "llvm/IR/Module.h"
64#include "llvm/IR/Operator.h"
66#include "llvm/IR/Type.h"
67#include "llvm/IR/User.h"
68#include "llvm/IR/Value.h"
76#include <algorithm>
77#include <cassert>
78#include <cstdint>
79#include <optional>
80#include <utility>
81
82using namespace llvm;
83using namespace llvm::PatternMatch;
84
85// Controls the number of uses of the value searched for possible
86// dominating comparisons.
87static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
88 cl::Hidden, cl::init(20));
89
90
91/// Returns the bitwidth of the given scalar or pointer type. For vector types,
92/// returns the element type's bitwidth.
93static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
94 if (unsigned BitWidth = Ty->getScalarSizeInBits())
95 return BitWidth;
96
97 return DL.getPointerTypeSizeInBits(Ty);
98}
99
100// Given the provided Value and, potentially, a context instruction, return
101// the preferred context instruction (if any).
102static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) {
103 // If we've been provided with a context instruction, then use that (provided
104 // it has been inserted).
105 if (CxtI && CxtI->getParent())
106 return CxtI;
107
108 // If the value is really an already-inserted instruction, then use that.
109 CxtI = dyn_cast<Instruction>(V);
110 if (CxtI && CxtI->getParent())
111 return CxtI;
112
113 return nullptr;
114}
115
116static const Instruction *safeCxtI(const Value *V1, const Value *V2, const Instruction *CxtI) {
117 // If we've been provided with a context instruction, then use that (provided
118 // it has been inserted).
119 if (CxtI && CxtI->getParent())
120 return CxtI;
121
122 // If the value is really an already-inserted instruction, then use that.
123 CxtI = dyn_cast<Instruction>(V1);
124 if (CxtI && CxtI->getParent())
125 return CxtI;
126
127 CxtI = dyn_cast<Instruction>(V2);
128 if (CxtI && CxtI->getParent())
129 return CxtI;
130
131 return nullptr;
132}
133
135 const APInt &DemandedElts,
136 APInt &DemandedLHS, APInt &DemandedRHS) {
137 if (isa<ScalableVectorType>(Shuf->getType())) {
138 assert(DemandedElts == APInt(1,1));
139 DemandedLHS = DemandedRHS = DemandedElts;
140 return true;
141 }
142
143 int NumElts =
144 cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements();
145 return llvm::getShuffleDemandedElts(NumElts, Shuf->getShuffleMask(),
146 DemandedElts, DemandedLHS, DemandedRHS);
147}
148
149static void computeKnownBits(const Value *V, const APInt &DemandedElts,
150 KnownBits &Known, unsigned Depth,
151 const SimplifyQuery &Q);
152
153void llvm::computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
154 const SimplifyQuery &Q) {
155 // Since the number of lanes in a scalable vector is unknown at compile time,
156 // we track one bit which is implicitly broadcast to all lanes. This means
157 // that all lanes in a scalable vector are considered demanded.
158 auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
159 APInt DemandedElts =
160 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
161 ::computeKnownBits(V, DemandedElts, Known, Depth, Q);
162}
163
165 const DataLayout &DL, unsigned Depth,
166 AssumptionCache *AC, const Instruction *CxtI,
167 const DominatorTree *DT, bool UseInstrInfo) {
169 V, Known, Depth,
170 SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
171}
172
174 unsigned Depth, AssumptionCache *AC,
175 const Instruction *CxtI,
176 const DominatorTree *DT, bool UseInstrInfo) {
177 return computeKnownBits(
178 V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
179}
180
181KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts,
182 const DataLayout &DL, unsigned Depth,
183 AssumptionCache *AC, const Instruction *CxtI,
184 const DominatorTree *DT, bool UseInstrInfo) {
185 return computeKnownBits(
186 V, DemandedElts, Depth,
187 SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
188}
189
190static bool haveNoCommonBitsSetSpecialCases(const Value *LHS, const Value *RHS,
191 const SimplifyQuery &SQ) {
192 // Look for an inverted mask: (X & ~M) op (Y & M).
193 {
194 Value *M;
195 if (match(LHS, m_c_And(m_Not(m_Value(M)), m_Value())) &&
197 isGuaranteedNotToBeUndef(M, SQ.AC, SQ.CxtI, SQ.DT))
198 return true;
199 }
200
201 // X op (Y & ~X)
204 return true;
205
206 // X op ((X & Y) ^ Y) -- this is the canonical form of the previous pattern
207 // for constant Y.
208 Value *Y;
209 if (match(RHS,
211 isGuaranteedNotToBeUndef(LHS, SQ.AC, SQ.CxtI, SQ.DT) &&
212 isGuaranteedNotToBeUndef(Y, SQ.AC, SQ.CxtI, SQ.DT))
213 return true;
214
215 // Peek through extends to find a 'not' of the other side:
216 // (ext Y) op ext(~Y)
217 if (match(LHS, m_ZExtOrSExt(m_Value(Y))) &&
219 isGuaranteedNotToBeUndef(Y, SQ.AC, SQ.CxtI, SQ.DT))
220 return true;
221
222 // Look for: (A & B) op ~(A | B)
223 {
224 Value *A, *B;
225 if (match(LHS, m_And(m_Value(A), m_Value(B))) &&
227 isGuaranteedNotToBeUndef(A, SQ.AC, SQ.CxtI, SQ.DT) &&
228 isGuaranteedNotToBeUndef(B, SQ.AC, SQ.CxtI, SQ.DT))
229 return true;
230 }
231
232 return false;
233}
234
236 const WithCache<const Value *> &RHSCache,
237 const SimplifyQuery &SQ) {
238 const Value *LHS = LHSCache.getValue();
239 const Value *RHS = RHSCache.getValue();
240
241 assert(LHS->getType() == RHS->getType() &&
242 "LHS and RHS should have the same type");
244 "LHS and RHS should be integers");
245
248 return true;
249
251 RHSCache.getKnownBits(SQ));
252}
253
255 return !I->user_empty() && all_of(I->users(), [](const User *U) {
256 return match(U, m_ICmp(m_Value(), m_Zero()));
257 });
258}
259
261 return !I->user_empty() && all_of(I->users(), [](const User *U) {
262 CmpPredicate P;
263 return match(U, m_ICmp(P, m_Value(), m_Zero())) && ICmpInst::isEquality(P);
264 });
265}
266
268 bool OrZero, unsigned Depth,
269 AssumptionCache *AC, const Instruction *CxtI,
270 const DominatorTree *DT, bool UseInstrInfo) {
271 return ::isKnownToBeAPowerOfTwo(
272 V, OrZero, Depth,
273 SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
274}
275
276static bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
277 const SimplifyQuery &Q, unsigned Depth);
278
280 unsigned Depth) {
281 return computeKnownBits(V, Depth, SQ).isNonNegative();
282}
283
285 unsigned Depth) {
286 if (auto *CI = dyn_cast<ConstantInt>(V))
287 return CI->getValue().isStrictlyPositive();
288
289 // If `isKnownNonNegative` ever becomes more sophisticated, make sure to keep
290 // this updated.
291 KnownBits Known = computeKnownBits(V, Depth, SQ);
292 return Known.isNonNegative() &&
293 (Known.isNonZero() || isKnownNonZero(V, SQ, Depth));
294}
295
297 unsigned Depth) {
298 return computeKnownBits(V, Depth, SQ).isNegative();
299}
300
301static bool isKnownNonEqual(const Value *V1, const Value *V2,
302 const APInt &DemandedElts, unsigned Depth,
303 const SimplifyQuery &Q);
304
305bool llvm::isKnownNonEqual(const Value *V1, const Value *V2,
306 const DataLayout &DL, AssumptionCache *AC,
307 const Instruction *CxtI, const DominatorTree *DT,
308 bool UseInstrInfo) {
309 // We don't support looking through casts.
310 if (V1 == V2 || V1->getType() != V2->getType())
311 return false;
312 auto *FVTy = dyn_cast<FixedVectorType>(V1->getType());
313 APInt DemandedElts =
314 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
315 return ::isKnownNonEqual(
316 V1, V2, DemandedElts, 0,
317 SimplifyQuery(DL, DT, AC, safeCxtI(V2, V1, CxtI), UseInstrInfo));
318}
319
320bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask,
321 const SimplifyQuery &SQ, unsigned Depth) {
322 KnownBits Known(Mask.getBitWidth());
323 computeKnownBits(V, Known, Depth, SQ);
324 return Mask.isSubsetOf(Known.Zero);
325}
326
327static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts,
328 unsigned Depth, const SimplifyQuery &Q);
329
330static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
331 const SimplifyQuery &Q) {
332 auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
333 APInt DemandedElts =
334 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
335 return ComputeNumSignBits(V, DemandedElts, Depth, Q);
336}
337
338unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL,
339 unsigned Depth, AssumptionCache *AC,
340 const Instruction *CxtI,
341 const DominatorTree *DT, bool UseInstrInfo) {
342 return ::ComputeNumSignBits(
343 V, Depth, SimplifyQuery(DL, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
344}
345
347 unsigned Depth, AssumptionCache *AC,
348 const Instruction *CxtI,
349 const DominatorTree *DT) {
350 unsigned SignBits = ComputeNumSignBits(V, DL, Depth, AC, CxtI, DT);
351 return V->getType()->getScalarSizeInBits() - SignBits + 1;
352}
353
354static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
355 bool NSW, bool NUW,
356 const APInt &DemandedElts,
357 KnownBits &KnownOut, KnownBits &Known2,
358 unsigned Depth, const SimplifyQuery &Q) {
359 computeKnownBits(Op1, DemandedElts, KnownOut, Depth + 1, Q);
360
361 // If one operand is unknown and we have no nowrap information,
362 // the result will be unknown independently of the second operand.
363 if (KnownOut.isUnknown() && !NSW && !NUW)
364 return;
365
366 computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q);
367 KnownOut = KnownBits::computeForAddSub(Add, NSW, NUW, Known2, KnownOut);
368}
369
370static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
371 bool NUW, const APInt &DemandedElts,
372 KnownBits &Known, KnownBits &Known2,
373 unsigned Depth, const SimplifyQuery &Q) {
374 computeKnownBits(Op1, DemandedElts, Known, Depth + 1, Q);
375 computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q);
376
377 bool isKnownNegative = false;
378 bool isKnownNonNegative = false;
379 // If the multiplication is known not to overflow, compute the sign bit.
380 if (NSW) {
381 if (Op0 == Op1) {
382 // The product of a number with itself is non-negative.
383 isKnownNonNegative = true;
384 } else {
385 bool isKnownNonNegativeOp1 = Known.isNonNegative();
386 bool isKnownNonNegativeOp0 = Known2.isNonNegative();
387 bool isKnownNegativeOp1 = Known.isNegative();
388 bool isKnownNegativeOp0 = Known2.isNegative();
389 // The product of two numbers with the same sign is non-negative.
390 isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) ||
391 (isKnownNonNegativeOp1 && isKnownNonNegativeOp0);
392 if (!isKnownNonNegative && NUW) {
393 // mul nuw nsw with a factor > 1 is non-negative.
395 isKnownNonNegative = KnownBits::sgt(Known, One).value_or(false) ||
396 KnownBits::sgt(Known2, One).value_or(false);
397 }
398
399 // The product of a negative number and a non-negative number is either
400 // negative or zero.
403 (isKnownNegativeOp1 && isKnownNonNegativeOp0 &&
404 Known2.isNonZero()) ||
405 (isKnownNegativeOp0 && isKnownNonNegativeOp1 && Known.isNonZero());
406 }
407 }
408
409 bool SelfMultiply = Op0 == Op1;
410 if (SelfMultiply)
411 SelfMultiply &=
412 isGuaranteedNotToBeUndef(Op0, Q.AC, Q.CxtI, Q.DT, Depth + 1);
413 Known = KnownBits::mul(Known, Known2, SelfMultiply);
414
415 // Only make use of no-wrap flags if we failed to compute the sign bit
416 // directly. This matters if the multiplication always overflows, in
417 // which case we prefer to follow the result of the direct computation,
418 // though as the program is invoking undefined behaviour we can choose
419 // whatever we like here.
420 if (isKnownNonNegative && !Known.isNegative())
421 Known.makeNonNegative();
422 else if (isKnownNegative && !Known.isNonNegative())
423 Known.makeNegative();
424}
425
427 KnownBits &Known) {
428 unsigned BitWidth = Known.getBitWidth();
429 unsigned NumRanges = Ranges.getNumOperands() / 2;
430 assert(NumRanges >= 1);
431
432 Known.Zero.setAllBits();
433 Known.One.setAllBits();
434
435 for (unsigned i = 0; i < NumRanges; ++i) {
437 mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0));
439 mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1));
440 ConstantRange Range(Lower->getValue(), Upper->getValue());
441
442 // The first CommonPrefixBits of all values in Range are equal.
443 unsigned CommonPrefixBits =
445 APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits);
447 Known.One &= UnsignedMax & Mask;
448 Known.Zero &= ~UnsignedMax & Mask;
449 }
450}
451
452static bool isEphemeralValueOf(const Instruction *I, const Value *E) {
456
457 // The instruction defining an assumption's condition itself is always
458 // considered ephemeral to that assumption (even if it has other
459 // non-ephemeral users). See r246696's test case for an example.
460 if (is_contained(I->operands(), E))
461 return true;
462
463 while (!WorkSet.empty()) {
464 const Value *V = WorkSet.pop_back_val();
465 if (!Visited.insert(V).second)
466 continue;
467
468 // If all uses of this value are ephemeral, then so is this value.
469 if (llvm::all_of(V->users(), [&](const User *U) {
470 return EphValues.count(U);
471 })) {
472 if (V == E)
473 return true;
474
475 if (V == I || (isa<Instruction>(V) &&
476 !cast<Instruction>(V)->mayHaveSideEffects() &&
477 !cast<Instruction>(V)->isTerminator())) {
478 EphValues.insert(V);
479 if (const User *U = dyn_cast<User>(V))
480 append_range(WorkSet, U->operands());
481 }
482 }
483 }
484
485 return false;
486}
487
488// Is this an intrinsic that cannot be speculated but also cannot trap?
490 if (const IntrinsicInst *CI = dyn_cast<IntrinsicInst>(I))
491 return CI->isAssumeLikeIntrinsic();
492
493 return false;
494}
495
497 const Instruction *CxtI,
498 const DominatorTree *DT,
499 bool AllowEphemerals) {
500 // There are two restrictions on the use of an assume:
501 // 1. The assume must dominate the context (or the control flow must
502 // reach the assume whenever it reaches the context).
503 // 2. The context must not be in the assume's set of ephemeral values
504 // (otherwise we will use the assume to prove that the condition
505 // feeding the assume is trivially true, thus causing the removal of
506 // the assume).
507
508 if (Inv->getParent() == CxtI->getParent()) {
509 // If Inv and CtxI are in the same block, check if the assume (Inv) is first
510 // in the BB.
511 if (Inv->comesBefore(CxtI))
512 return true;
513
514 // Don't let an assume affect itself - this would cause the problems
515 // `isEphemeralValueOf` is trying to prevent, and it would also make
516 // the loop below go out of bounds.
517 if (!AllowEphemerals && Inv == CxtI)
518 return false;
519
520 // The context comes first, but they're both in the same block.
521 // Make sure there is nothing in between that might interrupt
522 // the control flow, not even CxtI itself.
523 // We limit the scan distance between the assume and its context instruction
524 // to avoid a compile-time explosion. This limit is chosen arbitrarily, so
525 // it can be adjusted if needed (could be turned into a cl::opt).
526 auto Range = make_range(CxtI->getIterator(), Inv->getIterator());
528 return false;
529
530 return AllowEphemerals || !isEphemeralValueOf(Inv, CxtI);
531 }
532
533 // Inv and CxtI are in different blocks.
534 if (DT) {
535 if (DT->dominates(Inv, CxtI))
536 return true;
537 } else if (Inv->getParent() == CxtI->getParent()->getSinglePredecessor() ||
538 Inv->getParent()->isEntryBlock()) {
539 // We don't have a DT, but this trivially dominates.
540 return true;
541 }
542
543 return false;
544}
545
546// TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but
547// we still have enough information about `RHS` to conclude non-zero. For
548// example Pred=EQ, RHS=isKnownNonZero. cmpExcludesZero is called in loops
549// so the extra compile time may not be worth it, but possibly a second API
550// should be created for use outside of loops.
551static bool cmpExcludesZero(CmpInst::Predicate Pred, const Value *RHS) {
552 // v u> y implies v != 0.
553 if (Pred == ICmpInst::ICMP_UGT)
554 return true;
555
556 // Special-case v != 0 to also handle v != null.
557 if (Pred == ICmpInst::ICMP_NE)
558 return match(RHS, m_Zero());
559
560 // All other predicates - rely on generic ConstantRange handling.
561 const APInt *C;
563 if (match(RHS, m_APInt(C))) {
565 return !TrueValues.contains(Zero);
566 }
567
568 auto *VC = dyn_cast<ConstantDataVector>(RHS);
569 if (VC == nullptr)
570 return false;
571
572 for (unsigned ElemIdx = 0, NElem = VC->getNumElements(); ElemIdx < NElem;
573 ++ElemIdx) {
575 Pred, VC->getElementAsAPInt(ElemIdx));
576 if (TrueValues.contains(Zero))
577 return false;
578 }
579 return true;
580}
581
582static bool isKnownNonZeroFromAssume(const Value *V, const SimplifyQuery &Q) {
583 // Use of assumptions is context-sensitive. If we don't have a context, we
584 // cannot use them!
585 if (!Q.AC || !Q.CxtI)
586 return false;
587
588 for (AssumptionCache::ResultElem &Elem : Q.AC->assumptionsFor(V)) {
589 if (!Elem.Assume)
590 continue;
591
592 AssumeInst *I = cast<AssumeInst>(Elem.Assume);
593 assert(I->getFunction() == Q.CxtI->getFunction() &&
594 "Got assumption for the wrong function!");
595
596 if (Elem.Index != AssumptionCache::ExprResultIdx) {
597 if (!V->getType()->isPointerTy())
598 continue;
600 *I, I->bundle_op_info_begin()[Elem.Index])) {
601 if (RK.WasOn == V &&
602 (RK.AttrKind == Attribute::NonNull ||
603 (RK.AttrKind == Attribute::Dereferenceable &&
605 V->getType()->getPointerAddressSpace()))) &&
607 return true;
608 }
609 continue;
610 }
611
612 // Warning: This loop can end up being somewhat performance sensitive.
613 // We're running this loop for once for each value queried resulting in a
614 // runtime of ~O(#assumes * #values).
615
616 Value *RHS;
617 CmpPredicate Pred;
618 auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V)));
619 if (!match(I->getArgOperand(0), m_c_ICmp(Pred, m_V, m_Value(RHS))))
620 continue;
621
622 if (cmpExcludesZero(Pred, RHS) && isValidAssumeForContext(I, Q.CxtI, Q.DT))
623 return true;
624 }
625
626 return false;
627}
628
630 Value *LHS, Value *RHS, KnownBits &Known,
631 const SimplifyQuery &Q) {
632 if (RHS->getType()->isPointerTy()) {
633 // Handle comparison of pointer to null explicitly, as it will not be
634 // covered by the m_APInt() logic below.
635 if (LHS == V && match(RHS, m_Zero())) {
636 switch (Pred) {
637 case ICmpInst::ICMP_EQ:
638 Known.setAllZero();
639 break;
640 case ICmpInst::ICMP_SGE:
641 case ICmpInst::ICMP_SGT:
642 Known.makeNonNegative();
643 break;
644 case ICmpInst::ICMP_SLT:
645 Known.makeNegative();
646 break;
647 default:
648 break;
649 }
650 }
651 return;
652 }
653
654 unsigned BitWidth = Known.getBitWidth();
655 auto m_V =
657
658 Value *Y;
659 const APInt *Mask, *C;
660 uint64_t ShAmt;
661 switch (Pred) {
662 case ICmpInst::ICMP_EQ:
663 // assume(V = C)
664 if (match(LHS, m_V) && match(RHS, m_APInt(C))) {
665 Known = Known.unionWith(KnownBits::makeConstant(*C));
666 // assume(V & Mask = C)
667 } else if (match(LHS, m_c_And(m_V, m_Value(Y))) &&
668 match(RHS, m_APInt(C))) {
669 // For one bits in Mask, we can propagate bits from C to V.
670 Known.One |= *C;
671 if (match(Y, m_APInt(Mask)))
672 Known.Zero |= ~*C & *Mask;
673 // assume(V | Mask = C)
674 } else if (match(LHS, m_c_Or(m_V, m_Value(Y))) && match(RHS, m_APInt(C))) {
675 // For zero bits in Mask, we can propagate bits from C to V.
676 Known.Zero |= ~*C;
677 if (match(Y, m_APInt(Mask)))
678 Known.One |= *C & ~*Mask;
679 // assume(V ^ Mask = C)
680 } else if (match(LHS, m_Xor(m_V, m_APInt(Mask))) &&
681 match(RHS, m_APInt(C))) {
682 // Equivalent to assume(V == Mask ^ C)
683 Known = Known.unionWith(KnownBits::makeConstant(*C ^ *Mask));
684 // assume(V << ShAmt = C)
685 } else if (match(LHS, m_Shl(m_V, m_ConstantInt(ShAmt))) &&
686 match(RHS, m_APInt(C)) && ShAmt < BitWidth) {
687 // For those bits in C that are known, we can propagate them to known
688 // bits in V shifted to the right by ShAmt.
690 RHSKnown.Zero.lshrInPlace(ShAmt);
691 RHSKnown.One.lshrInPlace(ShAmt);
692 Known = Known.unionWith(RHSKnown);
693 // assume(V >> ShAmt = C)
694 } else if (match(LHS, m_Shr(m_V, m_ConstantInt(ShAmt))) &&
695 match(RHS, m_APInt(C)) && ShAmt < BitWidth) {
697 // For those bits in RHS that are known, we can propagate them to known
698 // bits in V shifted to the right by C.
699 Known.Zero |= RHSKnown.Zero << ShAmt;
700 Known.One |= RHSKnown.One << ShAmt;
701 }
702 break;
703 case ICmpInst::ICMP_NE: {
704 // assume (V & B != 0) where B is a power of 2
705 const APInt *BPow2;
706 if (match(LHS, m_And(m_V, m_Power2(BPow2))) && match(RHS, m_Zero()))
707 Known.One |= *BPow2;
708 break;
709 }
710 default:
711 if (match(RHS, m_APInt(C))) {
712 const APInt *Offset = nullptr;
713 if (match(LHS, m_CombineOr(m_V, m_AddLike(m_V, m_APInt(Offset))))) {
715 if (Offset)
716 LHSRange = LHSRange.sub(*Offset);
717 Known = Known.unionWith(LHSRange.toKnownBits());
718 }
719 if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE) {
720 // X & Y u> C -> X u> C && Y u> C
721 // X nuw- Y u> C -> X u> C
722 if (match(LHS, m_c_And(m_V, m_Value())) ||
723 match(LHS, m_NUWSub(m_V, m_Value())))
724 Known.One.setHighBits(
725 (*C + (Pred == ICmpInst::ICMP_UGT)).countLeadingOnes());
726 }
727 if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
728 // X | Y u< C -> X u< C && Y u< C
729 // X nuw+ Y u< C -> X u< C && Y u< C
730 if (match(LHS, m_c_Or(m_V, m_Value())) ||
731 match(LHS, m_c_NUWAdd(m_V, m_Value()))) {
732 Known.Zero.setHighBits(
733 (*C - (Pred == ICmpInst::ICMP_ULT)).countLeadingZeros());
734 }
735 }
736 }
737 break;
738 }
739}
740
741static void computeKnownBitsFromICmpCond(const Value *V, ICmpInst *Cmp,
742 KnownBits &Known,
743 const SimplifyQuery &SQ, bool Invert) {
745 Invert ? Cmp->getInversePredicate() : Cmp->getPredicate();
746 Value *LHS = Cmp->getOperand(0);
747 Value *RHS = Cmp->getOperand(1);
748
749 // Handle icmp pred (trunc V), C
750 if (match(LHS, m_Trunc(m_Specific(V)))) {
752 computeKnownBitsFromCmp(LHS, Pred, LHS, RHS, DstKnown, SQ);
753 Known = Known.unionWith(DstKnown.anyext(Known.getBitWidth()));
754 return;
755 }
756
757 computeKnownBitsFromCmp(V, Pred, LHS, RHS, Known, SQ);
758}
759
761 KnownBits &Known, unsigned Depth,
762 const SimplifyQuery &SQ, bool Invert) {
763 Value *A, *B;
766 KnownBits Known2(Known.getBitWidth());
767 KnownBits Known3(Known.getBitWidth());
768 computeKnownBitsFromCond(V, A, Known2, Depth + 1, SQ, Invert);
769 computeKnownBitsFromCond(V, B, Known3, Depth + 1, SQ, Invert);
770 if (Invert ? match(Cond, m_LogicalOr(m_Value(), m_Value()))
772 Known2 = Known2.unionWith(Known3);
773 else
774 Known2 = Known2.intersectWith(Known3);
775 Known = Known.unionWith(Known2);
776 }
777
778 if (auto *Cmp = dyn_cast<ICmpInst>(Cond))
779 computeKnownBitsFromICmpCond(V, Cmp, Known, SQ, Invert);
780}
781
783 unsigned Depth, const SimplifyQuery &Q) {
784 // Handle injected condition.
785 if (Q.CC && Q.CC->AffectedValues.contains(V))
786 computeKnownBitsFromCond(V, Q.CC->Cond, Known, Depth, Q, Q.CC->Invert);
787
788 if (!Q.CxtI)
789 return;
790
791 if (Q.DC && Q.DT) {
792 // Handle dominating conditions.
793 for (BranchInst *BI : Q.DC->conditionsFor(V)) {
794 BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0));
795 if (Q.DT->dominates(Edge0, Q.CxtI->getParent()))
796 computeKnownBitsFromCond(V, BI->getCondition(), Known, Depth, Q,
797 /*Invert*/ false);
798
799 BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1));
800 if (Q.DT->dominates(Edge1, Q.CxtI->getParent()))
801 computeKnownBitsFromCond(V, BI->getCondition(), Known, Depth, Q,
802 /*Invert*/ true);
803 }
804
805 if (Known.hasConflict())
806 Known.resetAll();
807 }
808
809 if (!Q.AC)
810 return;
811
812 unsigned BitWidth = Known.getBitWidth();
813
814 // Note that the patterns below need to be kept in sync with the code
815 // in AssumptionCache::updateAffectedValues.
816
817 for (AssumptionCache::ResultElem &Elem : Q.AC->assumptionsFor(V)) {
818 if (!Elem.Assume)
819 continue;
820
821 AssumeInst *I = cast<AssumeInst>(Elem.Assume);
822 assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() &&
823 "Got assumption for the wrong function!");
824
825 if (Elem.Index != AssumptionCache::ExprResultIdx) {
826 if (!V->getType()->isPointerTy())
827 continue;
829 *I, I->bundle_op_info_begin()[Elem.Index])) {
830 // Allow AllowEphemerals in isValidAssumeForContext, as the CxtI might
831 // be the producer of the pointer in the bundle. At the moment, align
832 // assumptions aren't optimized away.
833 if (RK.WasOn == V && RK.AttrKind == Attribute::Alignment &&
834 isPowerOf2_64(RK.ArgValue) &&
835 isValidAssumeForContext(I, Q.CxtI, Q.DT, /*AllowEphemerals*/ true))
836 Known.Zero.setLowBits(Log2_64(RK.ArgValue));
837 }
838 continue;
839 }
840
841 // Warning: This loop can end up being somewhat performance sensitive.
842 // We're running this loop for once for each value queried resulting in a
843 // runtime of ~O(#assumes * #values).
844
845 Value *Arg = I->getArgOperand(0);
846
847 if (Arg == V && isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
848 assert(BitWidth == 1 && "assume operand is not i1?");
849 (void)BitWidth;
850 Known.setAllOnes();
851 return;
852 }
853 if (match(Arg, m_Not(m_Specific(V))) &&
855 assert(BitWidth == 1 && "assume operand is not i1?");
856 (void)BitWidth;
857 Known.setAllZero();
858 return;
859 }
860
861 // The remaining tests are all recursive, so bail out if we hit the limit.
863 continue;
864
865 ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg);
866 if (!Cmp)
867 continue;
868
869 if (!isValidAssumeForContext(I, Q.CxtI, Q.DT))
870 continue;
871
872 computeKnownBitsFromICmpCond(V, Cmp, Known, Q, /*Invert=*/false);
873 }
874
875 // Conflicting assumption: Undefined behavior will occur on this execution
876 // path.
877 if (Known.hasConflict())
878 Known.resetAll();
879}
880
881/// Compute known bits from a shift operator, including those with a
882/// non-constant shift amount. Known is the output of this function. Known2 is a
883/// pre-allocated temporary with the same bit width as Known and on return
884/// contains the known bit of the shift value source. KF is an
885/// operator-specific function that, given the known-bits and a shift amount,
886/// compute the implied known-bits of the shift operator's result respectively
887/// for that shift amount. The results from calling KF are conservatively
888/// combined for all permitted shift amounts.
890 const Operator *I, const APInt &DemandedElts, KnownBits &Known,
891 KnownBits &Known2, unsigned Depth, const SimplifyQuery &Q,
892 function_ref<KnownBits(const KnownBits &, const KnownBits &, bool)> KF) {
893 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
894 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
895 // To limit compile-time impact, only query isKnownNonZero() if we know at
896 // least something about the shift amount.
897 bool ShAmtNonZero =
898 Known.isNonZero() ||
899 (Known.getMaxValue().ult(Known.getBitWidth()) &&
900 isKnownNonZero(I->getOperand(1), DemandedElts, Q, Depth + 1));
901 Known = KF(Known2, Known, ShAmtNonZero);
902}
903
904static KnownBits
905getKnownBitsFromAndXorOr(const Operator *I, const APInt &DemandedElts,
906 const KnownBits &KnownLHS, const KnownBits &KnownRHS,
907 unsigned Depth, const SimplifyQuery &Q) {
908 unsigned BitWidth = KnownLHS.getBitWidth();
909 KnownBits KnownOut(BitWidth);
910 bool IsAnd = false;
911 bool HasKnownOne = !KnownLHS.One.isZero() || !KnownRHS.One.isZero();
912 Value *X = nullptr, *Y = nullptr;
913
914 switch (I->getOpcode()) {
915 case Instruction::And:
916 KnownOut = KnownLHS & KnownRHS;
917 IsAnd = true;
918 // and(x, -x) is common idioms that will clear all but lowest set
919 // bit. If we have a single known bit in x, we can clear all bits
920 // above it.
921 // TODO: instcombine often reassociates independent `and` which can hide
922 // this pattern. Try to match and(x, and(-x, y)) / and(and(x, y), -x).
923 if (HasKnownOne && match(I, m_c_And(m_Value(X), m_Neg(m_Deferred(X))))) {
924 // -(-x) == x so using whichever (LHS/RHS) gets us a better result.
925 if (KnownLHS.countMaxTrailingZeros() <= KnownRHS.countMaxTrailingZeros())
926 KnownOut = KnownLHS.blsi();
927 else
928 KnownOut = KnownRHS.blsi();
929 }
930 break;
931 case Instruction::Or:
932 KnownOut = KnownLHS | KnownRHS;
933 break;
934 case Instruction::Xor:
935 KnownOut = KnownLHS ^ KnownRHS;
936 // xor(x, x-1) is common idioms that will clear all but lowest set
937 // bit. If we have a single known bit in x, we can clear all bits
938 // above it.
939 // TODO: xor(x, x-1) is often rewritting as xor(x, x-C) where C !=
940 // -1 but for the purpose of demanded bits (xor(x, x-C) &
941 // Demanded) == (xor(x, x-1) & Demanded). Extend the xor pattern
942 // to use arbitrary C if xor(x, x-C) as the same as xor(x, x-1).
943 if (HasKnownOne &&
945 const KnownBits &XBits = I->getOperand(0) == X ? KnownLHS : KnownRHS;
946 KnownOut = XBits.blsmsk();
947 }
948 break;
949 default:
950 llvm_unreachable("Invalid Op used in 'analyzeKnownBitsFromAndXorOr'");
951 }
952
953 // and(x, add (x, -1)) is a common idiom that always clears the low bit;
954 // xor/or(x, add (x, -1)) is an idiom that will always set the low bit.
955 // here we handle the more general case of adding any odd number by
956 // matching the form and/xor/or(x, add(x, y)) where y is odd.
957 // TODO: This could be generalized to clearing any bit set in y where the
958 // following bit is known to be unset in y.
959 if (!KnownOut.Zero[0] && !KnownOut.One[0] &&
963 KnownBits KnownY(BitWidth);
964 computeKnownBits(Y, DemandedElts, KnownY, Depth + 1, Q);
965 if (KnownY.countMinTrailingOnes() > 0) {
966 if (IsAnd)
967 KnownOut.Zero.setBit(0);
968 else
969 KnownOut.One.setBit(0);
970 }
971 }
972 return KnownOut;
973}
974
976 const Operator *I, const APInt &DemandedElts, unsigned Depth,
977 const SimplifyQuery &Q,
978 const function_ref<KnownBits(const KnownBits &, const KnownBits &)>
979 KnownBitsFunc) {
980 APInt DemandedEltsLHS, DemandedEltsRHS;
982 DemandedElts, DemandedEltsLHS,
983 DemandedEltsRHS);
984
985 const auto ComputeForSingleOpFunc =
986 [Depth, &Q, KnownBitsFunc](const Value *Op, APInt &DemandedEltsOp) {
987 return KnownBitsFunc(
988 computeKnownBits(Op, DemandedEltsOp, Depth + 1, Q),
989 computeKnownBits(Op, DemandedEltsOp << 1, Depth + 1, Q));
990 };
991
992 if (DemandedEltsRHS.isZero())
993 return ComputeForSingleOpFunc(I->getOperand(0), DemandedEltsLHS);
994 if (DemandedEltsLHS.isZero())
995 return ComputeForSingleOpFunc(I->getOperand(1), DemandedEltsRHS);
996
997 return ComputeForSingleOpFunc(I->getOperand(0), DemandedEltsLHS)
998 .intersectWith(ComputeForSingleOpFunc(I->getOperand(1), DemandedEltsRHS));
999}
1000
1001// Public so this can be used in `SimplifyDemandedUseBits`.
1003 const KnownBits &KnownLHS,
1004 const KnownBits &KnownRHS,
1005 unsigned Depth,
1006 const SimplifyQuery &SQ) {
1007 auto *FVTy = dyn_cast<FixedVectorType>(I->getType());
1008 APInt DemandedElts =
1009 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
1010
1011 return getKnownBitsFromAndXorOr(I, DemandedElts, KnownLHS, KnownRHS, Depth,
1012 SQ);
1013}
1014
1016 Attribute Attr = F->getFnAttribute(Attribute::VScaleRange);
1017 // Without vscale_range, we only know that vscale is non-zero.
1018 if (!Attr.isValid())
1020
1021 unsigned AttrMin = Attr.getVScaleRangeMin();
1022 // Minimum is larger than vscale width, result is always poison.
1023 if ((unsigned)llvm::bit_width(AttrMin) > BitWidth)
1024 return ConstantRange::getEmpty(BitWidth);
1025
1026 APInt Min(BitWidth, AttrMin);
1027 std::optional<unsigned> AttrMax = Attr.getVScaleRangeMax();
1028 if (!AttrMax || (unsigned)llvm::bit_width(*AttrMax) > BitWidth)
1030
1031 return ConstantRange(Min, APInt(BitWidth, *AttrMax) + 1);
1032}
1033
1035 Value *Arm, bool Invert, unsigned Depth,
1036 const SimplifyQuery &Q) {
1037 // If we have a constant arm, we are done.
1038 if (Known.isConstant())
1039 return;
1040
1041 // See what condition implies about the bits of the select arm.
1042 KnownBits CondRes(Known.getBitWidth());
1043 computeKnownBitsFromCond(Arm, Cond, CondRes, Depth + 1, Q, Invert);
1044 // If we don't get any information from the condition, no reason to
1045 // proceed.
1046 if (CondRes.isUnknown())
1047 return;
1048
1049 // We can have conflict if the condition is dead. I.e if we have
1050 // (x | 64) < 32 ? (x | 64) : y
1051 // we will have conflict at bit 6 from the condition/the `or`.
1052 // In that case just return. Its not particularly important
1053 // what we do, as this select is going to be simplified soon.
1054 CondRes = CondRes.unionWith(Known);
1055 if (CondRes.hasConflict())
1056 return;
1057
1058 // Finally make sure the information we found is valid. This is relatively
1059 // expensive so it's left for the very end.
1060 if (!isGuaranteedNotToBeUndef(Arm, Q.AC, Q.CxtI, Q.DT, Depth + 1))
1061 return;
1062
1063 // Finally, we know we get information from the condition and its valid,
1064 // so return it.
1065 Known = CondRes;
1066}
1067
1068// Match a signed min+max clamp pattern like smax(smin(In, CHigh), CLow).
1069// Returns the input and lower/upper bounds.
1070static bool isSignedMinMaxClamp(const Value *Select, const Value *&In,
1071 const APInt *&CLow, const APInt *&CHigh) {
1072 assert(isa<Operator>(Select) &&
1073 cast<Operator>(Select)->getOpcode() == Instruction::Select &&
1074 "Input should be a Select!");
1075
1076 const Value *LHS = nullptr, *RHS = nullptr;
1078 if (SPF != SPF_SMAX && SPF != SPF_SMIN)
1079 return false;
1080
1081 if (!match(RHS, m_APInt(CLow)))
1082 return false;
1083
1084 const Value *LHS2 = nullptr, *RHS2 = nullptr;
1086 if (getInverseMinMaxFlavor(SPF) != SPF2)
1087 return false;
1088
1089 if (!match(RHS2, m_APInt(CHigh)))
1090 return false;
1091
1092 if (SPF == SPF_SMIN)
1093 std::swap(CLow, CHigh);
1094
1095 In = LHS2;
1096 return CLow->sle(*CHigh);
1097}
1098
1100 const APInt *&CLow,
1101 const APInt *&CHigh) {
1102 assert((II->getIntrinsicID() == Intrinsic::smin ||
1103 II->getIntrinsicID() == Intrinsic::smax) &&
1104 "Must be smin/smax");
1105
1106 Intrinsic::ID InverseID = getInverseMinMaxIntrinsic(II->getIntrinsicID());
1107 auto *InnerII = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
1108 if (!InnerII || InnerII->getIntrinsicID() != InverseID ||
1109 !match(II->getArgOperand(1), m_APInt(CLow)) ||
1110 !match(InnerII->getArgOperand(1), m_APInt(CHigh)))
1111 return false;
1112
1113 if (II->getIntrinsicID() == Intrinsic::smin)
1114 std::swap(CLow, CHigh);
1115 return CLow->sle(*CHigh);
1116}
1117
1119 KnownBits &Known) {
1120 const APInt *CLow, *CHigh;
1121 if (isSignedMinMaxIntrinsicClamp(II, CLow, CHigh))
1122 Known = Known.unionWith(
1123 ConstantRange::getNonEmpty(*CLow, *CHigh + 1).toKnownBits());
1124}
1125
1127 const APInt &DemandedElts,
1128 KnownBits &Known, unsigned Depth,
1129 const SimplifyQuery &Q) {
1130 unsigned BitWidth = Known.getBitWidth();
1131
1132 KnownBits Known2(BitWidth);
1133 switch (I->getOpcode()) {
1134 default: break;
1135 case Instruction::Load:
1136 if (MDNode *MD =
1137 Q.IIQ.getMetadata(cast<LoadInst>(I), LLVMContext::MD_range))
1139 break;
1140 case Instruction::And:
1141 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
1142 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1143
1144 Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q);
1145 break;
1146 case Instruction::Or:
1147 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
1148 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1149
1150 Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q);
1151 break;
1152 case Instruction::Xor:
1153 computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
1154 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1155
1156 Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q);
1157 break;
1158 case Instruction::Mul: {
1159 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1160 bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I));
1161 computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, NUW,
1162 DemandedElts, Known, Known2, Depth, Q);
1163 break;
1164 }
1165 case Instruction::UDiv: {
1166 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1167 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1168 Known =
1169 KnownBits::udiv(Known, Known2, Q.IIQ.isExact(cast<BinaryOperator>(I)));
1170 break;
1171 }
1172 case Instruction::SDiv: {
1173 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1174 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1175 Known =
1176 KnownBits::sdiv(Known, Known2, Q.IIQ.isExact(cast<BinaryOperator>(I)));
1177 break;
1178 }
1179 case Instruction::Select: {
1180 auto ComputeForArm = [&](Value *Arm, bool Invert) {
1181 KnownBits Res(Known.getBitWidth());
1182 computeKnownBits(Arm, DemandedElts, Res, Depth + 1, Q);
1183 adjustKnownBitsForSelectArm(Res, I->getOperand(0), Arm, Invert, Depth, Q);
1184 return Res;
1185 };
1186 // Only known if known in both the LHS and RHS.
1187 Known =
1188 ComputeForArm(I->getOperand(1), /*Invert=*/false)
1189 .intersectWith(ComputeForArm(I->getOperand(2), /*Invert=*/true));
1190 break;
1191 }
1192 case Instruction::FPTrunc:
1193 case Instruction::FPExt:
1194 case Instruction::FPToUI:
1195 case Instruction::FPToSI:
1196 case Instruction::SIToFP:
1197 case Instruction::UIToFP:
1198 break; // Can't work with floating point.
1199 case Instruction::PtrToInt:
1200 case Instruction::IntToPtr:
1201 // Fall through and handle them the same as zext/trunc.
1202 [[fallthrough]];
1203 case Instruction::ZExt:
1204 case Instruction::Trunc: {
1205 Type *SrcTy = I->getOperand(0)->getType();
1206
1207 unsigned SrcBitWidth;
1208 // Note that we handle pointer operands here because of inttoptr/ptrtoint
1209 // which fall through here.
1210 Type *ScalarTy = SrcTy->getScalarType();
1211 SrcBitWidth = ScalarTy->isPointerTy() ?
1212 Q.DL.getPointerTypeSizeInBits(ScalarTy) :
1213 Q.DL.getTypeSizeInBits(ScalarTy);
1214
1215 assert(SrcBitWidth && "SrcBitWidth can't be zero");
1216 Known = Known.anyextOrTrunc(SrcBitWidth);
1217 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1218 if (auto *Inst = dyn_cast<PossiblyNonNegInst>(I);
1219 Inst && Inst->hasNonNeg() && !Known.isNegative())
1220 Known.makeNonNegative();
1221 Known = Known.zextOrTrunc(BitWidth);
1222 break;
1223 }
1224 case Instruction::BitCast: {
1225 Type *SrcTy = I->getOperand(0)->getType();
1226 if (SrcTy->isIntOrPtrTy() &&
1227 // TODO: For now, not handling conversions like:
1228 // (bitcast i64 %x to <2 x i32>)
1229 !I->getType()->isVectorTy()) {
1230 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1231 break;
1232 }
1233
1234 const Value *V;
1235 // Handle bitcast from floating point to integer.
1236 if (match(I, m_ElementWiseBitCast(m_Value(V))) &&
1237 V->getType()->isFPOrFPVectorTy()) {
1238 Type *FPType = V->getType()->getScalarType();
1239 KnownFPClass Result =
1240 computeKnownFPClass(V, DemandedElts, fcAllFlags, Depth + 1, Q);
1241 FPClassTest FPClasses = Result.KnownFPClasses;
1242
1243 // TODO: Treat it as zero/poison if the use of I is unreachable.
1244 if (FPClasses == fcNone)
1245 break;
1246
1247 if (Result.isKnownNever(fcNormal | fcSubnormal | fcNan)) {
1248 Known.Zero.setAllBits();
1249 Known.One.setAllBits();
1250
1251 if (FPClasses & fcInf)
1253 APFloat::getInf(FPType->getFltSemantics()).bitcastToAPInt()));
1254
1255 if (FPClasses & fcZero)
1257 APInt::getZero(FPType->getScalarSizeInBits())));
1258
1259 Known.Zero.clearSignBit();
1260 Known.One.clearSignBit();
1261 }
1262
1263 if (Result.SignBit) {
1264 if (*Result.SignBit)
1265 Known.makeNegative();
1266 else
1267 Known.makeNonNegative();
1268 }
1269
1270 break;
1271 }
1272
1273 // Handle cast from vector integer type to scalar or vector integer.
1274 auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcTy);
1275 if (!SrcVecTy || !SrcVecTy->getElementType()->isIntegerTy() ||
1276 !I->getType()->isIntOrIntVectorTy() ||
1277 isa<ScalableVectorType>(I->getType()))
1278 break;
1279
1280 // Look through a cast from narrow vector elements to wider type.
1281 // Examples: v4i32 -> v2i64, v3i8 -> v24
1282 unsigned SubBitWidth = SrcVecTy->getScalarSizeInBits();
1283 if (BitWidth % SubBitWidth == 0) {
1284 // Known bits are automatically intersected across demanded elements of a
1285 // vector. So for example, if a bit is computed as known zero, it must be
1286 // zero across all demanded elements of the vector.
1287 //
1288 // For this bitcast, each demanded element of the output is sub-divided
1289 // across a set of smaller vector elements in the source vector. To get
1290 // the known bits for an entire element of the output, compute the known
1291 // bits for each sub-element sequentially. This is done by shifting the
1292 // one-set-bit demanded elements parameter across the sub-elements for
1293 // consecutive calls to computeKnownBits. We are using the demanded
1294 // elements parameter as a mask operator.
1295 //
1296 // The known bits of each sub-element are then inserted into place
1297 // (dependent on endian) to form the full result of known bits.
1298 unsigned NumElts = DemandedElts.getBitWidth();
1299 unsigned SubScale = BitWidth / SubBitWidth;
1300 APInt SubDemandedElts = APInt::getZero(NumElts * SubScale);
1301 for (unsigned i = 0; i != NumElts; ++i) {
1302 if (DemandedElts[i])
1303 SubDemandedElts.setBit(i * SubScale);
1304 }
1305
1306 KnownBits KnownSrc(SubBitWidth);
1307 for (unsigned i = 0; i != SubScale; ++i) {
1308 computeKnownBits(I->getOperand(0), SubDemandedElts.shl(i), KnownSrc,
1309 Depth + 1, Q);
1310 unsigned ShiftElt = Q.DL.isLittleEndian() ? i : SubScale - 1 - i;
1311 Known.insertBits(KnownSrc, ShiftElt * SubBitWidth);
1312 }
1313 }
1314 break;
1315 }
1316 case Instruction::SExt: {
1317 // Compute the bits in the result that are not present in the input.
1318 unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
1319
1320 Known = Known.trunc(SrcBitWidth);
1321 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1322 // If the sign bit of the input is known set or clear, then we know the
1323 // top bits of the result.
1324 Known = Known.sext(BitWidth);
1325 break;
1326 }
1327 case Instruction::Shl: {
1328 bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I));
1329 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1330 auto KF = [NUW, NSW](const KnownBits &KnownVal, const KnownBits &KnownAmt,
1331 bool ShAmtNonZero) {
1332 return KnownBits::shl(KnownVal, KnownAmt, NUW, NSW, ShAmtNonZero);
1333 };
1334 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
1335 KF);
1336 // Trailing zeros of a right-shifted constant never decrease.
1337 const APInt *C;
1338 if (match(I->getOperand(0), m_APInt(C)))
1339 Known.Zero.setLowBits(C->countr_zero());
1340 break;
1341 }
1342 case Instruction::LShr: {
1343 bool Exact = Q.IIQ.isExact(cast<BinaryOperator>(I));
1344 auto KF = [Exact](const KnownBits &KnownVal, const KnownBits &KnownAmt,
1345 bool ShAmtNonZero) {
1346 return KnownBits::lshr(KnownVal, KnownAmt, ShAmtNonZero, Exact);
1347 };
1348 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
1349 KF);
1350 // Leading zeros of a left-shifted constant never decrease.
1351 const APInt *C;
1352 if (match(I->getOperand(0), m_APInt(C)))
1353 Known.Zero.setHighBits(C->countl_zero());
1354 break;
1355 }
1356 case Instruction::AShr: {
1357 bool Exact = Q.IIQ.isExact(cast<BinaryOperator>(I));
1358 auto KF = [Exact](const KnownBits &KnownVal, const KnownBits &KnownAmt,
1359 bool ShAmtNonZero) {
1360 return KnownBits::ashr(KnownVal, KnownAmt, ShAmtNonZero, Exact);
1361 };
1362 computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
1363 KF);
1364 break;
1365 }
1366 case Instruction::Sub: {
1367 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1368 bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I));
1369 computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, NUW,
1370 DemandedElts, Known, Known2, Depth, Q);
1371 break;
1372 }
1373 case Instruction::Add: {
1374 bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1375 bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I));
1376 computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW, NUW,
1377 DemandedElts, Known, Known2, Depth, Q);
1378 break;
1379 }
1380 case Instruction::SRem:
1381 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1382 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1383 Known = KnownBits::srem(Known, Known2);
1384 break;
1385
1386 case Instruction::URem:
1387 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1388 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1389 Known = KnownBits::urem(Known, Known2);
1390 break;
1391 case Instruction::Alloca:
1392 Known.Zero.setLowBits(Log2(cast<AllocaInst>(I)->getAlign()));
1393 break;
1394 case Instruction::GetElementPtr: {
1395 // Analyze all of the subscripts of this getelementptr instruction
1396 // to determine if we can prove known low zero bits.
1397 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1398 // Accumulate the constant indices in a separate variable
1399 // to minimize the number of calls to computeForAddSub.
1400 APInt AccConstIndices(BitWidth, 0, /*IsSigned*/ true);
1401
1403 for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
1404 // TrailZ can only become smaller, short-circuit if we hit zero.
1405 if (Known.isUnknown())
1406 break;
1407
1408 Value *Index = I->getOperand(i);
1409
1410 // Handle case when index is zero.
1411 Constant *CIndex = dyn_cast<Constant>(Index);
1412 if (CIndex && CIndex->isZeroValue())
1413 continue;
1414
1415 if (StructType *STy = GTI.getStructTypeOrNull()) {
1416 // Handle struct member offset arithmetic.
1417
1418 assert(CIndex &&
1419 "Access to structure field must be known at compile time");
1420
1421 if (CIndex->getType()->isVectorTy())
1422 Index = CIndex->getSplatValue();
1423
1424 unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
1425 const StructLayout *SL = Q.DL.getStructLayout(STy);
1427 AccConstIndices += Offset;
1428 continue;
1429 }
1430
1431 // Handle array index arithmetic.
1432 Type *IndexedTy = GTI.getIndexedType();
1433 if (!IndexedTy->isSized()) {
1434 Known.resetAll();
1435 break;
1436 }
1437
1438 unsigned IndexBitWidth = Index->getType()->getScalarSizeInBits();
1439 KnownBits IndexBits(IndexBitWidth);
1440 computeKnownBits(Index, IndexBits, Depth + 1, Q);
1441 TypeSize IndexTypeSize = GTI.getSequentialElementStride(Q.DL);
1442 uint64_t TypeSizeInBytes = IndexTypeSize.getKnownMinValue();
1443 KnownBits ScalingFactor(IndexBitWidth);
1444 // Multiply by current sizeof type.
1445 // &A[i] == A + i * sizeof(*A[i]).
1446 if (IndexTypeSize.isScalable()) {
1447 // For scalable types the only thing we know about sizeof is
1448 // that this is a multiple of the minimum size.
1449 ScalingFactor.Zero.setLowBits(llvm::countr_zero(TypeSizeInBytes));
1450 } else if (IndexBits.isConstant()) {
1451 APInt IndexConst = IndexBits.getConstant();
1452 APInt ScalingFactor(IndexBitWidth, TypeSizeInBytes);
1453 IndexConst *= ScalingFactor;
1454 AccConstIndices += IndexConst.sextOrTrunc(BitWidth);
1455 continue;
1456 } else {
1457 ScalingFactor =
1458 KnownBits::makeConstant(APInt(IndexBitWidth, TypeSizeInBytes));
1459 }
1460 IndexBits = KnownBits::mul(IndexBits, ScalingFactor);
1461
1462 // If the offsets have a different width from the pointer, according
1463 // to the language reference we need to sign-extend or truncate them
1464 // to the width of the pointer.
1465 IndexBits = IndexBits.sextOrTrunc(BitWidth);
1466
1467 // Note that inbounds does *not* guarantee nsw for the addition, as only
1468 // the offset is signed, while the base address is unsigned.
1469 Known = KnownBits::add(Known, IndexBits);
1470 }
1471 if (!Known.isUnknown() && !AccConstIndices.isZero()) {
1472 KnownBits Index = KnownBits::makeConstant(AccConstIndices);
1473 Known = KnownBits::add(Known, Index);
1474 }
1475 break;
1476 }
1477 case Instruction::PHI: {
1478 const PHINode *P = cast<PHINode>(I);
1479 BinaryOperator *BO = nullptr;
1480 Value *R = nullptr, *L = nullptr;
1481 if (matchSimpleRecurrence(P, BO, R, L)) {
1482 // Handle the case of a simple two-predecessor recurrence PHI.
1483 // There's a lot more that could theoretically be done here, but
1484 // this is sufficient to catch some interesting cases.
1485 unsigned Opcode = BO->getOpcode();
1486
1487 switch (Opcode) {
1488 // If this is a shift recurrence, we know the bits being shifted in. We
1489 // can combine that with information about the start value of the
1490 // recurrence to conclude facts about the result. If this is a udiv
1491 // recurrence, we know that the result can never exceed either the
1492 // numerator or the start value, whichever is greater.
1493 case Instruction::LShr:
1494 case Instruction::AShr:
1495 case Instruction::Shl:
1496 case Instruction::UDiv:
1497 if (BO->getOperand(0) != I)
1498 break;
1499 [[fallthrough]];
1500
1501 // For a urem recurrence, the result can never exceed the start value. The
1502 // phi could either be the numerator or the denominator.
1503 case Instruction::URem: {
1504 // We have matched a recurrence of the form:
1505 // %iv = [R, %entry], [%iv.next, %backedge]
1506 // %iv.next = shift_op %iv, L
1507
1508 // Recurse with the phi context to avoid concern about whether facts
1509 // inferred hold at original context instruction. TODO: It may be
1510 // correct to use the original context. IF warranted, explore and
1511 // add sufficient tests to cover.
1513 RecQ.CxtI = P;
1514 computeKnownBits(R, DemandedElts, Known2, Depth + 1, RecQ);
1515 switch (Opcode) {
1516 case Instruction::Shl:
1517 // A shl recurrence will only increase the tailing zeros
1518 Known.Zero.setLowBits(Known2.countMinTrailingZeros());
1519 break;
1520 case Instruction::LShr:
1521 case Instruction::UDiv:
1522 case Instruction::URem:
1523 // lshr, udiv, and urem recurrences will preserve the leading zeros of
1524 // the start value.
1525 Known.Zero.setHighBits(Known2.countMinLeadingZeros());
1526 break;
1527 case Instruction::AShr:
1528 // An ashr recurrence will extend the initial sign bit
1529 Known.Zero.setHighBits(Known2.countMinLeadingZeros());
1530 Known.One.setHighBits(Known2.countMinLeadingOnes());
1531 break;
1532 }
1533 break;
1534 }
1535
1536 // Check for operations that have the property that if
1537 // both their operands have low zero bits, the result
1538 // will have low zero bits.
1539 case Instruction::Add:
1540 case Instruction::Sub:
1541 case Instruction::And:
1542 case Instruction::Or:
1543 case Instruction::Mul: {
1544 // Change the context instruction to the "edge" that flows into the
1545 // phi. This is important because that is where the value is actually
1546 // "evaluated" even though it is used later somewhere else. (see also
1547 // D69571).
1549
1550 unsigned OpNum = P->getOperand(0) == R ? 0 : 1;
1551 Instruction *RInst = P->getIncomingBlock(OpNum)->getTerminator();
1552 Instruction *LInst = P->getIncomingBlock(1 - OpNum)->getTerminator();
1553
1554 // Ok, we have a PHI of the form L op= R. Check for low
1555 // zero bits.
1556 RecQ.CxtI = RInst;
1557 computeKnownBits(R, DemandedElts, Known2, Depth + 1, RecQ);
1558
1559 // We need to take the minimum number of known bits
1560 KnownBits Known3(BitWidth);
1561 RecQ.CxtI = LInst;
1562 computeKnownBits(L, DemandedElts, Known3, Depth + 1, RecQ);
1563
1564 Known.Zero.setLowBits(std::min(Known2.countMinTrailingZeros(),
1565 Known3.countMinTrailingZeros()));
1566
1567 auto *OverflowOp = dyn_cast<OverflowingBinaryOperator>(BO);
1568 if (!OverflowOp || !Q.IIQ.hasNoSignedWrap(OverflowOp))
1569 break;
1570
1571 switch (Opcode) {
1572 // If initial value of recurrence is nonnegative, and we are adding
1573 // a nonnegative number with nsw, the result can only be nonnegative
1574 // or poison value regardless of the number of times we execute the
1575 // add in phi recurrence. If initial value is negative and we are
1576 // adding a negative number with nsw, the result can only be
1577 // negative or poison value. Similar arguments apply to sub and mul.
1578 //
1579 // (add non-negative, non-negative) --> non-negative
1580 // (add negative, negative) --> negative
1581 case Instruction::Add: {
1582 if (Known2.isNonNegative() && Known3.isNonNegative())
1583 Known.makeNonNegative();
1584 else if (Known2.isNegative() && Known3.isNegative())
1585 Known.makeNegative();
1586 break;
1587 }
1588
1589 // (sub nsw non-negative, negative) --> non-negative
1590 // (sub nsw negative, non-negative) --> negative
1591 case Instruction::Sub: {
1592 if (BO->getOperand(0) != I)
1593 break;
1594 if (Known2.isNonNegative() && Known3.isNegative())
1595 Known.makeNonNegative();
1596 else if (Known2.isNegative() && Known3.isNonNegative())
1597 Known.makeNegative();
1598 break;
1599 }
1600
1601 // (mul nsw non-negative, non-negative) --> non-negative
1602 case Instruction::Mul:
1603 if (Known2.isNonNegative() && Known3.isNonNegative())
1604 Known.makeNonNegative();
1605 break;
1606
1607 default:
1608 break;
1609 }
1610 break;
1611 }
1612
1613 default:
1614 break;
1615 }
1616 }
1617
1618 // Unreachable blocks may have zero-operand PHI nodes.
1619 if (P->getNumIncomingValues() == 0)
1620 break;
1621
1622 // Otherwise take the unions of the known bit sets of the operands,
1623 // taking conservative care to avoid excessive recursion.
1624 if (Depth < MaxAnalysisRecursionDepth - 1 && Known.isUnknown()) {
1625 // Skip if every incoming value references to ourself.
1626 if (isa_and_nonnull<UndefValue>(P->hasConstantValue()))
1627 break;
1628
1629 Known.Zero.setAllBits();
1630 Known.One.setAllBits();
1631 for (unsigned u = 0, e = P->getNumIncomingValues(); u < e; ++u) {
1632 Value *IncValue = P->getIncomingValue(u);
1633 // Skip direct self references.
1634 if (IncValue == P) continue;
1635
1636 // If the Use is a select of this phi, use the knownbit of the other
1637 // operand to break the recursion.
1638 if (auto *SI = dyn_cast<SelectInst>(IncValue)) {
1639 if (SI->getTrueValue() == P || SI->getFalseValue() == P)
1640 IncValue = SI->getTrueValue() == P ? SI->getFalseValue()
1641 : SI->getTrueValue();
1642 }
1643
1644 // Change the context instruction to the "edge" that flows into the
1645 // phi. This is important because that is where the value is actually
1646 // "evaluated" even though it is used later somewhere else. (see also
1647 // D69571).
1649 RecQ.CxtI = P->getIncomingBlock(u)->getTerminator();
1650
1651 Known2 = KnownBits(BitWidth);
1652
1653 // Recurse, but cap the recursion to one level, because we don't
1654 // want to waste time spinning around in loops.
1655 // TODO: See if we can base recursion limiter on number of incoming phi
1656 // edges so we don't overly clamp analysis.
1657 computeKnownBits(IncValue, DemandedElts, Known2,
1658 MaxAnalysisRecursionDepth - 1, RecQ);
1659
1660 // See if we can further use a conditional branch into the phi
1661 // to help us determine the range of the value.
1662 if (!Known2.isConstant()) {
1663 CmpPredicate Pred;
1664 const APInt *RHSC;
1665 BasicBlock *TrueSucc, *FalseSucc;
1666 // TODO: Use RHS Value and compute range from its known bits.
1667 if (match(RecQ.CxtI,
1668 m_Br(m_c_ICmp(Pred, m_Specific(IncValue), m_APInt(RHSC)),
1669 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) {
1670 // Check for cases of duplicate successors.
1671 if ((TrueSucc == P->getParent()) != (FalseSucc == P->getParent())) {
1672 // If we're using the false successor, invert the predicate.
1673 if (FalseSucc == P->getParent())
1674 Pred = CmpInst::getInversePredicate(Pred);
1675 // Get the knownbits implied by the incoming phi condition.
1676 auto CR = ConstantRange::makeExactICmpRegion(Pred, *RHSC);
1677 KnownBits KnownUnion = Known2.unionWith(CR.toKnownBits());
1678 // We can have conflicts here if we are analyzing deadcode (its
1679 // impossible for us reach this BB based the icmp).
1680 if (KnownUnion.hasConflict()) {
1681 // No reason to continue analyzing in a known dead region, so
1682 // just resetAll and break. This will cause us to also exit the
1683 // outer loop.
1684 Known.resetAll();
1685 break;
1686 }
1687 Known2 = KnownUnion;
1688 }
1689 }
1690 }
1691
1692 Known = Known.intersectWith(Known2);
1693 // If all bits have been ruled out, there's no need to check
1694 // more operands.
1695 if (Known.isUnknown())
1696 break;
1697 }
1698 }
1699 break;
1700 }
1701 case Instruction::Call:
1702 case Instruction::Invoke: {
1703 // If range metadata is attached to this call, set known bits from that,
1704 // and then intersect with known bits based on other properties of the
1705 // function.
1706 if (MDNode *MD =
1707 Q.IIQ.getMetadata(cast<Instruction>(I), LLVMContext::MD_range))
1709
1710 const auto *CB = cast<CallBase>(I);
1711
1712 if (std::optional<ConstantRange> Range = CB->getRange())
1713 Known = Known.unionWith(Range->toKnownBits());
1714
1715 if (const Value *RV = CB->getReturnedArgOperand()) {
1716 if (RV->getType() == I->getType()) {
1717 computeKnownBits(RV, Known2, Depth + 1, Q);
1718 Known = Known.unionWith(Known2);
1719 // If the function doesn't return properly for all input values
1720 // (e.g. unreachable exits) then there might be conflicts between the
1721 // argument value and the range metadata. Simply discard the known bits
1722 // in case of conflicts.
1723 if (Known.hasConflict())
1724 Known.resetAll();
1725 }
1726 }
1727 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1728 switch (II->getIntrinsicID()) {
1729 default:
1730 break;
1731 case Intrinsic::abs: {
1732 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1733 bool IntMinIsPoison = match(II->getArgOperand(1), m_One());
1734 Known = Known2.abs(IntMinIsPoison);
1735 break;
1736 }
1737 case Intrinsic::bitreverse:
1738 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1739 Known.Zero |= Known2.Zero.reverseBits();
1740 Known.One |= Known2.One.reverseBits();
1741 break;
1742 case Intrinsic::bswap:
1743 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1744 Known.Zero |= Known2.Zero.byteSwap();
1745 Known.One |= Known2.One.byteSwap();
1746 break;
1747 case Intrinsic::ctlz: {
1748 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1749 // If we have a known 1, its position is our upper bound.
1750 unsigned PossibleLZ = Known2.countMaxLeadingZeros();
1751 // If this call is poison for 0 input, the result will be less than 2^n.
1752 if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
1753 PossibleLZ = std::min(PossibleLZ, BitWidth - 1);
1754 unsigned LowBits = llvm::bit_width(PossibleLZ);
1755 Known.Zero.setBitsFrom(LowBits);
1756 break;
1757 }
1758 case Intrinsic::cttz: {
1759 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1760 // If we have a known 1, its position is our upper bound.
1761 unsigned PossibleTZ = Known2.countMaxTrailingZeros();
1762 // If this call is poison for 0 input, the result will be less than 2^n.
1763 if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
1764 PossibleTZ = std::min(PossibleTZ, BitWidth - 1);
1765 unsigned LowBits = llvm::bit_width(PossibleTZ);
1766 Known.Zero.setBitsFrom(LowBits);
1767 break;
1768 }
1769 case Intrinsic::ctpop: {
1770 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1771 // We can bound the space the count needs. Also, bits known to be zero
1772 // can't contribute to the population.
1773 unsigned BitsPossiblySet = Known2.countMaxPopulation();
1774 unsigned LowBits = llvm::bit_width(BitsPossiblySet);
1775 Known.Zero.setBitsFrom(LowBits);
1776 // TODO: we could bound KnownOne using the lower bound on the number
1777 // of bits which might be set provided by popcnt KnownOne2.
1778 break;
1779 }
1780 case Intrinsic::fshr:
1781 case Intrinsic::fshl: {
1782 const APInt *SA;
1783 if (!match(I->getOperand(2), m_APInt(SA)))
1784 break;
1785
1786 // Normalize to funnel shift left.
1787 uint64_t ShiftAmt = SA->urem(BitWidth);
1788 if (II->getIntrinsicID() == Intrinsic::fshr)
1789 ShiftAmt = BitWidth - ShiftAmt;
1790
1791 KnownBits Known3(BitWidth);
1792 computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1793 computeKnownBits(I->getOperand(1), DemandedElts, Known3, Depth + 1, Q);
1794
1795 Known.Zero =
1796 Known2.Zero.shl(ShiftAmt) | Known3.Zero.lshr(BitWidth - ShiftAmt);
1797 Known.One =
1798 Known2.One.shl(ShiftAmt) | Known3.One.lshr(BitWidth - ShiftAmt);
1799 break;
1800 }
1801 case Intrinsic::uadd_sat:
1802 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1803 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1804 Known = KnownBits::uadd_sat(Known, Known2);
1805 break;
1806 case Intrinsic::usub_sat:
1807 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1808 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1809 Known = KnownBits::usub_sat(Known, Known2);
1810 break;
1811 case Intrinsic::sadd_sat:
1812 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1813 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1814 Known = KnownBits::sadd_sat(Known, Known2);
1815 break;
1816 case Intrinsic::ssub_sat:
1817 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1818 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1819 Known = KnownBits::ssub_sat(Known, Known2);
1820 break;
1821 // Vec reverse preserves bits from input vec.
1822 case Intrinsic::vector_reverse:
1823 computeKnownBits(I->getOperand(0), DemandedElts.reverseBits(), Known,
1824 Depth + 1, Q);
1825 break;
1826 // for min/max/and/or reduce, any bit common to each element in the
1827 // input vec is set in the output.
1828 case Intrinsic::vector_reduce_and:
1829 case Intrinsic::vector_reduce_or:
1830 case Intrinsic::vector_reduce_umax:
1831 case Intrinsic::vector_reduce_umin:
1832 case Intrinsic::vector_reduce_smax:
1833 case Intrinsic::vector_reduce_smin:
1834 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1835 break;
1836 case Intrinsic::vector_reduce_xor: {
1837 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1838 // The zeros common to all vecs are zero in the output.
1839 // If the number of elements is odd, then the common ones remain. If the
1840 // number of elements is even, then the common ones becomes zeros.
1841 auto *VecTy = cast<VectorType>(I->getOperand(0)->getType());
1842 // Even, so the ones become zeros.
1843 bool EvenCnt = VecTy->getElementCount().isKnownEven();
1844 if (EvenCnt)
1845 Known.Zero |= Known.One;
1846 // Maybe even element count so need to clear ones.
1847 if (VecTy->isScalableTy() || EvenCnt)
1848 Known.One.clearAllBits();
1849 break;
1850 }
1851 case Intrinsic::umin:
1852 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1853 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1854 Known = KnownBits::umin(Known, Known2);
1855 break;
1856 case Intrinsic::umax:
1857 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1858 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1859 Known = KnownBits::umax(Known, Known2);
1860 break;
1861 case Intrinsic::smin:
1862 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1863 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1864 Known = KnownBits::smin(Known, Known2);
1866 break;
1867 case Intrinsic::smax:
1868 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1869 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1870 Known = KnownBits::smax(Known, Known2);
1872 break;
1873 case Intrinsic::ptrmask: {
1874 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1875
1876 const Value *Mask = I->getOperand(1);
1877 Known2 = KnownBits(Mask->getType()->getScalarSizeInBits());
1878 computeKnownBits(Mask, DemandedElts, Known2, Depth + 1, Q);
1879 // TODO: 1-extend would be more precise.
1880 Known &= Known2.anyextOrTrunc(BitWidth);
1881 break;
1882 }
1883 case Intrinsic::x86_sse2_pmulh_w:
1884 case Intrinsic::x86_avx2_pmulh_w:
1885 case Intrinsic::x86_avx512_pmulh_w_512:
1886 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1887 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1888 Known = KnownBits::mulhs(Known, Known2);
1889 break;
1890 case Intrinsic::x86_sse2_pmulhu_w:
1891 case Intrinsic::x86_avx2_pmulhu_w:
1892 case Intrinsic::x86_avx512_pmulhu_w_512:
1893 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth + 1, Q);
1894 computeKnownBits(I->getOperand(1), DemandedElts, Known2, Depth + 1, Q);
1895 Known = KnownBits::mulhu(Known, Known2);
1896 break;
1897 case Intrinsic::x86_sse42_crc32_64_64:
1898 Known.Zero.setBitsFrom(32);
1899 break;
1900 case Intrinsic::x86_ssse3_phadd_d_128:
1901 case Intrinsic::x86_ssse3_phadd_w_128:
1902 case Intrinsic::x86_avx2_phadd_d:
1903 case Intrinsic::x86_avx2_phadd_w: {
1905 I, DemandedElts, Depth, Q,
1906 [](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
1907 return KnownBits::add(KnownLHS, KnownRHS);
1908 });
1909 break;
1910 }
1911 case Intrinsic::x86_ssse3_phadd_sw_128:
1912 case Intrinsic::x86_avx2_phadd_sw: {
1913 Known = computeKnownBitsForHorizontalOperation(I, DemandedElts, Depth,
1915 break;
1916 }
1917 case Intrinsic::x86_ssse3_phsub_d_128:
1918 case Intrinsic::x86_ssse3_phsub_w_128:
1919 case Intrinsic::x86_avx2_phsub_d:
1920 case Intrinsic::x86_avx2_phsub_w: {
1922 I, DemandedElts, Depth, Q,
1923 [](const KnownBits &KnownLHS, const KnownBits &KnownRHS) {
1924 return KnownBits::sub(KnownLHS, KnownRHS);
1925 });
1926 break;
1927 }
1928 case Intrinsic::x86_ssse3_phsub_sw_128:
1929 case Intrinsic::x86_avx2_phsub_sw: {
1930 Known = computeKnownBitsForHorizontalOperation(I, DemandedElts, Depth,
1932 break;
1933 }
1934 case Intrinsic::riscv_vsetvli:
1935 case Intrinsic::riscv_vsetvlimax: {
1936 bool HasAVL = II->getIntrinsicID() == Intrinsic::riscv_vsetvli;
1937 const ConstantRange Range = getVScaleRange(II->getFunction(), BitWidth);
1939 cast<ConstantInt>(II->getArgOperand(HasAVL))->getZExtValue());
1940 RISCVII::VLMUL VLMUL = static_cast<RISCVII::VLMUL>(
1941 cast<ConstantInt>(II->getArgOperand(1 + HasAVL))->getZExtValue());
1942 uint64_t MaxVLEN =
1944 uint64_t MaxVL = MaxVLEN / RISCVVType::getSEWLMULRatio(SEW, VLMUL);
1945
1946 // Result of vsetvli must be not larger than AVL.
1947 if (HasAVL)
1948 if (auto *CI = dyn_cast<ConstantInt>(II->getArgOperand(0)))
1949 MaxVL = std::min(MaxVL, CI->getZExtValue());
1950
1951 unsigned KnownZeroFirstBit = Log2_32(MaxVL) + 1;
1952 if (BitWidth > KnownZeroFirstBit)
1953 Known.Zero.setBitsFrom(KnownZeroFirstBit);
1954 break;
1955 }
1956 case Intrinsic::vscale: {
1957 if (!II->getParent() || !II->getFunction())
1958 break;
1959
1960 Known = getVScaleRange(II->getFunction(), BitWidth).toKnownBits();
1961 break;
1962 }
1963 }
1964 }
1965 break;
1966 }
1967 case Instruction::ShuffleVector: {
1968 auto *Shuf = dyn_cast<ShuffleVectorInst>(I);
1969 // FIXME: Do we need to handle ConstantExpr involving shufflevectors?
1970 if (!Shuf) {
1971 Known.resetAll();
1972 return;
1973 }
1974 // For undef elements, we don't know anything about the common state of
1975 // the shuffle result.
1976 APInt DemandedLHS, DemandedRHS;
1977 if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS)) {
1978 Known.resetAll();
1979 return;
1980 }
1981 Known.One.setAllBits();
1982 Known.Zero.setAllBits();
1983 if (!!DemandedLHS) {
1984 const Value *LHS = Shuf->getOperand(0);
1985 computeKnownBits(LHS, DemandedLHS, Known, Depth + 1, Q);
1986 // If we don't know any bits, early out.
1987 if (Known.isUnknown())
1988 break;
1989 }
1990 if (!!DemandedRHS) {
1991 const Value *RHS = Shuf->getOperand(1);
1992 computeKnownBits(RHS, DemandedRHS, Known2, Depth + 1, Q);
1993 Known = Known.intersectWith(Known2);
1994 }
1995 break;
1996 }
1997 case Instruction::InsertElement: {
1998 if (isa<ScalableVectorType>(I->getType())) {
1999 Known.resetAll();
2000 return;
2001 }
2002 const Value *Vec = I->getOperand(0);
2003 const Value *Elt = I->getOperand(1);
2004 auto *CIdx = dyn_cast<ConstantInt>(I->getOperand(2));
2005 unsigned NumElts = DemandedElts.getBitWidth();
2006 APInt DemandedVecElts = DemandedElts;
2007 bool NeedsElt = true;
2008 // If we know the index we are inserting too, clear it from Vec check.
2009 if (CIdx && CIdx->getValue().ult(NumElts)) {
2010 DemandedVecElts.clearBit(CIdx->getZExtValue());
2011 NeedsElt = DemandedElts[CIdx->getZExtValue()];
2012 }
2013
2014 Known.One.setAllBits();
2015 Known.Zero.setAllBits();
2016 if (NeedsElt) {
2017 computeKnownBits(Elt, Known, Depth + 1, Q);
2018 // If we don't know any bits, early out.
2019 if (Known.isUnknown())
2020 break;
2021 }
2022
2023 if (!DemandedVecElts.isZero()) {
2024 computeKnownBits(Vec, DemandedVecElts, Known2, Depth + 1, Q);
2025 Known = Known.intersectWith(Known2);
2026 }
2027 break;
2028 }
2029 case Instruction::ExtractElement: {
2030 // Look through extract element. If the index is non-constant or
2031 // out-of-range demand all elements, otherwise just the extracted element.
2032 const Value *Vec = I->getOperand(0);
2033 const Value *Idx = I->getOperand(1);
2034 auto *CIdx = dyn_cast<ConstantInt>(Idx);
2035 if (isa<ScalableVectorType>(Vec->getType())) {
2036 // FIXME: there's probably *something* we can do with scalable vectors
2037 Known.resetAll();
2038 break;
2039 }
2040 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2041 APInt DemandedVecElts = APInt::getAllOnes(NumElts);
2042 if (CIdx && CIdx->getValue().ult(NumElts))
2043 DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
2044 computeKnownBits(Vec, DemandedVecElts, Known, Depth + 1, Q);
2045 break;
2046 }
2047 case Instruction::ExtractValue:
2048 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) {
2049 const ExtractValueInst *EVI = cast<ExtractValueInst>(I);
2050 if (EVI->getNumIndices() != 1) break;
2051 if (EVI->getIndices()[0] == 0) {
2052 switch (II->getIntrinsicID()) {
2053 default: break;
2054 case Intrinsic::uadd_with_overflow:
2055 case Intrinsic::sadd_with_overflow:
2057 true, II->getArgOperand(0), II->getArgOperand(1), /*NSW=*/false,
2058 /* NUW=*/false, DemandedElts, Known, Known2, Depth, Q);
2059 break;
2060 case Intrinsic::usub_with_overflow:
2061 case Intrinsic::ssub_with_overflow:
2063 false, II->getArgOperand(0), II->getArgOperand(1), /*NSW=*/false,
2064 /* NUW=*/false, DemandedElts, Known, Known2, Depth, Q);
2065 break;
2066 case Intrinsic::umul_with_overflow:
2067 case Intrinsic::smul_with_overflow:
2068 computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false,
2069 false, DemandedElts, Known, Known2, Depth, Q);
2070 break;
2071 }
2072 }
2073 }
2074 break;
2075 case Instruction::Freeze:
2076 if (isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT,
2077 Depth + 1))
2078 computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
2079 break;
2080 }
2081}
2082
2083/// Determine which bits of V are known to be either zero or one and return
2084/// them.
2085KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts,
2086 unsigned Depth, const SimplifyQuery &Q) {
2087 KnownBits Known(getBitWidth(V->getType(), Q.DL));
2088 ::computeKnownBits(V, DemandedElts, Known, Depth, Q);
2089 return Known;
2090}
2091
2092/// Determine which bits of V are known to be either zero or one and return
2093/// them.
2095 const SimplifyQuery &Q) {
2096 KnownBits Known(getBitWidth(V->getType(), Q.DL));
2097 computeKnownBits(V, Known, Depth, Q);
2098 return Known;
2099}
2100
2101/// Determine which bits of V are known to be either zero or one and return
2102/// them in the Known bit set.
2103///
2104/// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that
2105/// we cannot optimize based on the assumption that it is zero without changing
2106/// it to be an explicit zero. If we don't change it to zero, other code could
2107/// optimized based on the contradictory assumption that it is non-zero.
2108/// Because instcombine aggressively folds operations with undef args anyway,
2109/// this won't lose us code quality.
2110///
2111/// This function is defined on values with integer type, values with pointer
2112/// type, and vectors of integers. In the case
2113/// where V is a vector, known zero, and known one values are the
2114/// same width as the vector element, and the bit is set only if it is true
2115/// for all of the demanded elements in the vector specified by DemandedElts.
2116void computeKnownBits(const Value *V, const APInt &DemandedElts,
2117 KnownBits &Known, unsigned Depth,
2118 const SimplifyQuery &Q) {
2119 if (!DemandedElts) {
2120 // No demanded elts, better to assume we don't know anything.
2121 Known.resetAll();
2122 return;
2123 }
2124
2125 assert(V && "No Value?");
2126 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
2127
2128#ifndef NDEBUG
2129 Type *Ty = V->getType();
2130 unsigned BitWidth = Known.getBitWidth();
2131
2133 "Not integer or pointer type!");
2134
2135 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
2136 assert(
2137 FVTy->getNumElements() == DemandedElts.getBitWidth() &&
2138 "DemandedElt width should equal the fixed vector number of elements");
2139 } else {
2140 assert(DemandedElts == APInt(1, 1) &&
2141 "DemandedElt width should be 1 for scalars or scalable vectors");
2142 }
2143
2144 Type *ScalarTy = Ty->getScalarType();
2145 if (ScalarTy->isPointerTy()) {
2146 assert(BitWidth == Q.DL.getPointerTypeSizeInBits(ScalarTy) &&
2147 "V and Known should have same BitWidth");
2148 } else {
2149 assert(BitWidth == Q.DL.getTypeSizeInBits(ScalarTy) &&
2150 "V and Known should have same BitWidth");
2151 }
2152#endif
2153
2154 const APInt *C;
2155 if (match(V, m_APInt(C))) {
2156 // We know all of the bits for a scalar constant or a splat vector constant!
2157 Known = KnownBits::makeConstant(*C);
2158 return;
2159 }
2160 // Null and aggregate-zero are all-zeros.
2161 if (isa<ConstantPointerNull>(V) || isa<ConstantAggregateZero>(V)) {
2162 Known.setAllZero();
2163 return;
2164 }
2165 // Handle a constant vector by taking the intersection of the known bits of
2166 // each element.
2167 if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(V)) {
2168 assert(!isa<ScalableVectorType>(V->getType()));
2169 // We know that CDV must be a vector of integers. Take the intersection of
2170 // each element.
2171 Known.Zero.setAllBits(); Known.One.setAllBits();
2172 for (unsigned i = 0, e = CDV->getNumElements(); i != e; ++i) {
2173 if (!DemandedElts[i])
2174 continue;
2175 APInt Elt = CDV->getElementAsAPInt(i);
2176 Known.Zero &= ~Elt;
2177 Known.One &= Elt;
2178 }
2179 if (Known.hasConflict())
2180 Known.resetAll();
2181 return;
2182 }
2183
2184 if (const auto *CV = dyn_cast<ConstantVector>(V)) {
2185 assert(!isa<ScalableVectorType>(V->getType()));
2186 // We know that CV must be a vector of integers. Take the intersection of
2187 // each element.
2188 Known.Zero.setAllBits(); Known.One.setAllBits();
2189 for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
2190 if (!DemandedElts[i])
2191 continue;
2192 Constant *Element = CV->getAggregateElement(i);
2193 if (isa<PoisonValue>(Element))
2194 continue;
2195 auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);
2196 if (!ElementCI) {
2197 Known.resetAll();
2198 return;
2199 }
2200 const APInt &Elt = ElementCI->getValue();
2201 Known.Zero &= ~Elt;
2202 Known.One &= Elt;
2203 }
2204 if (Known.hasConflict())
2205 Known.resetAll();
2206 return;
2207 }
2208
2209 // Start out not knowing anything.
2210 Known.resetAll();
2211
2212 // We can't imply anything about undefs.
2213 if (isa<UndefValue>(V))
2214 return;
2215
2216 // There's no point in looking through other users of ConstantData for
2217 // assumptions. Confirm that we've handled them all.
2218 assert(!isa<ConstantData>(V) && "Unhandled constant data!");
2219
2220 if (const auto *A = dyn_cast<Argument>(V))
2221 if (std::optional<ConstantRange> Range = A->getRange())
2222 Known = Range->toKnownBits();
2223
2224 // All recursive calls that increase depth must come after this.
2226 return;
2227
2228 // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
2229 // the bits of its aliasee.
2230 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
2231 if (!GA->isInterposable())
2232 computeKnownBits(GA->getAliasee(), Known, Depth + 1, Q);
2233 return;
2234 }
2235
2236 if (const Operator *I = dyn_cast<Operator>(V))
2237 computeKnownBitsFromOperator(I, DemandedElts, Known, Depth, Q);
2238 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
2239 if (std::optional<ConstantRange> CR = GV->getAbsoluteSymbolRange())
2240 Known = CR->toKnownBits();
2241 }
2242
2243 // Aligned pointers have trailing zeros - refine Known.Zero set
2244 if (isa<PointerType>(V->getType())) {
2245 Align Alignment = V->getPointerAlignment(Q.DL);
2246 Known.Zero.setLowBits(Log2(Alignment));
2247 }
2248
2249 // computeKnownBitsFromContext strictly refines Known.
2250 // Therefore, we run them after computeKnownBitsFromOperator.
2251
2252 // Check whether we can determine known bits from context such as assumes.
2253 computeKnownBitsFromContext(V, Known, Depth, Q);
2254}
2255
2256/// Try to detect a recurrence that the value of the induction variable is
2257/// always a power of two (or zero).
2258static bool isPowerOfTwoRecurrence(const PHINode *PN, bool OrZero,
2259 unsigned Depth, SimplifyQuery &Q) {
2260 BinaryOperator *BO = nullptr;
2261 Value *Start = nullptr, *Step = nullptr;
2262 if (!matchSimpleRecurrence(PN, BO, Start, Step))
2263 return false;
2264
2265 // Initial value must be a power of two.
2266 for (const Use &U : PN->operands()) {
2267 if (U.get() == Start) {
2268 // Initial value comes from a different BB, need to adjust context
2269 // instruction for analysis.
2270 Q.CxtI = PN->getIncomingBlock(U)->getTerminator();
2271 if (!isKnownToBeAPowerOfTwo(Start, OrZero, Depth, Q))
2272 return false;
2273 }
2274 }
2275
2276 // Except for Mul, the induction variable must be on the left side of the
2277 // increment expression, otherwise its value can be arbitrary.
2278 if (BO->getOpcode() != Instruction::Mul && BO->getOperand(1) != Step)
2279 return false;
2280
2281 Q.CxtI = BO->getParent()->getTerminator();
2282 switch (BO->getOpcode()) {
2283 case Instruction::Mul:
2284 // Power of two is closed under multiplication.
2285 return (OrZero || Q.IIQ.hasNoUnsignedWrap(BO) ||
2286 Q.IIQ.hasNoSignedWrap(BO)) &&
2287 isKnownToBeAPowerOfTwo(Step, OrZero, Depth, Q);
2288 case Instruction::SDiv:
2289 // Start value must not be signmask for signed division, so simply being a
2290 // power of two is not sufficient, and it has to be a constant.
2291 if (!match(Start, m_Power2()) || match(Start, m_SignMask()))
2292 return false;
2293 [[fallthrough]];
2294 case Instruction::UDiv:
2295 // Divisor must be a power of two.
2296 // If OrZero is false, cannot guarantee induction variable is non-zero after
2297 // division, same for Shr, unless it is exact division.
2298 return (OrZero || Q.IIQ.isExact(BO)) &&
2299 isKnownToBeAPowerOfTwo(Step, false, Depth, Q);
2300 case Instruction::Shl:
2301 return OrZero || Q.IIQ.hasNoUnsignedWrap(BO) || Q.IIQ.hasNoSignedWrap(BO);
2302 case Instruction::AShr:
2303 if (!match(Start, m_Power2()) || match(Start, m_SignMask()))
2304 return false;
2305 [[fallthrough]];
2306 case Instruction::LShr:
2307 return OrZero || Q.IIQ.isExact(BO);
2308 default:
2309 return false;
2310 }
2311}
2312
2313/// Return true if we can infer that \p V is known to be a power of 2 from
2314/// dominating condition \p Cond (e.g., ctpop(V) == 1).
2315static bool isImpliedToBeAPowerOfTwoFromCond(const Value *V, bool OrZero,
2316 const Value *Cond,
2317 bool CondIsTrue) {
2318 CmpPredicate Pred;
2319 const APInt *RHSC;
2320 if (!match(Cond, m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(m_Specific(V)),
2321 m_APInt(RHSC))))
2322 return false;
2323 if (!CondIsTrue)
2324 Pred = ICmpInst::getInversePredicate(Pred);
2325 // ctpop(V) u< 2
2326 if (OrZero && Pred == ICmpInst::ICMP_ULT && *RHSC == 2)
2327 return true;
2328 // ctpop(V) == 1
2329 return Pred == ICmpInst::ICMP_EQ && *RHSC == 1;
2330}
2331
2332/// Return true if the given value is known to have exactly one
2333/// bit set when defined. For vectors return true if every element is known to
2334/// be a power of two when defined. Supports values with integer or pointer
2335/// types and vectors of integers.
2336bool llvm::isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
2337 const SimplifyQuery &Q) {
2338 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
2339
2340 if (isa<Constant>(V))
2341 return OrZero ? match(V, m_Power2OrZero()) : match(V, m_Power2());
2342
2343 // i1 is by definition a power of 2 or zero.
2344 if (OrZero && V->getType()->getScalarSizeInBits() == 1)
2345 return true;
2346
2347 // Try to infer from assumptions.
2348 if (Q.AC && Q.CxtI) {
2349 for (auto &AssumeVH : Q.AC->assumptionsFor(V)) {
2350 if (!AssumeVH)
2351 continue;
2352 CallInst *I = cast<CallInst>(AssumeVH);
2353 if (isImpliedToBeAPowerOfTwoFromCond(V, OrZero, I->getArgOperand(0),
2354 /*CondIsTrue=*/true) &&
2356 return true;
2357 }
2358 }
2359
2360 // Handle dominating conditions.
2361 if (Q.DC && Q.CxtI && Q.DT) {
2362 for (BranchInst *BI : Q.DC->conditionsFor(V)) {
2363 Value *Cond = BI->getCondition();
2364
2365 BasicBlockEdge Edge0(BI->getParent(), BI->getSuccessor(0));
2367 /*CondIsTrue=*/true) &&
2368 Q.DT->dominates(Edge0, Q.CxtI->getParent()))
2369 return true;
2370
2371 BasicBlockEdge Edge1(BI->getParent(), BI->getSuccessor(1));
2373 /*CondIsTrue=*/false) &&
2374 Q.DT->dominates(Edge1, Q.CxtI->getParent()))
2375 return true;
2376 }
2377 }
2378
2379 auto *I = dyn_cast<Instruction>(V);
2380 if (!I)
2381 return false;
2382
2383 if (Q.CxtI && match(V, m_VScale())) {
2384 const Function *F = Q.CxtI->getFunction();
2385 // The vscale_range indicates vscale is a power-of-two.
2386 return F->hasFnAttribute(Attribute::VScaleRange);
2387 }
2388
2389 // 1 << X is clearly a power of two if the one is not shifted off the end. If
2390 // it is shifted off the end then the result is undefined.
2391 if (match(I, m_Shl(m_One(), m_Value())))
2392 return true;
2393
2394 // (signmask) >>l X is clearly a power of two if the one is not shifted off
2395 // the bottom. If it is shifted off the bottom then the result is undefined.
2396 if (match(I, m_LShr(m_SignMask(), m_Value())))
2397 return true;
2398
2399 // The remaining tests are all recursive, so bail out if we hit the limit.
2401 return false;
2402
2403 switch (I->getOpcode()) {
2404 case Instruction::ZExt:
2405 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
2406 case Instruction::Trunc:
2407 return OrZero && isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
2408 case Instruction::Shl:
2409 if (OrZero || Q.IIQ.hasNoUnsignedWrap(I) || Q.IIQ.hasNoSignedWrap(I))
2410 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
2411 return false;
2412 case Instruction::LShr:
2413 if (OrZero || Q.IIQ.isExact(cast<BinaryOperator>(I)))
2414 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
2415 return false;
2416 case Instruction::UDiv:
2417 if (Q.IIQ.isExact(cast<BinaryOperator>(I)))
2418 return isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q);
2419 return false;
2420 case Instruction::Mul:
2421 return isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q) &&
2422 isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q) &&
2423 (OrZero || isKnownNonZero(I, Q, Depth));
2424 case Instruction::And:
2425 // A power of two and'd with anything is a power of two or zero.
2426 if (OrZero &&
2427 (isKnownToBeAPowerOfTwo(I->getOperand(1), /*OrZero*/ true, Depth, Q) ||
2428 isKnownToBeAPowerOfTwo(I->getOperand(0), /*OrZero*/ true, Depth, Q)))
2429 return true;
2430 // X & (-X) is always a power of two or zero.
2431 if (match(I->getOperand(0), m_Neg(m_Specific(I->getOperand(1)))) ||
2432 match(I->getOperand(1), m_Neg(m_Specific(I->getOperand(0)))))
2433 return OrZero || isKnownNonZero(I->getOperand(0), Q, Depth);
2434 return false;
2435 case Instruction::Add: {
2436 // Adding a power-of-two or zero to the same power-of-two or zero yields
2437 // either the original power-of-two, a larger power-of-two or zero.
2438 const OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V);
2439 if (OrZero || Q.IIQ.hasNoUnsignedWrap(VOBO) ||
2440 Q.IIQ.hasNoSignedWrap(VOBO)) {
2441 if (match(I->getOperand(0),
2442 m_c_And(m_Specific(I->getOperand(1)), m_Value())) &&
2443 isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q))
2444 return true;
2445 if (match(I->getOperand(1),
2446 m_c_And(m_Specific(I->getOperand(0)), m_Value())) &&
2447 isKnownToBeAPowerOfTwo(I->getOperand(0), OrZero, Depth, Q))
2448 return true;
2449
2450 unsigned BitWidth = V->getType()->getScalarSizeInBits();
2451 KnownBits LHSBits(BitWidth);
2452 computeKnownBits(I->getOperand(0), LHSBits, Depth, Q);
2453
2454 KnownBits RHSBits(BitWidth);
2455 computeKnownBits(I->getOperand(1), RHSBits, Depth, Q);
2456 // If i8 V is a power of two or zero:
2457 // ZeroBits: 1 1 1 0 1 1 1 1
2458 // ~ZeroBits: 0 0 0 1 0 0 0 0
2459 if ((~(LHSBits.Zero & RHSBits.Zero)).isPowerOf2())
2460 // If OrZero isn't set, we cannot give back a zero result.
2461 // Make sure either the LHS or RHS has a bit set.
2462 if (OrZero || RHSBits.One.getBoolValue() || LHSBits.One.getBoolValue())
2463 return true;
2464 }
2465
2466 // LShr(UINT_MAX, Y) + 1 is a power of two (if add is nuw) or zero.
2467 if (OrZero || Q.IIQ.hasNoUnsignedWrap(VOBO))
2468 if (match(I, m_Add(m_LShr(m_AllOnes(), m_Value()), m_One())))
2469 return true;
2470 return false;
2471 }
2472 case Instruction::Select:
2473 return isKnownToBeAPowerOfTwo(I->getOperand(1), OrZero, Depth, Q) &&
2474 isKnownToBeAPowerOfTwo(I->getOperand(2), OrZero, Depth, Q);
2475 case Instruction::PHI: {
2476 // A PHI node is power of two if all incoming values are power of two, or if
2477 // it is an induction variable where in each step its value is a power of
2478 // two.
2479 auto *PN = cast<PHINode>(I);
2481
2482 // Check if it is an induction variable and always power of two.
2483 if (isPowerOfTwoRecurrence(PN, OrZero, Depth, RecQ))
2484 return true;
2485
2486 // Recursively check all incoming values. Limit recursion to 2 levels, so
2487 // that search complexity is limited to number of operands^2.
2488 unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1);
2489 return llvm::all_of(PN->operands(), [&](const Use &U) {
2490 // Value is power of 2 if it is coming from PHI node itself by induction.
2491 if (U.get() == PN)
2492 return true;
2493
2494 // Change the context instruction to the incoming block where it is
2495 // evaluated.
2496 RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator();
2497 return isKnownToBeAPowerOfTwo(U.get(), OrZero, NewDepth, RecQ);
2498 });
2499 }
2500 case Instruction::Invoke:
2501 case Instruction::Call: {
2502 if (auto *II = dyn_cast<IntrinsicInst>(I)) {
2503 switch (II->getIntrinsicID()) {
2504 case Intrinsic::umax:
2505 case Intrinsic::smax:
2506 case Intrinsic::umin:
2507 case Intrinsic::smin:
2508 return isKnownToBeAPowerOfTwo(II->getArgOperand(1), OrZero, Depth, Q) &&
2509 isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q);
2510 // bswap/bitreverse just move around bits, but don't change any 1s/0s
2511 // thus dont change pow2/non-pow2 status.
2512 case Intrinsic::bitreverse:
2513 case Intrinsic::bswap:
2514 return isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q);
2515 case Intrinsic::fshr:
2516 case Intrinsic::fshl:
2517 // If Op0 == Op1, this is a rotate. is_pow2(rotate(x, y)) == is_pow2(x)
2518 if (II->getArgOperand(0) == II->getArgOperand(1))
2519 return isKnownToBeAPowerOfTwo(II->getArgOperand(0), OrZero, Depth, Q);
2520 break;
2521 default:
2522 break;
2523 }
2524 }
2525 return false;
2526 }
2527 default:
2528 return false;
2529 }
2530}
2531
2532/// Test whether a GEP's result is known to be non-null.
2533///
2534/// Uses properties inherent in a GEP to try to determine whether it is known
2535/// to be non-null.
2536///
2537/// Currently this routine does not support vector GEPs.
2538static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth,
2539 const SimplifyQuery &Q) {
2540 const Function *F = nullptr;
2541 if (const Instruction *I = dyn_cast<Instruction>(GEP))
2542 F = I->getFunction();
2543
2544 // If the gep is nuw or inbounds with invalid null pointer, then the GEP
2545 // may be null iff the base pointer is null and the offset is zero.
2546 if (!GEP->hasNoUnsignedWrap() &&
2547 !(GEP->isInBounds() &&
2548 !NullPointerIsDefined(F, GEP->getPointerAddressSpace())))
2549 return false;
2550
2551 // FIXME: Support vector-GEPs.
2552 assert(GEP->getType()->isPointerTy() && "We only support plain pointer GEP");
2553
2554 // If the base pointer is non-null, we cannot walk to a null address with an
2555 // inbounds GEP in address space zero.
2556 if (isKnownNonZero(GEP->getPointerOperand(), Q, Depth))
2557 return true;
2558
2559 // Walk the GEP operands and see if any operand introduces a non-zero offset.
2560 // If so, then the GEP cannot produce a null pointer, as doing so would
2561 // inherently violate the inbounds contract within address space zero.
2563 GTI != GTE; ++GTI) {
2564 // Struct types are easy -- they must always be indexed by a constant.
2565 if (StructType *STy = GTI.getStructTypeOrNull()) {
2566 ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand());
2567 unsigned ElementIdx = OpC->getZExtValue();
2568 const StructLayout *SL = Q.DL.getStructLayout(STy);
2569 uint64_t ElementOffset = SL->getElementOffset(ElementIdx);
2570 if (ElementOffset > 0)
2571 return true;
2572 continue;
2573 }
2574
2575 // If we have a zero-sized type, the index doesn't matter. Keep looping.
2576 if (GTI.getSequentialElementStride(Q.DL).isZero())
2577 continue;
2578
2579 // Fast path the constant operand case both for efficiency and so we don't
2580 // increment Depth when just zipping down an all-constant GEP.
2581 if (ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand())) {
2582 if (!OpC->isZero())
2583 return true;
2584 continue;
2585 }
2586
2587 // We post-increment Depth here because while isKnownNonZero increments it
2588 // as well, when we pop back up that increment won't persist. We don't want
2589 // to recurse 10k times just because we have 10k GEP operands. We don't
2590 // bail completely out because we want to handle constant GEPs regardless
2591 // of depth.
2593 continue;
2594
2595 if (isKnownNonZero(GTI.getOperand(), Q, Depth))
2596 return true;
2597 }
2598
2599 return false;
2600}
2601
2603 const Instruction *CtxI,
2604 const DominatorTree *DT) {
2605 assert(!isa<Constant>(V) && "Called for constant?");
2606
2607 if (!CtxI || !DT)
2608 return false;
2609
2610 unsigned NumUsesExplored = 0;
2611 for (const auto *U : V->users()) {
2612 // Avoid massive lists
2613 if (NumUsesExplored >= DomConditionsMaxUses)
2614 break;
2615 NumUsesExplored++;
2616
2617 // If the value is used as an argument to a call or invoke, then argument
2618 // attributes may provide an answer about null-ness.
2619 if (const auto *CB = dyn_cast<CallBase>(U))
2620 if (auto *CalledFunc = CB->getCalledFunction())
2621 for (const Argument &Arg : CalledFunc->args())
2622 if (CB->getArgOperand(Arg.getArgNo()) == V &&
2623 Arg.hasNonNullAttr(/* AllowUndefOrPoison */ false) &&
2624 DT->dominates(CB, CtxI))
2625 return true;
2626
2627 // If the value is used as a load/store, then the pointer must be non null.
2628 if (V == getLoadStorePointerOperand(U)) {
2629 const Instruction *I = cast<Instruction>(U);
2630 if (!NullPointerIsDefined(I->getFunction(),
2631 V->getType()->getPointerAddressSpace()) &&
2632 DT->dominates(I, CtxI))
2633 return true;
2634 }
2635
2636 if ((match(U, m_IDiv(m_Value(), m_Specific(V))) ||
2637 match(U, m_IRem(m_Value(), m_Specific(V)))) &&
2638 isValidAssumeForContext(cast<Instruction>(U), CtxI, DT))
2639 return true;
2640
2641 // Consider only compare instructions uniquely controlling a branch
2642 Value *RHS;
2643 CmpPredicate Pred;
2644 if (!match(U, m_c_ICmp(Pred, m_Specific(V), m_Value(RHS))))
2645 continue;
2646
2647 bool NonNullIfTrue;
2648 if (cmpExcludesZero(Pred, RHS))
2649 NonNullIfTrue = true;
2651 NonNullIfTrue = false;
2652 else
2653 continue;
2654
2657 for (const auto *CmpU : U->users()) {
2658 assert(WorkList.empty() && "Should be!");
2659 if (Visited.insert(CmpU).second)
2660 WorkList.push_back(CmpU);
2661
2662 while (!WorkList.empty()) {
2663 auto *Curr = WorkList.pop_back_val();
2664
2665 // If a user is an AND, add all its users to the work list. We only
2666 // propagate "pred != null" condition through AND because it is only
2667 // correct to assume that all conditions of AND are met in true branch.
2668 // TODO: Support similar logic of OR and EQ predicate?
2669 if (NonNullIfTrue)
2670 if (match(Curr, m_LogicalAnd(m_Value(), m_Value()))) {
2671 for (const auto *CurrU : Curr->users())
2672 if (Visited.insert(CurrU).second)
2673 WorkList.push_back(CurrU);
2674 continue;
2675 }
2676
2677 if (const BranchInst *BI = dyn_cast<BranchInst>(Curr)) {
2678 assert(BI->isConditional() && "uses a comparison!");
2679
2680 BasicBlock *NonNullSuccessor =
2681 BI->getSuccessor(NonNullIfTrue ? 0 : 1);
2682 BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor);
2683 if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent()))
2684 return true;
2685 } else if (NonNullIfTrue && isGuard(Curr) &&
2686 DT->dominates(cast<Instruction>(Curr), CtxI)) {
2687 return true;
2688 }
2689 }
2690 }
2691 }
2692
2693 return false;
2694}
2695
2696/// Does the 'Range' metadata (which must be a valid MD_range operand list)
2697/// ensure that the value it's attached to is never Value? 'RangeType' is
2698/// is the type of the value described by the range.
2699static bool rangeMetadataExcludesValue(const MDNode* Ranges, const APInt& Value) {
2700 const unsigned NumRanges = Ranges->getNumOperands() / 2;
2701 assert(NumRanges >= 1);
2702 for (unsigned i = 0; i < NumRanges; ++i) {
2704 mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 0));
2706 mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 1));
2707 ConstantRange Range(Lower->getValue(), Upper->getValue());
2708 if (Range.contains(Value))
2709 return false;
2710 }
2711 return true;
2712}
2713
2714/// Try to detect a recurrence that monotonically increases/decreases from a
2715/// non-zero starting value. These are common as induction variables.
2716static bool isNonZeroRecurrence(const PHINode *PN) {
2717 BinaryOperator *BO = nullptr;
2718 Value *Start = nullptr, *Step = nullptr;
2719 const APInt *StartC, *StepC;
2720 if (!matchSimpleRecurrence(PN, BO, Start, Step) ||
2721 !match(Start, m_APInt(StartC)) || StartC->isZero())
2722 return false;
2723
2724 switch (BO->getOpcode()) {
2725 case Instruction::Add:
2726 // Starting from non-zero and stepping away from zero can never wrap back
2727 // to zero.
2728 return BO->hasNoUnsignedWrap() ||
2729 (BO->hasNoSignedWrap() && match(Step, m_APInt(StepC)) &&
2730 StartC->isNegative() == StepC->isNegative());
2731 case Instruction::Mul:
2732 return (BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap()) &&
2733 match(Step, m_APInt(StepC)) && !StepC->isZero();
2734 case Instruction::Shl:
2735 return BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap();
2736 case Instruction::AShr:
2737 case Instruction::LShr:
2738 return BO->isExact();
2739 default:
2740 return false;
2741 }
2742}
2743
2744static bool matchOpWithOpEqZero(Value *Op0, Value *Op1) {
2745 return match(Op0, m_ZExtOrSExt(m_SpecificICmp(ICmpInst::ICMP_EQ,
2746 m_Specific(Op1), m_Zero()))) ||
2747 match(Op1, m_ZExtOrSExt(m_SpecificICmp(ICmpInst::ICMP_EQ,
2748 m_Specific(Op0), m_Zero())));
2749}
2750
2751static bool isNonZeroAdd(const APInt &DemandedElts, unsigned Depth,
2752 const SimplifyQuery &Q, unsigned BitWidth, Value *X,
2753 Value *Y, bool NSW, bool NUW) {
2754 // (X + (X != 0)) is non zero
2755 if (matchOpWithOpEqZero(X, Y))
2756 return true;
2757
2758 if (NUW)
2759 return isKnownNonZero(Y, DemandedElts, Q, Depth) ||
2760 isKnownNonZero(X, DemandedElts, Q, Depth);
2761
2762 KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q);
2763 KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q);
2764
2765 // If X and Y are both non-negative (as signed values) then their sum is not
2766 // zero unless both X and Y are zero.
2767 if (XKnown.isNonNegative() && YKnown.isNonNegative())
2768 if (isKnownNonZero(Y, DemandedElts, Q, Depth) ||
2769 isKnownNonZero(X, DemandedElts, Q, Depth))
2770 return true;
2771
2772 // If X and Y are both negative (as signed values) then their sum is not
2773 // zero unless both X and Y equal INT_MIN.
2774 if (XKnown.isNegative() && YKnown.isNegative()) {
2776 // The sign bit of X is set. If some other bit is set then X is not equal
2777 // to INT_MIN.
2778 if (XKnown.One.intersects(Mask))
2779 return true;
2780 // The sign bit of Y is set. If some other bit is set then Y is not equal
2781 // to INT_MIN.
2782 if (YKnown.One.intersects(Mask))
2783 return true;
2784 }
2785
2786 // The sum of a non-negative number and a power of two is not zero.
2787 if (XKnown.isNonNegative() &&
2788 isKnownToBeAPowerOfTwo(Y, /*OrZero*/ false, Depth, Q))
2789 return true;
2790 if (YKnown.isNonNegative() &&
2791 isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, Depth, Q))
2792 return true;
2793
2794 return KnownBits::add(XKnown, YKnown, NSW, NUW).isNonZero();
2795}
2796
2797static bool isNonZeroSub(const APInt &DemandedElts, unsigned Depth,
2798 const SimplifyQuery &Q, unsigned BitWidth, Value *X,
2799 Value *Y) {
2800 // (X - (X != 0)) is non zero
2801 // ((X != 0) - X) is non zero
2802 if (matchOpWithOpEqZero(X, Y))
2803 return true;
2804
2805 // TODO: Move this case into isKnownNonEqual().
2806 if (auto *C = dyn_cast<Constant>(X))
2807 if (C->isNullValue() && isKnownNonZero(Y, DemandedElts, Q, Depth))
2808 return true;
2809
2810 return ::isKnownNonEqual(X, Y, DemandedElts, Depth, Q);
2811}
2812
2813static bool isNonZeroMul(const APInt &DemandedElts, unsigned Depth,
2814 const SimplifyQuery &Q, unsigned BitWidth, Value *X,
2815 Value *Y, bool NSW, bool NUW) {
2816 // If X and Y are non-zero then so is X * Y as long as the multiplication
2817 // does not overflow.
2818 if (NSW || NUW)
2819 return isKnownNonZero(X, DemandedElts, Q, Depth) &&
2820 isKnownNonZero(Y, DemandedElts, Q, Depth);
2821
2822 // If either X or Y is odd, then if the other is non-zero the result can't
2823 // be zero.
2824 KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q);
2825 if (XKnown.One[0])
2826 return isKnownNonZero(Y, DemandedElts, Q, Depth);
2827
2828 KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q);
2829 if (YKnown.One[0])
2830 return XKnown.isNonZero() || isKnownNonZero(X, DemandedElts, Q, Depth);
2831
2832 // If there exists any subset of X (sX) and subset of Y (sY) s.t sX * sY is
2833 // non-zero, then X * Y is non-zero. We can find sX and sY by just taking
2834 // the lowest known One of X and Y. If they are non-zero, the result
2835 // must be non-zero. We can check if LSB(X) * LSB(Y) != 0 by doing
2836 // X.CountLeadingZeros + Y.CountLeadingZeros < BitWidth.
2837 return (XKnown.countMaxTrailingZeros() + YKnown.countMaxTrailingZeros()) <
2838 BitWidth;
2839}
2840
2841static bool isNonZeroShift(const Operator *I, const APInt &DemandedElts,
2842 unsigned Depth, const SimplifyQuery &Q,
2843 const KnownBits &KnownVal) {
2844 auto ShiftOp = [&](const APInt &Lhs, const APInt &Rhs) {
2845 switch (I->getOpcode()) {
2846 case Instruction::Shl:
2847 return Lhs.shl(Rhs);
2848 case Instruction::LShr:
2849 return Lhs.lshr(Rhs);
2850 case Instruction::AShr:
2851 return Lhs.ashr(Rhs);
2852 default:
2853 llvm_unreachable("Unknown Shift Opcode");
2854 }
2855 };
2856
2857 auto InvShiftOp = [&](const APInt &Lhs, const APInt &Rhs) {
2858 switch (I->getOpcode()) {
2859 case Instruction::Shl:
2860 return Lhs.lshr(Rhs);
2861 case Instruction::LShr:
2862 case Instruction::AShr:
2863 return Lhs.shl(Rhs);
2864 default:
2865 llvm_unreachable("Unknown Shift Opcode");
2866 }
2867 };
2868
2869 if (KnownVal.isUnknown())
2870 return false;
2871
2872 KnownBits KnownCnt =
2873 computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q);
2874 APInt MaxShift = KnownCnt.getMaxValue();
2875 unsigned NumBits = KnownVal.getBitWidth();
2876 if (MaxShift.uge(NumBits))
2877 return false;
2878
2879 if (!ShiftOp(KnownVal.One, MaxShift).isZero())
2880 return true;
2881
2882 // If all of the bits shifted out are known to be zero, and Val is known
2883 // non-zero then at least one non-zero bit must remain.
2884 if (InvShiftOp(KnownVal.Zero, NumBits - MaxShift)
2885 .eq(InvShiftOp(APInt::getAllOnes(NumBits), NumBits - MaxShift)) &&
2886 isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth))
2887 return true;
2888
2889 return false;
2890}
2891
2893 const APInt &DemandedElts,
2894 unsigned Depth, const SimplifyQuery &Q) {
2895 unsigned BitWidth = getBitWidth(I->getType()->getScalarType(), Q.DL);
2896 switch (I->getOpcode()) {
2897 case Instruction::Alloca:
2898 // Alloca never returns null, malloc might.
2899 return I->getType()->getPointerAddressSpace() == 0;
2900 case Instruction::GetElementPtr:
2901 if (I->getType()->isPointerTy())
2902 return isGEPKnownNonNull(cast<GEPOperator>(I), Depth, Q);
2903 break;
2904 case Instruction::BitCast: {
2905 // We need to be a bit careful here. We can only peek through the bitcast
2906 // if the scalar size of elements in the operand are smaller than and a
2907 // multiple of the size they are casting too. Take three cases:
2908 //
2909 // 1) Unsafe:
2910 // bitcast <2 x i16> %NonZero to <4 x i8>
2911 //
2912 // %NonZero can have 2 non-zero i16 elements, but isKnownNonZero on a
2913 // <4 x i8> requires that all 4 i8 elements be non-zero which isn't
2914 // guranteed (imagine just sign bit set in the 2 i16 elements).
2915 //
2916 // 2) Unsafe:
2917 // bitcast <4 x i3> %NonZero to <3 x i4>
2918 //
2919 // Even though the scalar size of the src (`i3`) is smaller than the
2920 // scalar size of the dst `i4`, because `i3` is not a multiple of `i4`
2921 // its possible for the `3 x i4` elements to be zero because there are
2922 // some elements in the destination that don't contain any full src
2923 // element.
2924 //
2925 // 3) Safe:
2926 // bitcast <4 x i8> %NonZero to <2 x i16>
2927 //
2928 // This is always safe as non-zero in the 4 i8 elements implies
2929 // non-zero in the combination of any two adjacent ones. Since i8 is a
2930 // multiple of i16, each i16 is guranteed to have 2 full i8 elements.
2931 // This all implies the 2 i16 elements are non-zero.
2932 Type *FromTy = I->getOperand(0)->getType();
2933 if ((FromTy->isIntOrIntVectorTy() || FromTy->isPtrOrPtrVectorTy()) &&
2934 (BitWidth % getBitWidth(FromTy->getScalarType(), Q.DL)) == 0)
2935 return isKnownNonZero(I->getOperand(0), Q, Depth);
2936 } break;
2937 case Instruction::IntToPtr:
2938 // Note that we have to take special care to avoid looking through
2939 // truncating casts, e.g., int2ptr/ptr2int with appropriate sizes, as well
2940 // as casts that can alter the value, e.g., AddrSpaceCasts.
2941 if (!isa<ScalableVectorType>(I->getType()) &&
2942 Q.DL.getTypeSizeInBits(I->getOperand(0)->getType()).getFixedValue() <=
2943 Q.DL.getTypeSizeInBits(I->getType()).getFixedValue())
2944 return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
2945 break;
2946 case Instruction::PtrToInt:
2947 // Similar to int2ptr above, we can look through ptr2int here if the cast
2948 // is a no-op or an extend and not a truncate.
2949 if (!isa<ScalableVectorType>(I->getType()) &&
2950 Q.DL.getTypeSizeInBits(I->getOperand(0)->getType()).getFixedValue() <=
2951 Q.DL.getTypeSizeInBits(I->getType()).getFixedValue())
2952 return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
2953 break;
2954 case Instruction::Trunc:
2955 // nuw/nsw trunc preserves zero/non-zero status of input.
2956 if (auto *TI = dyn_cast<TruncInst>(I))
2957 if (TI->hasNoSignedWrap() || TI->hasNoUnsignedWrap())
2958 return isKnownNonZero(TI->getOperand(0), DemandedElts, Q, Depth);
2959 break;
2960
2961 case Instruction::Sub:
2962 return isNonZeroSub(DemandedElts, Depth, Q, BitWidth, I->getOperand(0),
2963 I->getOperand(1));
2964 case Instruction::Xor:
2965 // (X ^ (X != 0)) is non zero
2966 if (matchOpWithOpEqZero(I->getOperand(0), I->getOperand(1)))
2967 return true;
2968 break;
2969 case Instruction::Or:
2970 // (X | (X != 0)) is non zero
2971 if (matchOpWithOpEqZero(I->getOperand(0), I->getOperand(1)))
2972 return true;
2973 // X | Y != 0 if X != 0 or Y != 0.
2974 return isKnownNonZero(I->getOperand(1), DemandedElts, Q, Depth) ||
2975 isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
2976 case Instruction::SExt:
2977 case Instruction::ZExt:
2978 // ext X != 0 if X != 0.
2979 return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
2980
2981 case Instruction::Shl: {
2982 // shl nsw/nuw can't remove any non-zero bits.
2983 const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(I);
2984 if (Q.IIQ.hasNoUnsignedWrap(BO) || Q.IIQ.hasNoSignedWrap(BO))
2985 return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
2986
2987 // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined
2988 // if the lowest bit is shifted off the end.
2989 KnownBits Known(BitWidth);
2990 computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth, Q);
2991 if (Known.One[0])
2992 return true;
2993
2994 return isNonZeroShift(I, DemandedElts, Depth, Q, Known);
2995 }
2996 case Instruction::LShr:
2997 case Instruction::AShr: {
2998 // shr exact can only shift out zero bits.
2999 const PossiblyExactOperator *BO = cast<PossiblyExactOperator>(I);
3000 if (BO->isExact())
3001 return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
3002
3003 // shr X, Y != 0 if X is negative. Note that the value of the shift is not
3004 // defined if the sign bit is shifted off the end.
3005 KnownBits Known =
3006 computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q);
3007 if (Known.isNegative())
3008 return true;
3009
3010 return isNonZeroShift(I, DemandedElts, Depth, Q, Known);
3011 }
3012 case Instruction::UDiv:
3013 case Instruction::SDiv: {
3014 // X / Y
3015 // div exact can only produce a zero if the dividend is zero.
3016 if (cast<PossiblyExactOperator>(I)->isExact())
3017 return isKnownNonZero(I->getOperand(0), DemandedElts, Q, Depth);
3018
3019 KnownBits XKnown =
3020 computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q);
3021 // If X is fully unknown we won't be able to figure anything out so don't
3022 // both computing knownbits for Y.
3023 if (XKnown.isUnknown())
3024 return false;
3025
3026 KnownBits YKnown =
3027 computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q);
3028 if (I->getOpcode() == Instruction::SDiv) {
3029 // For signed division need to compare abs value of the operands.
3030 XKnown = XKnown.abs(/*IntMinIsPoison*/ false);
3031 YKnown = YKnown.abs(/*IntMinIsPoison*/ false);
3032 }
3033 // If X u>= Y then div is non zero (0/0 is UB).
3034 std::optional<bool> XUgeY = KnownBits::uge(XKnown, YKnown);
3035 // If X is total unknown or X u< Y we won't be able to prove non-zero
3036 // with compute known bits so just return early.
3037 return XUgeY && *XUgeY;
3038 }
3039 case Instruction::Add: {
3040 // X + Y.
3041
3042 // If Add has nuw wrap flag, then if either X or Y is non-zero the result is
3043 // non-zero.
3044 auto *BO = cast<OverflowingBinaryOperator>(I);
3045 return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth, I->getOperand(0),
3046 I->getOperand(1), Q.IIQ.hasNoSignedWrap(BO),
3047 Q.IIQ.hasNoUnsignedWrap(BO));
3048 }
3049 case Instruction::Mul: {
3050 const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(I);
3051 return isNonZeroMul(DemandedElts, Depth, Q, BitWidth, I->getOperand(0),
3052 I->getOperand(1), Q.IIQ.hasNoSignedWrap(BO),
3053 Q.IIQ.hasNoUnsignedWrap(BO));
3054 }
3055 case Instruction::Select: {
3056 // (C ? X : Y) != 0 if X != 0 and Y != 0.
3057
3058 // First check if the arm is non-zero using `isKnownNonZero`. If that fails,
3059 // then see if the select condition implies the arm is non-zero. For example
3060 // (X != 0 ? X : Y), we know the true arm is non-zero as the `X` "return" is
3061 // dominated by `X != 0`.
3062 auto SelectArmIsNonZero = [&](bool IsTrueArm) {
3063 Value *Op;
3064 Op = IsTrueArm ? I->getOperand(1) : I->getOperand(2);
3065 // Op is trivially non-zero.
3066 if (isKnownNonZero(Op, DemandedElts, Q, Depth))
3067 return true;
3068
3069 // The condition of the select dominates the true/false arm. Check if the
3070 // condition implies that a given arm is non-zero.
3071 Value *X;
3072 CmpPredicate Pred;
3073 if (!match(I->getOperand(0), m_c_ICmp(Pred, m_Specific(Op), m_Value(X))))
3074 return false;
3075
3076 if (!IsTrueArm)
3077 Pred = ICmpInst::getInversePredicate(Pred);
3078
3079 return cmpExcludesZero(Pred, X);
3080 };
3081
3082 if (SelectArmIsNonZero(/* IsTrueArm */ true) &&
3083 SelectArmIsNonZero(/* IsTrueArm */ false))
3084 return true;
3085 break;
3086 }
3087 case Instruction::PHI: {
3088 auto *PN = cast<PHINode>(I);
3090 return true;
3091
3092 // Check if all incoming values are non-zero using recursion.
3094 unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1);
3095 return llvm::all_of(PN->operands(), [&](const Use &U) {
3096 if (U.get() == PN)
3097 return true;
3098 RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator();
3099 // Check if the branch on the phi excludes zero.
3100 CmpPredicate Pred;
3101 Value *X;
3102 BasicBlock *TrueSucc, *FalseSucc;
3103 if (match(RecQ.CxtI,
3104 m_Br(m_c_ICmp(Pred, m_Specific(U.get()), m_Value(X)),
3105 m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) {
3106 // Check for cases of duplicate successors.
3107 if ((TrueSucc == PN->getParent()) != (FalseSucc == PN->getParent())) {
3108 // If we're using the false successor, invert the predicate.
3109 if (FalseSucc == PN->getParent())
3110 Pred = CmpInst::getInversePredicate(Pred);
3111 if (cmpExcludesZero(Pred, X))
3112 return true;
3113 }
3114 }
3115 // Finally recurse on the edge and check it directly.
3116 return isKnownNonZero(U.get(), DemandedElts, RecQ, NewDepth);
3117 });
3118 }
3119 case Instruction::InsertElement: {
3120 if (isa<ScalableVectorType>(I->getType()))
3121 break;
3122
3123 const Value *Vec = I->getOperand(0);
3124 const Value *Elt = I->getOperand(1);
3125 auto *CIdx = dyn_cast<ConstantInt>(I->getOperand(2));
3126
3127 unsigned NumElts = DemandedElts.getBitWidth();
3128 APInt DemandedVecElts = DemandedElts;
3129 bool SkipElt = false;
3130 // If we know the index we are inserting too, clear it from Vec check.
3131 if (CIdx && CIdx->getValue().ult(NumElts)) {
3132 DemandedVecElts.clearBit(CIdx->getZExtValue());
3133 SkipElt = !DemandedElts[CIdx->getZExtValue()];
3134 }
3135
3136 // Result is zero if Elt is non-zero and rest of the demanded elts in Vec
3137 // are non-zero.
3138 return (SkipElt || isKnownNonZero(Elt, Q, Depth)) &&
3139 (DemandedVecElts.isZero() ||
3140 isKnownNonZero(Vec, DemandedVecElts, Q, Depth));
3141 }
3142 case Instruction::ExtractElement:
3143 if (const auto *EEI = dyn_cast<ExtractElementInst>(I)) {
3144 const Value *Vec = EEI->getVectorOperand();
3145 const Value *Idx = EEI->getIndexOperand();
3146 auto *CIdx = dyn_cast<ConstantInt>(Idx);
3147 if (auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType())) {
3148 unsigned NumElts = VecTy->getNumElements();
3149 APInt DemandedVecElts = APInt::getAllOnes(NumElts);
3150 if (CIdx && CIdx->getValue().ult(NumElts))
3151 DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
3152 return isKnownNonZero(Vec, DemandedVecElts, Q, Depth);
3153 }
3154 }
3155 break;
3156 case Instruction::ShuffleVector: {
3157 auto *Shuf = dyn_cast<ShuffleVectorInst>(I);
3158 if (!Shuf)
3159 break;
3160 APInt DemandedLHS, DemandedRHS;
3161 // For undef elements, we don't know anything about the common state of
3162 // the shuffle result.
3163 if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS))
3164 break;
3165 // If demanded elements for both vecs are non-zero, the shuffle is non-zero.
3166 return (DemandedRHS.isZero() ||
3167 isKnownNonZero(Shuf->getOperand(1), DemandedRHS, Q, Depth)) &&
3168 (DemandedLHS.isZero() ||
3169 isKnownNonZero(Shuf->getOperand(0), DemandedLHS, Q, Depth));
3170 }
3171 case Instruction::Freeze:
3172 return isKnownNonZero(I->getOperand(0), Q, Depth) &&
3173 isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT,
3174 Depth);
3175 case Instruction::Load: {
3176 auto *LI = cast<LoadInst>(I);
3177 // A Load tagged with nonnull or dereferenceable with null pointer undefined
3178 // is never null.
3179 if (auto *PtrT = dyn_cast<PointerType>(I->getType())) {
3180 if (Q.IIQ.getMetadata(LI, LLVMContext::MD_nonnull) ||
3181 (Q.IIQ.getMetadata(LI, LLVMContext::MD_dereferenceable) &&
3182 !NullPointerIsDefined(LI->getFunction(), PtrT->getAddressSpace())))
3183 return true;
3184 } else if (MDNode *Ranges = Q.IIQ.getMetadata(LI, LLVMContext::MD_range)) {
3186 }
3187
3188 // No need to fall through to computeKnownBits as range metadata is already
3189 // handled in isKnownNonZero.
3190 return false;
3191 }
3192 case Instruction::ExtractValue: {
3193 const WithOverflowInst *WO;
3194 if (match(I, m_ExtractValue<0>(m_WithOverflowInst(WO)))) {
3195 switch (WO->getBinaryOp()) {
3196 default:
3197 break;
3198 case Instruction::Add:
3199 return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth,
3200 WO->getArgOperand(0), WO->getArgOperand(1),
3201 /*NSW=*/false,
3202 /*NUW=*/false);
3203 case Instruction::Sub:
3204 return isNonZeroSub(DemandedElts, Depth, Q, BitWidth,
3205 WO->getArgOperand(0), WO->getArgOperand(1));
3206 case Instruction::Mul:
3207 return isNonZeroMul(DemandedElts, Depth, Q, BitWidth,
3208 WO->getArgOperand(0), WO->getArgOperand(1),
3209 /*NSW=*/false, /*NUW=*/false);
3210 break;
3211 }
3212 }
3213 break;
3214 }
3215 case Instruction::Call:
3216 case Instruction::Invoke: {
3217 const auto *Call = cast<CallBase>(I);
3218 if (I->getType()->isPointerTy()) {
3219 if (Call->isReturnNonNull())
3220 return true;
3221 if (const auto *RP = getArgumentAliasingToReturnedPointer(Call, true))
3222 return isKnownNonZero(RP, Q, Depth);
3223 } else {
3224 if (MDNode *Ranges = Q.IIQ.getMetadata(Call, LLVMContext::MD_range))
3226 if (std::optional<ConstantRange> Range = Call->getRange()) {
3227 const APInt ZeroValue(Range->getBitWidth(), 0);
3228 if (!Range->contains(ZeroValue))
3229 return true;
3230 }
3231 if (const Value *RV = Call->getReturnedArgOperand())
3232 if (RV->getType() == I->getType() && isKnownNonZero(RV, Q, Depth))
3233 return true;
3234 }
3235
3236 if (auto *II = dyn_cast<IntrinsicInst>(I)) {
3237 switch (II->getIntrinsicID()) {
3238 case Intrinsic::sshl_sat:
3239 case Intrinsic::ushl_sat:
3240 case Intrinsic::abs:
3241 case Intrinsic::bitreverse:
3242 case Intrinsic::bswap:
3243 case Intrinsic::ctpop:
3244 return isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth);
3245 // NB: We don't do usub_sat here as in any case we can prove its
3246 // non-zero, we will fold it to `sub nuw` in InstCombine.
3247 case Intrinsic::ssub_sat:
3248 return isNonZeroSub(DemandedElts, Depth, Q, BitWidth,
3249 II->getArgOperand(0), II->getArgOperand(1));
3250 case Intrinsic::sadd_sat:
3251 return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth,
3252 II->getArgOperand(0), II->getArgOperand(1),
3253 /*NSW=*/true, /* NUW=*/false);
3254 // Vec reverse preserves zero/non-zero status from input vec.
3255 case Intrinsic::vector_reverse:
3256 return isKnownNonZero(II->getArgOperand(0), DemandedElts.reverseBits(),
3257 Q, Depth);
3258 // umin/smin/smax/smin/or of all non-zero elements is always non-zero.
3259 case Intrinsic::vector_reduce_or:
3260 case Intrinsic::vector_reduce_umax:
3261 case Intrinsic::vector_reduce_umin:
3262 case Intrinsic::vector_reduce_smax:
3263 case Intrinsic::vector_reduce_smin:
3264 return isKnownNonZero(II->getArgOperand(0), Q, Depth);
3265 case Intrinsic::umax:
3266 case Intrinsic::uadd_sat:
3267 // umax(X, (X != 0)) is non zero
3268 // X +usat (X != 0) is non zero
3269 if (matchOpWithOpEqZero(II->getArgOperand(0), II->getArgOperand(1)))
3270 return true;
3271
3272 return isKnownNonZero(II->getArgOperand(1), DemandedElts, Q, Depth) ||
3273 isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth);
3274 case Intrinsic::smax: {
3275 // If either arg is strictly positive the result is non-zero. Otherwise
3276 // the result is non-zero if both ops are non-zero.
3277 auto IsNonZero = [&](Value *Op, std::optional<bool> &OpNonZero,
3278 const KnownBits &OpKnown) {
3279 if (!OpNonZero.has_value())
3280 OpNonZero = OpKnown.isNonZero() ||
3281 isKnownNonZero(Op, DemandedElts, Q, Depth);
3282 return *OpNonZero;
3283 };
3284 // Avoid re-computing isKnownNonZero.
3285 std::optional<bool> Op0NonZero, Op1NonZero;
3286 KnownBits Op1Known =
3287 computeKnownBits(II->getArgOperand(1), DemandedElts, Depth, Q);
3288 if (Op1Known.isNonNegative() &&
3289 IsNonZero(II->getArgOperand(1), Op1NonZero, Op1Known))
3290 return true;
3291 KnownBits Op0Known =
3292 computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q);
3293 if (Op0Known.isNonNegative() &&
3294 IsNonZero(II->getArgOperand(0), Op0NonZero, Op0Known))
3295 return true;
3296 return IsNonZero(II->getArgOperand(1), Op1NonZero, Op1Known) &&
3297 IsNonZero(II->getArgOperand(0), Op0NonZero, Op0Known);
3298 }
3299 case Intrinsic::smin: {
3300 // If either arg is negative the result is non-zero. Otherwise
3301 // the result is non-zero if both ops are non-zero.
3302 KnownBits Op1Known =
3303 computeKnownBits(II->getArgOperand(1), DemandedElts, Depth, Q);
3304 if (Op1Known.isNegative())
3305 return true;
3306 KnownBits Op0Known =
3307 computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q);
3308 if (Op0Known.isNegative())
3309 return true;
3310
3311 if (Op1Known.isNonZero() && Op0Known.isNonZero())
3312 return true;
3313 }
3314 [[fallthrough]];
3315 case Intrinsic::umin:
3316 return isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth) &&
3317 isKnownNonZero(II->getArgOperand(1), DemandedElts, Q, Depth);
3318 case Intrinsic::cttz:
3319 return computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q)
3320 .Zero[0];
3321 case Intrinsic::ctlz:
3322 return computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q)
3323 .isNonNegative();
3324 case Intrinsic::fshr:
3325 case Intrinsic::fshl:
3326 // If Op0 == Op1, this is a rotate. rotate(x, y) != 0 iff x != 0.
3327 if (II->getArgOperand(0) == II->getArgOperand(1))
3328 return isKnownNonZero(II->getArgOperand(0), DemandedElts, Q, Depth);
3329 break;
3330 case Intrinsic::vscale:
3331 return true;
3332 case Intrinsic::experimental_get_vector_length:
3333 return isKnownNonZero(I->getOperand(0), Q, Depth);
3334 default:
3335 break;
3336 }
3337 break;
3338 }
3339
3340 return false;
3341 }
3342 }
3343
3344 KnownBits Known(BitWidth);
3345 computeKnownBits(I, DemandedElts, Known, Depth, Q);
3346 return Known.One != 0;
3347}
3348
3349/// Return true if the given value is known to be non-zero when defined. For
3350/// vectors, return true if every demanded element is known to be non-zero when
3351/// defined. For pointers, if the context instruction and dominator tree are
3352/// specified, perform context-sensitive analysis and return true if the
3353/// pointer couldn't possibly be null at the specified instruction.
3354/// Supports values with integer or pointer type and vectors of integers.
3355bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
3356 const SimplifyQuery &Q, unsigned Depth) {
3357 Type *Ty = V->getType();
3358
3359#ifndef NDEBUG
3360 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
3361
3362 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
3363 assert(
3364 FVTy->getNumElements() == DemandedElts.getBitWidth() &&
3365 "DemandedElt width should equal the fixed vector number of elements");
3366 } else {
3367 assert(DemandedElts == APInt(1, 1) &&
3368 "DemandedElt width should be 1 for scalars");
3369 }
3370#endif
3371
3372 if (auto *C = dyn_cast<Constant>(V)) {
3373 if (C->isNullValue())
3374 return false;
3375 if (isa<ConstantInt>(C))
3376 // Must be non-zero due to null test above.
3377 return true;
3378
3379 // For constant vectors, check that all elements are poison or known
3380 // non-zero to determine that the whole vector is known non-zero.
3381 if (auto *VecTy = dyn_cast<FixedVectorType>(Ty)) {
3382 for (unsigned i = 0, e = VecTy->getNumElements(); i != e; ++i) {
3383 if (!DemandedElts[i])
3384 continue;
3385 Constant *Elt = C->getAggregateElement(i);
3386 if (!Elt || Elt->isNullValue())
3387 return false;
3388 if (!isa<PoisonValue>(Elt) && !isa<ConstantInt>(Elt))
3389 return false;
3390 }
3391 return true;
3392 }
3393
3394 // Constant ptrauth can be null, iff the base pointer can be.
3395 if (auto *CPA = dyn_cast<ConstantPtrAuth>(V))
3396 return isKnownNonZero(CPA->getPointer(), DemandedElts, Q, Depth);
3397
3398 // A global variable in address space 0 is non null unless extern weak
3399 // or an absolute symbol reference. Other address spaces may have null as a
3400 // valid address for a global, so we can't assume anything.
3401 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
3402 if (!GV->isAbsoluteSymbolRef() && !GV->hasExternalWeakLinkage() &&
3403 GV->getType()->getAddressSpace() == 0)
3404 return true;
3405 }
3406
3407 // For constant expressions, fall through to the Operator code below.
3408 if (!isa<ConstantExpr>(V))
3409 return false;
3410 }
3411
3412 if (const auto *A = dyn_cast<Argument>(V))
3413 if (std::optional<ConstantRange> Range = A->getRange()) {
3414 const APInt ZeroValue(Range->getBitWidth(), 0);
3415 if (!Range->contains(ZeroValue))
3416 return true;
3417 }
3418
3419 if (!isa<Constant>(V) && isKnownNonZeroFromAssume(V, Q))
3420 return true;
3421
3422 // Some of the tests below are recursive, so bail out if we hit the limit.
3424 return false;
3425
3426 // Check for pointer simplifications.
3427
3428 if (PointerType *PtrTy = dyn_cast<PointerType>(Ty)) {
3429 // A byval, inalloca may not be null in a non-default addres space. A
3430 // nonnull argument is assumed never 0.
3431 if (const Argument *A = dyn_cast<Argument>(V)) {
3432 if (((A->hasPassPointeeByValueCopyAttr() &&
3433 !NullPointerIsDefined(A->getParent(), PtrTy->getAddressSpace())) ||
3434 A->hasNonNullAttr()))
3435 return true;
3436 }
3437 }
3438
3439 if (const auto *I = dyn_cast<Operator>(V))
3440 if (isKnownNonZeroFromOperator(I, DemandedElts, Depth, Q))
3441 return true;
3442
3443 if (!isa<Constant>(V) &&
3445 return true;
3446
3447 return false;
3448}
3449
3451 unsigned Depth) {
3452 auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
3453 APInt DemandedElts =
3454 FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
3455 return ::isKnownNonZero(V, DemandedElts, Q, Depth);
3456}
3457
3458/// If the pair of operators are the same invertible function, return the
3459/// the operands of the function corresponding to each input. Otherwise,
3460/// return std::nullopt. An invertible function is one that is 1-to-1 and maps
3461/// every input value to exactly one output value. This is equivalent to
3462/// saying that Op1 and Op2 are equal exactly when the specified pair of
3463/// operands are equal, (except that Op1 and Op2 may be poison more often.)
3464static std::optional<std::pair<Value*, Value*>>
3466 const Operator *Op2) {
3467 if (Op1->getOpcode() != Op2->getOpcode())
3468 return std::nullopt;
3469
3470 auto getOperands = [&](unsigned OpNum) -> auto {
3471 return std::make_pair(Op1->getOperand(OpNum), Op2->getOperand(OpNum));
3472 };
3473
3474 switch (Op1->getOpcode()) {
3475 default:
3476 break;
3477 case Instruction::Or:
3478 if (!cast<PossiblyDisjointInst>(Op1)->isDisjoint() ||
3479 !cast<PossiblyDisjointInst>(Op2)->isDisjoint())
3480 break;
3481 [[fallthrough]];
3482 case Instruction::Xor:
3483 case Instruction::Add: {
3484 Value *Other;
3485 if (match(Op2, m_c_BinOp(m_Specific(Op1->getOperand(0)), m_Value(Other))))
3486 return std::make_pair(Op1->getOperand(1), Other);
3487 if (match(Op2, m_c_BinOp(m_Specific(Op1->getOperand(1)), m_Value(Other))))
3488 return std::make_pair(Op1->getOperand(0), Other);
3489 break;
3490 }
3491 case Instruction::Sub:
3492 if (Op1->getOperand(0) == Op2->getOperand(0))
3493 return getOperands(1);
3494 if (Op1->getOperand(1) == Op2->getOperand(1))
3495 return getOperands(0);
3496 break;
3497 case Instruction::Mul: {
3498 // invertible if A * B == (A * B) mod 2^N where A, and B are integers
3499 // and N is the bitwdith. The nsw case is non-obvious, but proven by
3500 // alive2: https://alive2.llvm.org/ce/z/Z6D5qK
3501 auto *OBO1 = cast<OverflowingBinaryOperator>(Op1);
3502 auto *OBO2 = cast<OverflowingBinaryOperator>(Op2);
3503 if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) &&
3504 (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap()))
3505 break;
3506
3507 // Assume operand order has been canonicalized
3508 if (Op1->getOperand(1) == Op2->getOperand(1) &&
3509 isa<ConstantInt>(Op1->getOperand(1)) &&
3510 !cast<ConstantInt>(Op1->getOperand(1))->isZero())
3511 return getOperands(0);
3512 break;
3513 }
3514 case Instruction::Shl: {
3515 // Same as multiplies, with the difference that we don't need to check
3516 // for a non-zero multiply. Shifts always multiply by non-zero.
3517 auto *OBO1 = cast<OverflowingBinaryOperator>(Op1);
3518 auto *OBO2 = cast<OverflowingBinaryOperator>(Op2);
3519 if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) &&
3520 (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap()))
3521 break;
3522
3523 if (Op1->getOperand(1) == Op2->getOperand(1))
3524 return getOperands(0);
3525 break;
3526 }
3527 case Instruction::AShr:
3528 case Instruction::LShr: {
3529 auto *PEO1 = cast<PossiblyExactOperator>(Op1);
3530 auto *PEO2 = cast<PossiblyExactOperator>(Op2);
3531 if (!PEO1->isExact() || !PEO2->isExact())
3532 break;
3533
3534 if (Op1->getOperand(1) == Op2->getOperand(1))
3535 return getOperands(0);
3536 break;
3537 }
3538 case Instruction::SExt:
3539 case Instruction::ZExt:
3540 if (Op1->getOperand(0)->getType() == Op2->getOperand(0)->getType())
3541 return getOperands(0);
3542 break;
3543 case Instruction::PHI: {
3544 const PHINode *PN1 = cast<PHINode>(Op1);
3545 const PHINode *PN2 = cast<PHINode>(Op2);
3546
3547 // If PN1 and PN2 are both recurrences, can we prove the entire recurrences
3548 // are a single invertible function of the start values? Note that repeated
3549 // application of an invertible function is also invertible
3550 BinaryOperator *BO1 = nullptr;
3551 Value *Start1 = nullptr, *Step1 = nullptr;
3552 BinaryOperator *BO2 = nullptr;
3553 Value *Start2 = nullptr, *Step2 = nullptr;
3554 if (PN1->getParent() != PN2->getParent() ||
3555 !matchSimpleRecurrence(PN1, BO1, Start1, Step1) ||
3556 !matchSimpleRecurrence(PN2, BO2, Start2, Step2))
3557 break;
3558
3559 auto Values = getInvertibleOperands(cast<Operator>(BO1),
3560 cast<Operator>(BO2));
3561 if (!Values)
3562 break;
3563
3564 // We have to be careful of mutually defined recurrences here. Ex:
3565 // * X_i = X_(i-1) OP Y_(i-1), and Y_i = X_(i-1) OP V
3566 // * X_i = Y_i = X_(i-1) OP Y_(i-1)
3567 // The invertibility of these is complicated, and not worth reasoning
3568 // about (yet?).
3569 if (Values->first != PN1 || Values->second != PN2)
3570 break;
3571
3572 return std::make_pair(Start1, Start2);
3573 }
3574 }
3575 return std::nullopt;
3576}
3577
3578/// Return true if V1 == (binop V2, X), where X is known non-zero.
3579/// Only handle a small subset of binops where (binop V2, X) with non-zero X
3580/// implies V2 != V1.
3581static bool isModifyingBinopOfNonZero(const Value *V1, const Value *V2,
3582 const APInt &DemandedElts, unsigned Depth,
3583 const SimplifyQuery &Q) {
3584 const BinaryOperator *BO = dyn_cast<BinaryOperator>(V1);
3585 if (!BO)
3586 return false;
3587 switch (BO->getOpcode()) {
3588 default:
3589 break;
3590 case Instruction::Or:
3591 if (!cast<PossiblyDisjointInst>(V1)->isDisjoint())
3592 break;
3593 [[fallthrough]];
3594 case Instruction::Xor:
3595 case Instruction::Add:
3596 Value *Op = nullptr;
3597 if (V2 == BO->getOperand(0))
3598 Op = BO->getOperand(1);
3599 else if (V2 == BO->getOperand(1))
3600 Op = BO->getOperand(0);
3601 else
3602 return false;
3603 return isKnownNonZero(Op, DemandedElts, Q, Depth + 1);
3604 }
3605 return false;
3606}
3607
3608/// Return true if V2 == V1 * C, where V1 is known non-zero, C is not 0/1 and
3609/// the multiplication is nuw or nsw.
3610static bool isNonEqualMul(const Value *V1, const Value *V2,
3611 const APInt &DemandedElts, unsigned Depth,
3612 const SimplifyQuery &Q) {
3613 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(V2)) {
3614 const APInt *C;
3615 return match(OBO, m_Mul(m_Specific(V1), m_APInt(C))) &&
3616 (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) &&
3617 !C->isZero() && !C->isOne() &&
3618 isKnownNonZero(V1, DemandedElts, Q, Depth + 1);
3619 }
3620 return false;
3621}
3622
3623/// Return true if V2 == V1 << C, where V1 is known non-zero, C is not 0 and
3624/// the shift is nuw or nsw.
3625static bool isNonEqualShl(const Value *V1, const Value *V2,
3626 const APInt &DemandedElts, unsigned Depth,
3627 const SimplifyQuery &Q) {
3628 if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(V2)) {
3629 const APInt *C;
3630 return match(OBO, m_Shl(m_Specific(V1), m_APInt(C))) &&
3631 (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) &&
3632 !C->isZero() && isKnownNonZero(V1, DemandedElts, Q, Depth + 1);
3633 }
3634 return false;
3635}
3636
3637static bool isNonEqualPHIs(const PHINode *PN1, const PHINode *PN2,
3638 const APInt &DemandedElts, unsigned Depth,
3639 const SimplifyQuery &Q) {
3640 // Check two PHIs are in same block.
3641 if (PN1->getParent() != PN2->getParent())
3642 return false;
3643
3645 bool UsedFullRecursion = false;
3646 for (const BasicBlock *IncomBB : PN1->blocks()) {
3647 if (!VisitedBBs.insert(IncomBB).second)
3648 continue; // Don't reprocess blocks that we have dealt with already.
3649 const Value *IV1 = PN1->getIncomingValueForBlock(IncomBB);
3650 const Value *IV2 = PN2->getIncomingValueForBlock(IncomBB);
3651 const APInt *C1, *C2;
3652 if (match(IV1, m_APInt(C1)) && match(IV2, m_APInt(C2)) && *C1 != *C2)
3653 continue;
3654
3655 // Only one pair of phi operands is allowed for full recursion.
3656 if (UsedFullRecursion)
3657 return false;
3658
3660 RecQ.CxtI = IncomBB->getTerminator();
3661 if (!isKnownNonEqual(IV1, IV2, DemandedElts, Depth + 1, RecQ))
3662 return false;
3663 UsedFullRecursion = true;
3664 }
3665 return true;
3666}
3667
3668static bool isNonEqualSelect(const Value *V1, const Value *V2,
3669 const APInt &DemandedElts, unsigned Depth,
3670 const SimplifyQuery &Q) {
3671 const SelectInst *SI1 = dyn_cast<SelectInst>(V1);
3672 if (!SI1)
3673 return false;
3674
3675 if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2)) {
3676 const Value *Cond1 = SI1->getCondition();
3677 const Value *Cond2 = SI2->getCondition();
3678 if (Cond1 == Cond2)
3679 return isKnownNonEqual(SI1->getTrueValue(), SI2->getTrueValue(),
3680 DemandedElts, Depth + 1, Q) &&
3681 isKnownNonEqual(SI1->getFalseValue(), SI2->getFalseValue(),
3682 DemandedElts, Depth + 1, Q);
3683 }
3684 return isKnownNonEqual(SI1->getTrueValue(), V2, DemandedElts, Depth + 1, Q) &&
3685 isKnownNonEqual(SI1->getFalseValue(), V2, DemandedElts, Depth + 1, Q);
3686}
3687
3688// Check to see if A is both a GEP and is the incoming value for a PHI in the
3689// loop, and B is either a ptr or another GEP. If the PHI has 2 incoming values,
3690// one of them being the recursive GEP A and the other a ptr at same base and at
3691// the same/higher offset than B we are only incrementing the pointer further in
3692// loop if offset of recursive GEP is greater than 0.
3694 const SimplifyQuery &Q) {
3695 if (!A->getType()->isPointerTy() || !B->getType()->isPointerTy())
3696 return false;
3697
3698 auto *GEPA = dyn_cast<GEPOperator>(A);
3699 if (!GEPA || GEPA->getNumIndices() != 1 || !isa<Constant>(GEPA->idx_begin()))
3700 return false;
3701
3702 // Handle 2 incoming PHI values with one being a recursive GEP.
3703 auto *PN = dyn_cast<PHINode>(GEPA->getPointerOperand());
3704 if (!PN || PN->getNumIncomingValues() != 2)
3705 return false;
3706
3707 // Search for the recursive GEP as an incoming operand, and record that as
3708 // Step.
3709 Value *Start = nullptr;
3710 Value *Step = const_cast<Value *>(A);
3711 if (PN->getIncomingValue(0) == Step)
3712 Start = PN->getIncomingValue(1);
3713 else if (PN->getIncomingValue(1) == Step)
3714 Start = PN->getIncomingValue(0);
3715 else
3716 return false;
3717
3718 // Other incoming node base should match the B base.
3719 // StartOffset >= OffsetB && StepOffset > 0?
3720 // StartOffset <= OffsetB && StepOffset < 0?
3721 // Is non-equal if above are true.
3722 // We use stripAndAccumulateInBoundsConstantOffsets to restrict the
3723 // optimisation to inbounds GEPs only.
3724 unsigned IndexWidth = Q.DL.getIndexTypeSizeInBits(Start->getType());
3725 APInt StartOffset(IndexWidth, 0);
3726 Start = Start->stripAndAccumulateInBoundsConstantOffsets(Q.DL, StartOffset);
3727 APInt StepOffset(IndexWidth, 0);
3728 Step = Step->stripAndAccumulateInBoundsConstantOffsets(Q.DL, StepOffset);
3729
3730 // Check if Base Pointer of Step matches the PHI.
3731 if (Step != PN)
3732 return false;
3733 APInt OffsetB(IndexWidth, 0);
3734 B = B->stripAndAccumulateInBoundsConstantOffsets(Q.DL, OffsetB);
3735 return Start == B &&
3736 ((StartOffset.sge(OffsetB) && StepOffset.isStrictlyPositive()) ||
3737 (StartOffset.sle(OffsetB) && StepOffset.isNegative()));
3738}
3739
3740/// Return true if it is known that V1 != V2.
3741static bool isKnownNonEqual(const Value *V1, const Value *V2,
3742 const APInt &DemandedElts, unsigned Depth,
3743 const SimplifyQuery &Q) {
3744 if (V1 == V2)
3745 return false;
3746 if (V1->getType() != V2->getType())
3747 // We can't look through casts yet.
3748 return false;
3749
3751 return false;
3752
3753 // See if we can recurse through (exactly one of) our operands. This
3754 // requires our operation be 1-to-1 and map every input value to exactly
3755 // one output value. Such an operation is invertible.
3756 auto *O1 = dyn_cast<Operator>(V1);
3757 auto *O2 = dyn_cast<Operator>(V2);
3758 if (O1 && O2 && O1->getOpcode() == O2->getOpcode()) {
3759 if (auto Values = getInvertibleOperands(O1, O2))
3760 return isKnownNonEqual(Values->first, Values->second, DemandedElts,
3761 Depth + 1, Q);
3762
3763 if (const PHINode *PN1 = dyn_cast<PHINode>(V1)) {
3764 const PHINode *PN2 = cast<PHINode>(V2);
3765 // FIXME: This is missing a generalization to handle the case where one is
3766 // a PHI and another one isn't.
3767 if (isNonEqualPHIs(PN1, PN2, DemandedElts, Depth, Q))
3768 return true;
3769 };
3770 }
3771
3772 if (isModifyingBinopOfNonZero(V1, V2, DemandedElts, Depth, Q) ||
3773 isModifyingBinopOfNonZero(V2, V1, DemandedElts, Depth, Q))
3774 return true;
3775
3776 if (isNonEqualMul(V1, V2, DemandedElts, Depth, Q) ||
3777 isNonEqualMul(V2, V1, DemandedElts, Depth, Q))
3778 return true;
3779
3780 if (isNonEqualShl(V1, V2, DemandedElts, Depth, Q) ||
3781 isNonEqualShl(V2, V1, DemandedElts, Depth, Q))
3782 return true;
3783
3784 if (V1->getType()->isIntOrIntVectorTy()) {
3785 // Are any known bits in V1 contradictory to known bits in V2? If V1
3786 // has a known zero where V2 has a known one, they must not be equal.
3787 KnownBits Known1 = computeKnownBits(V1, DemandedElts, Depth, Q);
3788 if (!Known1.isUnknown()) {
3789 KnownBits Known2 = computeKnownBits(V2, DemandedElts, Depth, Q);
3790 if (Known1.Zero.intersects(Known2.One) ||
3791 Known2.Zero.intersects(Known1.One))
3792 return true;
3793 }
3794 }
3795
3796 if (isNonEqualSelect(V1, V2, DemandedElts, Depth, Q) ||
3797 isNonEqualSelect(V2, V1, DemandedElts, Depth, Q))
3798 return true;
3799
3800 if (isNonEqualPointersWithRecursiveGEP(V1, V2, Q) ||
3802 return true;
3803
3804 Value *A, *B;
3805 // PtrToInts are NonEqual if their Ptrs are NonEqual.
3806 // Check PtrToInt type matches the pointer size.
3807 if (match(V1, m_PtrToIntSameSize(Q.DL, m_Value(A))) &&
3809 return isKnownNonEqual(A, B, DemandedElts, Depth + 1, Q);
3810
3811 return false;
3812}
3813
3814/// For vector constants, loop over the elements and find the constant with the
3815/// minimum number of sign bits. Return 0 if the value is not a vector constant
3816/// or if any element was not analyzed; otherwise, return the count for the
3817/// element with the minimum number of sign bits.
3819 const APInt &DemandedElts,
3820 unsigned TyBits) {
3821 const auto *CV = dyn_cast<Constant>(V);
3822 if (!CV || !isa<FixedVectorType>(CV->getType()))
3823 return 0;
3824
3825 unsigned MinSignBits = TyBits;
3826 unsigned NumElts = cast<FixedVectorType>(CV->getType())->getNumElements();
3827 for (unsigned i = 0; i != NumElts; ++i) {
3828 if (!DemandedElts[i])
3829 continue;
3830 // If we find a non-ConstantInt, bail out.
3831 auto *Elt = dyn_cast_or_null<ConstantInt>(CV->getAggregateElement(i));
3832 if (!Elt)
3833 return 0;
3834
3835 MinSignBits = std::min(MinSignBits, Elt->getValue().getNumSignBits());
3836 }
3837
3838 return MinSignBits;
3839}
3840
3841static unsigned ComputeNumSignBitsImpl(const Value *V,
3842 const APInt &DemandedElts,
3843 unsigned Depth, const SimplifyQuery &Q);
3844
3845static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts,
3846 unsigned Depth, const SimplifyQuery &Q) {
3847 unsigned Result = ComputeNumSignBitsImpl(V, DemandedElts, Depth, Q);
3848 assert(Result > 0 && "At least one sign bit needs to be present!");
3849 return Result;
3850}
3851
3852/// Return the number of times the sign bit of the register is replicated into
3853/// the other bits. We know that at least 1 bit is always equal to the sign bit
3854/// (itself), but other cases can give us information. For example, immediately
3855/// after an "ashr X, 2", we know that the top 3 bits are all equal to each
3856/// other, so we return 3. For vectors, return the number of sign bits for the
3857/// vector element with the minimum number of known sign bits of the demanded
3858/// elements in the vector specified by DemandedElts.
3859static unsigned ComputeNumSignBitsImpl(const Value *V,
3860 const APInt &DemandedElts,
3861 unsigned Depth, const SimplifyQuery &Q) {
3862 Type *Ty = V->getType();
3863#ifndef NDEBUG
3864 assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
3865
3866 if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
3867 assert(
3868 FVTy->getNumElements() == DemandedElts.getBitWidth() &&
3869 "DemandedElt width should equal the fixed vector number of elements");
3870 } else {
3871 assert(DemandedElts == APInt(1, 1) &&
3872 "DemandedElt width should be 1 for scalars");
3873 }
3874#endif
3875
3876 // We return the minimum number of sign bits that are guaranteed to be present
3877 // in V, so for undef we have to conservatively return 1. We don't have the
3878 // same behavior for poison though -- that's a FIXME today.
3879
3880 Type *ScalarTy = Ty->getScalarType();
3881 unsigned TyBits = ScalarTy->isPointerTy() ?
3882 Q.DL.getPointerTypeSizeInBits(ScalarTy) :
3883 Q.DL.getTypeSizeInBits(ScalarTy);
3884
3885 unsigned Tmp, Tmp2;
3886 unsigned FirstAnswer = 1;
3887
3888 // Note that ConstantInt is handled by the general computeKnownBits case
3889 // below.
3890
3892 return 1;
3893
3894 if (auto *U = dyn_cast<Operator>(V)) {
3895 switch (Operator::getOpcode(V)) {
3896 default: break;
3897 case Instruction::SExt:
3898 Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
3899 return ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q) +
3900 Tmp;
3901
3902 case Instruction::SDiv: {
3903 const APInt *Denominator;
3904 // sdiv X, C -> adds log(C) sign bits.
3905 if (match(U->getOperand(1), m_APInt(Denominator))) {
3906
3907 // Ignore non-positive denominator.
3908 if (!Denominator->isStrictlyPositive())
3909 break;
3910
3911 // Calculate the incoming numerator bits.
3912 unsigned NumBits =
3913 ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
3914
3915 // Add floor(log(C)) bits to the numerator bits.
3916 return std::min(TyBits, NumBits + Denominator->logBase2());
3917 }
3918 break;
3919 }
3920
3921 case Instruction::SRem: {
3922 Tmp = ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
3923
3924 const APInt *Denominator;
3925 // srem X, C -> we know that the result is within [-C+1,C) when C is a
3926 // positive constant. This let us put a lower bound on the number of sign
3927 // bits.
3928 if (match(U->getOperand(1), m_APInt(Denominator))) {
3929
3930 // Ignore non-positive denominator.
3931 if (Denominator->isStrictlyPositive()) {
3932 // Calculate the leading sign bit constraints by examining the
3933 // denominator. Given that the denominator is positive, there are two
3934 // cases:
3935 //
3936 // 1. The numerator is positive. The result range is [0,C) and
3937 // [0,C) u< (1 << ceilLogBase2(C)).
3938 //
3939 // 2. The numerator is negative. Then the result range is (-C,0] and
3940 // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)).
3941 //
3942 // Thus a lower bound on the number of sign bits is `TyBits -
3943 // ceilLogBase2(C)`.
3944
3945 unsigned ResBits = TyBits - Denominator->ceilLogBase2();
3946 Tmp = std::max(Tmp, ResBits);
3947 }
3948 }
3949 return Tmp;
3950 }
3951
3952 case Instruction::AShr: {
3953 Tmp = ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
3954 // ashr X, C -> adds C sign bits. Vectors too.
3955 const APInt *ShAmt;
3956 if (match(U->getOperand(1), m_APInt(ShAmt))) {
3957 if (ShAmt->uge(TyBits))
3958 break; // Bad shift.
3959 unsigned ShAmtLimited = ShAmt->getZExtValue();
3960 Tmp += ShAmtLimited;
3961 if (Tmp > TyBits) Tmp = TyBits;
3962 }
3963 return Tmp;
3964 }
3965 case Instruction::Shl: {
3966 const APInt *ShAmt;
3967 Value *X = nullptr;
3968 if (match(U->getOperand(1), m_APInt(ShAmt))) {
3969 // shl destroys sign bits.
3970 if (ShAmt->uge(TyBits))
3971 break; // Bad shift.
3972 // We can look through a zext (more or less treating it as a sext) if
3973 // all extended bits are shifted out.
3974 if (match(U->getOperand(0), m_ZExt(m_Value(X))) &&
3975 ShAmt->uge(TyBits - X->getType()->getScalarSizeInBits())) {
3976 Tmp = ComputeNumSignBits(X, DemandedElts, Depth + 1, Q);
3977 Tmp += TyBits - X->getType()->getScalarSizeInBits();
3978 } else
3979 Tmp =
3980 ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
3981 if (ShAmt->uge(Tmp))
3982 break; // Shifted all sign bits out.
3983 Tmp2 = ShAmt->getZExtValue();
3984 return Tmp - Tmp2;
3985 }
3986 break;
3987 }
3988 case Instruction::And:
3989 case Instruction::Or:
3990 case Instruction::Xor: // NOT is handled here.
3991 // Logical binary ops preserve the number of sign bits at the worst.
3992 Tmp = ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
3993 if (Tmp != 1) {
3994 Tmp2 = ComputeNumSignBits(U->getOperand(1), DemandedElts, Depth + 1, Q);
3995 FirstAnswer = std::min(Tmp, Tmp2);
3996 // We computed what we know about the sign bits as our first
3997 // answer. Now proceed to the generic code that uses
3998 // computeKnownBits, and pick whichever answer is better.
3999 }
4000 break;
4001
4002 case Instruction::Select: {
4003 // If we have a clamp pattern, we know that the number of sign bits will
4004 // be the minimum of the clamp min/max range.
4005 const Value *X;
4006 const APInt *CLow, *CHigh;
4007 if (isSignedMinMaxClamp(U, X, CLow, CHigh))
4008 return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits());
4009
4010 Tmp = ComputeNumSignBits(U->getOperand(1), DemandedElts, Depth + 1, Q);
4011 if (Tmp == 1)
4012 break;
4013 Tmp2 = ComputeNumSignBits(U->getOperand(2), DemandedElts, Depth + 1, Q);
4014 return std::min(Tmp, Tmp2);
4015 }
4016
4017 case Instruction::Add:
4018 // Add can have at most one carry bit. Thus we know that the output
4019 // is, at worst, one more bit than the inputs.
4020 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
4021 if (Tmp == 1) break;
4022
4023 // Special case decrementing a value (ADD X, -1):
4024 if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1)))
4025 if (CRHS->isAllOnesValue()) {
4026 KnownBits Known(TyBits);
4027 computeKnownBits(U->getOperand(0), DemandedElts, Known, Depth + 1, Q);
4028
4029 // If the input is known to be 0 or 1, the output is 0/-1, which is
4030 // all sign bits set.
4031 if ((Known.Zero | 1).isAllOnes())
4032 return TyBits;
4033
4034 // If we are subtracting one from a positive number, there is no carry
4035 // out of the result.
4036 if (Known.isNonNegative())
4037 return Tmp;
4038 }
4039
4040 Tmp2 = ComputeNumSignBits(U->getOperand(1), DemandedElts, Depth + 1, Q);
4041 if (Tmp2 == 1)
4042 break;
4043 return std::min(Tmp, Tmp2) - 1;
4044
4045 case Instruction::Sub:
4046 Tmp2 = ComputeNumSignBits(U->getOperand(1), DemandedElts, Depth + 1, Q);
4047 if (Tmp2 == 1)
4048 break;
4049
4050 // Handle NEG.
4051 if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0)))
4052 if (CLHS->isNullValue()) {
4053 KnownBits Known(TyBits);
4054 computeKnownBits(U->getOperand(1), DemandedElts, Known, Depth + 1, Q);
4055 // If the input is known to be 0 or 1, the output is 0/-1, which is
4056 // all sign bits set.
4057 if ((Known.Zero | 1).isAllOnes())
4058 return TyBits;
4059
4060 // If the input is known to be positive (the sign bit is known clear),
4061 // the output of the NEG has the same number of sign bits as the
4062 // input.
4063 if (Known.isNonNegative())
4064 return Tmp2;
4065
4066 // Otherwise, we treat this like a SUB.
4067 }
4068
4069 // Sub can have at most one carry bit. Thus we know that the output
4070 // is, at worst, one more bit than the inputs.
4071 Tmp = ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
4072 if (Tmp == 1)
4073 break;
4074 return std::min(Tmp, Tmp2) - 1;
4075
4076 case Instruction::Mul: {
4077 // The output of the Mul can be at most twice the valid bits in the
4078 // inputs.
4079 unsigned SignBitsOp0 =
4080 ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
4081 if (SignBitsOp0 == 1)
4082 break;
4083 unsigned SignBitsOp1 =
4084 ComputeNumSignBits(U->getOperand(1), DemandedElts, Depth + 1, Q);
4085 if (SignBitsOp1 == 1)
4086 break;
4087 unsigned OutValidBits =
4088 (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1);
4089 return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1;
4090 }
4091
4092 case Instruction::PHI: {
4093 const PHINode *PN = cast<PHINode>(U);
4094 unsigned NumIncomingValues = PN->getNumIncomingValues();
4095 // Don't analyze large in-degree PHIs.
4096 if (NumIncomingValues > 4) break;
4097 // Unreachable blocks may have zero-operand PHI nodes.
4098 if (NumIncomingValues == 0) break;
4099
4100 // Take the minimum of all incoming values. This can't infinitely loop
4101 // because of our depth threshold.
4103 Tmp = TyBits;
4104 for (unsigned i = 0, e = NumIncomingValues; i != e; ++i) {
4105 if (Tmp == 1) return Tmp;
4106 RecQ.CxtI = PN->getIncomingBlock(i)->getTerminator();
4107 Tmp = std::min(Tmp, ComputeNumSignBits(PN->getIncomingValue(i),
4108 DemandedElts, Depth + 1, RecQ));
4109 }
4110 return Tmp;
4111 }
4112
4113 case Instruction::Trunc: {
4114 // If the input contained enough sign bits that some remain after the
4115 // truncation, then we can make use of that. Otherwise we don't know
4116 // anything.
4117 Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
4118 unsigned OperandTyBits = U->getOperand(0)->getType()->getScalarSizeInBits();
4119 if (Tmp > (OperandTyBits - TyBits))
4120 return Tmp - (OperandTyBits - TyBits);
4121
4122 return 1;
4123 }
4124
4125 case Instruction::ExtractElement:
4126 // Look through extract element. At the moment we keep this simple and
4127 // skip tracking the specific element. But at least we might find
4128 // information valid for all elements of the vector (for example if vector
4129 // is sign extended, shifted, etc).
4130 return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
4131
4132 case Instruction::ShuffleVector: {
4133 // Collect the minimum number of sign bits that are shared by every vector
4134 // element referenced by the shuffle.
4135 auto *Shuf = dyn_cast<ShuffleVectorInst>(U);
4136 if (!Shuf) {
4137 // FIXME: Add support for shufflevector constant expressions.
4138 return 1;
4139 }
4140 APInt DemandedLHS, DemandedRHS;
4141 // For undef elements, we don't know anything about the common state of
4142 // the shuffle result.
4143 if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS))
4144 return 1;
4145 Tmp = std::numeric_limits<unsigned>::max();
4146 if (!!DemandedLHS) {
4147 const Value *LHS = Shuf->getOperand(0);
4148 Tmp = ComputeNumSignBits(LHS, DemandedLHS, Depth + 1, Q);
4149 }
4150 // If we don't know anything, early out and try computeKnownBits
4151 // fall-back.
4152 if (Tmp == 1)
4153 break;
4154 if (!!DemandedRHS) {
4155 const Value *RHS = Shuf->getOperand(1);
4156 Tmp2 = ComputeNumSignBits(RHS, DemandedRHS, Depth + 1, Q);
4157 Tmp = std::min(Tmp, Tmp2);
4158 }
4159 // If we don't know anything, early out and try computeKnownBits
4160 // fall-back.
4161 if (Tmp == 1)
4162 break;
4163 assert(Tmp <= TyBits && "Failed to determine minimum sign bits");
4164 return Tmp;
4165 }
4166 case Instruction::Call: {
4167 if (const auto *II = dyn_cast<IntrinsicInst>(U)) {
4168 switch (II->getIntrinsicID()) {
4169 default:
4170 break;
4171 case Intrinsic::abs:
4172 Tmp =
4173 ComputeNumSignBits(U->getOperand(0), DemandedElts, Depth + 1, Q);
4174 if (Tmp == 1)
4175 break;
4176
4177 // Absolute value reduces number of sign bits by at most 1.
4178 return Tmp - 1;
4179 case Intrinsic::smin:
4180 case Intrinsic::smax: {
4181 const APInt *CLow, *CHigh;
4182 if (isSignedMinMaxIntrinsicClamp(II, CLow, CHigh))
4183 return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits());
4184 }
4185 }
4186 }
4187 }
4188 }
4189 }
4190
4191 // Finally, if we can prove that the top bits of the result are 0's or 1's,
4192 // use this information.
4193
4194 // If we can examine all elements of a vector constant successfully, we're
4195 // done (we can't do any better than that). If not, keep trying.
4196 if (unsigned VecSignBits =
4197 computeNumSignBitsVectorConstant(V, DemandedElts, TyBits))
4198 return VecSignBits;
4199
4200 KnownBits Known(TyBits);
4201 computeKnownBits(V, DemandedElts, Known, Depth, Q);
4202
4203 // If we know that the sign bit is either zero or one, determine the number of
4204 // identical bits in the top of the input value.
4205 return std::max(FirstAnswer, Known.countMinSignBits());
4206}
4207
4209 const TargetLibraryInfo *TLI) {
4210 const Function *F = CB.getCalledFunction();
4211 if (!F)
4213
4214 if (F->isIntrinsic())
4215 return F->getIntrinsicID();
4216
4217 // We are going to infer semantics of a library function based on mapping it
4218 // to an LLVM intrinsic. Check that the library function is available from
4219 // this callbase and in this environment.
4220 LibFunc Func;
4221 if (F->hasLocalLinkage() || !TLI || !TLI->getLibFunc(CB, Func) ||
4222 !CB.onlyReadsMemory())
4224
4225 switch (Func) {
4226 default:
4227 break;
4228 case LibFunc_sin:
4229 case LibFunc_sinf:
4230 case LibFunc_sinl:
4231 return Intrinsic::sin;
4232 case LibFunc_cos:
4233 case LibFunc_cosf:
4234 case LibFunc_cosl:
4235 return Intrinsic::cos;
4236 case LibFunc_tan:
4237 case LibFunc_tanf:
4238 case LibFunc_tanl:
4239 return Intrinsic::tan;
4240 case LibFunc_asin:
4241 case LibFunc_asinf:
4242 case LibFunc_asinl:
4243 return Intrinsic::asin;
4244 case LibFunc_acos:
4245 case LibFunc_acosf:
4246 case LibFunc_acosl:
4247 return Intrinsic::acos;
4248 case LibFunc_atan:
4249 case LibFunc_atanf:
4250 case LibFunc_atanl:
4251 return Intrinsic::atan;
4252 case LibFunc_atan2:
4253 case LibFunc_atan2f:
4254 case LibFunc_atan2l:
4255 return Intrinsic::atan2;
4256 case LibFunc_sinh:
4257 case LibFunc_sinhf:
4258 case LibFunc_sinhl:
4259 return Intrinsic::sinh;
4260 case LibFunc_cosh:
4261 case LibFunc_coshf:
4262 case LibFunc_coshl:
4263 return Intrinsic::cosh;
4264 case LibFunc_tanh:
4265 case LibFunc_tanhf:
4266 case LibFunc_tanhl:
4267 return Intrinsic::tanh;
4268 case LibFunc_exp:
4269 case LibFunc_expf:
4270 case LibFunc_expl:
4271 return Intrinsic::exp;
4272 case LibFunc_exp2:
4273 case LibFunc_exp2f:
4274 case LibFunc_exp2l:
4275 return Intrinsic::exp2;
4276 case LibFunc_exp10:
4277 case LibFunc_exp10f:
4278 case LibFunc_exp10l:
4279 return Intrinsic::exp10;
4280 case LibFunc_log:
4281 case LibFunc_logf:
4282 case LibFunc_logl:
4283 return Intrinsic::log;
4284 case LibFunc_log10:
4285 case LibFunc_log10f:
4286 case LibFunc_log10l:
4287 return Intrinsic::log10;
4288 case LibFunc_log2:
4289 case LibFunc_log2f:
4290 case LibFunc_log2l:
4291 return Intrinsic::log2;
4292 case LibFunc_fabs:
4293 case LibFunc_fabsf:
4294 case LibFunc_fabsl:
4295 return Intrinsic::fabs;
4296 case LibFunc_fmin:
4297 case LibFunc_fminf:
4298 case LibFunc_fminl:
4299 return Intrinsic::minnum;
4300 case LibFunc_fmax:
4301 case LibFunc_fmaxf:
4302 case LibFunc_fmaxl:
4303 return Intrinsic::maxnum;
4304 case LibFunc_copysign:
4305 case LibFunc_copysignf:
4306 case LibFunc_copysignl:
4307 return Intrinsic::copysign;
4308 case LibFunc_floor:
4309 case LibFunc_floorf:
4310 case LibFunc_floorl:
4311 return Intrinsic::floor;
4312 case LibFunc_ceil:
4313 case LibFunc_ceilf:
4314 case LibFunc_ceill:
4315 return Intrinsic::ceil;
4316 case LibFunc_trunc:
4317 case LibFunc_truncf:
4318 case LibFunc_truncl:
4319 return Intrinsic::trunc;
4320 case LibFunc_rint:
4321 case LibFunc_rintf:
4322 case LibFunc_rintl:
4323 return Intrinsic::rint;
4324 case LibFunc_nearbyint:
4325 case LibFunc_nearbyintf:
4326 case LibFunc_nearbyintl:
4327 return Intrinsic::nearbyint;
4328 case LibFunc_round:
4329 case LibFunc_roundf:
4330 case LibFunc_roundl:
4331 return Intrinsic::round;
4332 case LibFunc_roundeven:
4333 case LibFunc_roundevenf:
4334 case LibFunc_roundevenl:
4335 return Intrinsic::roundeven;
4336 case LibFunc_pow:
4337 case LibFunc_powf:
4338 case LibFunc_powl:
4339 return Intrinsic::pow;
4340 case LibFunc_sqrt:
4341 case LibFunc_sqrtf:
4342 case LibFunc_sqrtl:
4343 return Intrinsic::sqrt;
4344 }
4345
4347}
4348
4349/// Return true if it's possible to assume IEEE treatment of input denormals in
4350/// \p F for \p Val.
4351static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) {
4352 Ty = Ty->getScalarType();
4353 return F.getDenormalMode(Ty->getFltSemantics()).Input == DenormalMode::IEEE;
4354}
4355
4356static bool inputDenormalIsIEEEOrPosZero(const Function &F, const Type *Ty) {
4357 Ty = Ty->getScalarType();
4358 DenormalMode Mode = F.getDenormalMode(Ty->getFltSemantics());
4359 return Mode.Input == DenormalMode::IEEE ||
4360 Mode.Input == DenormalMode::PositiveZero;
4361}
4362
4363static bool outputDenormalIsIEEEOrPosZero(const Function &F, const Type *Ty) {
4364 Ty = Ty->getScalarType();
4365 DenormalMode Mode = F.getDenormalMode(Ty->getFltSemantics());
4366 return Mode.Output == DenormalMode::IEEE ||
4367 Mode.Output == DenormalMode::PositiveZero;
4368}
4369
4371 return isKnownNeverZero() &&
4373}
4374
4376 Type *Ty) const {
4377 return isKnownNeverNegZero() &&
4379}
4380
4382 Type *Ty) const {
4383 if (!isKnownNeverPosZero())
4384 return false;
4385
4386 // If we know there are no denormals, nothing can be flushed to zero.
4388 return true;
4389
4390 DenormalMode Mode = F.getDenormalMode(Ty->getScalarType()->getFltSemantics());
4391 switch (Mode.Input) {
4392 case DenormalMode::IEEE:
4393 return true;
4395 // Negative subnormal won't flush to +0
4396 return isKnownNeverPosSubnormal();
4398 default:
4399 // Both positive and negative subnormal could flush to +0
4400 return false;
4401 }
4402
4403 llvm_unreachable("covered switch over denormal mode");
4404}
4405
4407 Type *Ty) {
4408 KnownFPClasses = Src.KnownFPClasses;
4409 // If we aren't assuming the source can't be a zero, we don't have to check if
4410 // a denormal input could be flushed.
4411 if (!Src.isKnownNeverPosZero() && !Src.isKnownNeverNegZero())
4412 return;
4413
4414 // If we know the input can't be a denormal, it can't be flushed to 0.
4415 if (Src.isKnownNeverSubnormal())
4416 return;
4417
4418 DenormalMode Mode = F.getDenormalMode(Ty->getScalarType()->getFltSemantics());
4419
4420 if (!Src.isKnownNeverPosSubnormal() && Mode != DenormalMode::getIEEE())
4422
4423 if (!Src.isKnownNeverNegSubnormal() && Mode != DenormalMode::getIEEE()) {
4424 if (Mode != DenormalMode::getPositiveZero())
4426
4427 if (Mode.Input == DenormalMode::PositiveZero ||
4428 Mode.Output == DenormalMode::PositiveZero ||
4429 Mode.Input == DenormalMode::Dynamic ||
4430 Mode.Output == DenormalMode::Dynamic)
4432 }
4433}
4434
4436 const Function &F, Type *Ty) {
4437 propagateDenormal(Src, F, Ty);
4438 propagateNaN(Src, /*PreserveSign=*/true);
4439}
4440
4441/// Given an exploded icmp instruction, return true if the comparison only
4442/// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if
4443/// the result of the comparison is true when the input value is signed.
4445 bool &TrueIfSigned) {
4446 switch (Pred) {
4447 case ICmpInst::ICMP_SLT: // True if LHS s< 0
4448 TrueIfSigned = true;
4449 return RHS.isZero();
4450 case ICmpInst::ICMP_SLE: // True if LHS s<= -1
4451 TrueIfSigned = true;
4452 return RHS.isAllOnes();
4453 case ICmpInst::ICMP_SGT: // True if LHS s> -1
4454 TrueIfSigned = false;
4455 return RHS.isAllOnes();
4456 case ICmpInst::ICMP_SGE: // True if LHS s>= 0
4457 TrueIfSigned = false;
4458 return RHS.isZero();
4459 case ICmpInst::ICMP_UGT:
4460 // True if LHS u> RHS and RHS == sign-bit-mask - 1
4461 TrueIfSigned = true;
4462 return RHS.isMaxSignedValue();
4463 case ICmpInst::ICMP_UGE:
4464 // True if LHS u>= RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc)
4465 TrueIfSigned = true;
4466 return RHS.isMinSignedValue();
4467 case ICmpInst::ICMP_ULT:
4468 // True if LHS u< RHS and RHS == sign-bit-mask (2^7, 2^15, 2^31, etc)
4469 TrueIfSigned = false;
4470 return RHS.isMinSignedValue();
4471 case ICmpInst::ICMP_ULE:
4472 // True if LHS u<= RHS and RHS == sign-bit-mask - 1
4473 TrueIfSigned = false;
4474 return RHS.isMaxSignedValue();
4475 default:
4476 return false;
4477 }
4478}
4479
4480/// Returns a pair of values, which if passed to llvm.is.fpclass, returns the
4481/// same result as an fcmp with the given operands.