LLVM  16.0.0git
ValueTracking.cpp
Go to the documentation of this file.
1 //===- ValueTracking.cpp - Walk computations to compute properties --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains routines that help analyze properties that chains of
10 // computations have.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallPtrSet.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/StringRef.h"
33 #include "llvm/Analysis/Loads.h"
34 #include "llvm/Analysis/LoopInfo.h"
37 #include "llvm/IR/Argument.h"
38 #include "llvm/IR/Attributes.h"
39 #include "llvm/IR/BasicBlock.h"
40 #include "llvm/IR/Constant.h"
41 #include "llvm/IR/ConstantRange.h"
42 #include "llvm/IR/Constants.h"
43 #include "llvm/IR/DerivedTypes.h"
44 #include "llvm/IR/DiagnosticInfo.h"
45 #include "llvm/IR/Dominators.h"
46 #include "llvm/IR/Function.h"
48 #include "llvm/IR/GlobalAlias.h"
49 #include "llvm/IR/GlobalValue.h"
50 #include "llvm/IR/GlobalVariable.h"
51 #include "llvm/IR/InstrTypes.h"
52 #include "llvm/IR/Instruction.h"
53 #include "llvm/IR/Instructions.h"
54 #include "llvm/IR/IntrinsicInst.h"
55 #include "llvm/IR/Intrinsics.h"
56 #include "llvm/IR/IntrinsicsAArch64.h"
57 #include "llvm/IR/IntrinsicsRISCV.h"
58 #include "llvm/IR/IntrinsicsX86.h"
59 #include "llvm/IR/LLVMContext.h"
60 #include "llvm/IR/Metadata.h"
61 #include "llvm/IR/Module.h"
62 #include "llvm/IR/Operator.h"
63 #include "llvm/IR/PatternMatch.h"
64 #include "llvm/IR/Type.h"
65 #include "llvm/IR/User.h"
66 #include "llvm/IR/Value.h"
67 #include "llvm/Support/Casting.h"
69 #include "llvm/Support/Compiler.h"
71 #include "llvm/Support/KnownBits.h"
73 #include <algorithm>
74 #include <cassert>
75 #include <cstdint>
76 #include <utility>
77 
78 using namespace llvm;
79 using namespace llvm::PatternMatch;
80 
81 // Controls the number of uses of the value searched for possible
82 // dominating comparisons.
83 static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
84  cl::Hidden, cl::init(20));
85 
86 // According to the LangRef, branching on a poison condition is absolutely
87 // immediate full UB. However, historically we haven't implemented that
88 // consistently as we had an important transformation (non-trivial unswitch)
89 // which introduced instances of branch on poison/undef to otherwise well
90 // defined programs. This issue has since been fixed, but the flag is
91 // temporarily retained to easily diagnose potential regressions.
92 static cl::opt<bool> BranchOnPoisonAsUB("branch-on-poison-as-ub",
93  cl::Hidden, cl::init(true));
94 
95 
96 /// Returns the bitwidth of the given scalar or pointer type. For vector types,
97 /// returns the element type's bitwidth.
98 static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
99  if (unsigned BitWidth = Ty->getScalarSizeInBits())
100  return BitWidth;
101 
102  return DL.getPointerTypeSizeInBits(Ty);
103 }
104 
105 namespace {
106 
107 // Simplifying using an assume can only be done in a particular control-flow
108 // context (the context instruction provides that context). If an assume and
109 // the context instruction are not in the same block then the DT helps in
110 // figuring out if we can use it.
111 struct Query {
112  const DataLayout &DL;
113  AssumptionCache *AC;
114  const Instruction *CxtI;
115  const DominatorTree *DT;
116 
117  // Unlike the other analyses, this may be a nullptr because not all clients
118  // provide it currently.
120 
121  /// If true, it is safe to use metadata during simplification.
122  InstrInfoQuery IIQ;
123 
124  Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI,
125  const DominatorTree *DT, bool UseInstrInfo,
126  OptimizationRemarkEmitter *ORE = nullptr)
127  : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), IIQ(UseInstrInfo) {}
128 };
129 
130 } // end anonymous namespace
131 
132 // Given the provided Value and, potentially, a context instruction, return
133 // the preferred context instruction (if any).
134 static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) {
135  // If we've been provided with a context instruction, then use that (provided
136  // it has been inserted).
137  if (CxtI && CxtI->getParent())
138  return CxtI;
139 
140  // If the value is really an already-inserted instruction, then use that.
141  CxtI = dyn_cast<Instruction>(V);
142  if (CxtI && CxtI->getParent())
143  return CxtI;
144 
145  return nullptr;
146 }
147 
148 static const Instruction *safeCxtI(const Value *V1, const Value *V2, const Instruction *CxtI) {
149  // If we've been provided with a context instruction, then use that (provided
150  // it has been inserted).
151  if (CxtI && CxtI->getParent())
152  return CxtI;
153 
154  // If the value is really an already-inserted instruction, then use that.
155  CxtI = dyn_cast<Instruction>(V1);
156  if (CxtI && CxtI->getParent())
157  return CxtI;
158 
159  CxtI = dyn_cast<Instruction>(V2);
160  if (CxtI && CxtI->getParent())
161  return CxtI;
162 
163  return nullptr;
164 }
165 
167  const APInt &DemandedElts,
168  APInt &DemandedLHS, APInt &DemandedRHS) {
169  // The length of scalable vectors is unknown at compile time, thus we
170  // cannot check their values
171  if (isa<ScalableVectorType>(Shuf->getType()))
172  return false;
173 
174  int NumElts =
175  cast<FixedVectorType>(Shuf->getOperand(0)->getType())->getNumElements();
176  int NumMaskElts = cast<FixedVectorType>(Shuf->getType())->getNumElements();
177  DemandedLHS = DemandedRHS = APInt::getZero(NumElts);
178  if (DemandedElts.isZero())
179  return true;
180  // Simple case of a shuffle with zeroinitializer.
181  if (all_of(Shuf->getShuffleMask(), [](int Elt) { return Elt == 0; })) {
182  DemandedLHS.setBit(0);
183  return true;
184  }
185  for (int i = 0; i != NumMaskElts; ++i) {
186  if (!DemandedElts[i])
187  continue;
188  int M = Shuf->getMaskValue(i);
189  assert(M < (NumElts * 2) && "Invalid shuffle mask constant");
190 
191  // For undef elements, we don't know anything about the common state of
192  // the shuffle result.
193  if (M == -1)
194  return false;
195  if (M < NumElts)
196  DemandedLHS.setBit(M % NumElts);
197  else
198  DemandedRHS.setBit(M % NumElts);
199  }
200 
201  return true;
202 }
203 
204 static void computeKnownBits(const Value *V, const APInt &DemandedElts,
205  KnownBits &Known, unsigned Depth, const Query &Q);
206 
207 static void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
208  const Query &Q) {
209  // FIXME: We currently have no way to represent the DemandedElts of a scalable
210  // vector
211  if (isa<ScalableVectorType>(V->getType())) {
212  Known.resetAll();
213  return;
214  }
215 
216  auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
217  APInt DemandedElts =
218  FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
219  computeKnownBits(V, DemandedElts, Known, Depth, Q);
220 }
221 
222 void llvm::computeKnownBits(const Value *V, KnownBits &Known,
223  const DataLayout &DL, unsigned Depth,
224  AssumptionCache *AC, const Instruction *CxtI,
225  const DominatorTree *DT,
226  OptimizationRemarkEmitter *ORE, bool UseInstrInfo) {
227  ::computeKnownBits(V, Known, Depth,
228  Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE));
229 }
230 
231 void llvm::computeKnownBits(const Value *V, const APInt &DemandedElts,
232  KnownBits &Known, const DataLayout &DL,
233  unsigned Depth, AssumptionCache *AC,
234  const Instruction *CxtI, const DominatorTree *DT,
235  OptimizationRemarkEmitter *ORE, bool UseInstrInfo) {
236  ::computeKnownBits(V, DemandedElts, Known, Depth,
237  Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE));
238 }
239 
240 static KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts,
241  unsigned Depth, const Query &Q);
242 
243 static KnownBits computeKnownBits(const Value *V, unsigned Depth,
244  const Query &Q);
245 
247  unsigned Depth, AssumptionCache *AC,
248  const Instruction *CxtI,
249  const DominatorTree *DT,
251  bool UseInstrInfo) {
253  V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE));
254 }
255 
256 KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts,
257  const DataLayout &DL, unsigned Depth,
258  AssumptionCache *AC, const Instruction *CxtI,
259  const DominatorTree *DT,
261  bool UseInstrInfo) {
263  V, DemandedElts, Depth,
264  Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE));
265 }
266 
268  const DataLayout &DL, AssumptionCache *AC,
269  const Instruction *CxtI, const DominatorTree *DT,
270  bool UseInstrInfo) {
271  assert(LHS->getType() == RHS->getType() &&
272  "LHS and RHS should have the same type");
274  "LHS and RHS should be integers");
275  // Look for an inverted mask: (X & ~M) op (Y & M).
276  {
277  Value *M;
278  if (match(LHS, m_c_And(m_Not(m_Value(M)), m_Value())) &&
280  return true;
281  if (match(RHS, m_c_And(m_Not(m_Value(M)), m_Value())) &&
283  return true;
284  }
285 
286  // X op (Y & ~X)
287  if (match(RHS, m_c_And(m_Not(m_Specific(LHS)), m_Value())) ||
289  return true;
290 
291  // X op ((X & Y) ^ Y) -- this is the canonical form of the previous pattern
292  // for constant Y.
293  Value *Y;
294  if (match(RHS,
297  return true;
298 
299  // Look for: (A & B) op ~(A | B)
300  {
301  Value *A, *B;
302  if (match(LHS, m_And(m_Value(A), m_Value(B))) &&
304  return true;
305  if (match(RHS, m_And(m_Value(A), m_Value(B))) &&
307  return true;
308  }
309  IntegerType *IT = cast<IntegerType>(LHS->getType()->getScalarType());
310  KnownBits LHSKnown(IT->getBitWidth());
311  KnownBits RHSKnown(IT->getBitWidth());
312  computeKnownBits(LHS, LHSKnown, DL, 0, AC, CxtI, DT, nullptr, UseInstrInfo);
313  computeKnownBits(RHS, RHSKnown, DL, 0, AC, CxtI, DT, nullptr, UseInstrInfo);
314  return KnownBits::haveNoCommonBitsSet(LHSKnown, RHSKnown);
315 }
316 
318  return !I->user_empty() && all_of(I->users(), [](const User *U) {
319  ICmpInst::Predicate P;
320  return match(U, m_ICmp(P, m_Value(), m_Zero())) && ICmpInst::isEquality(P);
321  });
322 }
323 
324 static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
325  const Query &Q);
326 
328  bool OrZero, unsigned Depth,
329  AssumptionCache *AC, const Instruction *CxtI,
330  const DominatorTree *DT, bool UseInstrInfo) {
332  V, OrZero, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo));
333 }
334 
335 static bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
336  unsigned Depth, const Query &Q);
337 
338 static bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q);
339 
340 bool llvm::isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth,
341  AssumptionCache *AC, const Instruction *CxtI,
342  const DominatorTree *DT, bool UseInstrInfo) {
344  Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo));
345 }
346 
348  unsigned Depth, AssumptionCache *AC,
349  const Instruction *CxtI, const DominatorTree *DT,
350  bool UseInstrInfo) {
351  KnownBits Known =
352  computeKnownBits(V, DL, Depth, AC, CxtI, DT, nullptr, UseInstrInfo);
353  return Known.isNonNegative();
354 }
355 
356 bool llvm::isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth,
357  AssumptionCache *AC, const Instruction *CxtI,
358  const DominatorTree *DT, bool UseInstrInfo) {
359  if (auto *CI = dyn_cast<ConstantInt>(V))
360  return CI->getValue().isStrictlyPositive();
361 
362  // TODO: We'd doing two recursive queries here. We should factor this such
363  // that only a single query is needed.
364  return isKnownNonNegative(V, DL, Depth, AC, CxtI, DT, UseInstrInfo) &&
365  isKnownNonZero(V, DL, Depth, AC, CxtI, DT, UseInstrInfo);
366 }
367 
368 bool llvm::isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth,
369  AssumptionCache *AC, const Instruction *CxtI,
370  const DominatorTree *DT, bool UseInstrInfo) {
371  KnownBits Known =
372  computeKnownBits(V, DL, Depth, AC, CxtI, DT, nullptr, UseInstrInfo);
373  return Known.isNegative();
374 }
375 
376 static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth,
377  const Query &Q);
378 
379 bool llvm::isKnownNonEqual(const Value *V1, const Value *V2,
380  const DataLayout &DL, AssumptionCache *AC,
381  const Instruction *CxtI, const DominatorTree *DT,
382  bool UseInstrInfo) {
384  Query(DL, AC, safeCxtI(V2, V1, CxtI), DT,
385  UseInstrInfo, /*ORE=*/nullptr));
386 }
387 
388 static bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth,
389  const Query &Q);
390 
391 bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask,
392  const DataLayout &DL, unsigned Depth,
393  AssumptionCache *AC, const Instruction *CxtI,
394  const DominatorTree *DT, bool UseInstrInfo) {
396  V, Mask, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo));
397 }
398 
399 static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts,
400  unsigned Depth, const Query &Q);
401 
402 static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
403  const Query &Q) {
404  // FIXME: We currently have no way to represent the DemandedElts of a scalable
405  // vector
406  if (isa<ScalableVectorType>(V->getType()))
407  return 1;
408 
409  auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
410  APInt DemandedElts =
411  FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
412  return ComputeNumSignBits(V, DemandedElts, Depth, Q);
413 }
414 
415 unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL,
416  unsigned Depth, AssumptionCache *AC,
417  const Instruction *CxtI,
418  const DominatorTree *DT, bool UseInstrInfo) {
420  V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo));
421 }
422 
424  unsigned Depth, AssumptionCache *AC,
425  const Instruction *CxtI,
426  const DominatorTree *DT) {
427  unsigned SignBits = ComputeNumSignBits(V, DL, Depth, AC, CxtI, DT);
428  return V->getType()->getScalarSizeInBits() - SignBits + 1;
429 }
430 
431 static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
432  bool NSW, const APInt &DemandedElts,
433  KnownBits &KnownOut, KnownBits &Known2,
434  unsigned Depth, const Query &Q) {
435  computeKnownBits(Op1, DemandedElts, KnownOut, Depth + 1, Q);
436 
437  // If one operand is unknown and we have no nowrap information,
438  // the result will be unknown independently of the second operand.
439  if (KnownOut.isUnknown() && !NSW)
440  return;
441 
442  computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q);
443  KnownOut = KnownBits::computeForAddSub(Add, NSW, Known2, KnownOut);
444 }
445 
446 static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
447  const APInt &DemandedElts, KnownBits &Known,
448  KnownBits &Known2, unsigned Depth,
449  const Query &Q) {
450  computeKnownBits(Op1, DemandedElts, Known, Depth + 1, Q);
451  computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q);
452 
453  bool isKnownNegative = false;
454  bool isKnownNonNegative = false;
455  // If the multiplication is known not to overflow, compute the sign bit.
456  if (NSW) {
457  if (Op0 == Op1) {
458  // The product of a number with itself is non-negative.
459  isKnownNonNegative = true;
460  } else {
461  bool isKnownNonNegativeOp1 = Known.isNonNegative();
462  bool isKnownNonNegativeOp0 = Known2.isNonNegative();
463  bool isKnownNegativeOp1 = Known.isNegative();
464  bool isKnownNegativeOp0 = Known2.isNegative();
465  // The product of two numbers with the same sign is non-negative.
466  isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) ||
467  (isKnownNonNegativeOp1 && isKnownNonNegativeOp0);
468  // The product of a negative number and a non-negative number is either
469  // negative or zero.
470  if (!isKnownNonNegative)
472  (isKnownNegativeOp1 && isKnownNonNegativeOp0 &&
473  Known2.isNonZero()) ||
474  (isKnownNegativeOp0 && isKnownNonNegativeOp1 && Known.isNonZero());
475  }
476  }
477 
478  bool SelfMultiply = Op0 == Op1;
479  // TODO: SelfMultiply can be poison, but not undef.
480  if (SelfMultiply)
481  SelfMultiply &=
482  isGuaranteedNotToBeUndefOrPoison(Op0, Q.AC, Q.CxtI, Q.DT, Depth + 1);
483  Known = KnownBits::mul(Known, Known2, SelfMultiply);
484 
485  // Only make use of no-wrap flags if we failed to compute the sign bit
486  // directly. This matters if the multiplication always overflows, in
487  // which case we prefer to follow the result of the direct computation,
488  // though as the program is invoking undefined behaviour we can choose
489  // whatever we like here.
490  if (isKnownNonNegative && !Known.isNegative())
491  Known.makeNonNegative();
492  else if (isKnownNegative && !Known.isNonNegative())
493  Known.makeNegative();
494 }
495 
497  KnownBits &Known) {
498  unsigned BitWidth = Known.getBitWidth();
499  unsigned NumRanges = Ranges.getNumOperands() / 2;
500  assert(NumRanges >= 1);
501 
502  Known.Zero.setAllBits();
503  Known.One.setAllBits();
504 
505  for (unsigned i = 0; i < NumRanges; ++i) {
506  ConstantInt *Lower =
507  mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0));
508  ConstantInt *Upper =
509  mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1));
510  ConstantRange Range(Lower->getValue(), Upper->getValue());
511 
512  // The first CommonPrefixBits of all values in Range are equal.
513  unsigned CommonPrefixBits =
514  (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countLeadingZeros();
515  APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits);
516  APInt UnsignedMax = Range.getUnsignedMax().zextOrTrunc(BitWidth);
517  Known.One &= UnsignedMax & Mask;
518  Known.Zero &= ~UnsignedMax & Mask;
519  }
520 }
521 
522 static bool isEphemeralValueOf(const Instruction *I, const Value *E) {
523  SmallVector<const Value *, 16> WorkSet(1, I);
526 
527  // The instruction defining an assumption's condition itself is always
528  // considered ephemeral to that assumption (even if it has other
529  // non-ephemeral users). See r246696's test case for an example.
530  if (is_contained(I->operands(), E))
531  return true;
532 
533  while (!WorkSet.empty()) {
534  const Value *V = WorkSet.pop_back_val();
535  if (!Visited.insert(V).second)
536  continue;
537 
538  // If all uses of this value are ephemeral, then so is this value.
539  if (llvm::all_of(V->users(), [&](const User *U) {
540  return EphValues.count(U);
541  })) {
542  if (V == E)
543  return true;
544 
545  if (V == I || (isa<Instruction>(V) &&
546  !cast<Instruction>(V)->mayHaveSideEffects() &&
547  !cast<Instruction>(V)->isTerminator())) {
548  EphValues.insert(V);
549  if (const User *U = dyn_cast<User>(V))
550  append_range(WorkSet, U->operands());
551  }
552  }
553  }
554 
555  return false;
556 }
557 
558 // Is this an intrinsic that cannot be speculated but also cannot trap?
560  if (const IntrinsicInst *CI = dyn_cast<IntrinsicInst>(I))
561  return CI->isAssumeLikeIntrinsic();
562 
563  return false;
564 }
565 
567  const Instruction *CxtI,
568  const DominatorTree *DT) {
569  // There are two restrictions on the use of an assume:
570  // 1. The assume must dominate the context (or the control flow must
571  // reach the assume whenever it reaches the context).
572  // 2. The context must not be in the assume's set of ephemeral values
573  // (otherwise we will use the assume to prove that the condition
574  // feeding the assume is trivially true, thus causing the removal of
575  // the assume).
576 
577  if (Inv->getParent() == CxtI->getParent()) {
578  // If Inv and CtxI are in the same block, check if the assume (Inv) is first
579  // in the BB.
580  if (Inv->comesBefore(CxtI))
581  return true;
582 
583  // Don't let an assume affect itself - this would cause the problems
584  // `isEphemeralValueOf` is trying to prevent, and it would also make
585  // the loop below go out of bounds.
586  if (Inv == CxtI)
587  return false;
588 
589  // The context comes first, but they're both in the same block.
590  // Make sure there is nothing in between that might interrupt
591  // the control flow, not even CxtI itself.
592  // We limit the scan distance between the assume and its context instruction
593  // to avoid a compile-time explosion. This limit is chosen arbitrarily, so
594  // it can be adjusted if needed (could be turned into a cl::opt).
595  auto Range = make_range(CxtI->getIterator(), Inv->getIterator());
597  return false;
598 
599  return !isEphemeralValueOf(Inv, CxtI);
600  }
601 
602  // Inv and CxtI are in different blocks.
603  if (DT) {
604  if (DT->dominates(Inv, CxtI))
605  return true;
606  } else if (Inv->getParent() == CxtI->getParent()->getSinglePredecessor()) {
607  // We don't have a DT, but this trivially dominates.
608  return true;
609  }
610 
611  return false;
612 }
613 
614 static bool cmpExcludesZero(CmpInst::Predicate Pred, const Value *RHS) {
615  // v u> y implies v != 0.
616  if (Pred == ICmpInst::ICMP_UGT)
617  return true;
618 
619  // Special-case v != 0 to also handle v != null.
620  if (Pred == ICmpInst::ICMP_NE)
621  return match(RHS, m_Zero());
622 
623  // All other predicates - rely on generic ConstantRange handling.
624  const APInt *C;
625  if (!match(RHS, m_APInt(C)))
626  return false;
627 
629  return !TrueValues.contains(APInt::getZero(C->getBitWidth()));
630 }
631 
632 static bool isKnownNonZeroFromAssume(const Value *V, const Query &Q) {
633  // Use of assumptions is context-sensitive. If we don't have a context, we
634  // cannot use them!
635  if (!Q.AC || !Q.CxtI)
636  return false;
637 
638  if (Q.CxtI && V->getType()->isPointerTy()) {
639  SmallVector<Attribute::AttrKind, 2> AttrKinds{Attribute::NonNull};
640  if (!NullPointerIsDefined(Q.CxtI->getFunction(),
642  AttrKinds.push_back(Attribute::Dereferenceable);
643 
644  if (getKnowledgeValidInContext(V, AttrKinds, Q.CxtI, Q.DT, Q.AC))
645  return true;
646  }
647 
648  for (auto &AssumeVH : Q.AC->assumptionsFor(V)) {
649  if (!AssumeVH)
650  continue;
651  CallInst *I = cast<CallInst>(AssumeVH);
652  assert(I->getFunction() == Q.CxtI->getFunction() &&
653  "Got assumption for the wrong function!");
654 
655  // Warning: This loop can end up being somewhat performance sensitive.
656  // We're running this loop for once for each value queried resulting in a
657  // runtime of ~O(#assumes * #values).
658 
659  assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume &&
660  "must be an assume intrinsic");
661 
662  Value *RHS;
663  CmpInst::Predicate Pred;
664  auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V)));
665  if (!match(I->getArgOperand(0), m_c_ICmp(Pred, m_V, m_Value(RHS))))
666  return false;
667 
668  if (cmpExcludesZero(Pred, RHS) && isValidAssumeForContext(I, Q.CxtI, Q.DT))
669  return true;
670  }
671 
672  return false;
673 }
674 
675 static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
676  unsigned Depth, const Query &Q) {
677  // Use of assumptions is context-sensitive. If we don't have a context, we
678  // cannot use them!
679  if (!Q.AC || !Q.CxtI)
680  return;
681 
682  unsigned BitWidth = Known.getBitWidth();
683 
684  // Refine Known set if the pointer alignment is set by assume bundles.
685  if (V->getType()->isPointerTy()) {
687  V, {Attribute::Alignment}, Q.CxtI, Q.DT, Q.AC)) {
688  if (isPowerOf2_64(RK.ArgValue))
689  Known.Zero.setLowBits(Log2_64(RK.ArgValue));
690  }
691  }
692 
693  // Note that the patterns below need to be kept in sync with the code
694  // in AssumptionCache::updateAffectedValues.
695 
696  for (auto &AssumeVH : Q.AC->assumptionsFor(V)) {
697  if (!AssumeVH)
698  continue;
699  CallInst *I = cast<CallInst>(AssumeVH);
700  assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() &&
701  "Got assumption for the wrong function!");
702 
703  // Warning: This loop can end up being somewhat performance sensitive.
704  // We're running this loop for once for each value queried resulting in a
705  // runtime of ~O(#assumes * #values).
706 
707  assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume &&
708  "must be an assume intrinsic");
709 
710  Value *Arg = I->getArgOperand(0);
711 
712  if (Arg == V && isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
713  assert(BitWidth == 1 && "assume operand is not i1?");
714  Known.setAllOnes();
715  return;
716  }
717  if (match(Arg, m_Not(m_Specific(V))) &&
718  isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
719  assert(BitWidth == 1 && "assume operand is not i1?");
720  Known.setAllZero();
721  return;
722  }
723 
724  // The remaining tests are all recursive, so bail out if we hit the limit.
726  continue;
727 
728  ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg);
729  if (!Cmp)
730  continue;
731 
732  // We are attempting to compute known bits for the operands of an assume.
733  // Do not try to use other assumptions for those recursive calls because
734  // that can lead to mutual recursion and a compile-time explosion.
735  // An example of the mutual recursion: computeKnownBits can call
736  // isKnownNonZero which calls computeKnownBitsFromAssume (this function)
737  // and so on.
738  Query QueryNoAC = Q;
739  QueryNoAC.AC = nullptr;
740 
741  // Note that ptrtoint may change the bitwidth.
742  Value *A, *B;
743  auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V)));
744 
745  CmpInst::Predicate Pred;
746  uint64_t C;
747  switch (Cmp->getPredicate()) {
748  default:
749  break;
750  case ICmpInst::ICMP_EQ:
751  // assume(v = a)
752  if (match(Cmp, m_c_ICmp(Pred, m_V, m_Value(A))) &&
753  isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
754  KnownBits RHSKnown =
755  computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
756  Known.Zero |= RHSKnown.Zero;
757  Known.One |= RHSKnown.One;
758  // assume(v & b = a)
759  } else if (match(Cmp,
760  m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) &&
761  isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
762  KnownBits RHSKnown =
763  computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
764  KnownBits MaskKnown =
765  computeKnownBits(B, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
766 
767  // For those bits in the mask that are known to be one, we can propagate
768  // known bits from the RHS to V.
769  Known.Zero |= RHSKnown.Zero & MaskKnown.One;
770  Known.One |= RHSKnown.One & MaskKnown.One;
771  // assume(~(v & b) = a)
772  } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))),
773  m_Value(A))) &&
774  isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
775  KnownBits RHSKnown =
776  computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
777  KnownBits MaskKnown =
778  computeKnownBits(B, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
779 
780  // For those bits in the mask that are known to be one, we can propagate
781  // inverted known bits from the RHS to V.
782  Known.Zero |= RHSKnown.One & MaskKnown.One;
783  Known.One |= RHSKnown.Zero & MaskKnown.One;
784  // assume(v | b = a)
785  } else if (match(Cmp,
786  m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) &&
787  isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
788  KnownBits RHSKnown =
789  computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
790  KnownBits BKnown =
791  computeKnownBits(B, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
792 
793  // For those bits in B that are known to be zero, we can propagate known
794  // bits from the RHS to V.
795  Known.Zero |= RHSKnown.Zero & BKnown.Zero;
796  Known.One |= RHSKnown.One & BKnown.Zero;
797  // assume(~(v | b) = a)
798  } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))),
799  m_Value(A))) &&
800  isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
801  KnownBits RHSKnown =
802  computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
803  KnownBits BKnown =
804  computeKnownBits(B, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
805 
806  // For those bits in B that are known to be zero, we can propagate
807  // inverted known bits from the RHS to V.
808  Known.Zero |= RHSKnown.One & BKnown.Zero;
809  Known.One |= RHSKnown.Zero & BKnown.Zero;
810  // assume(v ^ b = a)
811  } else if (match(Cmp,
812  m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) &&
813  isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
814  KnownBits RHSKnown =
815  computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
816  KnownBits BKnown =
817  computeKnownBits(B, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
818 
819  // For those bits in B that are known to be zero, we can propagate known
820  // bits from the RHS to V. For those bits in B that are known to be one,
821  // we can propagate inverted known bits from the RHS to V.
822  Known.Zero |= RHSKnown.Zero & BKnown.Zero;
823  Known.One |= RHSKnown.One & BKnown.Zero;
824  Known.Zero |= RHSKnown.One & BKnown.One;
825  Known.One |= RHSKnown.Zero & BKnown.One;
826  // assume(~(v ^ b) = a)
827  } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))),
828  m_Value(A))) &&
829  isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
830  KnownBits RHSKnown =
831  computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
832  KnownBits BKnown =
833  computeKnownBits(B, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
834 
835  // For those bits in B that are known to be zero, we can propagate
836  // inverted known bits from the RHS to V. For those bits in B that are
837  // known to be one, we can propagate known bits from the RHS to V.
838  Known.Zero |= RHSKnown.One & BKnown.Zero;
839  Known.One |= RHSKnown.Zero & BKnown.Zero;
840  Known.Zero |= RHSKnown.Zero & BKnown.One;
841  Known.One |= RHSKnown.One & BKnown.One;
842  // assume(v << c = a)
843  } else if (match(Cmp, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)),
844  m_Value(A))) &&
845  isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) {
846  KnownBits RHSKnown =
847  computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
848 
849  // For those bits in RHS that are known, we can propagate them to known
850  // bits in V shifted to the right by C.
851  RHSKnown.Zero.lshrInPlace(C);
852  Known.Zero |= RHSKnown.Zero;
853  RHSKnown.One.lshrInPlace(C);
854  Known.One |= RHSKnown.One;
855  // assume(~(v << c) = a)
856  } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))),
857  m_Value(A))) &&
858  isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) {
859  KnownBits RHSKnown =
860  computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
861  // For those bits in RHS that are known, we can propagate them inverted
862  // to known bits in V shifted to the right by C.
863  RHSKnown.One.lshrInPlace(C);
864  Known.Zero |= RHSKnown.One;
865  RHSKnown.Zero.lshrInPlace(C);
866  Known.One |= RHSKnown.Zero;
867  // assume(v >> c = a)
868  } else if (match(Cmp, m_c_ICmp(Pred, m_Shr(m_V, m_ConstantInt(C)),
869  m_Value(A))) &&
870  isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) {
871  KnownBits RHSKnown =
872  computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
873  // For those bits in RHS that are known, we can propagate them to known
874  // bits in V shifted to the right by C.
875  Known.Zero |= RHSKnown.Zero << C;
876  Known.One |= RHSKnown.One << C;
877  // assume(~(v >> c) = a)
878  } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shr(m_V, m_ConstantInt(C))),
879  m_Value(A))) &&
880  isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) {
881  KnownBits RHSKnown =
882  computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
883  // For those bits in RHS that are known, we can propagate them inverted
884  // to known bits in V shifted to the right by C.
885  Known.Zero |= RHSKnown.One << C;
886  Known.One |= RHSKnown.Zero << C;
887  }
888  break;
889  case ICmpInst::ICMP_SGE:
890  // assume(v >=_s c) where c is non-negative
891  if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
892  isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
893  KnownBits RHSKnown =
894  computeKnownBits(A, Depth + 1, QueryNoAC).anyextOrTrunc(BitWidth);
895 
896  if (RHSKnown.isNonNegative()) {
897  // We know that the sign bit is zero.
898  Known.makeNonNegative();
899  }
900  }
901  break;
902  case ICmpInst::ICMP_SGT:
903  // assume(v >_s c) where c is at least -1.
904  if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
905  isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
906  KnownBits RHSKnown =
907  computeKnownBits(A, Depth + 1, QueryNoAC).anyextOrTrunc(BitWidth);
908 
909  if (RHSKnown.isAllOnes() || RHSKnown.isNonNegative()) {
910  // We know that the sign bit is zero.
911  Known.makeNonNegative();
912  }
913  }
914  break;
915  case ICmpInst::ICMP_SLE:
916  // assume(v <=_s c) where c is negative
917  if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
918  isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
919  KnownBits RHSKnown =
920  computeKnownBits(A, Depth + 1, QueryNoAC).anyextOrTrunc(BitWidth);
921 
922  if (RHSKnown.isNegative()) {
923  // We know that the sign bit is one.
924  Known.makeNegative();
925  }
926  }
927  break;
928  case ICmpInst::ICMP_SLT:
929  // assume(v <_s c) where c is non-positive
930  if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
931  isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
932  KnownBits RHSKnown =
933  computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
934 
935  if (RHSKnown.isZero() || RHSKnown.isNegative()) {
936  // We know that the sign bit is one.
937  Known.makeNegative();
938  }
939  }
940  break;
941  case ICmpInst::ICMP_ULE:
942  // assume(v <=_u c)
943  if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
944  isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
945  KnownBits RHSKnown =
946  computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
947 
948  // Whatever high bits in c are zero are known to be zero.
949  Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros());
950  }
951  break;
952  case ICmpInst::ICMP_ULT:
953  // assume(v <_u c)
954  if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
955  isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
956  KnownBits RHSKnown =
957  computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
958 
959  // If the RHS is known zero, then this assumption must be wrong (nothing
960  // is unsigned less than zero). Signal a conflict and get out of here.
961  if (RHSKnown.isZero()) {
962  Known.Zero.setAllBits();
963  Known.One.setAllBits();
964  break;
965  }
966 
967  // Whatever high bits in c are zero are known to be zero (if c is a power
968  // of 2, then one more).
969  if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, QueryNoAC))
970  Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros() + 1);
971  else
972  Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros());
973  }
974  break;
975  }
976  }
977 
978  // If assumptions conflict with each other or previous known bits, then we
979  // have a logical fallacy. It's possible that the assumption is not reachable,
980  // so this isn't a real bug. On the other hand, the program may have undefined
981  // behavior, or we might have a bug in the compiler. We can't assert/crash, so
982  // clear out the known bits, try to warn the user, and hope for the best.
983  if (Known.Zero.intersects(Known.One)) {
984  Known.resetAll();
985 
986  if (Q.ORE)
987  Q.ORE->emit([&]() {
988  auto *CxtI = const_cast<Instruction *>(Q.CxtI);
989  return OptimizationRemarkAnalysis("value-tracking", "BadAssumption",
990  CxtI)
991  << "Detected conflicting code assumptions. Program may "
992  "have undefined behavior, or compiler may have "
993  "internal error.";
994  });
995  }
996 }
997 
998 /// Compute known bits from a shift operator, including those with a
999 /// non-constant shift amount. Known is the output of this function. Known2 is a
1000 /// pre-allocated temporary with the same bit width as Known and on return
1001 /// contains the known bit of the shift value source. KF is an
1002 /// operator-specific function that, given the known-bits and a shift amount,
1003 /// compute the implied known-bits of the shift operator's result respectively
1004 /// for that shift amount. The results from calling KF are conservatively
1005 /// combined for all permitted shift amounts.
1007  const Operator *I, const APInt &DemandedElts, KnownBits &Known,
1008  KnownBits &Known2, unsigned Depth, const Query &Q,
1009  function_ref<KnownBits(const KnownBits &, const KnownBits &)> KF) {
1010  unsigned BitWidth = Known.getBitWidth();
1011  computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1012  computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
1013 
1014  // Note: We cannot use Known.Zero.getLimitedValue() here, because if
1015  // BitWidth > 64 and any upper bits are known, we'll end up returning the
1016  // limit value (which implies all bits are known).
1017  uint64_t ShiftAmtKZ = Known.Zero.zextOrTrunc(64).getZExtValue();
1018  uint64_t ShiftAmtKO = Known.One.zextOrTrunc(64).getZExtValue();
1019  bool ShiftAmtIsConstant = Known.isConstant();
1020  bool MaxShiftAmtIsOutOfRange = Known.getMaxValue().uge(BitWidth);
1021 
1022  if (ShiftAmtIsConstant) {
1023  Known = KF(Known2, Known);
1024 
1025  // If the known bits conflict, this must be an overflowing left shift, so
1026  // the shift result is poison. We can return anything we want. Choose 0 for
1027  // the best folding opportunity.
1028  if (Known.hasConflict())
1029  Known.setAllZero();
1030 
1031  return;
1032  }
1033 
1034  // If the shift amount could be greater than or equal to the bit-width of the
1035  // LHS, the value could be poison, but bail out because the check below is
1036  // expensive.
1037  // TODO: Should we just carry on?
1038  if (MaxShiftAmtIsOutOfRange) {
1039  Known.resetAll();
1040  return;
1041  }
1042 
1043  // It would be more-clearly correct to use the two temporaries for this
1044  // calculation. Reusing the APInts here to prevent unnecessary allocations.
1045  Known.resetAll();
1046 
1047  // If we know the shifter operand is nonzero, we can sometimes infer more
1048  // known bits. However this is expensive to compute, so be lazy about it and
1049  // only compute it when absolutely necessary.
1050  Optional<bool> ShifterOperandIsNonZero;
1051 
1052  // Early exit if we can't constrain any well-defined shift amount.
1053  if (!(ShiftAmtKZ & (PowerOf2Ceil(BitWidth) - 1)) &&
1054  !(ShiftAmtKO & (PowerOf2Ceil(BitWidth) - 1))) {
1055  ShifterOperandIsNonZero =
1056  isKnownNonZero(I->getOperand(1), DemandedElts, Depth + 1, Q);
1057  if (!*ShifterOperandIsNonZero)
1058  return;
1059  }
1060 
1061  Known.Zero.setAllBits();
1062  Known.One.setAllBits();
1063  for (unsigned ShiftAmt = 0; ShiftAmt < BitWidth; ++ShiftAmt) {
1064  // Combine the shifted known input bits only for those shift amounts
1065  // compatible with its known constraints.
1066  if ((ShiftAmt & ~ShiftAmtKZ) != ShiftAmt)
1067  continue;
1068  if ((ShiftAmt | ShiftAmtKO) != ShiftAmt)
1069  continue;
1070  // If we know the shifter is nonzero, we may be able to infer more known
1071  // bits. This check is sunk down as far as possible to avoid the expensive
1072  // call to isKnownNonZero if the cheaper checks above fail.
1073  if (ShiftAmt == 0) {
1074  if (!ShifterOperandIsNonZero)
1075  ShifterOperandIsNonZero =
1076  isKnownNonZero(I->getOperand(1), DemandedElts, Depth + 1, Q);
1077  if (*ShifterOperandIsNonZero)
1078  continue;
1079  }
1080 
1081  Known = KnownBits::commonBits(
1082  Known, KF(Known2, KnownBits::makeConstant(APInt(32, ShiftAmt))));
1083  }
1084 
1085  // If the known bits conflict, the result is poison. Return a 0 and hope the
1086  // caller can further optimize that.
1087  if (Known.hasConflict())
1088  Known.setAllZero();
1089 }
1090 
1092  const APInt &DemandedElts,
1093  KnownBits &Known, unsigned Depth,
1094  const Query &Q) {
1095  unsigned BitWidth = Known.getBitWidth();
1096 
1097  KnownBits Known2(BitWidth);
1098  switch (I->getOpcode()) {
1099  default: break;
1100  case Instruction::Load:
1101  if (MDNode *MD =
1102  Q.IIQ.getMetadata(cast<LoadInst>(I), LLVMContext::MD_range))
1104  break;
1105  case Instruction::And: {
1106  // If either the LHS or the RHS are Zero, the result is zero.
1107  computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
1108  computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1109 
1110  Known &= Known2;
1111 
1112  // and(x, add (x, -1)) is a common idiom that always clears the low bit;
1113  // here we handle the more general case of adding any odd number by
1114  // matching the form add(x, add(x, y)) where y is odd.
1115  // TODO: This could be generalized to clearing any bit set in y where the
1116  // following bit is known to be unset in y.
1117  Value *X = nullptr, *Y = nullptr;
1118  if (!Known.Zero[0] && !Known.One[0] &&
1120  Known2.resetAll();
1121  computeKnownBits(Y, DemandedElts, Known2, Depth + 1, Q);
1122  if (Known2.countMinTrailingOnes() > 0)
1123  Known.Zero.setBit(0);
1124  }
1125  break;
1126  }
1127  case Instruction::Or:
1128  computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
1129  computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1130 
1131  Known |= Known2;
1132  break;
1133  case Instruction::Xor:
1134  computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
1135  computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1136 
1137  Known ^= Known2;
1138  break;
1139  case Instruction::Mul: {
1140  bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1141  computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, DemandedElts,
1142  Known, Known2, Depth, Q);
1143  break;
1144  }
1145  case Instruction::UDiv: {
1146  computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1147  computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1148  Known = KnownBits::udiv(Known, Known2);
1149  break;
1150  }
1151  case Instruction::Select: {
1152  const Value *LHS = nullptr, *RHS = nullptr;
1155  computeKnownBits(RHS, Known, Depth + 1, Q);
1156  computeKnownBits(LHS, Known2, Depth + 1, Q);
1157  switch (SPF) {
1158  default:
1159  llvm_unreachable("Unhandled select pattern flavor!");
1160  case SPF_SMAX:
1161  Known = KnownBits::smax(Known, Known2);
1162  break;
1163  case SPF_SMIN:
1164  Known = KnownBits::smin(Known, Known2);
1165  break;
1166  case SPF_UMAX:
1167  Known = KnownBits::umax(Known, Known2);
1168  break;
1169  case SPF_UMIN:
1170  Known = KnownBits::umin(Known, Known2);
1171  break;
1172  }
1173  break;
1174  }
1175 
1176  computeKnownBits(I->getOperand(2), Known, Depth + 1, Q);
1177  computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1178 
1179  // Only known if known in both the LHS and RHS.
1180  Known = KnownBits::commonBits(Known, Known2);
1181 
1182  if (SPF == SPF_ABS) {
1183  // RHS from matchSelectPattern returns the negation part of abs pattern.
1184  // If the negate has an NSW flag we can assume the sign bit of the result
1185  // will be 0 because that makes abs(INT_MIN) undefined.
1186  if (match(RHS, m_Neg(m_Specific(LHS))) &&
1187  Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(RHS)))
1188  Known.Zero.setSignBit();
1189  }
1190 
1191  break;
1192  }
1193  case Instruction::FPTrunc:
1194  case Instruction::FPExt:
1195  case Instruction::FPToUI:
1196  case Instruction::FPToSI:
1197  case Instruction::SIToFP:
1198  case Instruction::UIToFP:
1199  break; // Can't work with floating point.
1200  case Instruction::PtrToInt:
1201  case Instruction::IntToPtr:
1202  // Fall through and handle them the same as zext/trunc.
1203  [[fallthrough]];
1204  case Instruction::ZExt:
1205  case Instruction::Trunc: {
1206  Type *SrcTy = I->getOperand(0)->getType();
1207 
1208  unsigned SrcBitWidth;
1209  // Note that we handle pointer operands here because of inttoptr/ptrtoint
1210  // which fall through here.
1211  Type *ScalarTy = SrcTy->getScalarType();
1212  SrcBitWidth = ScalarTy->isPointerTy() ?
1213  Q.DL.getPointerTypeSizeInBits(ScalarTy) :
1214  Q.DL.getTypeSizeInBits(ScalarTy);
1215 
1216  assert(SrcBitWidth && "SrcBitWidth can't be zero");
1217  Known = Known.anyextOrTrunc(SrcBitWidth);
1218  computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1219  Known = Known.zextOrTrunc(BitWidth);
1220  break;
1221  }
1222  case Instruction::BitCast: {
1223  Type *SrcTy = I->getOperand(0)->getType();
1224  if (SrcTy->isIntOrPtrTy() &&
1225  // TODO: For now, not handling conversions like:
1226  // (bitcast i64 %x to <2 x i32>)
1227  !I->getType()->isVectorTy()) {
1228  computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1229  break;
1230  }
1231 
1232  // Handle cast from vector integer type to scalar or vector integer.
1233  auto *SrcVecTy = dyn_cast<FixedVectorType>(SrcTy);
1234  if (!SrcVecTy || !SrcVecTy->getElementType()->isIntegerTy() ||
1235  !I->getType()->isIntOrIntVectorTy())
1236  break;
1237 
1238  // Look through a cast from narrow vector elements to wider type.
1239  // Examples: v4i32 -> v2i64, v3i8 -> v24
1240  unsigned SubBitWidth = SrcVecTy->getScalarSizeInBits();
1241  if (BitWidth % SubBitWidth == 0) {
1242  // Known bits are automatically intersected across demanded elements of a
1243  // vector. So for example, if a bit is computed as known zero, it must be
1244  // zero across all demanded elements of the vector.
1245  //
1246  // For this bitcast, each demanded element of the output is sub-divided
1247  // across a set of smaller vector elements in the source vector. To get
1248  // the known bits for an entire element of the output, compute the known
1249  // bits for each sub-element sequentially. This is done by shifting the
1250  // one-set-bit demanded elements parameter across the sub-elements for
1251  // consecutive calls to computeKnownBits. We are using the demanded
1252  // elements parameter as a mask operator.
1253  //
1254  // The known bits of each sub-element are then inserted into place
1255  // (dependent on endian) to form the full result of known bits.
1256  unsigned NumElts = DemandedElts.getBitWidth();
1257  unsigned SubScale = BitWidth / SubBitWidth;
1258  APInt SubDemandedElts = APInt::getZero(NumElts * SubScale);
1259  for (unsigned i = 0; i != NumElts; ++i) {
1260  if (DemandedElts[i])
1261  SubDemandedElts.setBit(i * SubScale);
1262  }
1263 
1264  KnownBits KnownSrc(SubBitWidth);
1265  for (unsigned i = 0; i != SubScale; ++i) {
1266  computeKnownBits(I->getOperand(0), SubDemandedElts.shl(i), KnownSrc,
1267  Depth + 1, Q);
1268  unsigned ShiftElt = Q.DL.isLittleEndian() ? i : SubScale - 1 - i;
1269  Known.insertBits(KnownSrc, ShiftElt * SubBitWidth);
1270  }
1271  }
1272  break;
1273  }
1274  case Instruction::SExt: {
1275  // Compute the bits in the result that are not present in the input.
1276  unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
1277 
1278  Known = Known.trunc(SrcBitWidth);
1279  computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1280  // If the sign bit of the input is known set or clear, then we know the
1281  // top bits of the result.
1282  Known = Known.sext(BitWidth);
1283  break;
1284  }
1285  case Instruction::Shl: {
1286  bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1287  auto KF = [NSW](const KnownBits &KnownVal, const KnownBits &KnownAmt) {
1288  KnownBits Result = KnownBits::shl(KnownVal, KnownAmt);
1289  // If this shift has "nsw" keyword, then the result is either a poison
1290  // value or has the same sign bit as the first operand.
1291  if (NSW) {
1292  if (KnownVal.Zero.isSignBitSet())
1293  Result.Zero.setSignBit();
1294  if (KnownVal.One.isSignBitSet())
1295  Result.One.setSignBit();
1296  }
1297  return Result;
1298  };
1299  computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
1300  KF);
1301  // Trailing zeros of a right-shifted constant never decrease.
1302  const APInt *C;
1303  if (match(I->getOperand(0), m_APInt(C)))
1304  Known.Zero.setLowBits(C->countTrailingZeros());
1305  break;
1306  }
1307  case Instruction::LShr: {
1308  auto KF = [](const KnownBits &KnownVal, const KnownBits &KnownAmt) {
1309  return KnownBits::lshr(KnownVal, KnownAmt);
1310  };
1311  computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
1312  KF);
1313  // Leading zeros of a left-shifted constant never decrease.
1314  const APInt *C;
1315  if (match(I->getOperand(0), m_APInt(C)))
1316  Known.Zero.setHighBits(C->countLeadingZeros());
1317  break;
1318  }
1319  case Instruction::AShr: {
1320  auto KF = [](const KnownBits &KnownVal, const KnownBits &KnownAmt) {
1321  return KnownBits::ashr(KnownVal, KnownAmt);
1322  };
1323  computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
1324  KF);
1325  break;
1326  }
1327  case Instruction::Sub: {
1328  bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1329  computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW,
1330  DemandedElts, Known, Known2, Depth, Q);
1331  break;
1332  }
1333  case Instruction::Add: {
1334  bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
1335  computeKnownBitsAddSub(true, I->getOperand(0), I->getOperand(1), NSW,
1336  DemandedElts, Known, Known2, Depth, Q);
1337  break;
1338  }
1339  case Instruction::SRem:
1340  computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1341  computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1342  Known = KnownBits::srem(Known, Known2);
1343  break;
1344 
1345  case Instruction::URem:
1346  computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1347  computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1348  Known = KnownBits::urem(Known, Known2);
1349  break;
1350  case Instruction::Alloca:
1351  Known.Zero.setLowBits(Log2(cast<AllocaInst>(I)->getAlign()));
1352  break;
1353  case Instruction::GetElementPtr: {
1354  // Analyze all of the subscripts of this getelementptr instruction
1355  // to determine if we can prove known low zero bits.
1356  computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1357  // Accumulate the constant indices in a separate variable
1358  // to minimize the number of calls to computeForAddSub.
1359  APInt AccConstIndices(BitWidth, 0, /*IsSigned*/ true);
1360 
1362  for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
1363  // TrailZ can only become smaller, short-circuit if we hit zero.
1364  if (Known.isUnknown())
1365  break;
1366 
1367  Value *Index = I->getOperand(i);
1368 
1369  // Handle case when index is zero.
1370  Constant *CIndex = dyn_cast<Constant>(Index);
1371  if (CIndex && CIndex->isZeroValue())
1372  continue;
1373 
1374  if (StructType *STy = GTI.getStructTypeOrNull()) {
1375  // Handle struct member offset arithmetic.
1376 
1377  assert(CIndex &&
1378  "Access to structure field must be known at compile time");
1379 
1380  if (CIndex->getType()->isVectorTy())
1381  Index = CIndex->getSplatValue();
1382 
1383  unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
1384  const StructLayout *SL = Q.DL.getStructLayout(STy);
1385  uint64_t Offset = SL->getElementOffset(Idx);
1386  AccConstIndices += Offset;
1387  continue;
1388  }
1389 
1390  // Handle array index arithmetic.
1391  Type *IndexedTy = GTI.getIndexedType();
1392  if (!IndexedTy->isSized()) {
1393  Known.resetAll();
1394  break;
1395  }
1396 
1397  unsigned IndexBitWidth = Index->getType()->getScalarSizeInBits();
1398  KnownBits IndexBits(IndexBitWidth);
1399  computeKnownBits(Index, IndexBits, Depth + 1, Q);
1400  TypeSize IndexTypeSize = Q.DL.getTypeAllocSize(IndexedTy);
1401  uint64_t TypeSizeInBytes = IndexTypeSize.getKnownMinSize();
1402  KnownBits ScalingFactor(IndexBitWidth);
1403  // Multiply by current sizeof type.
1404  // &A[i] == A + i * sizeof(*A[i]).
1405  if (IndexTypeSize.isScalable()) {
1406  // For scalable types the only thing we know about sizeof is
1407  // that this is a multiple of the minimum size.
1408  ScalingFactor.Zero.setLowBits(countTrailingZeros(TypeSizeInBytes));
1409  } else if (IndexBits.isConstant()) {
1410  APInt IndexConst = IndexBits.getConstant();
1411  APInt ScalingFactor(IndexBitWidth, TypeSizeInBytes);
1412  IndexConst *= ScalingFactor;
1413  AccConstIndices += IndexConst.sextOrTrunc(BitWidth);
1414  continue;
1415  } else {
1416  ScalingFactor =
1417  KnownBits::makeConstant(APInt(IndexBitWidth, TypeSizeInBytes));
1418  }
1419  IndexBits = KnownBits::mul(IndexBits, ScalingFactor);
1420 
1421  // If the offsets have a different width from the pointer, according
1422  // to the language reference we need to sign-extend or truncate them
1423  // to the width of the pointer.
1424  IndexBits = IndexBits.sextOrTrunc(BitWidth);
1425 
1426  // Note that inbounds does *not* guarantee nsw for the addition, as only
1427  // the offset is signed, while the base address is unsigned.
1429  /*Add=*/true, /*NSW=*/false, Known, IndexBits);
1430  }
1431  if (!Known.isUnknown() && !AccConstIndices.isZero()) {
1432  KnownBits Index = KnownBits::makeConstant(AccConstIndices);
1434  /*Add=*/true, /*NSW=*/false, Known, Index);
1435  }
1436  break;
1437  }
1438  case Instruction::PHI: {
1439  const PHINode *P = cast<PHINode>(I);
1440  BinaryOperator *BO = nullptr;
1441  Value *R = nullptr, *L = nullptr;
1442  if (matchSimpleRecurrence(P, BO, R, L)) {
1443  // Handle the case of a simple two-predecessor recurrence PHI.
1444  // There's a lot more that could theoretically be done here, but
1445  // this is sufficient to catch some interesting cases.
1446  unsigned Opcode = BO->getOpcode();
1447 
1448  // If this is a shift recurrence, we know the bits being shifted in.
1449  // We can combine that with information about the start value of the
1450  // recurrence to conclude facts about the result.
1451  if ((Opcode == Instruction::LShr || Opcode == Instruction::AShr ||
1452  Opcode == Instruction::Shl) &&
1453  BO->getOperand(0) == I) {
1454 
1455  // We have matched a recurrence of the form:
1456  // %iv = [R, %entry], [%iv.next, %backedge]
1457  // %iv.next = shift_op %iv, L
1458 
1459  // Recurse with the phi context to avoid concern about whether facts
1460  // inferred hold at original context instruction. TODO: It may be
1461  // correct to use the original context. IF warranted, explore and
1462  // add sufficient tests to cover.
1463  Query RecQ = Q;
1464  RecQ.CxtI = P;
1465  computeKnownBits(R, DemandedElts, Known2, Depth + 1, RecQ);
1466  switch (Opcode) {
1467  case Instruction::Shl:
1468  // A shl recurrence will only increase the tailing zeros
1469  Known.Zero.setLowBits(Known2.countMinTrailingZeros());
1470  break;
1471  case Instruction::LShr:
1472  // A lshr recurrence will preserve the leading zeros of the
1473  // start value
1474  Known.Zero.setHighBits(Known2.countMinLeadingZeros());
1475  break;
1476  case Instruction::AShr:
1477  // An ashr recurrence will extend the initial sign bit
1478  Known.Zero.setHighBits(Known2.countMinLeadingZeros());
1479  Known.One.setHighBits(Known2.countMinLeadingOnes());
1480  break;
1481  };
1482  }
1483 
1484  // Check for operations that have the property that if
1485  // both their operands have low zero bits, the result
1486  // will have low zero bits.
1487  if (Opcode == Instruction::Add ||
1488  Opcode == Instruction::Sub ||
1489  Opcode == Instruction::And ||
1490  Opcode == Instruction::Or ||
1491  Opcode == Instruction::Mul) {
1492  // Change the context instruction to the "edge" that flows into the
1493  // phi. This is important because that is where the value is actually
1494  // "evaluated" even though it is used later somewhere else. (see also
1495  // D69571).
1496  Query RecQ = Q;
1497 
1498  unsigned OpNum = P->getOperand(0) == R ? 0 : 1;
1499  Instruction *RInst = P->getIncomingBlock(OpNum)->getTerminator();
1500  Instruction *LInst = P->getIncomingBlock(1-OpNum)->getTerminator();
1501 
1502  // Ok, we have a PHI of the form L op= R. Check for low
1503  // zero bits.
1504  RecQ.CxtI = RInst;
1505  computeKnownBits(R, Known2, Depth + 1, RecQ);
1506 
1507  // We need to take the minimum number of known bits
1508  KnownBits Known3(BitWidth);
1509  RecQ.CxtI = LInst;
1510  computeKnownBits(L, Known3, Depth + 1, RecQ);
1511 
1513  Known3.countMinTrailingZeros()));
1514 
1515  auto *OverflowOp = dyn_cast<OverflowingBinaryOperator>(BO);
1516  if (OverflowOp && Q.IIQ.hasNoSignedWrap(OverflowOp)) {
1517  // If initial value of recurrence is nonnegative, and we are adding
1518  // a nonnegative number with nsw, the result can only be nonnegative
1519  // or poison value regardless of the number of times we execute the
1520  // add in phi recurrence. If initial value is negative and we are
1521  // adding a negative number with nsw, the result can only be
1522  // negative or poison value. Similar arguments apply to sub and mul.
1523  //
1524  // (add non-negative, non-negative) --> non-negative
1525  // (add negative, negative) --> negative
1526  if (Opcode == Instruction::Add) {
1527  if (Known2.isNonNegative() && Known3.isNonNegative())
1528  Known.makeNonNegative();
1529  else if (Known2.isNegative() && Known3.isNegative())
1530  Known.makeNegative();
1531  }
1532 
1533  // (sub nsw non-negative, negative) --> non-negative
1534  // (sub nsw negative, non-negative) --> negative
1535  else if (Opcode == Instruction::Sub && BO->getOperand(0) == I) {
1536  if (Known2.isNonNegative() && Known3.isNegative())
1537  Known.makeNonNegative();
1538  else if (Known2.isNegative() && Known3.isNonNegative())
1539  Known.makeNegative();
1540  }
1541 
1542  // (mul nsw non-negative, non-negative) --> non-negative
1543  else if (Opcode == Instruction::Mul && Known2.isNonNegative() &&
1544  Known3.isNonNegative())
1545  Known.makeNonNegative();
1546  }
1547 
1548  break;
1549  }
1550  }
1551 
1552  // Unreachable blocks may have zero-operand PHI nodes.
1553  if (P->getNumIncomingValues() == 0)
1554  break;
1555 
1556  // Otherwise take the unions of the known bit sets of the operands,
1557  // taking conservative care to avoid excessive recursion.
1558  if (Depth < MaxAnalysisRecursionDepth - 1 && !Known.Zero && !Known.One) {
1559  // Skip if every incoming value references to ourself.
1560  if (isa_and_nonnull<UndefValue>(P->hasConstantValue()))
1561  break;
1562 
1563  Known.Zero.setAllBits();
1564  Known.One.setAllBits();
1565  for (unsigned u = 0, e = P->getNumIncomingValues(); u < e; ++u) {
1566  Value *IncValue = P->getIncomingValue(u);
1567  // Skip direct self references.
1568  if (IncValue == P) continue;
1569 
1570  // Change the context instruction to the "edge" that flows into the
1571  // phi. This is important because that is where the value is actually
1572  // "evaluated" even though it is used later somewhere else. (see also
1573  // D69571).
1574  Query RecQ = Q;
1575  RecQ.CxtI = P->getIncomingBlock(u)->getTerminator();
1576 
1577  Known2 = KnownBits(BitWidth);
1578 
1579  // Recurse, but cap the recursion to one level, because we don't
1580  // want to waste time spinning around in loops.
1581  computeKnownBits(IncValue, Known2, MaxAnalysisRecursionDepth - 1, RecQ);
1582 
1583  // If this failed, see if we can use a conditional branch into the phi
1584  // to help us determine the range of the value.
1585  if (Known2.isUnknown()) {
1586  ICmpInst::Predicate Pred;
1587  const APInt *RHSC;
1588  BasicBlock *TrueSucc, *FalseSucc;
1589  // TODO: Use RHS Value and compute range from its known bits.
1590  if (match(RecQ.CxtI,
1591  m_Br(m_c_ICmp(Pred, m_Specific(IncValue), m_APInt(RHSC)),
1592  m_BasicBlock(TrueSucc), m_BasicBlock(FalseSucc)))) {
1593  // Check for cases of duplicate successors.
1594  if ((TrueSucc == P->getParent()) != (FalseSucc == P->getParent())) {
1595  // If we're using the false successor, invert the predicate.
1596  if (FalseSucc == P->getParent())
1597  Pred = CmpInst::getInversePredicate(Pred);
1598 
1599  switch (Pred) {
1600  case CmpInst::Predicate::ICMP_EQ:
1601  Known2 = KnownBits::makeConstant(*RHSC);
1602  break;
1603  case CmpInst::Predicate::ICMP_ULE:
1604  Known2.Zero.setHighBits(RHSC->countLeadingZeros());
1605  break;
1606  case CmpInst::Predicate::ICMP_ULT:
1607  Known2.Zero.setHighBits((*RHSC - 1).countLeadingZeros());
1608  break;
1609  default:
1610  // TODO - add additional integer predicate handling.
1611  break;
1612  }
1613  }
1614  }
1615  }
1616 
1617  Known = KnownBits::commonBits(Known, Known2);
1618  // If all bits have been ruled out, there's no need to check
1619  // more operands.
1620  if (Known.isUnknown())
1621  break;
1622  }
1623  }
1624  break;
1625  }
1626  case Instruction::Call:
1627  case Instruction::Invoke:
1628  // If range metadata is attached to this call, set known bits from that,
1629  // and then intersect with known bits based on other properties of the
1630  // function.
1631  if (MDNode *MD =
1632  Q.IIQ.getMetadata(cast<Instruction>(I), LLVMContext::MD_range))
1634  if (const Value *RV = cast<CallBase>(I)->getReturnedArgOperand()) {
1635  computeKnownBits(RV, Known2, Depth + 1, Q);
1636  Known.Zero |= Known2.Zero;
1637  Known.One |= Known2.One;
1638  }
1639  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1640  switch (II->getIntrinsicID()) {
1641  default: break;
1642  case Intrinsic::abs: {
1643  computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1644  bool IntMinIsPoison = match(II->getArgOperand(1), m_One());
1645  Known = Known2.abs(IntMinIsPoison);
1646  break;
1647  }
1648  case Intrinsic::bitreverse:
1649  computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1650  Known.Zero |= Known2.Zero.reverseBits();
1651  Known.One |= Known2.One.reverseBits();
1652  break;
1653  case Intrinsic::bswap:
1654  computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
1655  Known.Zero |= Known2.Zero.byteSwap();
1656  Known.One |= Known2.One.byteSwap();
1657  break;
1658  case Intrinsic::ctlz: {
1659  computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1660  // If we have a known 1, its position is our upper bound.
1661  unsigned PossibleLZ = Known2.countMaxLeadingZeros();
1662  // If this call is poison for 0 input, the result will be less than 2^n.
1663  if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
1664  PossibleLZ = std::min(PossibleLZ, BitWidth - 1);
1665  unsigned LowBits = Log2_32(PossibleLZ)+1;
1666  Known.Zero.setBitsFrom(LowBits);
1667  break;
1668  }
1669  case Intrinsic::cttz: {
1670  computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1671  // If we have a known 1, its position is our upper bound.
1672  unsigned PossibleTZ = Known2.countMaxTrailingZeros();
1673  // If this call is poison for 0 input, the result will be less than 2^n.
1674  if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
1675  PossibleTZ = std::min(PossibleTZ, BitWidth - 1);
1676  unsigned LowBits = Log2_32(PossibleTZ)+1;
1677  Known.Zero.setBitsFrom(LowBits);
1678  break;
1679  }
1680  case Intrinsic::ctpop: {
1681  computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1682  // We can bound the space the count needs. Also, bits known to be zero
1683  // can't contribute to the population.
1684  unsigned BitsPossiblySet = Known2.countMaxPopulation();
1685  unsigned LowBits = Log2_32(BitsPossiblySet)+1;
1686  Known.Zero.setBitsFrom(LowBits);
1687  // TODO: we could bound KnownOne using the lower bound on the number
1688  // of bits which might be set provided by popcnt KnownOne2.
1689  break;
1690  }
1691  case Intrinsic::fshr:
1692  case Intrinsic::fshl: {
1693  const APInt *SA;
1694  if (!match(I->getOperand(2), m_APInt(SA)))
1695  break;
1696 
1697  // Normalize to funnel shift left.
1698  uint64_t ShiftAmt = SA->urem(BitWidth);
1699  if (II->getIntrinsicID() == Intrinsic::fshr)
1700  ShiftAmt = BitWidth - ShiftAmt;
1701 
1702  KnownBits Known3(BitWidth);
1703  computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
1704  computeKnownBits(I->getOperand(1), Known3, Depth + 1, Q);
1705 
1706  Known.Zero =
1707  Known2.Zero.shl(ShiftAmt) | Known3.Zero.lshr(BitWidth - ShiftAmt);
1708  Known.One =
1709  Known2.One.shl(ShiftAmt) | Known3.One.lshr(BitWidth - ShiftAmt);
1710  break;
1711  }
1712  case Intrinsic::uadd_sat:
1713  case Intrinsic::usub_sat: {
1714  bool IsAdd = II->getIntrinsicID() == Intrinsic::uadd_sat;
1715  computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1716  computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1717 
1718  // Add: Leading ones of either operand are preserved.
1719  // Sub: Leading zeros of LHS and leading ones of RHS are preserved
1720  // as leading zeros in the result.
1721  unsigned LeadingKnown;
1722  if (IsAdd)
1723  LeadingKnown = std::max(Known.countMinLeadingOnes(),
1724  Known2.countMinLeadingOnes());
1725  else
1726  LeadingKnown = std::max(Known.countMinLeadingZeros(),
1727  Known2.countMinLeadingOnes());
1728 
1730  IsAdd, /* NSW */ false, Known, Known2);
1731 
1732  // We select between the operation result and all-ones/zero
1733  // respectively, so we can preserve known ones/zeros.
1734  if (IsAdd) {
1735  Known.One.setHighBits(LeadingKnown);
1736  Known.Zero.clearAllBits();
1737  } else {
1738  Known.Zero.setHighBits(LeadingKnown);
1739  Known.One.clearAllBits();
1740  }
1741  break;
1742  }
1743  case Intrinsic::umin:
1744  computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1745  computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1746  Known = KnownBits::umin(Known, Known2);
1747  break;
1748  case Intrinsic::umax:
1749  computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1750  computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1751  Known = KnownBits::umax(Known, Known2);
1752  break;
1753  case Intrinsic::smin:
1754  computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1755  computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1756  Known = KnownBits::smin(Known, Known2);
1757  break;
1758  case Intrinsic::smax:
1759  computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1760  computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
1761  Known = KnownBits::smax(Known, Known2);
1762  break;
1763  case Intrinsic::x86_sse42_crc32_64_64:
1764  Known.Zero.setBitsFrom(32);
1765  break;
1766  case Intrinsic::riscv_vsetvli:
1767  case Intrinsic::riscv_vsetvlimax:
1768  // Assume that VL output is positive and would fit in an int32_t.
1769  // TODO: VLEN might be capped at 16 bits in a future V spec update.
1770  if (BitWidth >= 32)
1771  Known.Zero.setBitsFrom(31);
1772  break;
1773  case Intrinsic::vscale: {
1774  if (!II->getParent() || !II->getFunction() ||
1775  !II->getFunction()->hasFnAttribute(Attribute::VScaleRange))
1776  break;
1777 
1778  auto Attr = II->getFunction()->getFnAttribute(Attribute::VScaleRange);
1779  Optional<unsigned> VScaleMax = Attr.getVScaleRangeMax();
1780 
1781  if (!VScaleMax)
1782  break;
1783 
1784  unsigned VScaleMin = Attr.getVScaleRangeMin();
1785 
1786  // If vscale min = max then we know the exact value at compile time
1787  // and hence we know the exact bits.
1788  if (VScaleMin == VScaleMax) {
1789  Known.One = VScaleMin;
1790  Known.Zero = VScaleMin;
1791  Known.Zero.flipAllBits();
1792  break;
1793  }
1794 
1795  unsigned FirstZeroHighBit = 32 - countLeadingZeros(*VScaleMax);
1796  if (FirstZeroHighBit < BitWidth)
1797  Known.Zero.setBitsFrom(FirstZeroHighBit);
1798 
1799  break;
1800  }
1801  }
1802  }
1803  break;
1804  case Instruction::ShuffleVector: {
1805  auto *Shuf = dyn_cast<ShuffleVectorInst>(I);
1806  // FIXME: Do we need to handle ConstantExpr involving shufflevectors?
1807  if (!Shuf) {
1808  Known.resetAll();
1809  return;
1810  }
1811  // For undef elements, we don't know anything about the common state of
1812  // the shuffle result.
1813  APInt DemandedLHS, DemandedRHS;
1814  if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS)) {
1815  Known.resetAll();
1816  return;
1817  }
1818  Known.One.setAllBits();
1819  Known.Zero.setAllBits();
1820  if (!!DemandedLHS) {
1821  const Value *LHS = Shuf->getOperand(0);
1822  computeKnownBits(LHS, DemandedLHS, Known, Depth + 1, Q);
1823  // If we don't know any bits, early out.
1824  if (Known.isUnknown())
1825  break;
1826  }
1827  if (!!DemandedRHS) {
1828  const Value *RHS = Shuf->getOperand(1);
1829  computeKnownBits(RHS, DemandedRHS, Known2, Depth + 1, Q);
1830  Known = KnownBits::commonBits(Known, Known2);
1831  }
1832  break;
1833  }
1834  case Instruction::InsertElement: {
1835  const Value *Vec = I->getOperand(0);
1836  const Value *Elt = I->getOperand(1);
1837  auto *CIdx = dyn_cast<ConstantInt>(I->getOperand(2));
1838  // Early out if the index is non-constant or out-of-range.
1839  unsigned NumElts = DemandedElts.getBitWidth();
1840  if (!CIdx || CIdx->getValue().uge(NumElts)) {
1841  Known.resetAll();
1842  return;
1843  }
1844  Known.One.setAllBits();
1845  Known.Zero.setAllBits();
1846  unsigned EltIdx = CIdx->getZExtValue();
1847  // Do we demand the inserted element?
1848  if (DemandedElts[EltIdx]) {
1849  computeKnownBits(Elt, Known, Depth + 1, Q);
1850  // If we don't know any bits, early out.
1851  if (Known.isUnknown())
1852  break;
1853  }
1854  // We don't need the base vector element that has been inserted.
1855  APInt DemandedVecElts = DemandedElts;
1856  DemandedVecElts.clearBit(EltIdx);
1857  if (!!DemandedVecElts) {
1858  computeKnownBits(Vec, DemandedVecElts, Known2, Depth + 1, Q);
1859  Known = KnownBits::commonBits(Known, Known2);
1860  }
1861  break;
1862  }
1863  case Instruction::ExtractElement: {
1864  // Look through extract element. If the index is non-constant or
1865  // out-of-range demand all elements, otherwise just the extracted element.
1866  const Value *Vec = I->getOperand(0);
1867  const Value *Idx = I->getOperand(1);
1868  auto *CIdx = dyn_cast<ConstantInt>(Idx);
1869  if (isa<ScalableVectorType>(Vec->getType())) {
1870  // FIXME: there's probably *something* we can do with scalable vectors
1871  Known.resetAll();
1872  break;
1873  }
1874  unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
1875  APInt DemandedVecElts = APInt::getAllOnes(NumElts);
1876  if (CIdx && CIdx->getValue().ult(NumElts))
1877  DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
1878  computeKnownBits(Vec, DemandedVecElts, Known, Depth + 1, Q);
1879  break;
1880  }
1881  case Instruction::ExtractValue:
1882  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) {
1883  const ExtractValueInst *EVI = cast<ExtractValueInst>(I);
1884  if (EVI->getNumIndices() != 1) break;
1885  if (EVI->getIndices()[0] == 0) {
1886  switch (II->getIntrinsicID()) {
1887  default: break;
1888  case Intrinsic::uadd_with_overflow:
1889  case Intrinsic::sadd_with_overflow:
1890  computeKnownBitsAddSub(true, II->getArgOperand(0),
1891  II->getArgOperand(1), false, DemandedElts,
1892  Known, Known2, Depth, Q);
1893  break;
1894  case Intrinsic::usub_with_overflow:
1895  case Intrinsic::ssub_with_overflow:
1896  computeKnownBitsAddSub(false, II->getArgOperand(0),
1897  II->getArgOperand(1), false, DemandedElts,
1898  Known, Known2, Depth, Q);
1899  break;
1900  case Intrinsic::umul_with_overflow:
1901  case Intrinsic::smul_with_overflow:
1902  computeKnownBitsMul(II->getArgOperand(0), II->getArgOperand(1), false,
1903  DemandedElts, Known, Known2, Depth, Q);
1904  break;
1905  }
1906  }
1907  }
1908  break;
1909  case Instruction::Freeze:
1910  if (isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT,
1911  Depth + 1))
1912  computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
1913  break;
1914  }
1915 }
1916 
1917 /// Determine which bits of V are known to be either zero or one and return
1918 /// them.
1919 KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts,
1920  unsigned Depth, const Query &Q) {
1921  KnownBits Known(getBitWidth(V->getType(), Q.DL));
1922  computeKnownBits(V, DemandedElts, Known, Depth, Q);
1923  return Known;
1924 }
1925 
1926 /// Determine which bits of V are known to be either zero or one and return
1927 /// them.
1928 KnownBits computeKnownBits(const Value *V, unsigned Depth, const Query &Q) {
1929  KnownBits Known(getBitWidth(V->getType(), Q.DL));
1930  computeKnownBits(V, Known, Depth, Q);
1931  return Known;
1932 }
1933 
1934 /// Determine which bits of V are known to be either zero or one and return
1935 /// them in the Known bit set.
1936 ///
1937 /// NOTE: we cannot consider 'undef' to be "IsZero" here. The problem is that
1938 /// we cannot optimize based on the assumption that it is zero without changing
1939 /// it to be an explicit zero. If we don't change it to zero, other code could
1940 /// optimized based on the contradictory assumption that it is non-zero.
1941 /// Because instcombine aggressively folds operations with undef args anyway,
1942 /// this won't lose us code quality.
1943 ///
1944 /// This function is defined on values with integer type, values with pointer
1945 /// type, and vectors of integers. In the case
1946 /// where V is a vector, known zero, and known one values are the
1947 /// same width as the vector element, and the bit is set only if it is true
1948 /// for all of the demanded elements in the vector specified by DemandedElts.
1949 void computeKnownBits(const Value *V, const APInt &DemandedElts,
1950  KnownBits &Known, unsigned Depth, const Query &Q) {
1951  if (!DemandedElts || isa<ScalableVectorType>(V->getType())) {
1952  // No demanded elts or V is a scalable vector, better to assume we don't
1953  // know anything.
1954  Known.resetAll();
1955  return;
1956  }
1957 
1958  assert(V && "No Value?");
1959  assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
1960 
1961 #ifndef NDEBUG
1962  Type *Ty = V->getType();
1963  unsigned BitWidth = Known.getBitWidth();
1964 
1966  "Not integer or pointer type!");
1967 
1968  if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
1969  assert(
1970  FVTy->getNumElements() == DemandedElts.getBitWidth() &&
1971  "DemandedElt width should equal the fixed vector number of elements");
1972  } else {
1973  assert(DemandedElts == APInt(1, 1) &&
1974  "DemandedElt width should be 1 for scalars");
1975  }
1976 
1977  Type *ScalarTy = Ty->getScalarType();
1978  if (ScalarTy->isPointerTy()) {
1979  assert(BitWidth == Q.DL.getPointerTypeSizeInBits(ScalarTy) &&
1980  "V and Known should have same BitWidth");
1981  } else {
1982  assert(BitWidth == Q.DL.getTypeSizeInBits(ScalarTy) &&
1983  "V and Known should have same BitWidth");
1984  }
1985 #endif
1986 
1987  const APInt *C;
1988  if (match(V, m_APInt(C))) {
1989  // We know all of the bits for a scalar constant or a splat vector constant!
1990  Known = KnownBits::makeConstant(*C);
1991  return;
1992  }
1993  // Null and aggregate-zero are all-zeros.
1994  if (isa<ConstantPointerNull>(V) || isa<ConstantAggregateZero>(V)) {
1995  Known.setAllZero();
1996  return;
1997  }
1998  // Handle a constant vector by taking the intersection of the known bits of
1999  // each element.
2000  if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(V)) {
2001  // We know that CDV must be a vector of integers. Take the intersection of
2002  // each element.
2003  Known.Zero.setAllBits(); Known.One.setAllBits();
2004  for (unsigned i = 0, e = CDV->getNumElements(); i != e; ++i) {
2005  if (!DemandedElts[i])
2006  continue;
2007  APInt Elt = CDV->getElementAsAPInt(i);
2008  Known.Zero &= ~Elt;
2009  Known.One &= Elt;
2010  }
2011  return;
2012  }
2013 
2014  if (const auto *CV = dyn_cast<ConstantVector>(V)) {
2015  // We know that CV must be a vector of integers. Take the intersection of
2016  // each element.
2017  Known.Zero.setAllBits(); Known.One.setAllBits();
2018  for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
2019  if (!DemandedElts[i])
2020  continue;
2021  Constant *Element = CV->getAggregateElement(i);
2022  auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);
2023  if (!ElementCI) {
2024  Known.resetAll();
2025  return;
2026  }
2027  const APInt &Elt = ElementCI->getValue();
2028  Known.Zero &= ~Elt;
2029  Known.One &= Elt;
2030  }
2031  return;
2032  }
2033 
2034  // Start out not knowing anything.
2035  Known.resetAll();
2036 
2037  // We can't imply anything about undefs.
2038  if (isa<UndefValue>(V))
2039  return;
2040 
2041  // There's no point in looking through other users of ConstantData for
2042  // assumptions. Confirm that we've handled them all.
2043  assert(!isa<ConstantData>(V) && "Unhandled constant data!");
2044 
2045  // All recursive calls that increase depth must come after this.
2047  return;
2048 
2049  // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
2050  // the bits of its aliasee.
2051  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
2052  if (!GA->isInterposable())
2053  computeKnownBits(GA->getAliasee(), Known, Depth + 1, Q);
2054  return;
2055  }
2056 
2057  if (const Operator *I = dyn_cast<Operator>(V))
2058  computeKnownBitsFromOperator(I, DemandedElts, Known, Depth, Q);
2059 
2060  // Aligned pointers have trailing zeros - refine Known.Zero set
2061  if (isa<PointerType>(V->getType())) {
2062  Align Alignment = V->getPointerAlignment(Q.DL);
2063  Known.Zero.setLowBits(Log2(Alignment));
2064  }
2065 
2066  // computeKnownBitsFromAssume strictly refines Known.
2067  // Therefore, we run them after computeKnownBitsFromOperator.
2068 
2069  // Check whether a nearby assume intrinsic can determine some known bits.
2070  computeKnownBitsFromAssume(V, Known, Depth, Q);
2071 
2072  assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
2073 }
2074 
2075 /// Try to detect a recurrence that the value of the induction variable is
2076 /// always a power of two (or zero).
2077 static bool isPowerOfTwoRecurrence(const PHINode *PN, bool OrZero,
2078  unsigned Depth, Query &Q) {
2079  BinaryOperator *BO = nullptr;
2080  Value *Start = nullptr, *Step = nullptr;
2081  if (!matchSimpleRecurrence(PN, BO, Start, Step))
2082  return false;
2083 
2084  // Initial value must be a power of two.
2085  for (const Use &U : PN->operands()) {
2086  if (U.get() == Start) {
2087  // Initial value comes from a different BB, need to adjust context
2088  // instruction for analysis.
2089  Q.CxtI = PN->getIncomingBlock(U)->getTerminator();
2090  if (!isKnownToBeAPowerOfTwo(Start, OrZero, Depth, Q))
2091  return false;
2092  }
2093  }
2094 
2095  // Except for Mul, the induction variable must be on the left side of the
2096  // increment expression, otherwise its value can be arbitrary.
2097  if (BO->getOpcode() != Instruction::Mul && BO->getOperand(1) != Step)
2098  return false;
2099 
2100  Q.CxtI = BO->getParent()->getTerminator();
2101  switch (BO->getOpcode()) {
2102  case Instruction::Mul:
2103  // Power of two is closed under multiplication.
2104  return (OrZero || Q.IIQ.hasNoUnsignedWrap(BO) ||
2105  Q.IIQ.hasNoSignedWrap(BO)) &&
2106  isKnownToBeAPowerOfTwo(Step, OrZero, Depth, Q);
2107  case Instruction::SDiv:
2108  // Start value must not be signmask for signed division, so simply being a
2109  // power of two is not sufficient, and it has to be a constant.
2110  if (!match(Start, m_Power2()) || match(Start, m_SignMask()))
2111  return false;
2112  [[fallthrough]];
2113  case Instruction::UDiv:
2114  // Divisor must be a power of two.
2115  // If OrZero is false, cannot guarantee induction variable is non-zero after
2116  // division, same for Shr, unless it is exact division.
2117  return (OrZero || Q.IIQ.isExact(BO)) &&
2118  isKnownToBeAPowerOfTwo(Step, false, Depth, Q);
2119  case Instruction::Shl:
2120  return OrZero || Q.IIQ.hasNoUnsignedWrap(BO) || Q.IIQ.hasNoSignedWrap(BO);
2121  case Instruction::AShr:
2122  if (!match(Start, m_Power2()) || match(Start, m_SignMask()))
2123  return false;
2124  [[fallthrough]];
2125  case Instruction::LShr:
2126  return OrZero || Q.IIQ.isExact(BO);
2127  default:
2128  return false;
2129  }
2130 }
2131 
2132 /// Return true if the given value is known to have exactly one
2133 /// bit set when defined. For vectors return true if every element is known to
2134 /// be a power of two when defined. Supports values with integer or pointer
2135 /// types and vectors of integers.
2136 bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
2137  const Query &Q) {
2138  assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
2139 
2140  // Attempt to match against constants.
2141  if (OrZero && match(V, m_Power2OrZero()))
2142  return true;
2143  if (match(V, m_Power2()))
2144  return true;
2145 
2146  // 1 << X is clearly a power of two if the one is not shifted off the end. If
2147  // it is shifted off the end then the result is undefined.
2148  if (match(V, m_Shl(m_One(), m_Value())))
2149  return true;
2150 
2151  // (signmask) >>l X is clearly a power of two if the one is not shifted off
2152  // the bottom. If it is shifted off the bottom then the result is undefined.
2153  if (match(V, m_LShr(m_SignMask(), m_Value())))
2154  return true;
2155 
2156  // The remaining tests are all recursive, so bail out if we hit the limit.
2158  return false;
2159 
2160  Value *X = nullptr, *Y = nullptr;
2161  // A shift left or a logical shift right of a power of two is a power of two
2162  // or zero.
2163  if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) ||
2164  match(V, m_LShr(m_Value(X), m_Value()))))
2165  return isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q);
2166 
2167  if (const ZExtInst *ZI = dyn_cast<ZExtInst>(V))
2168  return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth, Q);
2169 
2170  if (const SelectInst *SI = dyn_cast<SelectInst>(V))
2171  return isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth, Q) &&
2172  isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth, Q);
2173 
2174  // Peek through min/max.
2175  if (match(V, m_MaxOrMin(m_Value(X), m_Value(Y)))) {
2176  return isKnownToBeAPowerOfTwo(X, OrZero, Depth, Q) &&
2177  isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q);
2178  }
2179 
2180  if (OrZero && match(V, m_And(m_Value(X), m_Value(Y)))) {
2181  // A power of two and'd with anything is a power of two or zero.
2182  if (isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q) ||
2183  isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, Depth, Q))
2184  return true;
2185  // X & (-X) is always a power of two or zero.
2186  if (match(X, m_Neg(m_Specific(Y))) || match(Y, m_Neg(m_Specific(X))))
2187  return true;
2188  return false;
2189  }
2190 
2191  // Adding a power-of-two or zero to the same power-of-two or zero yields
2192  // either the original power-of-two, a larger power-of-two or zero.
2193  if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
2194  const OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V);
2195  if (OrZero || Q.IIQ.hasNoUnsignedWrap(VOBO) ||
2196  Q.IIQ.hasNoSignedWrap(VOBO)) {
2197  if (match(X, m_And(m_Specific(Y), m_Value())) ||
2198  match(X, m_And(m_Value(), m_Specific(Y))))
2199  if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth, Q))
2200  return true;
2201  if (match(Y, m_And(m_Specific(X), m_Value())) ||
2202  match(Y, m_And(m_Value(), m_Specific(X))))
2203  if (isKnownToBeAPowerOfTwo(X, OrZero, Depth, Q))
2204  return true;
2205 
2206  unsigned BitWidth = V->getType()->getScalarSizeInBits();
2207  KnownBits LHSBits(BitWidth);
2208  computeKnownBits(X, LHSBits, Depth, Q);
2209 
2210  KnownBits RHSBits(BitWidth);
2211  computeKnownBits(Y, RHSBits, Depth, Q);
2212  // If i8 V is a power of two or zero:
2213  // ZeroBits: 1 1 1 0 1 1 1 1
2214  // ~ZeroBits: 0 0 0 1 0 0 0 0
2215  if ((~(LHSBits.Zero & RHSBits.Zero)).isPowerOf2())
2216  // If OrZero isn't set, we cannot give back a zero result.
2217  // Make sure either the LHS or RHS has a bit set.
2218  if (OrZero || RHSBits.One.getBoolValue() || LHSBits.One.getBoolValue())
2219  return true;
2220  }
2221  }
2222 
2223  // A PHI node is power of two if all incoming values are power of two, or if
2224  // it is an induction variable where in each step its value is a power of two.
2225  if (const PHINode *PN = dyn_cast<PHINode>(V)) {
2226  Query RecQ = Q;
2227 
2228  // Check if it is an induction variable and always power of two.
2229  if (isPowerOfTwoRecurrence(PN, OrZero, Depth, RecQ))
2230  return true;
2231 
2232  // Recursively check all incoming values. Limit recursion to 2 levels, so
2233  // that search complexity is limited to number of operands^2.
2234  unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1);
2235  return llvm::all_of(PN->operands(), [&](const Use &U) {
2236  // Value is power of 2 if it is coming from PHI node itself by induction.
2237  if (U.get() == PN)
2238  return true;
2239 
2240  // Change the context instruction to the incoming block where it is
2241  // evaluated.
2242  RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator();
2243  return isKnownToBeAPowerOfTwo(U.get(), OrZero, NewDepth, RecQ);
2244  });
2245  }
2246 
2247  // An exact divide or right shift can only shift off zero bits, so the result
2248  // is a power of two only if the first operand is a power of two and not
2249  // copying a sign bit (sdiv int_min, 2).
2250  if (match(V, m_Exact(m_LShr(m_Value(), m_Value()))) ||
2251  match(V, m_Exact(m_UDiv(m_Value(), m_Value())))) {
2252  return isKnownToBeAPowerOfTwo(cast<Operator>(V)->getOperand(0), OrZero,
2253  Depth, Q);
2254  }
2255 
2256  return false;
2257 }
2258 
2259 /// Test whether a GEP's result is known to be non-null.
2260 ///
2261 /// Uses properties inherent in a GEP to try to determine whether it is known
2262 /// to be non-null.
2263 ///
2264 /// Currently this routine does not support vector GEPs.
2265 static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth,
2266  const Query &Q) {
2267  const Function *F = nullptr;
2268  if (const Instruction *I = dyn_cast<Instruction>(GEP))
2269  F = I->getFunction();
2270 
2271  if (!GEP->isInBounds() ||
2272  NullPointerIsDefined(F, GEP->getPointerAddressSpace()))
2273  return false;
2274 
2275  // FIXME: Support vector-GEPs.
2276  assert(GEP->getType()->isPointerTy() && "We only support plain pointer GEP");
2277 
2278  // If the base pointer is non-null, we cannot walk to a null address with an
2279  // inbounds GEP in address space zero.
2280  if (isKnownNonZero(GEP->getPointerOperand(), Depth, Q))
2281  return true;
2282 
2283  // Walk the GEP operands and see if any operand introduces a non-zero offset.
2284  // If so, then the GEP cannot produce a null pointer, as doing so would
2285  // inherently violate the inbounds contract within address space zero.
2286  for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
2287  GTI != GTE; ++GTI) {
2288  // Struct types are easy -- they must always be indexed by a constant.
2289  if (StructType *STy = GTI.getStructTypeOrNull()) {
2290  ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand());
2291  unsigned ElementIdx = OpC->getZExtValue();
2292  const StructLayout *SL = Q.DL.getStructLayout(STy);
2293  uint64_t ElementOffset = SL->getElementOffset(ElementIdx);
2294  if (ElementOffset > 0)
2295  return true;
2296  continue;
2297  }
2298 
2299  // If we have a zero-sized type, the index doesn't matter. Keep looping.
2300  if (Q.DL.getTypeAllocSize(GTI.getIndexedType()).getKnownMinSize() == 0)
2301  continue;
2302 
2303  // Fast path the constant operand case both for efficiency and so we don't
2304  // increment Depth when just zipping down an all-constant GEP.
2305  if (ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand())) {
2306  if (!OpC->isZero())
2307  return true;
2308  continue;
2309  }
2310 
2311  // We post-increment Depth here because while isKnownNonZero increments it
2312  // as well, when we pop back up that increment won't persist. We don't want
2313  // to recurse 10k times just because we have 10k GEP operands. We don't
2314  // bail completely out because we want to handle constant GEPs regardless
2315  // of depth.
2317  continue;
2318 
2319  if (isKnownNonZero(GTI.getOperand(), Depth, Q))
2320  return true;
2321  }
2322 
2323  return false;
2324 }
2325 
2327  const Instruction *CtxI,
2328  const DominatorTree *DT) {
2329  if (isa<Constant>(V))
2330  return false;
2331 
2332  if (!CtxI || !DT)
2333  return false;
2334 
2335  unsigned NumUsesExplored = 0;
2336  for (const auto *U : V->users()) {
2337  // Avoid massive lists
2338  if (NumUsesExplored >= DomConditionsMaxUses)
2339  break;
2340  NumUsesExplored++;
2341 
2342  // If the value is used as an argument to a call or invoke, then argument
2343  // attributes may provide an answer about null-ness.
2344  if (const auto *CB = dyn_cast<CallBase>(U))
2345  if (auto *CalledFunc = CB->getCalledFunction())
2346  for (const Argument &Arg : CalledFunc->args())
2347  if (CB->getArgOperand(Arg.getArgNo()) == V &&
2348  Arg.hasNonNullAttr(/* AllowUndefOrPoison */ false) &&
2349  DT->dominates(CB, CtxI))
2350  return true;
2351 
2352  // If the value is used as a load/store, then the pointer must be non null.
2353  if (V == getLoadStorePointerOperand(U)) {
2354  const Instruction *I = cast<Instruction>(U);
2355  if (!NullPointerIsDefined(I->getFunction(),
2356  V->getType()->getPointerAddressSpace()) &&
2357  DT->dominates(I, CtxI))
2358  return true;
2359  }
2360 
2361  // Consider only compare instructions uniquely controlling a branch
2362  Value *RHS;
2363  CmpInst::Predicate Pred;
2364  if (!match(U, m_c_ICmp(Pred, m_Specific(V), m_Value(RHS))))
2365  continue;
2366 
2367  bool NonNullIfTrue;
2368  if (cmpExcludesZero(Pred, RHS))
2369  NonNullIfTrue = true;
2371  NonNullIfTrue = false;
2372  else
2373  continue;
2374 
2377  for (const auto *CmpU : U->users()) {
2378  assert(WorkList.empty() && "Should be!");
2379  if (Visited.insert(CmpU).second)
2380  WorkList.push_back(CmpU);
2381 
2382  while (!WorkList.empty()) {
2383  auto *Curr = WorkList.pop_back_val();
2384 
2385  // If a user is an AND, add all its users to the work list. We only
2386  // propagate "pred != null" condition through AND because it is only
2387  // correct to assume that all conditions of AND are met in true branch.
2388  // TODO: Support similar logic of OR and EQ predicate?
2389  if (NonNullIfTrue)
2390  if (match(Curr, m_LogicalAnd(m_Value(), m_Value()))) {
2391  for (const auto *CurrU : Curr->users())
2392  if (Visited.insert(CurrU).second)
2393  WorkList.push_back(CurrU);
2394  continue;
2395  }
2396 
2397  if (const BranchInst *BI = dyn_cast<BranchInst>(Curr)) {
2398  assert(BI->isConditional() && "uses a comparison!");
2399 
2400  BasicBlock *NonNullSuccessor =
2401  BI->getSuccessor(NonNullIfTrue ? 0 : 1);
2402  BasicBlockEdge Edge(BI->getParent(), NonNullSuccessor);
2403  if (Edge.isSingleEdge() && DT->dominates(Edge, CtxI->getParent()))
2404  return true;
2405  } else if (NonNullIfTrue && isGuard(Curr) &&
2406  DT->dominates(cast<Instruction>(Curr), CtxI)) {
2407  return true;
2408  }
2409  }
2410  }
2411  }
2412 
2413  return false;
2414 }
2415 
2416 /// Does the 'Range' metadata (which must be a valid MD_range operand list)
2417 /// ensure that the value it's attached to is never Value? 'RangeType' is
2418 /// is the type of the value described by the range.
2419 static bool rangeMetadataExcludesValue(const MDNode* Ranges, const APInt& Value) {
2420  const unsigned NumRanges = Ranges->getNumOperands() / 2;
2421  assert(NumRanges >= 1);
2422  for (unsigned i = 0; i < NumRanges; ++i) {
2423  ConstantInt *Lower =
2424  mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 0));
2425  ConstantInt *Upper =
2426  mdconst::extract<ConstantInt>(Ranges->getOperand(2 * i + 1));
2427  ConstantRange Range(Lower->getValue(), Upper->getValue());
2428  if (Range.contains(Value))
2429  return false;
2430  }
2431  return true;
2432 }
2433 
2434 /// Try to detect a recurrence that monotonically increases/decreases from a
2435 /// non-zero starting value. These are common as induction variables.
2436 static bool isNonZeroRecurrence(const PHINode *PN) {
2437  BinaryOperator *BO = nullptr;
2438  Value *Start = nullptr, *Step = nullptr;
2439  const APInt *StartC, *StepC;
2440  if (!matchSimpleRecurrence(PN, BO, Start, Step) ||
2441  !match(Start, m_APInt(StartC)) || StartC->isZero())
2442  return false;
2443 
2444  switch (BO->getOpcode()) {
2445  case Instruction::Add:
2446  // Starting from non-zero and stepping away from zero can never wrap back
2447  // to zero.
2448  return BO->hasNoUnsignedWrap() ||
2449  (BO->hasNoSignedWrap() && match(Step, m_APInt(StepC)) &&
2450  StartC->isNegative() == StepC->isNegative());
2451  case Instruction::Mul:
2452  return (BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap()) &&
2453  match(Step, m_APInt(StepC)) && !StepC->isZero();
2454  case Instruction::Shl:
2455  return BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap();
2456  case Instruction::AShr:
2457  case Instruction::LShr:
2458  return BO->isExact();
2459  default:
2460  return false;
2461  }
2462 }
2463 
2464 /// Return true if the given value is known to be non-zero when defined. For
2465 /// vectors, return true if every demanded element is known to be non-zero when
2466 /// defined. For pointers, if the context instruction and dominator tree are
2467 /// specified, perform context-sensitive analysis and return true if the
2468 /// pointer couldn't possibly be null at the specified instruction.
2469 /// Supports values with integer or pointer type and vectors of integers.
2470 bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth,
2471  const Query &Q) {
2472  // FIXME: We currently have no way to represent the DemandedElts of a scalable
2473  // vector
2474  if (isa<ScalableVectorType>(V->getType()))
2475  return false;
2476 
2477  if (auto *C = dyn_cast<Constant>(V)) {
2478  if (C->isNullValue())
2479  return false;
2480  if (isa<ConstantInt>(C))
2481  // Must be non-zero due to null test above.
2482  return true;
2483 
2484  if (auto *CE = dyn_cast<ConstantExpr>(C)) {
2485  // See the comment for IntToPtr/PtrToInt instructions below.
2486  if (CE->getOpcode() == Instruction::IntToPtr ||
2487  CE->getOpcode() == Instruction::PtrToInt)
2488  if (Q.DL.getTypeSizeInBits(CE->getOperand(0)->getType())
2489  .getFixedSize() <=
2490  Q.DL.getTypeSizeInBits(CE->getType()).getFixedSize())
2491  return isKnownNonZero(CE->getOperand(0), Depth, Q);
2492  }
2493 
2494  // For constant vectors, check that all elements are undefined or known
2495  // non-zero to determine that the whole vector is known non-zero.
2496  if (auto *VecTy = dyn_cast<FixedVectorType>(C->getType())) {
2497  for (unsigned i = 0, e = VecTy->getNumElements(); i != e; ++i) {
2498  if (!DemandedElts[i])
2499  continue;
2500  Constant *Elt = C->getAggregateElement(i);
2501  if (!Elt || Elt->isNullValue())
2502  return false;
2503  if (!isa<UndefValue>(Elt) && !isa<ConstantInt>(Elt))
2504  return false;
2505  }
2506  return true;
2507  }
2508 
2509  // A global variable in address space 0 is non null unless extern weak
2510  // or an absolute symbol reference. Other address spaces may have null as a
2511  // valid address for a global, so we can't assume anything.
2512  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
2513  if (!GV->isAbsoluteSymbolRef() && !GV->hasExternalWeakLinkage() &&
2514  GV->getType()->getAddressSpace() == 0)
2515  return true;
2516  } else
2517  return false;
2518  }
2519 
2520  if (auto *I = dyn_cast<Instruction>(V)) {
2521  if (MDNode *Ranges = Q.IIQ.getMetadata(I, LLVMContext::MD_range)) {
2522  // If the possible ranges don't contain zero, then the value is
2523  // definitely non-zero.
2524  if (auto *Ty = dyn_cast<IntegerType>(V->getType())) {
2525  const APInt ZeroValue(Ty->getBitWidth(), 0);
2526  if (rangeMetadataExcludesValue(Ranges, ZeroValue))
2527  return true;
2528  }
2529  }
2530  }
2531 
2532  if (isKnownNonZeroFromAssume(V, Q))
2533  return true;
2534 
2535  // Some of the tests below are recursive, so bail out if we hit the limit.
2537  return false;
2538 
2539  // Check for pointer simplifications.
2540 
2541  if (PointerType *PtrTy = dyn_cast<PointerType>(V->getType())) {
2542  // Alloca never returns null, malloc might.
2543  if (isa<AllocaInst>(V) && Q.DL.getAllocaAddrSpace() == 0)
2544  return true;
2545 
2546  // A byval, inalloca may not be null in a non-default addres space. A
2547  // nonnull argument is assumed never 0.
2548  if (const Argument *A = dyn_cast<Argument>(V)) {
2549  if (((A->hasPassPointeeByValueCopyAttr() &&
2550  !NullPointerIsDefined(A->getParent(), PtrTy->getAddressSpace())) ||
2551  A->hasNonNullAttr()))
2552  return true;
2553  }
2554 
2555  // A Load tagged with nonnull metadata is never null.
2556  if (const LoadInst *LI = dyn_cast<LoadInst>(V))
2557  if (Q.IIQ.getMetadata(LI, LLVMContext::MD_nonnull))
2558  return true;
2559 
2560  if (const auto *Call = dyn_cast<CallBase>(V)) {
2561  if (Call->isReturnNonNull())
2562  return true;
2563  if (const auto *RP = getArgumentAliasingToReturnedPointer(Call, true))
2564  return isKnownNonZero(RP, Depth, Q);
2565  }
2566  }
2567 
2568  if (isKnownNonNullFromDominatingCondition(V, Q.CxtI, Q.DT))
2569  return true;
2570 
2571  // Check for recursive pointer simplifications.
2572  if (V->getType()->isPointerTy()) {
2573  // Look through bitcast operations, GEPs, and int2ptr instructions as they
2574  // do not alter the value, or at least not the nullness property of the
2575  // value, e.g., int2ptr is allowed to zero/sign extend the value.
2576  //
2577  // Note that we have to take special care to avoid looking through
2578  // truncating casts, e.g., int2ptr/ptr2int with appropriate sizes, as well
2579  // as casts that can alter the value, e.g., AddrSpaceCasts.
2580  if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V))
2581  return isGEPKnownNonNull(GEP, Depth, Q);
2582 
2583  if (auto *BCO = dyn_cast<BitCastOperator>(V))
2584  return isKnownNonZero(BCO->getOperand(0), Depth, Q);
2585 
2586  if (auto *I2P = dyn_cast<IntToPtrInst>(V))
2587  if (Q.DL.getTypeSizeInBits(I2P->getSrcTy()).getFixedSize() <=
2588  Q.DL.getTypeSizeInBits(I2P->getDestTy()).getFixedSize())
2589  return isKnownNonZero(I2P->getOperand(0), Depth, Q);
2590  }
2591 
2592  // Similar to int2ptr above, we can look through ptr2int here if the cast
2593  // is a no-op or an extend and not a truncate.
2594  if (auto *P2I = dyn_cast<PtrToIntInst>(V))
2595  if (Q.DL.getTypeSizeInBits(P2I->getSrcTy()).getFixedSize() <=
2596  Q.DL.getTypeSizeInBits(P2I->getDestTy()).getFixedSize())
2597  return isKnownNonZero(P2I->getOperand(0), Depth, Q);
2598 
2599  unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), Q.DL);
2600 
2601  // X | Y != 0 if X != 0 or Y != 0.
2602  Value *X = nullptr, *Y = nullptr;
2603  if (match(V, m_Or(m_Value(X), m_Value(Y))))
2604  return isKnownNonZero(X, DemandedElts, Depth, Q) ||
2605  isKnownNonZero(Y, DemandedElts, Depth, Q);
2606 
2607  // ext X != 0 if X != 0.
2608  if (isa<SExtInst>(V) || isa<ZExtInst>(V))
2609  return isKnownNonZero(cast<Instruction>(V)->getOperand(0), Depth, Q);
2610 
2611  // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined
2612  // if the lowest bit is shifted off the end.
2613  if (match(V, m_Shl(m_Value(X), m_Value(Y)))) {
2614  // shl nuw can't remove any non-zero bits.
2615  const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
2616  if (Q.IIQ.hasNoUnsignedWrap(BO))
2617  return isKnownNonZero(X, Depth, Q);
2618 
2619  KnownBits Known(BitWidth);
2620  computeKnownBits(X, DemandedElts, Known, Depth, Q);
2621  if (Known.One[0])
2622  return true;
2623  }
2624  // shr X, Y != 0 if X is negative. Note that the value of the shift is not
2625  // defined if the sign bit is shifted off the end.
2626  else if (match(V, m_Shr(m_Value(X), m_Value(Y)))) {
2627  // shr exact can only shift out zero bits.
2628  const PossiblyExactOperator *BO = cast<PossiblyExactOperator>(V);
2629  if (BO->isExact())
2630  return isKnownNonZero(X, Depth, Q);
2631 
2632  KnownBits Known = computeKnownBits(X, DemandedElts, Depth, Q);
2633  if (Known.isNegative())
2634  return true;
2635 
2636  // If the shifter operand is a constant, and all of the bits shifted
2637  // out are known to be zero, and X is known non-zero then at least one
2638  // non-zero bit must remain.
2639  if (ConstantInt *Shift = dyn_cast<ConstantInt>(Y)) {
2640  auto ShiftVal = Shift->getLimitedValue(BitWidth - 1);
2641  // Is there a known one in the portion not shifted out?
2642  if (Known.countMaxLeadingZeros() < BitWidth - ShiftVal)
2643  return true;
2644  // Are all the bits to be shifted out known zero?
2645  if (Known.countMinTrailingZeros() >= ShiftVal)
2646  return isKnownNonZero(X, DemandedElts, Depth, Q);
2647  }
2648  }
2649  // div exact can only produce a zero if the dividend is zero.
2650  else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) {
2651  return isKnownNonZero(X, DemandedElts, Depth, Q);
2652  }
2653  // X + Y.
2654  else if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
2655  KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q);
2656  KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q);
2657 
2658  // If X and Y are both non-negative (as signed values) then their sum is not
2659  // zero unless both X and Y are zero.
2660  if (XKnown.isNonNegative() && YKnown.isNonNegative())
2661  if (isKnownNonZero(X, DemandedElts, Depth, Q) ||
2662  isKnownNonZero(Y, DemandedElts, Depth, Q))
2663  return true;
2664 
2665  // If X and Y are both negative (as signed values) then their sum is not
2666  // zero unless both X and Y equal INT_MIN.
2667  if (XKnown.isNegative() && YKnown.isNegative()) {
2669  // The sign bit of X is set. If some other bit is set then X is not equal
2670  // to INT_MIN.
2671  if (XKnown.One.intersects(Mask))
2672  return true;
2673  // The sign bit of Y is set. If some other bit is set then Y is not equal
2674  // to INT_MIN.
2675  if (YKnown.One.intersects(Mask))
2676  return true;
2677  }
2678 
2679  // The sum of a non-negative number and a power of two is not zero.
2680  if (XKnown.isNonNegative() &&
2681  isKnownToBeAPowerOfTwo(Y, /*OrZero*/ false, Depth, Q))
2682  return true;
2683  if (YKnown.isNonNegative() &&
2684  isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, Depth, Q))
2685  return true;
2686  }
2687  // X * Y.
2688  else if (match(V, m_Mul(m_Value(X), m_Value(Y)))) {
2689  const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
2690  // If X and Y are non-zero then so is X * Y as long as the multiplication
2691  // does not overflow.
2692  if ((Q.IIQ.hasNoSignedWrap(BO) || Q.IIQ.hasNoUnsignedWrap(BO)) &&
2693  isKnownNonZero(X, DemandedElts, Depth, Q) &&
2694  isKnownNonZero(Y, DemandedElts, Depth, Q))
2695  return true;
2696  }
2697  // (C ? X : Y) != 0 if X != 0 and Y != 0.
2698  else if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
2699  if (isKnownNonZero(SI->getTrueValue(), DemandedElts, Depth, Q) &&
2700  isKnownNonZero(SI->getFalseValue(), DemandedElts, Depth, Q))
2701  return true;
2702  }
2703  // PHI
2704  else if (const PHINode *PN = dyn_cast<PHINode>(V)) {
2705  if (Q.IIQ.UseInstrInfo && isNonZeroRecurrence(PN))
2706  return true;
2707 
2708  // Check if all incoming values are non-zero using recursion.
2709  Query RecQ = Q;
2710  unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1);
2711  return llvm::all_of(PN->operands(), [&](const Use &U) {
2712  if (U.get() == PN)
2713  return true;
2714  RecQ.CxtI = PN->getIncomingBlock(U)->getTerminator();
2715  return isKnownNonZero(U.get(), DemandedElts, NewDepth, RecQ);
2716  });
2717  }
2718  // ExtractElement
2719  else if (const auto *EEI = dyn_cast<ExtractElementInst>(V)) {
2720  const Value *Vec = EEI->getVectorOperand();
2721  const Value *Idx = EEI->getIndexOperand();
2722  auto *CIdx = dyn_cast<ConstantInt>(Idx);
2723  if (auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType())) {
2724  unsigned NumElts = VecTy->getNumElements();
2725  APInt DemandedVecElts = APInt::getAllOnes(NumElts);
2726  if (CIdx && CIdx->getValue().ult(NumElts))
2727  DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
2728  return isKnownNonZero(Vec, DemandedVecElts, Depth, Q);
2729  }
2730  }
2731  // Freeze
2732  else if (const FreezeInst *FI = dyn_cast<FreezeInst>(V)) {
2733  auto *Op = FI->getOperand(0);
2734  if (isKnownNonZero(Op, Depth, Q) &&
2735  isGuaranteedNotToBePoison(Op, Q.AC, Q.CxtI, Q.DT, Depth))
2736  return true;
2737  } else if (const auto *II = dyn_cast<IntrinsicInst>(V)) {
2738  if (II->getIntrinsicID() == Intrinsic::vscale)
2739  return true;
2740  }
2741 
2742  KnownBits Known(BitWidth);
2743  computeKnownBits(V, DemandedElts, Known, Depth, Q);
2744  return Known.One != 0;
2745 }
2746 
2747 bool isKnownNonZero(const Value* V, unsigned Depth, const Query& Q) {
2748  // FIXME: We currently have no way to represent the DemandedElts of a scalable
2749  // vector
2750  if (isa<ScalableVectorType>(V->getType()))
2751  return false;
2752 
2753  auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
2754  APInt DemandedElts =
2755  FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
2756  return isKnownNonZero(V, DemandedElts, Depth, Q);
2757 }
2758 
2759 /// If the pair of operators are the same invertible function, return the
2760 /// the operands of the function corresponding to each input. Otherwise,
2761 /// return None. An invertible function is one that is 1-to-1 and maps
2762 /// every input value to exactly one output value. This is equivalent to
2763 /// saying that Op1 and Op2 are equal exactly when the specified pair of
2764 /// operands are equal, (except that Op1 and Op2 may be poison more often.)
2767  const Operator *Op2) {
2768  if (Op1->getOpcode() != Op2->getOpcode())
2769  return None;
2770 
2771  auto getOperands = [&](unsigned OpNum) -> auto {
2772  return std::make_pair(Op1->getOperand(OpNum), Op2->getOperand(OpNum));
2773  };
2774 
2775  switch (Op1->getOpcode()) {
2776  default:
2777  break;
2778  case Instruction::Add:
2779  case Instruction::Sub:
2780  if (Op1->getOperand(0) == Op2->getOperand(0))
2781  return getOperands(1);
2782  if (Op1->getOperand(1) == Op2->getOperand(1))
2783  return getOperands(0);
2784  break;
2785  case Instruction::Mul: {
2786  // invertible if A * B == (A * B) mod 2^N where A, and B are integers
2787  // and N is the bitwdith. The nsw case is non-obvious, but proven by
2788  // alive2: https://alive2.llvm.org/ce/z/Z6D5qK
2789  auto *OBO1 = cast<OverflowingBinaryOperator>(Op1);
2790  auto *OBO2 = cast<OverflowingBinaryOperator>(Op2);
2791  if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) &&
2792  (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap()))
2793  break;
2794 
2795  // Assume operand order has been canonicalized
2796  if (Op1->getOperand(1) == Op2->getOperand(1) &&
2797  isa<ConstantInt>(Op1->getOperand(1)) &&
2798  !cast<ConstantInt>(Op1->getOperand(1))->isZero())
2799  return getOperands(0);
2800  break;
2801  }
2802  case Instruction::Shl: {
2803  // Same as multiplies, with the difference that we don't need to check
2804  // for a non-zero multiply. Shifts always multiply by non-zero.
2805  auto *OBO1 = cast<OverflowingBinaryOperator>(Op1);
2806  auto *OBO2 = cast<OverflowingBinaryOperator>(Op2);
2807  if ((!OBO1->hasNoUnsignedWrap() || !OBO2->hasNoUnsignedWrap()) &&
2808  (!OBO1->hasNoSignedWrap() || !OBO2->hasNoSignedWrap()))
2809  break;
2810 
2811  if (Op1->getOperand(1) == Op2->getOperand(1))
2812  return getOperands(0);
2813  break;
2814  }
2815  case Instruction::AShr:
2816  case Instruction::LShr: {
2817  auto *PEO1 = cast<PossiblyExactOperator>(Op1);
2818  auto *PEO2 = cast<PossiblyExactOperator>(Op2);
2819  if (!PEO1->isExact() || !PEO2->isExact())
2820  break;
2821 
2822  if (Op1->getOperand(1) == Op2->getOperand(1))
2823  return getOperands(0);
2824  break;
2825  }
2826  case Instruction::SExt:
2827  case Instruction::ZExt:
2828  if (Op1->getOperand(0)->getType() == Op2->getOperand(0)->getType())
2829  return getOperands(0);
2830  break;
2831  case Instruction::PHI: {
2832  const PHINode *PN1 = cast<PHINode>(Op1);
2833  const PHINode *PN2 = cast<PHINode>(Op2);
2834 
2835  // If PN1 and PN2 are both recurrences, can we prove the entire recurrences
2836  // are a single invertible function of the start values? Note that repeated
2837  // application of an invertible function is also invertible
2838  BinaryOperator *BO1 = nullptr;
2839  Value *Start1 = nullptr, *Step1 = nullptr;
2840  BinaryOperator *BO2 = nullptr;
2841  Value *Start2 = nullptr, *Step2 = nullptr;
2842  if (PN1->getParent() != PN2->getParent() ||
2843  !matchSimpleRecurrence(PN1, BO1, Start1, Step1) ||
2844  !matchSimpleRecurrence(PN2, BO2, Start2, Step2))
2845  break;
2846 
2847  auto Values = getInvertibleOperands(cast<Operator>(BO1),
2848  cast<Operator>(BO2));
2849  if (!Values)
2850  break;
2851 
2852  // We have to be careful of mutually defined recurrences here. Ex:
2853  // * X_i = X_(i-1) OP Y_(i-1), and Y_i = X_(i-1) OP V
2854  // * X_i = Y_i = X_(i-1) OP Y_(i-1)
2855  // The invertibility of these is complicated, and not worth reasoning
2856  // about (yet?).
2857  if (Values->first != PN1 || Values->second != PN2)
2858  break;
2859 
2860  return std::make_pair(Start1, Start2);
2861  }
2862  }
2863  return None;
2864 }
2865 
2866 /// Return true if V2 == V1 + X, where X is known non-zero.
2867 static bool isAddOfNonZero(const Value *V1, const Value *V2, unsigned Depth,
2868  const Query &Q) {
2869  const BinaryOperator *BO = dyn_cast<BinaryOperator>(V1);
2870  if (!BO || BO->getOpcode() != Instruction::Add)
2871  return false;
2872  Value *Op = nullptr;
2873  if (V2 == BO->getOperand(0))
2874  Op = BO->getOperand(1);
2875  else if (V2 == BO->getOperand(1))
2876  Op = BO->getOperand(0);
2877  else
2878  return false;
2879  return isKnownNonZero(Op, Depth + 1, Q);
2880 }
2881 
2882 /// Return true if V2 == V1 * C, where V1 is known non-zero, C is not 0/1 and
2883 /// the multiplication is nuw or nsw.
2884 static bool isNonEqualMul(const Value *V1, const Value *V2, unsigned Depth,
2885  const Query &Q) {
2886  if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(V2)) {
2887  const APInt *C;
2888  return match(OBO, m_Mul(m_Specific(V1), m_APInt(C))) &&
2889  (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) &&
2890  !C->isZero() && !C->isOne() && isKnownNonZero(V1, Depth + 1, Q);
2891  }
2892  return false;
2893 }
2894 
2895 /// Return true if V2 == V1 << C, where V1 is known non-zero, C is not 0 and
2896 /// the shift is nuw or nsw.
2897 static bool isNonEqualShl(const Value *V1, const Value *V2, unsigned Depth,
2898  const Query &Q) {
2899  if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(V2)) {
2900  const APInt *C;
2901  return match(OBO, m_Shl(m_Specific(V1), m_APInt(C))) &&
2902  (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) &&
2903  !C->isZero() && isKnownNonZero(V1, Depth + 1, Q);
2904  }
2905  return false;
2906 }
2907 
2908 static bool isNonEqualPHIs(const PHINode *PN1, const PHINode *PN2,
2909  unsigned Depth, const Query &Q) {
2910  // Check two PHIs are in same block.
2911  if (PN1->getParent() != PN2->getParent())
2912  return false;
2913 
2915  bool UsedFullRecursion = false;
2916  for (const BasicBlock *IncomBB : PN1->blocks()) {
2917  if (!VisitedBBs.insert(IncomBB).second)
2918  continue; // Don't reprocess blocks that we have dealt with already.
2919  const Value *IV1 = PN1->getIncomingValueForBlock(IncomBB);
2920  const Value *IV2 = PN2->getIncomingValueForBlock(IncomBB);
2921  const APInt *C1, *C2;
2922  if (match(IV1, m_APInt(C1)) && match(IV2, m_APInt(C2)) && *C1 != *C2)
2923  continue;
2924 
2925  // Only one pair of phi operands is allowed for full recursion.
2926  if (UsedFullRecursion)
2927  return false;
2928 
2929  Query RecQ = Q;
2930  RecQ.CxtI = IncomBB->getTerminator();
2931  if (!isKnownNonEqual(IV1, IV2, Depth + 1, RecQ))
2932  return false;
2933  UsedFullRecursion = true;
2934  }
2935  return true;
2936 }
2937 
2938 /// Return true if it is known that V1 != V2.
2939 static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth,
2940  const Query &Q) {
2941  if (V1 == V2)
2942  return false;
2943  if (V1->getType() != V2->getType())
2944  // We can't look through casts yet.
2945  return false;
2946 
2948  return false;
2949 
2950  // See if we can recurse through (exactly one of) our operands. This
2951  // requires our operation be 1-to-1 and map every input value to exactly
2952  // one output value. Such an operation is invertible.
2953  auto *O1 = dyn_cast<Operator>(V1);
2954  auto *O2 = dyn_cast<Operator>(V2);
2955  if (O1 && O2 && O1->getOpcode() == O2->getOpcode()) {
2956  if (auto Values = getInvertibleOperands(O1, O2))
2957  return isKnownNonEqual(Values->first, Values->second, Depth + 1, Q);
2958 
2959  if (const PHINode *PN1 = dyn_cast<PHINode>(V1)) {
2960  const PHINode *PN2 = cast<PHINode>(V2);
2961  // FIXME: This is missing a generalization to handle the case where one is
2962  // a PHI and another one isn't.
2963  if (isNonEqualPHIs(PN1, PN2, Depth, Q))
2964  return true;
2965  };
2966  }
2967 
2968  if (isAddOfNonZero(V1, V2, Depth, Q) || isAddOfNonZero(V2, V1, Depth, Q))
2969  return true;
2970 
2971  if (isNonEqualMul(V1, V2, Depth, Q) || isNonEqualMul(V2, V1, Depth, Q))
2972  return true;
2973 
2974  if (isNonEqualShl(V1, V2, Depth, Q) || isNonEqualShl(V2, V1, Depth, Q))
2975  return true;
2976 
2977  if (V1->getType()->isIntOrIntVectorTy()) {
2978  // Are any known bits in V1 contradictory to known bits in V2? If V1
2979  // has a known zero where V2 has a known one, they must not be equal.
2980  KnownBits Known1 = computeKnownBits(V1, Depth, Q);
2981  KnownBits Known2 = computeKnownBits(V2, Depth, Q);
2982 
2983  if (Known1.Zero.intersects(Known2.One) ||
2984  Known2.Zero.intersects(Known1.One))
2985  return true;
2986  }
2987  return false;
2988 }
2989 
2990 /// Return true if 'V & Mask' is known to be zero. We use this predicate to
2991 /// simplify operations downstream. Mask is known to be zero for bits that V
2992 /// cannot have.
2993 ///
2994 /// This function is defined on values with integer type, values with pointer
2995 /// type, and vectors of integers. In the case
2996 /// where V is a vector, the mask, known zero, and known one values are the
2997 /// same width as the vector element, and the bit is set only if it is true
2998 /// for all of the elements in the vector.
2999 bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth,
3000  const Query &Q) {
3001  KnownBits Known(Mask.getBitWidth());
3002  computeKnownBits(V, Known, Depth, Q);
3003  return Mask.isSubsetOf(Known.Zero);
3004 }
3005 
3006 // Match a signed min+max clamp pattern like smax(smin(In, CHigh), CLow).
3007 // Returns the input and lower/upper bounds.
3008 static bool isSignedMinMaxClamp(const Value *Select, const Value *&In,
3009  const APInt *&CLow, const APInt *&CHigh) {
3010  assert(isa<Operator>(Select) &&
3011  cast<Operator>(Select)->getOpcode() == Instruction::Select &&
3012  "Input should be a Select!");
3013 
3014  const Value *LHS = nullptr, *RHS = nullptr;
3016  if (SPF != SPF_SMAX && SPF != SPF_SMIN)
3017  return false;
3018 
3019  if (!match(RHS, m_APInt(CLow)))
3020  return false;
3021 
3022  const Value *LHS2 = nullptr, *RHS2 = nullptr;
3023  SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor;
3024  if (getInverseMinMaxFlavor(SPF) != SPF2)
3025  return false;
3026 
3027  if (!match(RHS2, m_APInt(CHigh)))
3028  return false;
3029 
3030  if (SPF == SPF_SMIN)
3031  std::swap(CLow, CHigh);
3032 
3033  In = LHS2;
3034  return CLow->sle(*CHigh);
3035 }
3036 
3038  const APInt *&CLow,
3039  const APInt *&CHigh) {
3041  II->getIntrinsicID() == Intrinsic::smax) && "Must be smin/smax");
3042 
3044  auto *InnerII = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
3045  if (!InnerII || InnerII->getIntrinsicID() != InverseID ||
3046  !match(II->getArgOperand(1), m_APInt(CLow)) ||
3047  !match(InnerII->getArgOperand(1), m_APInt(CHigh)))
3048  return false;
3049 
3050  if (II->getIntrinsicID() == Intrinsic::smin)
3051  std::swap(CLow, CHigh);
3052  return CLow->sle(*CHigh);
3053 }
3054 
3055 /// For vector constants, loop over the elements and find the constant with the
3056 /// minimum number of sign bits. Return 0 if the value is not a vector constant
3057 /// or if any element was not analyzed; otherwise, return the count for the
3058 /// element with the minimum number of sign bits.
3059 static unsigned computeNumSignBitsVectorConstant(const Value *V,
3060  const APInt &DemandedElts,
3061  unsigned TyBits) {
3062  const auto *CV = dyn_cast<Constant>(V);
3063  if (!CV || !isa<FixedVectorType>(CV->getType()))
3064  return 0;
3065 
3066  unsigned MinSignBits = TyBits;
3067  unsigned NumElts = cast<FixedVectorType>(CV->getType())->getNumElements();
3068  for (unsigned i = 0; i != NumElts; ++i) {
3069  if (!DemandedElts[i])
3070  continue;
3071  // If we find a non-ConstantInt, bail out.
3072  auto *Elt = dyn_cast_or_null<ConstantInt>(CV->getAggregateElement(i));
3073  if (!Elt)
3074  return 0;
3075 
3076  MinSignBits = std::min(MinSignBits, Elt->getValue().getNumSignBits());
3077  }
3078 
3079  return MinSignBits;
3080 }
3081 
3082 static unsigned ComputeNumSignBitsImpl(const Value *V,
3083  const APInt &DemandedElts,
3084  unsigned Depth, const Query &Q);
3085 
3086 static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts,
3087  unsigned Depth, const Query &Q) {
3088  unsigned Result = ComputeNumSignBitsImpl(V, DemandedElts, Depth, Q);
3089  assert(Result > 0 && "At least one sign bit needs to be present!");
3090  return Result;
3091 }
3092 
3093 /// Return the number of times the sign bit of the register is replicated into
3094 /// the other bits. We know that at least 1 bit is always equal to the sign bit
3095 /// (itself), but other cases can give us information. For example, immediately
3096 /// after an "ashr X, 2", we know that the top 3 bits are all equal to each
3097 /// other, so we return 3. For vectors, return the number of sign bits for the
3098 /// vector element with the minimum number of known sign bits of the demanded
3099 /// elements in the vector specified by DemandedElts.
3100 static unsigned ComputeNumSignBitsImpl(const Value *V,
3101  const APInt &DemandedElts,
3102  unsigned Depth, const Query &Q) {
3103  Type *Ty = V->getType();
3104 
3105  // FIXME: We currently have no way to represent the DemandedElts of a scalable
3106  // vector
3107  if (isa<ScalableVectorType>(Ty))
3108  return 1;
3109 
3110 #ifndef NDEBUG
3111  assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
3112 
3113  if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
3114  assert(
3115  FVTy->getNumElements() == DemandedElts.getBitWidth() &&
3116  "DemandedElt width should equal the fixed vector number of elements");
3117  } else {
3118  assert(DemandedElts == APInt(1, 1) &&
3119  "DemandedElt width should be 1 for scalars");
3120  }
3121 #endif
3122 
3123  // We return the minimum number of sign bits that are guaranteed to be present
3124  // in V, so for undef we have to conservatively return 1. We don't have the
3125  // same behavior for poison though -- that's a FIXME today.
3126 
3127  Type *ScalarTy = Ty->getScalarType();
3128  unsigned TyBits = ScalarTy->isPointerTy() ?
3129  Q.DL.getPointerTypeSizeInBits(ScalarTy) :
3130  Q.DL.getTypeSizeInBits(ScalarTy);
3131 
3132  unsigned Tmp, Tmp2;
3133  unsigned FirstAnswer = 1;
3134 
3135  // Note that ConstantInt is handled by the general computeKnownBits case
3136  // below.
3137 
3139  return 1;
3140 
3141  if (auto *U = dyn_cast<Operator>(V)) {
3142  switch (Operator::getOpcode(V)) {
3143  default: break;
3144  case Instruction::SExt:
3145  Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
3146  return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q) + Tmp;
3147 
3148  case Instruction::SDiv: {
3149  const APInt *Denominator;
3150  // sdiv X, C -> adds log(C) sign bits.
3151  if (match(U->getOperand(1), m_APInt(Denominator))) {
3152 
3153  // Ignore non-positive denominator.
3154  if (!Denominator->isStrictlyPositive())
3155  break;
3156 
3157  // Calculate the incoming numerator bits.
3158  unsigned NumBits = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
3159 
3160  // Add floor(log(C)) bits to the numerator bits.
3161  return std::min(TyBits, NumBits + Denominator->logBase2());
3162  }
3163  break;
3164  }
3165 
3166  case Instruction::SRem: {
3167  Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
3168 
3169  const APInt *Denominator;
3170  // srem X, C -> we know that the result is within [-C+1,C) when C is a
3171  // positive constant. This let us put a lower bound on the number of sign
3172  // bits.
3173  if (match(U->getOperand(1), m_APInt(Denominator))) {
3174 
3175  // Ignore non-positive denominator.
3176  if (Denominator->isStrictlyPositive()) {
3177  // Calculate the leading sign bit constraints by examining the
3178  // denominator. Given that the denominator is positive, there are two
3179  // cases:
3180  //
3181  // 1. The numerator is positive. The result range is [0,C) and
3182  // [0,C) u< (1 << ceilLogBase2(C)).
3183  //
3184  // 2. The numerator is negative. Then the result range is (-C,0] and
3185  // integers in (-C,0] are either 0 or >u (-1 << ceilLogBase2(C)).
3186  //
3187  // Thus a lower bound on the number of sign bits is `TyBits -
3188  // ceilLogBase2(C)`.
3189 
3190  unsigned ResBits = TyBits - Denominator->ceilLogBase2();
3191  Tmp = std::max(Tmp, ResBits);
3192  }
3193  }
3194  return Tmp;
3195  }
3196 
3197  case Instruction::AShr: {
3198  Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
3199  // ashr X, C -> adds C sign bits. Vectors too.
3200  const APInt *ShAmt;
3201  if (match(U->getOperand(1), m_APInt(ShAmt))) {
3202  if (ShAmt->uge(TyBits))
3203  break; // Bad shift.
3204  unsigned ShAmtLimited = ShAmt->getZExtValue();
3205  Tmp += ShAmtLimited;
3206  if (Tmp > TyBits) Tmp = TyBits;
3207  }
3208  return Tmp;
3209  }
3210  case Instruction::Shl: {
3211  const APInt *ShAmt;
3212  if (match(U->getOperand(1), m_APInt(ShAmt))) {
3213  // shl destroys sign bits.
3214  Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
3215  if (ShAmt->uge(TyBits) || // Bad shift.
3216  ShAmt->uge(Tmp)) break; // Shifted all sign bits out.
3217  Tmp2 = ShAmt->getZExtValue();
3218  return Tmp - Tmp2;
3219  }
3220  break;
3221  }
3222  case Instruction::And:
3223  case Instruction::Or:
3224  case Instruction::Xor: // NOT is handled here.
3225  // Logical binary ops preserve the number of sign bits at the worst.
3226  Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
3227  if (Tmp != 1) {
3228  Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
3229  FirstAnswer = std::min(Tmp, Tmp2);
3230  // We computed what we know about the sign bits as our first
3231  // answer. Now proceed to the generic code that uses
3232  // computeKnownBits, and pick whichever answer is better.
3233  }
3234  break;
3235 
3236  case Instruction::Select: {
3237  // If we have a clamp pattern, we know that the number of sign bits will
3238  // be the minimum of the clamp min/max range.
3239  const Value *X;
3240  const APInt *CLow, *CHigh;
3241  if (isSignedMinMaxClamp(U, X, CLow, CHigh))
3242  return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits());
3243 
3244  Tmp = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
3245  if (Tmp == 1) break;
3246  Tmp2 = ComputeNumSignBits(U->getOperand(2), Depth + 1, Q);
3247  return std::min(Tmp, Tmp2);
3248  }
3249 
3250  case Instruction::Add:
3251  // Add can have at most one carry bit. Thus we know that the output
3252  // is, at worst, one more bit than the inputs.
3253  Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
3254  if (Tmp == 1) break;
3255 
3256  // Special case decrementing a value (ADD X, -1):
3257  if (const auto *CRHS = dyn_cast<Constant>(U->getOperand(1)))
3258  if (CRHS->isAllOnesValue()) {
3259  KnownBits Known(TyBits);
3260  computeKnownBits(U->getOperand(0), Known, Depth + 1, Q);
3261 
3262  // If the input is known to be 0 or 1, the output is 0/-1, which is
3263  // all sign bits set.
3264  if ((Known.Zero | 1).isAllOnes())
3265  return TyBits;
3266 
3267  // If we are subtracting one from a positive number, there is no carry
3268  // out of the result.
3269  if (Known.isNonNegative())
3270  return Tmp;
3271  }
3272 
3273  Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
3274  if (Tmp2 == 1) break;
3275  return std::min(Tmp, Tmp2) - 1;
3276 
3277  case Instruction::Sub:
3278  Tmp2 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
3279  if (Tmp2 == 1) break;
3280 
3281  // Handle NEG.
3282  if (const auto *CLHS = dyn_cast<Constant>(U->getOperand(0)))
3283  if (CLHS->isNullValue()) {
3284  KnownBits Known(TyBits);
3285  computeKnownBits(U->getOperand(1), Known, Depth + 1, Q);
3286  // If the input is known to be 0 or 1, the output is 0/-1, which is
3287  // all sign bits set.
3288  if ((Known.Zero | 1).isAllOnes())
3289  return TyBits;
3290 
3291  // If the input is known to be positive (the sign bit is known clear),
3292  // the output of the NEG has the same number of sign bits as the
3293  // input.
3294  if (Known.isNonNegative())
3295  return Tmp2;
3296 
3297  // Otherwise, we treat this like a SUB.
3298  }
3299 
3300  // Sub can have at most one carry bit. Thus we know that the output
3301  // is, at worst, one more bit than the inputs.
3302  Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
3303  if (Tmp == 1) break;
3304  return std::min(Tmp, Tmp2) - 1;
3305 
3306  case Instruction::Mul: {
3307  // The output of the Mul can be at most twice the valid bits in the
3308  // inputs.
3309  unsigned SignBitsOp0 = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
3310  if (SignBitsOp0 == 1) break;
3311  unsigned SignBitsOp1 = ComputeNumSignBits(U->getOperand(1), Depth + 1, Q);
3312  if (SignBitsOp1 == 1) break;
3313  unsigned OutValidBits =
3314  (TyBits - SignBitsOp0 + 1) + (TyBits - SignBitsOp1 + 1);
3315  return OutValidBits > TyBits ? 1 : TyBits - OutValidBits + 1;
3316  }
3317 
3318  case Instruction::PHI: {
3319  const PHINode *PN = cast<PHINode>(U);
3320  unsigned NumIncomingValues = PN->getNumIncomingValues();
3321  // Don't analyze large in-degree PHIs.
3322  if (NumIncomingValues > 4) break;
3323  // Unreachable blocks may have zero-operand PHI nodes.
3324  if (NumIncomingValues == 0) break;
3325 
3326  // Take the minimum of all incoming values. This can't infinitely loop
3327  // because of our depth threshold.
3328  Query RecQ = Q;
3329  Tmp = TyBits;
3330  for (unsigned i = 0, e = NumIncomingValues; i != e; ++i) {
3331  if (Tmp == 1) return Tmp;
3332  RecQ.CxtI = PN->getIncomingBlock(i)->getTerminator();
3333  Tmp = std::min(
3334  Tmp, ComputeNumSignBits(PN->getIncomingValue(i), Depth + 1, RecQ));
3335  }
3336  return Tmp;
3337  }
3338 
3339  case Instruction::Trunc:
3340  // FIXME: it's tricky to do anything useful for this, but it is an
3341  // important case for targets like X86.
3342  break;
3343 
3344  case Instruction::ExtractElement:
3345  // Look through extract element. At the moment we keep this simple and
3346  // skip tracking the specific element. But at least we might find
3347  // information valid for all elements of the vector (for example if vector
3348  // is sign extended, shifted, etc).
3349  return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
3350 
3351  case Instruction::ShuffleVector: {
3352  // Collect the minimum number of sign bits that are shared by every vector
3353  // element referenced by the shuffle.
3354  auto *Shuf = dyn_cast<ShuffleVectorInst>(U);
3355  if (!Shuf) {
3356  // FIXME: Add support for shufflevector constant expressions.
3357  return 1;
3358  }
3359  APInt DemandedLHS, DemandedRHS;
3360  // For undef elements, we don't know anything about the common state of
3361  // the shuffle result.
3362  if (!getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS))
3363  return 1;
3365  if (!!DemandedLHS) {
3366  const Value *LHS = Shuf->getOperand(0);
3367  Tmp = ComputeNumSignBits(LHS, DemandedLHS, Depth + 1, Q);
3368  }
3369  // If we don't know anything, early out and try computeKnownBits
3370  // fall-back.
3371  if (Tmp == 1)
3372  break;
3373  if (!!DemandedRHS) {
3374  const Value *RHS = Shuf->getOperand(1);
3375  Tmp2 = ComputeNumSignBits(RHS, DemandedRHS, Depth + 1, Q);
3376  Tmp = std::min(Tmp, Tmp2);
3377  }
3378  // If we don't know anything, early out and try computeKnownBits
3379  // fall-back.
3380  if (Tmp == 1)
3381  break;
3382  assert(Tmp <= TyBits && "Failed to determine minimum sign bits");
3383  return Tmp;
3384  }
3385  case Instruction::Call: {
3386  if (const auto *II = dyn_cast<IntrinsicInst>(U)) {
3387  switch (II->getIntrinsicID()) {
3388  default: break;
3389  case Intrinsic::abs:
3390  Tmp = ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
3391  if (Tmp == 1) break;
3392 
3393  // Absolute value reduces number of sign bits by at most 1.
3394  return Tmp - 1;
3395  case Intrinsic::smin:
3396  case Intrinsic::smax: {
3397  const APInt *CLow, *CHigh;
3398  if (isSignedMinMaxIntrinsicClamp(II, CLow, CHigh))
3399  return std::min(CLow->getNumSignBits(), CHigh->getNumSignBits());
3400  }
3401  }
3402  }
3403  }
3404  }
3405  }
3406 
3407  // Finally, if we can prove that the top bits of the result are 0's or 1's,
3408  // use this information.
3409 
3410  // If we can examine all elements of a vector constant successfully, we're
3411  // done (we can't do any better than that). If not, keep trying.
3412  if (unsigned VecSignBits =
3413  computeNumSignBitsVectorConstant(V, DemandedElts, TyBits))
3414  return VecSignBits;
3415 
3416  KnownBits Known(TyBits);
3417  computeKnownBits(V, DemandedElts, Known, Depth, Q);
3418 
3419  // If we know that the sign bit is either zero or one, determine the number of
3420  // identical bits in the top of the input value.
3421  return std::max(FirstAnswer, Known.countMinSignBits());
3422 }
3423 
3425  const TargetLibraryInfo *TLI) {
3426  const Function *F = CB.getCalledFunction();
3427  if (!F)
3428  return Intrinsic::not_intrinsic;
3429 
3430  if (F->isIntrinsic())
3431  return F->getIntrinsicID();
3432 
3433  // We are going to infer semantics of a library function based on mapping it
3434  // to an LLVM intrinsic. Check that the library function is available from
3435  // this callbase and in this environment.
3436  LibFunc Func;
3437  if (F->hasLocalLinkage() || !TLI || !TLI->getLibFunc(CB, Func) ||
3438  !CB.onlyReadsMemory())
3439  return Intrinsic::not_intrinsic;
3440 
3441  switch (Func) {
3442  default:
3443  break;
3444  case LibFunc_sin:
3445  case LibFunc_sinf:
3446  case LibFunc_sinl:
3447  return Intrinsic::sin;
3448  case LibFunc_cos:
3449  case LibFunc_cosf:
3450  case LibFunc_cosl:
3451  return Intrinsic::cos;
3452  case LibFunc_exp:
3453  case LibFunc_expf:
3454  case LibFunc_expl:
3455  return Intrinsic::exp;
3456  case LibFunc_exp2:
3457  case LibFunc_exp2f:
3458  case LibFunc_exp2l:
3459  return Intrinsic::exp2;
3460  case LibFunc_log:
3461  case LibFunc_logf:
3462  case LibFunc_logl:
3463  return Intrinsic::log;
3464  case LibFunc_log10:
3465  case LibFunc_log10f:
3466  case LibFunc_log10l:
3467  return Intrinsic::log10;
3468  case LibFunc_log2:
3469  case LibFunc_log2f:
3470  case LibFunc_log2l:
3471  return Intrinsic::log2;
3472  case LibFunc_fabs:
3473  case LibFunc_fabsf:
3474  case LibFunc_fabsl:
3475  return Intrinsic::fabs;
3476  case LibFunc_fmin:
3477  case LibFunc_fminf:
3478  case LibFunc_fminl:
3479  return Intrinsic::minnum;
3480  case LibFunc_fmax:
3481  case LibFunc_fmaxf:
3482  case LibFunc_fmaxl:
3483  return Intrinsic::maxnum;
3484  case LibFunc_copysign:
3485  case LibFunc_copysignf:
3486  case LibFunc_copysignl:
3487  return Intrinsic::copysign;
3488  case LibFunc_floor:
3489  case LibFunc_floorf:
3490  case LibFunc_floorl:
3491  return Intrinsic::floor;
3492  case LibFunc_ceil:
3493  case LibFunc_ceilf:
3494  case LibFunc_ceill:
3495  return Intrinsic::ceil;
3496  case LibFunc_trunc:
3497  case LibFunc_truncf:
3498  case LibFunc_truncl:
3499  return Intrinsic::trunc;
3500  case LibFunc_rint:
3501  case LibFunc_rintf:
3502  case LibFunc_rintl:
3503  return Intrinsic::rint;
3504  case LibFunc_nearbyint:
3505  case LibFunc_nearbyintf:
3506  case LibFunc_nearbyintl:
3507  return Intrinsic::nearbyint;
3508  case LibFunc_round:
3509  case LibFunc_roundf:
3510  case LibFunc_roundl:
3511  return Intrinsic::round;
3512  case LibFunc_roundeven:
3513  case LibFunc_roundevenf:
3514  case LibFunc_roundevenl:
3515  return Intrinsic::roundeven;
3516  case LibFunc_pow:
3517  case LibFunc_powf:
3518  case LibFunc_powl:
3519  return Intrinsic::pow;
3520  case LibFunc_sqrt:
3521  case LibFunc_sqrtf:
3522  case LibFunc_sqrtl:
3523  return Intrinsic::sqrt;
3524  }
3525 
3526  return Intrinsic::not_intrinsic;
3527 }
3528 
3529 /// Return true if we can prove that the specified FP value is never equal to
3530 /// -0.0.
3531 /// NOTE: Do not check 'nsz' here because that fast-math-flag does not guarantee
3532 /// that a value is not -0.0. It only guarantees that -0.0 may be treated
3533 /// the same as +0.0 in floating-point ops.
3535  unsigned Depth) {
3536  if (auto *CFP = dyn_cast<ConstantFP>(V))
3537  return !CFP->getValueAPF().isNegZero();
3538 
3540  return false;
3541 
3542  auto *Op = dyn_cast<Operator>(V);
3543  if (!Op)
3544  return false;
3545 
3546  // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0.
3547  if (match(Op, m_FAdd(m_Value(), m_PosZeroFP())))
3548  return true;
3549 
3550  // sitofp and uitofp turn into +0.0 for zero.
3551  if (isa<SIToFPInst>(Op) || isa<UIToFPInst>(Op))
3552  return true;
3553 
3554  if (auto *Call = dyn_cast<CallInst>(Op)) {
3555  Intrinsic::ID IID = getIntrinsicForCallSite(*Call, TLI);
3556  switch (IID) {
3557  default:
3558  break;
3559  // sqrt(-0.0) = -0.0, no other negative results are possible.
3560  case Intrinsic::sqrt:
3561  case Intrinsic::canonicalize:
3562  return CannotBeNegativeZero(Call->getArgOperand(0), TLI, Depth + 1);
3563  case Intrinsic::experimental_constrained_sqrt: {
3564  // NOTE: This rounding mode restriction may be too strict.
3565  const auto *CI = cast<ConstrainedFPIntrinsic>(Call);
3566  if (CI->getRoundingMode() == RoundingMode::NearestTiesToEven)
3567  return CannotBeNegativeZero(Call->getArgOperand(0), TLI, Depth + 1);
3568  else
3569  return false;
3570  }
3571  // fabs(x) != -0.0
3572  case Intrinsic::fabs:
3573  return true;
3574  // sitofp and uitofp turn into +0.0 for zero.
3575  case Intrinsic::experimental_constrained_sitofp:
3576  case Intrinsic::experimental_constrained_uitofp:
3577  return true;
3578  }
3579  }
3580 
3581  return false;
3582 }
3583 
3584 /// If \p SignBitOnly is true, test for a known 0 sign bit rather than a
3585 /// standard ordered compare. e.g. make -0.0 olt 0.0 be true because of the sign
3586 /// bit despite comparing equal.
3588  const TargetLibraryInfo *TLI,
3589  bool SignBitOnly,
3590  unsigned Depth) {
3591  // TODO: This function does not do the right thing when SignBitOnly is true
3592  // and we're lowering to a hypothetical IEEE 754-compliant-but-evil platform
3593  // which flips the sign bits of NaNs. See
3594  // https://llvm.org/bugs/show_bug.cgi?id=31702.
3595 
3596  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
3597  return !CFP->getValueAPF().isNegative() ||
3598  (!SignBitOnly && CFP->getValueAPF().isZero());
3599  }
3600 
3601  // Handle vector of constants.
3602  if (auto *CV = dyn_cast<Constant>(V)) {
3603  if (auto *CVFVTy = dyn_cast<FixedVectorType>(CV->getType())) {
3604  unsigned NumElts = CVFVTy->getNumElements();
3605  for (unsigned i = 0; i != NumElts; ++i) {
3606  auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i));
3607  if (!CFP)
3608  return false;
3609  if (CFP->getValueAPF().isNegative() &&
3610  (SignBitOnly || !CFP->getValueAPF().isZero()))
3611  return false;
3612  }
3613 
3614  // All non-negative ConstantFPs.
3615  return true;
3616  }
3617  }
3618 
3620  return false;
3621 
3622  const Operator *I = dyn_cast<Operator>(V);
3623  if (!I)
3624  return false;
3625 
3626  switch (I->getOpcode()) {
3627  default:
3628  break;
3629  // Unsigned integers are always nonnegative.
3630  case Instruction::UIToFP:
3631  return true;
3632  case Instruction::FMul:
3633  case Instruction::FDiv:
3634  // X * X is always non-negative or a NaN.
3635  // X / X is always exactly 1.0 or a NaN.
3636  if (I->getOperand(0) == I->getOperand(1) &&
3637  (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs()))
3638  return true;
3639 
3640  [[fallthrough]];
3641  case Instruction::FAdd:
3642  case Instruction::FRem:
3643  return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
3644  Depth + 1) &&
3645  cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
3646  Depth + 1);
3647  case Instruction::Select:
3648  return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
3649  Depth + 1) &&
3650  cannotBeOrderedLessThanZeroImpl(I->getOperand(2), TLI, SignBitOnly,
3651  Depth + 1);
3652  case Instruction::FPExt:
3653  case Instruction::FPTrunc:
3654  // Widening/narrowing never change sign.
3655  return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
3656  Depth + 1);
3657  case Instruction::ExtractElement:
3658  // Look through extract element. At the moment we keep this simple and skip
3659  // tracking the specific element. But at least we might find information
3660  // valid for all elements of the vector.
3661  return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
3662  Depth + 1);
3663  case Instruction::Call:
3664  const auto *CI = cast<CallInst>(I);
3665  Intrinsic::ID IID = getIntrinsicForCallSite(*CI, TLI);
3666  switch (IID) {
3667  default:
3668  break;
3669  case Intrinsic::maxnum: {
3670  Value *V0 = I->getOperand(0), *V1 = I->getOperand(1);
3671  auto isPositiveNum = [&](Value *V) {
3672  if (SignBitOnly) {
3673  // With SignBitOnly, this is tricky because the result of
3674  // maxnum(+0.0, -0.0) is unspecified. Just check if the operand is
3675  // a constant strictly greater than 0.0.
3676  const APFloat *C;
3677  return match(V, m_APFloat(C)) &&
3678  *C > APFloat::getZero(C->getSemantics());
3679  }
3680 
3681  // -0.0 compares equal to 0.0, so if this operand is at least -0.0,
3682  // maxnum can't be ordered-less-than-zero.
3683  return isKnownNeverNaN(V, TLI) &&
3684  cannotBeOrderedLessThanZeroImpl(V, TLI, false, Depth + 1);
3685  };
3686 
3687  // TODO: This could be improved. We could also check that neither operand
3688  // has its sign bit set (and at least 1 is not-NAN?).
3689  return isPositiveNum(V0) || isPositiveNum(V1);
3690  }
3691 
3692  case Intrinsic::maximum:
3693  return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
3694  Depth + 1) ||
3695  cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
3696  Depth + 1);
3697  case Intrinsic::minnum:
3698  case Intrinsic::minimum:
3699  return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
3700  Depth + 1) &&
3701  cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
3702  Depth + 1);
3703  case Intrinsic::exp:
3704  case Intrinsic::exp2:
3705  case Intrinsic::fabs:
3706  return true;
3707 
3708  case Intrinsic::sqrt:
3709  // sqrt(x) is always >= -0 or NaN. Moreover, sqrt(x) == -0 iff x == -0.
3710  if (!SignBitOnly)
3711  return true;
3712  return CI->hasNoNaNs() && (CI->hasNoSignedZeros() ||
3713  CannotBeNegativeZero(CI->getOperand(0), TLI));
3714 
3715  case Intrinsic::powi:
3716  if (ConstantInt *Exponent = dyn_cast<ConstantInt>(I->getOperand(1))) {
3717  // powi(x,n) is non-negative if n is even.
3718  if (Exponent->getBitWidth() <= 64 && Exponent->getSExtValue() % 2u == 0)
3719  return true;
3720  }
3721  // TODO: This is not correct. Given that exp is an integer, here are the
3722  // ways that pow can return a negative value:
3723  //
3724  // pow(x, exp) --> negative if exp is odd and x is negative.
3725  // pow(-0, exp) --> -inf if exp is negative odd.
3726  // pow(-0, exp) --> -0 if exp is positive odd.
3727  // pow(-inf, exp) --> -0 if exp is negative odd.
3728  // pow(-inf, exp) --> -inf if exp is positive odd.
3729  //
3730  // Therefore, if !SignBitOnly, we can return true if x >= +0 or x is NaN,
3731  // but we must return false if x == -0. Unfortunately we do not currently
3732  // have a way of expressing this constraint. See details in
3733  // https://llvm.org/bugs/show_bug.cgi?id=31702.
3734  return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
3735  Depth + 1);
3736 
3737  case Intrinsic::fma:
3738  case Intrinsic::fmuladd:
3739  // x*x+y is non-negative if y is non-negative.
3740  return I->getOperand(0) == I->getOperand(1) &&
3741  (!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs()) &&
3742  cannotBeOrderedLessThanZeroImpl(I->getOperand(2), TLI, SignBitOnly,
3743  Depth + 1);
3744  }
3745  break;
3746  }
3747  return false;
3748 }
3749 
3751  const TargetLibraryInfo *TLI) {
3752  return cannotBeOrderedLessThanZeroImpl(V, TLI, false, 0);
3753 }
3754 
3756  return cannotBeOrderedLessThanZeroImpl(V, TLI, true, 0);
3757 }
3758 
3760  unsigned Depth) {
3761  assert(V->getType()->isFPOrFPVectorTy() && "Querying for Inf on non-FP type");
3762 
3763  // If we're told that infinities won't happen, assume they won't.
3764  if (auto *FPMathOp = dyn_cast<FPMathOperator>(V))
3765  if (FPMathOp->hasNoInfs())
3766  return true;
3767 
3768  // Handle scalar constants.
3769  if (auto *CFP = dyn_cast<ConstantFP>(V))
3770  return !CFP->isInfinity();
3771 
3773  return false;
3774 
3775  if (auto *Inst = dyn_cast<Instruction>(V)) {
3776  switch (Inst->getOpcode()) {
3777  case Instruction::Select: {
3778  return isKnownNeverInfinity(Inst->getOperand(1), TLI, Depth + 1) &&
3779  isKnownNeverInfinity(Inst->getOperand(2), TLI, Depth + 1);
3780  }
3781  case Instruction::SIToFP:
3782  case Instruction::UIToFP: {
3783  // Get width of largest magnitude integer (remove a bit if signed).
3784  // This still works for a signed minimum value because the largest FP
3785  // value is scaled by some fraction close to 2.0 (1.0 + 0.xxxx).
3786  int IntSize = Inst->getOperand(0)->getType()->getScalarSizeInBits();
3787  if (Inst->getOpcode() == Instruction::SIToFP)
3788  --IntSize;
3789 
3790  // If the exponent of the largest finite FP value can hold the largest
3791  // integer, the result of the cast must be finite.
3792  Type *FPTy = Inst->getType()->getScalarType();
3793  return ilogb(APFloat::getLargest(FPTy->getFltSemantics())) >= IntSize;
3794  }
3795  default:
3796  break;
3797  }
3798  }
3799 
3800  // try to handle fixed width vector constants
3801  auto *VFVTy = dyn_cast<FixedVectorType>(V->getType());
3802  if (VFVTy && isa<Constant>(V)) {
3803  // For vectors, verify that each element is not infinity.
3804  unsigned NumElts = VFVTy->getNumElements();
3805  for (unsigned i = 0; i != NumElts; ++i) {
3806  Constant *Elt = cast<Constant>(V)->getAggregateElement(i);
3807  if (!Elt)
3808  return false;
3809  if (isa<UndefValue>(Elt))
3810  continue;
3811  auto *CElt = dyn_cast<ConstantFP>(Elt);
3812  if (!CElt || CElt->isInfinity())
3813  return false;
3814  }
3815  // All elements were confirmed non-infinity or undefined.
3816  return true;
3817  }
3818 
3819  // was not able to prove that V never contains infinity
3820  return false;
3821 }
3822 
3824  unsigned Depth) {
3825  assert(V->getType()->isFPOrFPVectorTy() && "Querying for NaN on non-FP type");
3826 
3827  // If we're told that NaNs won't happen, assume they won't.
3828  if (auto *FPMathOp = dyn_cast<FPMathOperator>(V))
3829  if (FPMathOp->hasNoNaNs())
3830  return true;
3831 
3832  // Handle scalar constants.
3833  if (auto *CFP = dyn_cast<ConstantFP>(V))
3834  return !CFP->isNaN();
3835 
3837  return false;
3838 
3839  if (auto *Inst = dyn_cast<Instruction>(V)) {
3840  switch (Inst->getOpcode()) {
3841  case Instruction::FAdd:
3842  case Instruction::FSub:
3843  // Adding positive and negative infinity produces NaN.
3844  return isKnownNeverNaN(Inst->getOperand(0), TLI, Depth + 1) &&
3845  isKnownNeverNaN(Inst->getOperand(1), TLI, Depth + 1) &&
3846  (isKnownNeverInfinity(Inst->getOperand(0), TLI, Depth + 1) ||
3847  isKnownNeverInfinity(Inst->getOperand(1), TLI, Depth + 1));
3848 
3849  case Instruction::FMul:
3850  // Zero multiplied with infinity produces NaN.
3851  // FIXME: If neither side can be zero fmul never produces NaN.
3852  return isKnownNeverNaN(Inst->getOperand(0), TLI, Depth + 1) &&
3853  isKnownNeverInfinity(Inst->getOperand(0), TLI, Depth + 1) &&
3854  isKnownNeverNaN(Inst->getOperand(1), TLI, Depth + 1) &&
3855  isKnownNeverInfinity(Inst->getOperand(1), TLI, Depth + 1);
3856 
3857  case Instruction::FDiv:
3858  case Instruction::FRem:
3859  // FIXME: Only 0/0, Inf/Inf, Inf REM x and x REM 0 produce NaN.
3860  return false;
3861 
3862  case Instruction::Select: {
3863  return isKnownNeverNaN(Inst->getOperand(1), TLI, Depth + 1) &&
3864  isKnownNeverNaN(Inst->getOperand(2), TLI, Depth + 1);
3865  }
3866  case Instruction::SIToFP:
3867  case Instruction::UIToFP:
3868  return true;
3869  case Instruction::FPTrunc:
3870  case Instruction::FPExt:
3871  return isKnownNeverNaN(Inst->getOperand(0), TLI, Depth + 1);
3872  default:
3873  break;
3874  }
3875  }
3876 
3877  if (const auto *II = dyn_cast<IntrinsicInst>(V)) {
3878  switch (II->getIntrinsicID()) {
3879  case Intrinsic::canonicalize:
3880  case Intrinsic::fabs:
3881  case Intrinsic::copysign:
3882  case Intrinsic::exp:
3883  case Intrinsic::exp2:
3884  case Intrinsic::floor:
3885  case Intrinsic::ceil:
3886  case Intrinsic::trunc:
3887  case Intrinsic::rint:
3888  case Intrinsic::nearbyint:
3889  case Intrinsic::round:
3890  case Intrinsic::roundeven:
3891  return isKnownNeverNaN(II->getArgOperand(0), TLI, Depth + 1);
3892  case Intrinsic::sqrt:
3893  return isKnownNeverNaN(II->getArgOperand(0), TLI, Depth + 1) &&
3894  CannotBeOrderedLessThanZero(II->getArgOperand(0), TLI);
3895  case Intrinsic::minnum:
3896  case Intrinsic::maxnum:
3897  // If either operand is not NaN, the result is not NaN.
3898  return isKnownNeverNaN(II->getArgOperand(0), TLI, Depth + 1) ||
3899  isKnownNeverNaN(II->getArgOperand(1), TLI, Depth + 1);
3900  default:
3901  return false;
3902  }
3903  }
3904 
3905  // Try to handle fixed width vector constants
3906  auto *VFVTy = dyn_cast<FixedVectorType>(V->getType());
3907  if (VFVTy && isa<Constant>(V)) {
3908  // For vectors, verify that each element is not NaN.
3909  unsigned NumElts = VFVTy->getNumElements();
3910  for (unsigned i = 0; i != NumElts; ++i) {
3911  Constant *Elt = cast<Constant>(V)->getAggregateElement(i);
3912  if (!Elt)
3913  return false;
3914  if (isa<UndefValue>(Elt))
3915  continue;
3916  auto *CElt = dyn_cast<ConstantFP>(Elt);
3917  if (!CElt || CElt->isNaN())
3918  return false;
3919  }
3920  // All elements were confirmed not-NaN or undefined.
3921  return true;
3922  }
3923 
3924  // Was not able to prove that V never contains NaN
3925  return false;
3926 }
3927 
3929 
3930  // All byte-wide stores are splatable, even of arbitrary variables.
3931  if (V->getType()->isIntegerTy(8))
3932  return V;
3933 
3934  LLVMContext &Ctx = V->getContext();
3935 
3936  // Undef don't care.
3937  auto *UndefInt8 = UndefValue::get(Type::getInt8Ty(Ctx));
3938  if (isa<UndefValue>(V))
3939  return UndefInt8;
3940 
3941  // Return Undef for zero-sized type.
3942  if (!DL.getTypeStoreSize(V->getType()).isNonZero())
3943  return UndefInt8;
3944 
3945  Constant *C = dyn_cast<Constant>(V);
3946  if (!C) {
3947  // Conceptually, we could handle things like:
3948  // %a = zext i8 %X to i16
3949  // %b = shl i16 %a, 8
3950  // %c = or i16 %a, %b
3951  // but until there is an example that actually needs this, it doesn't seem
3952  // worth worrying about.
3953  return nullptr;
3954  }
3955 
3956  // Handle 'null' ConstantArrayZero etc.
3957  if (C->isNullValue())
3959 
3960  // Constant floating-point values can be handled as integer values if the
3961  // corresponding integer value is "byteable". An important case is 0.0.
3962  if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
3963  Type *Ty = nullptr;
3964  if (CFP->getType()->isHalfTy())
3965  Ty = Type::getInt16Ty(Ctx);
3966  else if (CFP->getType()->isFloatTy())
3967  Ty = Type::getInt32Ty(Ctx);
3968  else if (CFP->getType()->isDoubleTy())
3969  Ty = Type::getInt64Ty(Ctx);
3970  // Don't handle long double formats, which have strange constraints.
3971  return Ty ? isBytewiseValue(ConstantExpr::getBitCast(CFP, Ty), DL)
3972  : nullptr;
3973  }
3974 
3975  // We can handle constant integers that are multiple of 8 bits.
3976  if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
3977  if (CI->getBitWidth() % 8 == 0) {
3978  assert(CI->getBitWidth() > 8 && "8 bits should be handled above!");
3979  if (!CI->getValue().isSplat(8))
3980  return nullptr;
3981  return ConstantInt::get(Ctx, CI->getValue().trunc(8));
3982  }
3983  }
3984 
3985  if (auto *CE = dyn_cast<ConstantExpr>(C)) {
3986  if (CE->getOpcode() == Instruction::IntToPtr) {
3987  if (auto *PtrTy = dyn_cast<PointerType>(CE->getType())) {
3988  unsigned BitWidth = DL.getPointerSizeInBits(PtrTy->getAddressSpace());
3989  return isBytewiseValue(
3990  ConstantExpr::getIntegerCast(CE->getOperand(0),
3991  Type::getIntNTy(Ctx, BitWidth), false),
3992  DL);
3993  }
3994  }
3995  }
3996 
3997  auto Merge = [&](Value *LHS, Value *RHS) -> Value * {
3998  if (LHS == RHS)
3999  return LHS;
4000  if (!LHS || !RHS)
4001  return nullptr;
4002  if (LHS == UndefInt8)
4003  return RHS;
4004  if (RHS == UndefInt8)
4005  return LHS;
4006  return nullptr;
4007  };
4008 
4009  if (ConstantDataSequential *CA = dyn_cast<ConstantDataSequential>(C)) {
4010  Value *Val = UndefInt8;
4011  for (unsigned I = 0, E = CA->getNumElements(); I != E; ++I)
4012  if (!(Val = Merge(Val, isBytewiseValue(CA->getElementAsConstant(I), DL))))
4013  return nullptr;
4014  return Val;
4015  }
4016 
4017  if (isa<ConstantAggregate>(C)) {
4018  Value *Val = UndefInt8;
4019  for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I)
4020  if (!(Val = Merge(Val, isBytewiseValue(C->getOperand(I), DL))))
4021  return nullptr;
4022  return Val;
4023  }
4024 
4025  // Don't try to handle the handful of other constants.
4026  return nullptr;
4027 }
4028 
4029 // This is the recursive version of BuildSubAggregate. It takes a few different
4030 // arguments. Idxs is the index within the nested struct From that we are
4031 // looking at now (which is of type IndexedType). IdxSkip is the number of
4032 // indices from Idxs that should be left out when inserting into the resulting
4033 // struct. To is the result struct built so far, new insertvalue instructions
4034 // build on that.
4035 static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType,
4037  unsigned IdxSkip,
4038  Instruction *InsertBefore) {
4039  StructType *STy = dyn_cast<StructType>(IndexedType);
4040  if (STy) {
4041  // Save the original To argument so we can modify it
4042  Value *OrigTo = To;
4043  // General case, the type indexed by Idxs is a struct
4044  for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
4045  // Process each struct element recursively
4046  Idxs.push_back(i);
4047  Value *PrevTo = To;
4048  To = BuildSubAggregate(From, To, STy->getElementType(i), Idxs, IdxSkip,
4049  InsertBefore);
4050  Idxs.pop_back();
4051  if (!To) {
4052  // Couldn't find any inserted value for this index? Cleanup
4053  while (PrevTo != OrigTo) {
4054  InsertValueInst* Del = cast<InsertValueInst>(PrevTo);
4055  PrevTo = Del->getAggregateOperand();
4056  Del->eraseFromParent();
4057  }
4058  // Stop processing elements
4059  break;
4060  }
4061  }
4062  // If we successfully found a value for each of our subaggregates
4063  if (To)
4064  return To;
4065  }
4066  // Base case, the type indexed by SourceIdxs is not a struct, or not all of
4067  // the struct's elements had a value that was inserted directly. In the latter
4068  // case, perhaps we can't determine each of the subelements individually, but
4069  // we might be able to find the complete struct somewhere.
4070 
4071  // Find the value that is at that particular spot
4072  Value *V = FindInsertedValue(From, Idxs);
4073 
4074  if (!V)
4075  return nullptr;
4076 
4077  // Insert the value in the new (sub) aggregate
4078  return InsertValueInst::Create(To, V, makeArrayRef(Idxs).slice(IdxSkip),
4079  "tmp", InsertBefore);
4080 }
4081 
4082 // This helper takes a nested struct and extracts a part of it (which is again a
4083 // struct) into a new value. For example, given the struct:
4084 // { a, { b, { c, d }, e } }
4085 // and the indices "1, 1" this returns
4086 // { c, d }.
4087 //
4088 // It does this by inserting an insertvalue for each element in the resulting
4089 // struct, as opposed to just inserting a single struct. This will only work if
4090 // each of the elements of the substruct are known (ie, inserted into From by an
4091 // insertvalue instruction somewhere).
4092 //
4093 // All inserted insertvalue instructions are inserted before InsertBefore
4095  Instruction *InsertBefore) {
4096  assert(InsertBefore && "Must have someplace to insert!");
4097  Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(),
4098  idx_range);
4099  Value *To = UndefValue::get(IndexedType);
4100  SmallVector<unsigned, 10> Idxs(idx_range.begin(), idx_range.end());
4101  unsigned IdxSkip = Idxs.size();
4102 
4103  return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore);
4104 }
4105 
4106 /// Given an aggregate and a sequence of indices, see if the scalar value
4107 /// indexed is already around as a register, for example if it was inserted
4108 /// directly into the aggregate.
4109 ///
4110 /// If InsertBefore is not null, this function will duplicate (modified)
4111 /// insertvalues when a part of a nested struct is extracted.
4113  Instruction *InsertBefore) {
4114  // Nothing to index? Just return V then (this is useful at the end of our
4115  // recursion).
4116  if (idx_range.empty())
4117  return V;
4118  // We have indices, so V should have an indexable type.
4119  assert((V->getType()->isStructTy() || V->getType()->isArrayTy()) &&
4120  "Not looking at a struct or array?");
4121  assert(ExtractValueInst::getIndexedType(V->getType(), idx_range) &&
4122  "Invalid indices for type?");
4123 
4124  if (Constant *C = dyn_cast<Constant>(V)) {
4125  C = C->getAggregateElement(idx_range[0]);
4126  if (!C) return nullptr;
4127  return FindInsertedValue(C, idx_range.slice(1), InsertBefore);
4128  }
4129 
4130  if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) {
4131  // Loop the indices for the insertvalue instruction in parallel with the
4132  // requested indices
4133  const unsigned *req_idx = idx_range.begin();
4134  for (const unsigned *i = I->idx_begin(), *e = I->idx_end();
4135  i != e; ++i, ++req_idx) {
4136  if (req_idx == idx_range.end()) {
4137  // We can't handle this without inserting insertvalues
4138  if (!InsertBefore)
4139  return nullptr;
4140 
4141  // The requested index identifies a part of a nested aggregate. Handle
4142  // this specially. For example,
4143  // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0
4144  // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1
4145  // %C = extractvalue {i32, { i32, i32 } } %B, 1
4146  // This can be changed into
4147  // %A = insertvalue {i32, i32 } undef, i32 10, 0
4148  // %C = insertvalue {i32, i32 } %A, i32 11, 1
4149  // which allows the unused 0,0 element from the nested struct to be
4150  // removed.
4151  return BuildSubAggregate(V, makeArrayRef(idx_range.begin(), req_idx),
4152  InsertBefore);
4153  }
4154 
4155  // This insert value inserts something else than what we are looking for.
4156  // See if the (aggregate) value inserted into has the value we are
4157  // looking for, then.
4158  if (*req_idx != *i)
4159  return FindInsertedValue(I->getAggregateOperand(), idx_range,
4160  InsertBefore);
4161  }
4162  // If we end up here, the indices of the insertvalue match with those
4163  // requested (though possibly only partially). Now we recursively look at
4164  // the inserted value, passing any remaining indices.
4165  return FindInsertedValue(I->getInsertedValueOperand(),
4166  makeArrayRef(req_idx, idx_range.end()),
4167  InsertBefore);
4168  }
4169 
4170  if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) {
4171  // If we're extracting a value from an aggregate that was extracted from
4172  // something else, we can extract from that something else directly instead.
4173  // However, we will need to chain I's indices with the requested indices.
4174 
4175  // Calculate the number of indices required
4176  unsigned size = I->getNumIndices() + idx_range.size();
4177  // Allocate some space to put the new indices in
4179  Idxs.reserve(size);
4180  // Add indices from the extract value instruction
4181  Idxs.append(I->idx_begin(), I->idx_end());
4182 
4183  // Add requested indices
4184  Idxs.append(idx_range.begin(), idx_range.end());
4185 
4186  assert(Idxs.size() == size
4187  && "Number of indices added not correct?");
4188 
4189  return FindInsertedValue(I->getAggregateOperand(), Idxs, InsertBefore);
4190  }
4191  // Otherwise, we don't know (such as, extracting from a function return value
4192  // or load instruction)
4193  return nullptr;
4194 }
4195 
4197  unsigned CharSize) {
4198  // Make sure the GEP has exactly three arguments.
4199  if (GEP->getNumOperands() != 3)
4200  return false;
4201 
4202  // Make sure the index-ee is a pointer to array of \p CharSize integers.
4203  // CharSize.
4204  ArrayType *AT = dyn_cast<ArrayType>(GEP->getSourceElementType());
4205  if (!AT || !AT->getElementType()->isIntegerTy(CharSize))
4206  return false;
4207 
4208  // Check to make sure that the first operand of the GEP is an integer and
4209  // has value 0 so that we are sure we're indexing into the initializer.
4210  const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
4211  if (!FirstIdx || !FirstIdx->isZero())
4212  return false;
4213 
4214  return true;
4215 }
4216 
4217 // If V refers to an initialized global constant, set Slice either to
4218 // its initializer if the size of its elements equals ElementSize, or,
4219 // for ElementSize == 8, to its representation as an array of unsiged
4220 // char. Return true on success.
4222  ConstantDataArraySlice &Slice,
4223  unsigned ElementSize, uint64_t Offset) {
4224  assert(V);
4225 
4226  // Drill down into the pointer expression V, ignoring any intervening
4227  // casts, and determine the identity of the object it references along
4228  // with the cumulative byte offset into it.
4229  const GlobalVariable *GV =
4230  dyn_cast<GlobalVariable>(getUnderlyingObject(V));
4231  if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
4232  // Fail if V is not based on constant global object.
4233  return false;
4234 
4235  const DataLayout &DL = GV->getParent()->getDataLayout();
4236  APInt Off(DL.getIndexTypeSizeInBits(V->getType()), 0);
4237 
4238  if (GV != V->stripAndAccumulateConstantOffsets(DL, Off,
4239  /*AllowNonInbounds*/ true))
4240  // Fail if a constant offset could not be determined.
4241  return false;
4242 
4243  uint64_t StartIdx = Off.getLimitedValue();
4244  if (StartIdx == UINT64_MAX)
4245  // Fail if the constant offset is excessive.
4246  return false;
4247 
4248  Offset += StartIdx;
4249 
4250  ConstantDataArray *Array = nullptr;
4251  ArrayType *ArrayTy = nullptr;
4252 
4253  if (GV->getInitializer()->isNullValue()) {
4254  Type *GVTy = GV->getValueType();
4255  uint64_t SizeInBytes = DL.getTypeStoreSize(GVTy).getFixedSize();
4256  uint64_t Length = SizeInBytes / (ElementSize / 8);
4257 
4258  Slice.Array = nullptr;
4259  Slice.Offset = 0;
4260  // Return an empty Slice for undersized constants to let callers
4261  // transform even undefined library calls into simpler, well-defined
4262  // expressions. This is preferable to making the calls although it
4263  // prevents sanitizers from detecting such calls.
4264  Slice.Length = Length < Offset ? 0 : Length - Offset;
4265  return true;
4266  }
4267 
4268  auto *Init = const_cast<Constant *>(GV->getInitializer());
4269  if (auto *ArrayInit = dyn_cast<ConstantDataArray>(Init)) {
4270  Type *InitElTy = ArrayInit->getElementType();
4271  if (InitElTy->isIntegerTy(ElementSize)) {
4272  // If Init is an initializer for an array of the expected type
4273  // and size, use it as is.
4274  Array = ArrayInit;
4275  ArrayTy = ArrayInit->getType();
4276  }
4277  }
4278 
4279  if (!Array) {
4280  if (ElementSize != 8)
4281  // TODO: Handle conversions to larger integral types.
4282  return false;
4283 
4284  // Otherwise extract the portion of the initializer starting
4285  // at Offset as an array of bytes, and reset Offset.
4286  Init = ReadByteArrayFromGlobal(GV, Offset);
4287  if (!Init)
4288  return false;
4289 
4290  Offset = 0;
4291  Array = dyn_cast<ConstantDataArray>(Init);
4292  ArrayTy = dyn_cast<ArrayType>(Init->getType());
4293  }
4294 
4295  uint64_t NumElts = ArrayTy->getArrayNumElements();
4296  if (Offset > NumElts)
4297  return false;
4298 
4299  Slice.Array = Array;
4300  Slice.Offset = Offset;
4301  Slice.Length = NumElts - Offset;
4302  return true;
4303 }
4304 
4305 /// Extract bytes from the initializer of the constant array V, which need
4306 /// not be a nul-terminated string. On success, store the bytes in Str and
4307 /// return true. When TrimAtNul is set, Str will contain only the bytes up
4308 /// to but not including the first nul. Return false on failure.
4310  uint64_t Offset, bool TrimAtNul) {
4311  ConstantDataArraySlice Slice;
4312  if (!getConstantDataArrayInfo(V, Slice, 8, Offset))
4313  return false;
4314 
4315  if (Slice.Array == nullptr) {
4316  if (TrimAtNul) {
4317  // Return a nul-terminated string even for an empty Slice. This is
4318  // safe because all existing SimplifyLibcalls callers require string
4319  // arguments and the behavior of the functions they fold is undefined
4320  // otherwise. Folding the calls this way is preferable to making
4321  // the undefined library calls, even though it prevents sanitizers
4322  // from reporting such calls.
4323  Str = StringRef();
4324  return true;
4325  }
4326  if (Slice.Length == 1) {
4327  Str = StringRef("", 1);
4328  return true;
4329  }
4330  // We cannot instantiate a StringRef as we do not have an appropriate string
4331  // of 0s at hand.
4332  return false;
4333  }
4334 
4335  // Start out with the entire array in the StringRef.
4336  Str = Slice.Array->getAsString();
4337  // Skip over 'offset' bytes.
4338  Str = Str.substr(Slice.Offset);
4339 
4340  if (TrimAtNul) {
4341  // Trim off the \0 and anything after it. If the array is not nul
4342  // terminated, we just return the whole end of string. The client may know
4343  // some other way that the string is length-bound.
4344  Str = Str.substr(0, Str.find('\0'));
4345  }
4346  return true;
4347 }
4348 
4349 // These next two are very similar to the above, but also look through PHI
4350 // nodes.
4351 // TODO: See if we can integrate these two together.
4352 
4353 /// If we can compute the length of the string pointed to by
4354 /// the specified pointer, return 'len+1'. If we can't, return 0.
4357  unsigned CharSize) {
4358  // Look through noop bitcast instructions.
4359  V = V->stripPointerCasts();
4360 
4361  // If this is a PHI node, there are two cases: either we have already seen it
4362  // or we haven't.
4363  if (const PHINode *PN = dyn_cast<PHINode>(V)) {
4364  if (!PHIs.insert(PN).second)
4365  return ~0ULL; // already in the set.
4366 
4367  // If it was new, see if all the input strings are the same length.
4368  uint64_t LenSoFar = ~0ULL;
4369  for (Value *IncValue : PN->incoming_values()) {
4370  uint64_t Len = GetStringLengthH(IncValue, PHIs, CharSize);
4371  if (Len == 0) return 0; // Unknown length -> unknown.
4372 
4373  if (Len == ~0ULL) continue;
4374 
4375  if (Len != LenSoFar && LenSoFar != ~0ULL)
4376  return 0; // Disagree -> unknown.
4377  LenSoFar = Len;
4378  }
4379 
4380  // Success, all agree.
4381  return LenSoFar;
4382  }
4383 
4384  // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
4385  if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
4386  uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs, CharSize);
4387  if (Len1 == 0) return 0;
4388  uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs, CharSize);
4389  if (Len2 == 0) return 0;
4390  if (Len1 == ~0ULL) return Len2;
4391  if (Len2 == ~0ULL) return Len1;
4392  if (Len1 != Len2) return 0;
4393  return Len1;
4394  }
4395 
4396  // Otherwise, see if we can read the string.
4397  ConstantDataArraySlice Slice;
4398  if (!getConstantDataArrayInfo(V, Slice, CharSize))
4399  return 0;
4400 
4401  if (Slice.Array == nullptr)
4402  // Zeroinitializer (including an empty one).
4403  return 1;
4404 
4405  // Search for the first nul character. Return a conservative result even
4406  // when there is no nul. This is safe since otherwise the string function
4407  // being folded such as strlen is undefined, and can be preferable to
4408  // making the undefined library call.
4409  unsigned NullIndex = 0;
4410  for (unsigned E = Slice.Length; NullIndex < E; ++NullIndex) {
4411  if (Slice.Array->getElementAsInteger(Slice.Offset + NullIndex) == 0)
4412  break;
4413  }
4414 
4415  return NullIndex + 1;
4416 }
4417 
4418 /// If we can compute the length of the string pointed to by
4419 /// the specified pointer, return 'len+1'. If we can't, return 0.
4420 uint64_t llvm::GetStringLength(const Value *V, unsigned CharSize) {
4421  if (!V->getType()->isPointerTy())
4422  return 0;
4423 
4425  uint64_t Len = GetStringLengthH(V, PHIs, CharSize);
4426  // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
4427  // an empty string as a length.
4428  return Len == ~0ULL ? 1 : Len;
4429 }
4430 
4431 const Value *
4433  bool MustPreserveNullness) {
4434  assert(Call &&
4435  "getArgumentAliasingToReturnedPointer only works on nonnull calls");
4436  if (const Value *RV = Call->getReturnedArgOperand())
4437  return RV;
4438  // This can be used only as a aliasing property.
4440  Call, MustPreserveNullness))
4441  return Call->getArgOperand(0);
4442  return nullptr;
4443 }
4444 
4446  const CallBase *Call, bool MustPreserveNullness) {
4447  switch (Call->getIntrinsicID()) {
4448  case Intrinsic::launder_invariant_group:
4449  case Intrinsic::strip_invariant_group:
4450  case Intrinsic::aarch64_irg:
4451  case Intrinsic::aarch64_tagp:
4452  return true;
4453  case Intrinsic::ptrmask:
4454  return !MustPreserveNullness;
4455  default:
4456  return false;
4457  }
4458 }
4459 
4460 /// \p PN defines a loop-variant pointer to an object. Check if the
4461 /// previous iteration of the loop was referring to the same object as \p PN.
4463  const LoopInfo *LI) {
4464  // Find the loop-defined value.
4465  Loop *L = LI->getLoopFor(PN->getParent());
4466  if (PN->getNumIncomingValues() != 2)
4467  return true;
4468 
4469  // Find the value from previous iteration.
4470  auto *PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(0));
4471  if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L)
4472  PrevValue = dyn_cast<Instruction>(PN->getIncomingValue(1));
4473  if (!PrevValue || LI->getLoopFor(PrevValue->getParent()) != L)
4474  return true;
4475 
4476  // If a new pointer is loaded in the loop, the pointer references a different
4477  // object in every iteration. E.g.:
4478  // for (i)
4479  // int *p = a[i];
4480  // ...
4481  if (auto *Load = dyn_cast<LoadInst>(PrevValue))
4482  if (!L->isLoopInvariant(Load->getPointerOperand()))
4483  return false;
4484  return true;
4485 }
4486 
4487 const Value *llvm::getUnderlyingObject(const Value *V, unsigned MaxLookup) {
4488  if (!V->getType()->isPointerTy())
4489  return V;
4490  for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
4491  if (auto *GEP = dyn_cast<GEPOperator>(V)) {
4492  V = GEP->getPointerOperand();
4493  } else if (Operator::getOpcode(V) == Instruction::BitCast ||
4494  Operator::getOpcode(V) == Instruction::AddrSpaceCast) {
4495  V = cast<Operator>(V)->getOperand(0);
4496  if (!V->getType()->isPointerTy())
4497  return V;
4498  } else if (auto *GA = dyn_cast<GlobalAlias>(V)) {
4499  if (GA->isInterposable())
4500  return V;
4501  V = GA->getAliasee();
4502  } else {
4503  if (auto *PHI = dyn_cast<PHINode>(V)) {
4504  // Look through single-arg phi nodes created by LCSSA.
4505  if (PHI->getNumIncomingValues() == 1) {
4506  V = PHI->getIncomingValue(0);
4507  continue;
4508  }
4509  } else if (auto *Call = dyn_cast<CallBase>(V)) {
4510  // CaptureTracking can know about special capturing properties of some
4511  // intrinsics like launder.invariant.group, that can't be expressed with
4512  // the attributes, but have properties like returning aliasing pointer.
4513  // Because some analysis may assume that nocaptured pointer is not
4514  // returned from some special intrinsic (because function would have to
4515  // be marked with returns attribute), it is crucial to use this function
4516  // because it should be in sync with CaptureTracking. Not using it may
4517  // cause weird miscompilations where 2 aliasing pointers are assumed to
4518  // noalias.
4519  if (auto *RP = getArgumentAliasingToReturnedPointer(Call, false)) {
4520  V = RP;
4521  continue;
4522  }
4523  }
4524 
4525  return V;
4526  }
4527  assert(V->getType()->isPointerTy() && "Unexpected operand type!");
4528  }
4529  return V;
4530 }
4531 
4534  LoopInfo *LI, unsigned MaxLookup) {
4537  Worklist.push_back(V);
4538  do {
4539  const Value *P = Worklist.pop_back_val();
4540  P = getUnderlyingObject(P, MaxLookup);
4541 
4542  if (!Visited.insert(P).second)
4543  continue;
4544 
4545  if (auto *SI = dyn_cast<SelectInst>(P)) {
4546  Worklist.push_back(SI->getTrueValue());
4547  Worklist.push_back(SI->getFalseValue());
4548  continue;
4549  }
4550 
4551  if (auto *PN = dyn_cast<PHINode>(P)) {
4552  // If this PHI changes the underlying object in every iteration of the
4553  // loop, don't look through it. Consider:
4554  // int **A;
4555  // for (i) {
4556  // Prev = Curr; // Prev = PHI (Prev_0, Curr)
4557  // Curr = A[i];
4558  // *Prev, *Curr;
4559  //
4560  // Prev is tracking Curr one iteration behind so they refer to different
4561  // underlying objects.
4562  if (!LI || !LI->isLoopHeader(PN->getParent()) ||
4564  append_range(Worklist, PN->incoming_values());
4565  continue;
4566  }
4567 
4568  Objects.push_back(P);
4569  } while (!Worklist.empty());
4570 }
4571 
4572 /// This is the function that does the work of looking through basic
4573 /// ptrtoint+arithmetic+inttoptr sequences.
4574 static const Value *getUnderlyingObjectFromInt(const Value *V) {
4575  do {
4576  if (const Operator *U = dyn_cast<Operator>(V)) {
4577  // If we find a ptrtoint, we can transfer control back to the
4578  // regular getUnderlyingObjectFromInt.
4579  if (U->getOpcode() == Instruction::PtrToInt)
4580  return U->getOperand(0);
4581  // If we find an add of a constant, a multiplied value, or a phi, it's
4582  // likely that the other operand will lead us to the base
4583  // object. We don't have to worry about the case where the
4584  // object address is somehow being computed by the multiply,
4585  // because our callers only care when the result is an
4586  // identifiable object.
4587  if (U->getOpcode() != Instruction::Add ||
4588  (!isa<ConstantInt>(U->getOperand(1)) &&
4589  Operator::getOpcode(U->getOperand(1)) != Instruction::Mul &&
4590  !isa<PHINode>(U->getOperand(1))))
4591  return V;
4592  V = U->getOperand(0);
4593  } else {
4594  return V;
4595  }
4596  assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
4597  } while (true);
4598 }
4599 
4600 /// This is a wrapper around getUnderlyingObjects and adds support for basic
4601 /// ptrtoint+arithmetic+inttoptr sequences.
4602 /// It returns false if unidentified object is found in getUnderlyingObjects.
4604  SmallVectorImpl<Value *> &Objects) {
4606  SmallVector<const Value *, 4> Working(1, V);
4607  do {
4608  V = Working.pop_back_val();
4609 
4611  getUnderlyingObjects(V, Objs);
4612 
4613  for (const Value *V : Objs) {
4614  if (!Visited.insert(V).second)
4615  continue;
4616  if (Operator::getOpcode(V) == Instruction::IntToPtr) {
4617  const Value *O =
4618  getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
4619  if (O->getType()->isPointerTy()) {
4620  Working.push_back(O);
4621  continue;
4622  }
4623  }
4624  // If getUnderlyingObjects fails to find an identifiable object,
4625  // getUnderlyingObjectsForCodeGen also fails for safety.
4626  if (!isIdentifiedObject(V)) {
4627  Objects.clear();
4628  return false;
4629  }
4630  Objects.push_back(const_cast<Value *>(V));
4631  }
4632  } while (!Working.empty());
4633  return true;
4634 }
4635 
4637  AllocaInst *Result = nullptr;
4638  SmallPtrSet<Value *, 4> Visited;
4639  SmallVector<Value *, 4> Worklist;
4640 
4641  auto AddWork = [&](Value *V) {
4642  if (Visited.insert(V).second)
4643  Worklist.push_back(V);
4644  };
4645 
4646  AddWork(V);
4647  do {
4648  V = Worklist.pop_back_val();
4649  assert(Visited.count(V));
4650 
4651  if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
4652  if (Result && Result != AI)
4653  return nullptr;
4654  Result = AI;
4655  } else if (CastInst *CI = dyn_cast<CastInst>(V)) {
4656  AddWork(CI->getOperand(0));
4657  } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
4658  for (Value *IncValue : PN->incoming_values())
4659  AddWork(IncValue);
4660  } else if (auto *SI = dyn_cast<SelectInst>(V)) {
4661  AddWork(SI->getTrueValue());
4662  AddWork(SI->getFalseValue());
4663  } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
4664  if (OffsetZero && !GEP->hasAllZeroIndices())
4665  return nullptr;
4666  AddWork(GEP->getPointerOperand());
4667  } else if (CallBase *CB = dyn_cast<CallBase>(V)) {
4668  Value *Returned = CB->getReturnedArgOperand();
4669  if (Returned)
4670  AddWork(Returned);
4671  else
4672  return nullptr;
4673  } else {
4674  return nullptr;
4675  }
4676  } while (!Worklist.empty());
4677 
4678  return Result;
4679 }
4680 
4682  const Value *V, bool AllowLifetime, bool AllowDroppable) {
4683  for (const User *U : V->users()) {
4684  const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
4685  if (!II)
4686  return false;
4687 
4688  if (AllowLifetime && II->isLifetimeStartOrEnd())
4689  continue;
4690 
4691  if (AllowDroppable && II->isDroppable())
4692  continue;
4693 
4694  return false;
4695  }
4696  return true;
4697 }
4698 
4701  V, /* AllowLifetime */ true, /* AllowDroppable */ false);
4702 }
4705  V, /* AllowLifetime */ true, /* AllowDroppable */ true);
4706 }
4707 
4709  if (!LI.isUnordered())
4710  return true;
4711  const Function &F = *LI.getFunction();
4712  // Speculative load may create a race that did not exist in the source.
4713  return F.hasFnAttribute(Attribute::SanitizeThread) ||
4714  // Speculative load may load data from dirty regions.
4715  F.hasFnAttribute(Attribute::SanitizeAddress) ||
4716  F.hasFnAttribute(Attribute::SanitizeHWAddress);
4717 }
4718 
4720  const Instruction *CtxI,
4721  AssumptionCache *AC,
4722  const DominatorTree *DT,
4723  const TargetLibraryInfo *TLI) {
4724  return isSafeToSpeculativelyExecuteWithOpcode(Inst->getOpcode(), Inst, CtxI,
4725  AC, DT, TLI);
4726 }
4727 
4729  unsigned Opcode, const Instruction *Inst, const Instruction *CtxI,
4730  AssumptionCache *AC, const DominatorTree *DT,
4731  const TargetLibraryInfo *TLI) {
4732 #ifndef NDEBUG
4733  if (Inst->getOpcode() != Opcode) {
4734  // Check that the operands are actually compatible with the Opcode override.
4735  auto hasEqualReturnAndLeadingOperandTypes =
4736  [](const Instruction *Inst, unsigned NumLeadingOperands) {
4737  if (Inst->getNumOperands() < NumLeadingOperands)
4738  return false;
4739  const Type *ExpectedType = Inst->getType();
4740  for (unsigned ItOp = 0; ItOp < NumLeadingOperands; ++ItOp)
4741  if (Inst->getOperand(ItOp)->getType() != ExpectedType)
4742  return false;
4743  return true;
4744  };
4745  assert(!Instruction::isBinaryOp(Opcode) ||
4746  hasEqualReturnAndLeadingOperandTypes(Inst, 2));
4747  assert(!Instruction::isUnaryOp(Opcode) ||
4748  hasEqualReturnAndLeadingOperandTypes(Inst, 1));
4749  }
4750 #endif
4751 
4752  switch (Opcode) {
4753  default:
4754  return true;
4755  case Instruction::UDiv:
4756  case Instruction::URem: {
4757  // x / y is undefined if y == 0.
4758  const APInt *V;
4759  if (match(Inst->getOperand(1), m_APInt(V)))
4760  return *V != 0;
4761  return false;
4762  }
4763  case Instruction::SDiv:
4764  case Instruction::SRem: {
4765  // x / y is undefined if y == 0 or x == INT_MIN and y == -1
4766  const APInt *Numerator, *Denominator;
4767  if (!match(Inst->getOperand(1), m_APInt(Denominator)))
4768  return false;
4769  // We cannot hoist this division if the denominator is 0.
4770  if (*Denominator == 0)
4771  return false;
4772  // It's safe to hoist if the denominator is not 0 or -1.
4773  if (!Denominator->isAllOnes())
4774  return true;
4775  // At this point we know that the denominator is -1. It is safe to hoist as
4776  // long we know that the numerator is not INT_MIN.
4777  if (match(Inst->getOperand(0), m_APInt(Numerator)))
4778  return !Numerator->isMinSignedValue();
4779  // The numerator *might* be MinSignedValue.
4780  return false;
4781  }
4782  case Instruction::Load: {
4783  const LoadInst *LI = dyn_cast<LoadInst>(Inst);
4784  if (!LI)
4785  return false;
4786  if (mustSuppressSpeculation(*LI))
4787  return false;
4788  const DataLayout &DL = LI->getModule()->getDataLayout();
4790  LI->getType(), LI->getAlign(), DL,
4791  CtxI, AC, DT, TLI);
4792  }
4793  case Instruction::Call: {
4794  auto *CI = dyn_cast<const CallInst>(Inst);
4795  if (!CI)
4796  return false;
4797  const Function *Callee = CI->getCalledFunction();
4798 
4799  // The called function could have undefined behavior or side-effects, even
4800  // if marked readnone nounwind.
4801  return Callee && Callee->isSpeculatable();
4802  }
4803  case Instruction::VAArg:
4804  case Instruction::Alloca:
4805  case Instruction::Invoke:
4806  case Instruction::CallBr:
4807  case Instruction::PHI:
4808  case Instruction::Store:
4809  case Instruction::Ret:
4810  case Instruction::Br:
4811  case Instruction::IndirectBr:
4812  case Instruction::Switch:
4813  case Instruction::Unreachable:
4814  case Instruction::Fence:
4815  case Instruction::AtomicRMW:
4816  case Instruction::AtomicCmpXchg:
4817  case Instruction::LandingPad:
4818  case Instruction::Resume:
4819  case Instruction::CatchSwitch:
4820  case Instruction::CatchPad:
4821  case Instruction::CatchRet:
4822  case Instruction::CleanupPad:
4823  case Instruction::CleanupRet:
4824  return false; // Misc instructions which have effects
4825  }
4826 }
4827 
4829  if (I.mayReadOrWriteMemory())
4830  // Memory dependency possible
4831  return true;
4833  // Can't move above a maythrow call or infinite loop. Or if an
4834  // inalloca alloca, above a stacksave call.
4835  return true;
4837  // 1) Can't reorder two inf-loop calls, even if readonly
4838  // 2) Also can't reorder an inf-loop call below a instruction which isn't
4839  // safe to speculative execute. (Inverse of above)
4840  return true;
4841  return false;
4842 }
4843 
4844 /// Convert ConstantRange OverflowResult into ValueTracking OverflowResult.
4846  switch (OR) {
4855  }
4856  llvm_unreachable("Unknown OverflowResult");
4857 }
4858 
4859 /// Combine constant ranges from computeConstantRange() and computeKnownBits().
4861  const Value *V, bool ForSigned, const DataLayout &DL, unsigned Depth,
4862  AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
4863  OptimizationRemarkEmitter *ORE = nullptr, bool UseInstrInfo = true) {
4864  KnownBits Known = computeKnownBits(
4865  V, DL, Depth, AC, CxtI, DT, ORE, UseInstrInfo);
4866  ConstantRange CR1 = ConstantRange::fromKnownBits(Known, ForSigned);
4867  ConstantRange CR2 = computeConstantRange(V, UseInstrInfo);
4870  return CR1.intersectWith(CR2, RangeType);
4871 }
4872 
4874  const Value *LHS, const Value *RHS, const DataLayout &DL,
4875  AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
4876  bool UseInstrInfo) {
4877  KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT,
4878  nullptr, UseInstrInfo);
4879  KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT,
4880  nullptr, UseInstrInfo);
4881  ConstantRange LHSRange = ConstantRange::fromKnownBits(LHSKnown, false);
4882  ConstantRange RHSRange = ConstantRange::fromKnownBits(RHSKnown, false);
4883  return mapOverflowResult(LHSRange.unsignedMulMayOverflow(RHSRange));
4884 }
4885 
4888  const DataLayout &DL, AssumptionCache *AC,
4889  const Instruction *CxtI,
4890  const DominatorTree *DT, bool UseInstrInfo) {
4891  // Multiplying n * m significant bits yields a result of n + m significant
4892  // bits. If the total number of significant bits does not exceed the
4893  // result bit width (minus 1), there is no overflow.
4894  // This means if we have enough leading sign bits in the operands
4895  // we can guarantee that the result does not overflow.
4896  // Ref: "Hacker's Delight" by Henry Warren
4897  unsigned BitWidth = LHS->getType()->getScalarSizeInBits();
4898 
4899  // Note that underestimating the number of sign bits gives a more
4900  // conservative answer.
4901  unsigned SignBits = ComputeNumSignBits(LHS, DL, 0, AC, CxtI, DT) +
4902  ComputeNumSignBits(RHS, DL, 0, AC, CxtI, DT);
4903 
4904  // First handle the easy case: if we have enough sign bits there's
4905  // definitely no overflow.
4906  if (SignBits > BitWidth + 1)
4908 
4909  // There are two ambiguous cases where the