LLVM  8.0.0svn
InstCombineCompares.cpp
Go to the documentation of this file.
1 //===- InstCombineCompares.cpp --------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the visitICmp and visitFCmp functions.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "InstCombineInternal.h"
15 #include "llvm/ADT/APSInt.h"
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/ADT/Statistic.h"
21 #include "llvm/IR/ConstantRange.h"
22 #include "llvm/IR/DataLayout.h"
24 #include "llvm/IR/IntrinsicInst.h"
25 #include "llvm/IR/PatternMatch.h"
26 #include "llvm/Support/Debug.h"
27 #include "llvm/Support/KnownBits.h"
28 
29 using namespace llvm;
30 using namespace PatternMatch;
31 
32 #define DEBUG_TYPE "instcombine"
33 
34 // How many times is a select replaced by one of its operands?
35 STATISTIC(NumSel, "Number of select opts");
36 
37 
38 /// Compute Result = In1+In2, returning true if the result overflowed for this
39 /// type.
40 static bool addWithOverflow(APInt &Result, const APInt &In1,
41  const APInt &In2, bool IsSigned = false) {
42  bool Overflow;
43  if (IsSigned)
44  Result = In1.sadd_ov(In2, Overflow);
45  else
46  Result = In1.uadd_ov(In2, Overflow);
47 
48  return Overflow;
49 }
50 
51 /// Compute Result = In1-In2, returning true if the result overflowed for this
52 /// type.
53 static bool subWithOverflow(APInt &Result, const APInt &In1,
54  const APInt &In2, bool IsSigned = false) {
55  bool Overflow;
56  if (IsSigned)
57  Result = In1.ssub_ov(In2, Overflow);
58  else
59  Result = In1.usub_ov(In2, Overflow);
60 
61  return Overflow;
62 }
63 
64 /// Given an icmp instruction, return true if any use of this comparison is a
65 /// branch on sign bit comparison.
66 static bool hasBranchUse(ICmpInst &I) {
67  for (auto *U : I.users())
68  if (isa<BranchInst>(U))
69  return true;
70  return false;
71 }
72 
73 /// Given an exploded icmp instruction, return true if the comparison only
74 /// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if the
75 /// result of the comparison is true when the input value is signed.
76 static bool isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS,
77  bool &TrueIfSigned) {
78  switch (Pred) {
79  case ICmpInst::ICMP_SLT: // True if LHS s< 0
80  TrueIfSigned = true;
81  return RHS.isNullValue();
82  case ICmpInst::ICMP_SLE: // True if LHS s<= RHS and RHS == -1
83  TrueIfSigned = true;
84  return RHS.isAllOnesValue();
85  case ICmpInst::ICMP_SGT: // True if LHS s> -1
86  TrueIfSigned = false;
87  return RHS.isAllOnesValue();
88  case ICmpInst::ICMP_UGT:
89  // True if LHS u> RHS and RHS == high-bit-mask - 1
90  TrueIfSigned = true;
91  return RHS.isMaxSignedValue();
92  case ICmpInst::ICMP_UGE:
93  // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc)
94  TrueIfSigned = true;
95  return RHS.isSignMask();
96  default:
97  return false;
98  }
99 }
100 
101 /// Returns true if the exploded icmp can be expressed as a signed comparison
102 /// to zero and updates the predicate accordingly.
103 /// The signedness of the comparison is preserved.
104 /// TODO: Refactor with decomposeBitTestICmp()?
105 static bool isSignTest(ICmpInst::Predicate &Pred, const APInt &C) {
106  if (!ICmpInst::isSigned(Pred))
107  return false;
108 
109  if (C.isNullValue())
110  return ICmpInst::isRelational(Pred);
111 
112  if (C.isOneValue()) {
113  if (Pred == ICmpInst::ICMP_SLT) {
114  Pred = ICmpInst::ICMP_SLE;
115  return true;
116  }
117  } else if (C.isAllOnesValue()) {
118  if (Pred == ICmpInst::ICMP_SGT) {
119  Pred = ICmpInst::ICMP_SGE;
120  return true;
121  }
122  }
123 
124  return false;
125 }
126 
127 /// Given a signed integer type and a set of known zero and one bits, compute
128 /// the maximum and minimum values that could have the specified known zero and
129 /// known one bits, returning them in Min/Max.
130 /// TODO: Move to method on KnownBits struct?
132  APInt &Min, APInt &Max) {
133  assert(Known.getBitWidth() == Min.getBitWidth() &&
134  Known.getBitWidth() == Max.getBitWidth() &&
135  "KnownZero, KnownOne and Min, Max must have equal bitwidth.");
136  APInt UnknownBits = ~(Known.Zero|Known.One);
137 
138  // The minimum value is when all unknown bits are zeros, EXCEPT for the sign
139  // bit if it is unknown.
140  Min = Known.One;
141  Max = Known.One|UnknownBits;
142 
143  if (UnknownBits.isNegative()) { // Sign bit is unknown
144  Min.setSignBit();
145  Max.clearSignBit();
146  }
147 }
148 
149 /// Given an unsigned integer type and a set of known zero and one bits, compute
150 /// the maximum and minimum values that could have the specified known zero and
151 /// known one bits, returning them in Min/Max.
152 /// TODO: Move to method on KnownBits struct?
154  APInt &Min, APInt &Max) {
155  assert(Known.getBitWidth() == Min.getBitWidth() &&
156  Known.getBitWidth() == Max.getBitWidth() &&
157  "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth.");
158  APInt UnknownBits = ~(Known.Zero|Known.One);
159 
160  // The minimum value is when the unknown bits are all zeros.
161  Min = Known.One;
162  // The maximum value is when the unknown bits are all ones.
163  Max = Known.One|UnknownBits;
164 }
165 
166 /// This is called when we see this pattern:
167 /// cmp pred (load (gep GV, ...)), cmpcst
168 /// where GV is a global variable with a constant initializer. Try to simplify
169 /// this into some simple computation that does not need the load. For example
170 /// we can optimize "icmp eq (load (gep "foo", 0, i)), 0" into "icmp eq i, 3".
171 ///
172 /// If AndCst is non-null, then the loaded value is masked with that constant
173 /// before doing the comparison. This handles cases like "A[i]&4 == 0".
174 Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
175  GlobalVariable *GV,
176  CmpInst &ICI,
177  ConstantInt *AndCst) {
178  Constant *Init = GV->getInitializer();
179  if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
180  return nullptr;
181 
182  uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
183  // Don't blow up on huge arrays.
184  if (ArrayElementCount > MaxArraySizeForCombine)
185  return nullptr;
186 
187  // There are many forms of this optimization we can handle, for now, just do
188  // the simple index into a single-dimensional array.
189  //
190  // Require: GEP GV, 0, i {{, constant indices}}
191  if (GEP->getNumOperands() < 3 ||
192  !isa<ConstantInt>(GEP->getOperand(1)) ||
193  !cast<ConstantInt>(GEP->getOperand(1))->isZero() ||
194  isa<Constant>(GEP->getOperand(2)))
195  return nullptr;
196 
197  // Check that indices after the variable are constants and in-range for the
198  // type they index. Collect the indices. This is typically for arrays of
199  // structs.
200  SmallVector<unsigned, 4> LaterIndices;
201 
202  Type *EltTy = Init->getType()->getArrayElementType();
203  for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) {
204  ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
205  if (!Idx) return nullptr; // Variable index.
206 
207  uint64_t IdxVal = Idx->getZExtValue();
208  if ((unsigned)IdxVal != IdxVal) return nullptr; // Too large array index.
209 
210  if (StructType *STy = dyn_cast<StructType>(EltTy))
211  EltTy = STy->getElementType(IdxVal);
212  else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) {
213  if (IdxVal >= ATy->getNumElements()) return nullptr;
214  EltTy = ATy->getElementType();
215  } else {
216  return nullptr; // Unknown type.
217  }
218 
219  LaterIndices.push_back(IdxVal);
220  }
221 
222  enum { Overdefined = -3, Undefined = -2 };
223 
224  // Variables for our state machines.
225 
226  // FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form
227  // "i == 47 | i == 87", where 47 is the first index the condition is true for,
228  // and 87 is the second (and last) index. FirstTrueElement is -2 when
229  // undefined, otherwise set to the first true element. SecondTrueElement is
230  // -2 when undefined, -3 when overdefined and >= 0 when that index is true.
231  int FirstTrueElement = Undefined, SecondTrueElement = Undefined;
232 
233  // FirstFalseElement/SecondFalseElement - Used to emit a comparison of the
234  // form "i != 47 & i != 87". Same state transitions as for true elements.
235  int FirstFalseElement = Undefined, SecondFalseElement = Undefined;
236 
237  /// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these
238  /// define a state machine that triggers for ranges of values that the index
239  /// is true or false for. This triggers on things like "abbbbc"[i] == 'b'.
240  /// This is -2 when undefined, -3 when overdefined, and otherwise the last
241  /// index in the range (inclusive). We use -2 for undefined here because we
242  /// use relative comparisons and don't want 0-1 to match -1.
243  int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined;
244 
245  // MagicBitvector - This is a magic bitvector where we set a bit if the
246  // comparison is true for element 'i'. If there are 64 elements or less in
247  // the array, this will fully represent all the comparison results.
248  uint64_t MagicBitvector = 0;
249 
250  // Scan the array and see if one of our patterns matches.
251  Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
252  for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) {
253  Constant *Elt = Init->getAggregateElement(i);
254  if (!Elt) return nullptr;
255 
256  // If this is indexing an array of structures, get the structure element.
257  if (!LaterIndices.empty())
258  Elt = ConstantExpr::getExtractValue(Elt, LaterIndices);
259 
260  // If the element is masked, handle it.
261  if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst);
262 
263  // Find out if the comparison would be true or false for the i'th element.
265  CompareRHS, DL, &TLI);
266  // If the result is undef for this element, ignore it.
267  if (isa<UndefValue>(C)) {
268  // Extend range state machines to cover this element in case there is an
269  // undef in the middle of the range.
270  if (TrueRangeEnd == (int)i-1)
271  TrueRangeEnd = i;
272  if (FalseRangeEnd == (int)i-1)
273  FalseRangeEnd = i;
274  continue;
275  }
276 
277  // If we can't compute the result for any of the elements, we have to give
278  // up evaluating the entire conditional.
279  if (!isa<ConstantInt>(C)) return nullptr;
280 
281  // Otherwise, we know if the comparison is true or false for this element,
282  // update our state machines.
283  bool IsTrueForElt = !cast<ConstantInt>(C)->isZero();
284 
285  // State machine for single/double/range index comparison.
286  if (IsTrueForElt) {
287  // Update the TrueElement state machine.
288  if (FirstTrueElement == Undefined)
289  FirstTrueElement = TrueRangeEnd = i; // First true element.
290  else {
291  // Update double-compare state machine.
292  if (SecondTrueElement == Undefined)
293  SecondTrueElement = i;
294  else
295  SecondTrueElement = Overdefined;
296 
297  // Update range state machine.
298  if (TrueRangeEnd == (int)i-1)
299  TrueRangeEnd = i;
300  else
301  TrueRangeEnd = Overdefined;
302  }
303  } else {
304  // Update the FalseElement state machine.
305  if (FirstFalseElement == Undefined)
306  FirstFalseElement = FalseRangeEnd = i; // First false element.
307  else {
308  // Update double-compare state machine.
309  if (SecondFalseElement == Undefined)
310  SecondFalseElement = i;
311  else
312  SecondFalseElement = Overdefined;
313 
314  // Update range state machine.
315  if (FalseRangeEnd == (int)i-1)
316  FalseRangeEnd = i;
317  else
318  FalseRangeEnd = Overdefined;
319  }
320  }
321 
322  // If this element is in range, update our magic bitvector.
323  if (i < 64 && IsTrueForElt)
324  MagicBitvector |= 1ULL << i;
325 
326  // If all of our states become overdefined, bail out early. Since the
327  // predicate is expensive, only check it every 8 elements. This is only
328  // really useful for really huge arrays.
329  if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined &&
330  SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined &&
331  FalseRangeEnd == Overdefined)
332  return nullptr;
333  }
334 
335  // Now that we've scanned the entire array, emit our new comparison(s). We
336  // order the state machines in complexity of the generated code.
337  Value *Idx = GEP->getOperand(2);
338 
339  // If the index is larger than the pointer size of the target, truncate the
340  // index down like the GEP would do implicitly. We don't have to do this for
341  // an inbounds GEP because the index can't be out of range.
342  if (!GEP->isInBounds()) {
343  Type *IntPtrTy = DL.getIntPtrType(GEP->getType());
344  unsigned PtrSize = IntPtrTy->getIntegerBitWidth();
345  if (Idx->getType()->getPrimitiveSizeInBits() > PtrSize)
346  Idx = Builder.CreateTrunc(Idx, IntPtrTy);
347  }
348 
349  // If the comparison is only true for one or two elements, emit direct
350  // comparisons.
351  if (SecondTrueElement != Overdefined) {
352  // None true -> false.
353  if (FirstTrueElement == Undefined)
354  return replaceInstUsesWith(ICI, Builder.getFalse());
355 
356  Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement);
357 
358  // True for one element -> 'i == 47'.
359  if (SecondTrueElement == Undefined)
360  return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx);
361 
362  // True for two elements -> 'i == 47 | i == 72'.
363  Value *C1 = Builder.CreateICmpEQ(Idx, FirstTrueIdx);
364  Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement);
365  Value *C2 = Builder.CreateICmpEQ(Idx, SecondTrueIdx);
366  return BinaryOperator::CreateOr(C1, C2);
367  }
368 
369  // If the comparison is only false for one or two elements, emit direct
370  // comparisons.
371  if (SecondFalseElement != Overdefined) {
372  // None false -> true.
373  if (FirstFalseElement == Undefined)
374  return replaceInstUsesWith(ICI, Builder.getTrue());
375 
376  Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement);
377 
378  // False for one element -> 'i != 47'.
379  if (SecondFalseElement == Undefined)
380  return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx);
381 
382  // False for two elements -> 'i != 47 & i != 72'.
383  Value *C1 = Builder.CreateICmpNE(Idx, FirstFalseIdx);
384  Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement);
385  Value *C2 = Builder.CreateICmpNE(Idx, SecondFalseIdx);
386  return BinaryOperator::CreateAnd(C1, C2);
387  }
388 
389  // If the comparison can be replaced with a range comparison for the elements
390  // where it is true, emit the range check.
391  if (TrueRangeEnd != Overdefined) {
392  assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare");
393 
394  // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1).
395  if (FirstTrueElement) {
396  Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement);
397  Idx = Builder.CreateAdd(Idx, Offs);
398  }
399 
400  Value *End = ConstantInt::get(Idx->getType(),
401  TrueRangeEnd-FirstTrueElement+1);
402  return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End);
403  }
404 
405  // False range check.
406  if (FalseRangeEnd != Overdefined) {
407  assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare");
408  // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse).
409  if (FirstFalseElement) {
410  Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement);
411  Idx = Builder.CreateAdd(Idx, Offs);
412  }
413 
414  Value *End = ConstantInt::get(Idx->getType(),
415  FalseRangeEnd-FirstFalseElement);
416  return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End);
417  }
418 
419  // If a magic bitvector captures the entire comparison state
420  // of this load, replace it with computation that does:
421  // ((magic_cst >> i) & 1) != 0
422  {
423  Type *Ty = nullptr;
424 
425  // Look for an appropriate type:
426  // - The type of Idx if the magic fits
427  // - The smallest fitting legal type
428  if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth())
429  Ty = Idx->getType();
430  else
431  Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
432 
433  if (Ty) {
434  Value *V = Builder.CreateIntCast(Idx, Ty, false);
435  V = Builder.CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
436  V = Builder.CreateAnd(ConstantInt::get(Ty, 1), V);
437  return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
438  }
439  }
440 
441  return nullptr;
442 }
443 
444 /// Return a value that can be used to compare the *offset* implied by a GEP to
445 /// zero. For example, if we have &A[i], we want to return 'i' for
446 /// "icmp ne i, 0". Note that, in general, indices can be complex, and scales
447 /// are involved. The above expression would also be legal to codegen as
448 /// "icmp ne (i*4), 0" (assuming A is a pointer to i32).
449 /// This latter form is less amenable to optimization though, and we are allowed
450 /// to generate the first by knowing that pointer arithmetic doesn't overflow.
451 ///
452 /// If we can't emit an optimized form for this expression, this returns null.
453 ///
455  const DataLayout &DL) {
457 
458  // Check to see if this gep only has a single variable index. If so, and if
459  // any constant indices are a multiple of its scale, then we can compute this
460  // in terms of the scale of the variable index. For example, if the GEP
461  // implies an offset of "12 + i*4", then we can codegen this as "3 + i",
462  // because the expression will cross zero at the same point.
463  unsigned i, e = GEP->getNumOperands();
464  int64_t Offset = 0;
465  for (i = 1; i != e; ++i, ++GTI) {
466  if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
467  // Compute the aggregate offset of constant indices.
468  if (CI->isZero()) continue;
469 
470  // Handle a struct index, which adds its field offset to the pointer.
471  if (StructType *STy = GTI.getStructTypeOrNull()) {
472  Offset += DL.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
473  } else {
474  uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType());
475  Offset += Size*CI->getSExtValue();
476  }
477  } else {
478  // Found our variable index.
479  break;
480  }
481  }
482 
483  // If there are no variable indices, we must have a constant offset, just
484  // evaluate it the general way.
485  if (i == e) return nullptr;
486 
487  Value *VariableIdx = GEP->getOperand(i);
488  // Determine the scale factor of the variable element. For example, this is
489  // 4 if the variable index is into an array of i32.
490  uint64_t VariableScale = DL.getTypeAllocSize(GTI.getIndexedType());
491 
492  // Verify that there are no other variable indices. If so, emit the hard way.
493  for (++i, ++GTI; i != e; ++i, ++GTI) {
494  ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i));
495  if (!CI) return nullptr;
496 
497  // Compute the aggregate offset of constant indices.
498  if (CI->isZero()) continue;
499 
500  // Handle a struct index, which adds its field offset to the pointer.
501  if (StructType *STy = GTI.getStructTypeOrNull()) {
502  Offset += DL.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
503  } else {
504  uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType());
505  Offset += Size*CI->getSExtValue();
506  }
507  }
508 
509  // Okay, we know we have a single variable index, which must be a
510  // pointer/array/vector index. If there is no offset, life is simple, return
511  // the index.
512  Type *IntPtrTy = DL.getIntPtrType(GEP->getOperand(0)->getType());
513  unsigned IntPtrWidth = IntPtrTy->getIntegerBitWidth();
514  if (Offset == 0) {
515  // Cast to intptrty in case a truncation occurs. If an extension is needed,
516  // we don't need to bother extending: the extension won't affect where the
517  // computation crosses zero.
518  if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth) {
519  VariableIdx = IC.Builder.CreateTrunc(VariableIdx, IntPtrTy);
520  }
521  return VariableIdx;
522  }
523 
524  // Otherwise, there is an index. The computation we will do will be modulo
525  // the pointer size, so get it.
526  uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
527 
528  Offset &= PtrSizeMask;
529  VariableScale &= PtrSizeMask;
530 
531  // To do this transformation, any constant index must be a multiple of the
532  // variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i",
533  // but we can't evaluate "10 + 3*i" in terms of i. Check that the offset is a
534  // multiple of the variable scale.
535  int64_t NewOffs = Offset / (int64_t)VariableScale;
536  if (Offset != NewOffs*(int64_t)VariableScale)
537  return nullptr;
538 
539  // Okay, we can do this evaluation. Start by converting the index to intptr.
540  if (VariableIdx->getType() != IntPtrTy)
541  VariableIdx = IC.Builder.CreateIntCast(VariableIdx, IntPtrTy,
542  true /*Signed*/);
543  Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs);
544  return IC.Builder.CreateAdd(VariableIdx, OffsetVal, "offset");
545 }
546 
547 /// Returns true if we can rewrite Start as a GEP with pointer Base
548 /// and some integer offset. The nodes that need to be re-written
549 /// for this transformation will be added to Explored.
550 static bool canRewriteGEPAsOffset(Value *Start, Value *Base,
551  const DataLayout &DL,
552  SetVector<Value *> &Explored) {
553  SmallVector<Value *, 16> WorkList(1, Start);
554  Explored.insert(Base);
555 
556  // The following traversal gives us an order which can be used
557  // when doing the final transformation. Since in the final
558  // transformation we create the PHI replacement instructions first,
559  // we don't have to get them in any particular order.
560  //
561  // However, for other instructions we will have to traverse the
562  // operands of an instruction first, which means that we have to
563  // do a post-order traversal.
564  while (!WorkList.empty()) {
566 
567  while (!WorkList.empty()) {
568  if (Explored.size() >= 100)
569  return false;
570 
571  Value *V = WorkList.back();
572 
573  if (Explored.count(V) != 0) {
574  WorkList.pop_back();
575  continue;
576  }
577 
578  if (!isa<IntToPtrInst>(V) && !isa<PtrToIntInst>(V) &&
579  !isa<GetElementPtrInst>(V) && !isa<PHINode>(V))
580  // We've found some value that we can't explore which is different from
581  // the base. Therefore we can't do this transformation.
582  return false;
583 
584  if (isa<IntToPtrInst>(V) || isa<PtrToIntInst>(V)) {
585  auto *CI = dyn_cast<CastInst>(V);
586  if (!CI->isNoopCast(DL))
587  return false;
588 
589  if (Explored.count(CI->getOperand(0)) == 0)
590  WorkList.push_back(CI->getOperand(0));
591  }
592 
593  if (auto *GEP = dyn_cast<GEPOperator>(V)) {
594  // We're limiting the GEP to having one index. This will preserve
595  // the original pointer type. We could handle more cases in the
596  // future.
597  if (GEP->getNumIndices() != 1 || !GEP->isInBounds() ||
598  GEP->getType() != Start->getType())
599  return false;
600 
601  if (Explored.count(GEP->getOperand(0)) == 0)
602  WorkList.push_back(GEP->getOperand(0));
603  }
604 
605  if (WorkList.back() == V) {
606  WorkList.pop_back();
607  // We've finished visiting this node, mark it as such.
608  Explored.insert(V);
609  }
610 
611  if (auto *PN = dyn_cast<PHINode>(V)) {
612  // We cannot transform PHIs on unsplittable basic blocks.
613  if (isa<CatchSwitchInst>(PN->getParent()->getTerminator()))
614  return false;
615  Explored.insert(PN);
616  PHIs.insert(PN);
617  }
618  }
619 
620  // Explore the PHI nodes further.
621  for (auto *PN : PHIs)
622  for (Value *Op : PN->incoming_values())
623  if (Explored.count(Op) == 0)
624  WorkList.push_back(Op);
625  }
626 
627  // Make sure that we can do this. Since we can't insert GEPs in a basic
628  // block before a PHI node, we can't easily do this transformation if
629  // we have PHI node users of transformed instructions.
630  for (Value *Val : Explored) {
631  for (Value *Use : Val->uses()) {
632 
633  auto *PHI = dyn_cast<PHINode>(Use);
634  auto *Inst = dyn_cast<Instruction>(Val);
635 
636  if (Inst == Base || Inst == PHI || !Inst || !PHI ||
637  Explored.count(PHI) == 0)
638  continue;
639 
640  if (PHI->getParent() == Inst->getParent())
641  return false;
642  }
643  }
644  return true;
645 }
646 
647 // Sets the appropriate insert point on Builder where we can add
648 // a replacement Instruction for V (if that is possible).
649 static void setInsertionPoint(IRBuilder<> &Builder, Value *V,
650  bool Before = true) {
651  if (auto *PHI = dyn_cast<PHINode>(V)) {
652  Builder.SetInsertPoint(&*PHI->getParent()->getFirstInsertionPt());
653  return;
654  }
655  if (auto *I = dyn_cast<Instruction>(V)) {
656  if (!Before)
657  I = &*std::next(I->getIterator());
658  Builder.SetInsertPoint(I);
659  return;
660  }
661  if (auto *A = dyn_cast<Argument>(V)) {
662  // Set the insertion point in the entry block.
663  BasicBlock &Entry = A->getParent()->getEntryBlock();
664  Builder.SetInsertPoint(&*Entry.getFirstInsertionPt());
665  return;
666  }
667  // Otherwise, this is a constant and we don't need to set a new
668  // insertion point.
669  assert(isa<Constant>(V) && "Setting insertion point for unknown value!");
670 }
671 
672 /// Returns a re-written value of Start as an indexed GEP using Base as a
673 /// pointer.
675  const DataLayout &DL,
676  SetVector<Value *> &Explored) {
677  // Perform all the substitutions. This is a bit tricky because we can
678  // have cycles in our use-def chains.
679  // 1. Create the PHI nodes without any incoming values.
680  // 2. Create all the other values.
681  // 3. Add the edges for the PHI nodes.
682  // 4. Emit GEPs to get the original pointers.
683  // 5. Remove the original instructions.
684  Type *IndexType = IntegerType::get(
685  Base->getContext(), DL.getIndexTypeSizeInBits(Start->getType()));
686 
688  NewInsts[Base] = ConstantInt::getNullValue(IndexType);
689 
690  // Create the new PHI nodes, without adding any incoming values.
691  for (Value *Val : Explored) {
692  if (Val == Base)
693  continue;
694  // Create empty phi nodes. This avoids cyclic dependencies when creating
695  // the remaining instructions.
696  if (auto *PHI = dyn_cast<PHINode>(Val))
697  NewInsts[PHI] = PHINode::Create(IndexType, PHI->getNumIncomingValues(),
698  PHI->getName() + ".idx", PHI);
699  }
700  IRBuilder<> Builder(Base->getContext());
701 
702  // Create all the other instructions.
703  for (Value *Val : Explored) {
704 
705  if (NewInsts.find(Val) != NewInsts.end())
706  continue;
707 
708  if (auto *CI = dyn_cast<CastInst>(Val)) {
709  NewInsts[CI] = NewInsts[CI->getOperand(0)];
710  continue;
711  }
712  if (auto *GEP = dyn_cast<GEPOperator>(Val)) {
713  Value *Index = NewInsts[GEP->getOperand(1)] ? NewInsts[GEP->getOperand(1)]
714  : GEP->getOperand(1);
715  setInsertionPoint(Builder, GEP);
716  // Indices might need to be sign extended. GEPs will magically do
717  // this, but we need to do it ourselves here.
718  if (Index->getType()->getScalarSizeInBits() !=
719  NewInsts[GEP->getOperand(0)]->getType()->getScalarSizeInBits()) {
720  Index = Builder.CreateSExtOrTrunc(
721  Index, NewInsts[GEP->getOperand(0)]->getType(),
722  GEP->getOperand(0)->getName() + ".sext");
723  }
724 
725  auto *Op = NewInsts[GEP->getOperand(0)];
726  if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero())
727  NewInsts[GEP] = Index;
728  else
729  NewInsts[GEP] = Builder.CreateNSWAdd(
730  Op, Index, GEP->getOperand(0)->getName() + ".add");
731  continue;
732  }
733  if (isa<PHINode>(Val))
734  continue;
735 
736  llvm_unreachable("Unexpected instruction type");
737  }
738 
739  // Add the incoming values to the PHI nodes.
740  for (Value *Val : Explored) {
741  if (Val == Base)
742  continue;
743  // All the instructions have been created, we can now add edges to the
744  // phi nodes.
745  if (auto *PHI = dyn_cast<PHINode>(Val)) {
746  PHINode *NewPhi = static_cast<PHINode *>(NewInsts[PHI]);
747  for (unsigned I = 0, E = PHI->getNumIncomingValues(); I < E; ++I) {
748  Value *NewIncoming = PHI->getIncomingValue(I);
749 
750  if (NewInsts.find(NewIncoming) != NewInsts.end())
751  NewIncoming = NewInsts[NewIncoming];
752 
753  NewPhi->addIncoming(NewIncoming, PHI->getIncomingBlock(I));
754  }
755  }
756  }
757 
758  for (Value *Val : Explored) {
759  if (Val == Base)
760  continue;
761 
762  // Depending on the type, for external users we have to emit
763  // a GEP or a GEP + ptrtoint.
764  setInsertionPoint(Builder, Val, false);
765 
766  // If required, create an inttoptr instruction for Base.
767  Value *NewBase = Base;
768  if (!Base->getType()->isPointerTy())
769  NewBase = Builder.CreateBitOrPointerCast(Base, Start->getType(),
770  Start->getName() + "to.ptr");
771 
772  Value *GEP = Builder.CreateInBoundsGEP(
773  Start->getType()->getPointerElementType(), NewBase,
774  makeArrayRef(NewInsts[Val]), Val->getName() + ".ptr");
775 
776  if (!Val->getType()->isPointerTy()) {
777  Value *Cast = Builder.CreatePointerCast(GEP, Val->getType(),
778  Val->getName() + ".conv");
779  GEP = Cast;
780  }
781  Val->replaceAllUsesWith(GEP);
782  }
783 
784  return NewInsts[Start];
785 }
786 
787 /// Looks through GEPs, IntToPtrInsts and PtrToIntInsts in order to express
788 /// the input Value as a constant indexed GEP. Returns a pair containing
789 /// the GEPs Pointer and Index.
790 static std::pair<Value *, Value *>
792  Type *IndexType = IntegerType::get(V->getContext(),
794 
796  while (true) {
797  if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
798  // We accept only inbouds GEPs here to exclude the possibility of
799  // overflow.
800  if (!GEP->isInBounds())
801  break;
802  if (GEP->hasAllConstantIndices() && GEP->getNumIndices() == 1 &&
803  GEP->getType() == V->getType()) {
804  V = GEP->getOperand(0);
805  Constant *GEPIndex = static_cast<Constant *>(GEP->getOperand(1));
806  Index = ConstantExpr::getAdd(
807  Index, ConstantExpr::getSExtOrBitCast(GEPIndex, IndexType));
808  continue;
809  }
810  break;
811  }
812  if (auto *CI = dyn_cast<IntToPtrInst>(V)) {
813  if (!CI->isNoopCast(DL))
814  break;
815  V = CI->getOperand(0);
816  continue;
817  }
818  if (auto *CI = dyn_cast<PtrToIntInst>(V)) {
819  if (!CI->isNoopCast(DL))
820  break;
821  V = CI->getOperand(0);
822  continue;
823  }
824  break;
825  }
826  return {V, Index};
827 }
828 
829 /// Converts (CMP GEPLHS, RHS) if this change would make RHS a constant.
830 /// We can look through PHIs, GEPs and casts in order to determine a common base
831 /// between GEPLHS and RHS.
833  ICmpInst::Predicate Cond,
834  const DataLayout &DL) {
835  if (!GEPLHS->hasAllConstantIndices())
836  return nullptr;
837 
838  // Make sure the pointers have the same type.
839  if (GEPLHS->getType() != RHS->getType())
840  return nullptr;
841 
842  Value *PtrBase, *Index;
843  std::tie(PtrBase, Index) = getAsConstantIndexedAddress(GEPLHS, DL);
844 
845  // The set of nodes that will take part in this transformation.
846  SetVector<Value *> Nodes;
847 
848  if (!canRewriteGEPAsOffset(RHS, PtrBase, DL, Nodes))
849  return nullptr;
850 
851  // We know we can re-write this as
852  // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2)
853  // Since we've only looked through inbouds GEPs we know that we
854  // can't have overflow on either side. We can therefore re-write
855  // this as:
856  // OFFSET1 cmp OFFSET2
857  Value *NewRHS = rewriteGEPAsOffset(RHS, PtrBase, DL, Nodes);
858 
859  // RewriteGEPAsOffset has replaced RHS and all of its uses with a re-written
860  // GEP having PtrBase as the pointer base, and has returned in NewRHS the
861  // offset. Since Index is the offset of LHS to the base pointer, we will now
862  // compare the offsets instead of comparing the pointers.
863  return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Index, NewRHS);
864 }
865 
866 /// Fold comparisons between a GEP instruction and something else. At this point
867 /// we know that the GEP is on the LHS of the comparison.
868 Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
869  ICmpInst::Predicate Cond,
870  Instruction &I) {
871  // Don't transform signed compares of GEPs into index compares. Even if the
872  // GEP is inbounds, the final add of the base pointer can have signed overflow
873  // and would change the result of the icmp.
874  // e.g. "&foo[0] <s &foo[1]" can't be folded to "true" because "foo" could be
875  // the maximum signed value for the pointer type.
876  if (ICmpInst::isSigned(Cond))
877  return nullptr;
878 
879  // Look through bitcasts and addrspacecasts. We do not however want to remove
880  // 0 GEPs.
881  if (!isa<GetElementPtrInst>(RHS))
882  RHS = RHS->stripPointerCasts();
883 
884  Value *PtrBase = GEPLHS->getOperand(0);
885  if (PtrBase == RHS && GEPLHS->isInBounds()) {
886  // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0).
887  // This transformation (ignoring the base and scales) is valid because we
888  // know pointers can't overflow since the gep is inbounds. See if we can
889  // output an optimized form.
890  Value *Offset = evaluateGEPOffsetExpression(GEPLHS, *this, DL);
891 
892  // If not, synthesize the offset the hard way.
893  if (!Offset)
894  Offset = EmitGEPOffset(GEPLHS);
895  return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
896  Constant::getNullValue(Offset->getType()));
897  } else if (GEPOperator *GEPRHS = dyn_cast<GEPOperator>(RHS)) {
898  // If the base pointers are different, but the indices are the same, just
899  // compare the base pointer.
900  if (PtrBase != GEPRHS->getOperand(0)) {
901  bool IndicesTheSame = GEPLHS->getNumOperands()==GEPRHS->getNumOperands();
902  IndicesTheSame &= GEPLHS->getOperand(0)->getType() ==
903  GEPRHS->getOperand(0)->getType();
904  if (IndicesTheSame)
905  for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i)
906  if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
907  IndicesTheSame = false;
908  break;
909  }
910 
911  // If all indices are the same, just compare the base pointers.
912  Type *BaseType = GEPLHS->getOperand(0)->getType();
913  if (IndicesTheSame && CmpInst::makeCmpResultType(BaseType) == I.getType())
914  return new ICmpInst(Cond, GEPLHS->getOperand(0), GEPRHS->getOperand(0));
915 
916  // If we're comparing GEPs with two base pointers that only differ in type
917  // and both GEPs have only constant indices or just one use, then fold
918  // the compare with the adjusted indices.
919  if (GEPLHS->isInBounds() && GEPRHS->isInBounds() &&
920  (GEPLHS->hasAllConstantIndices() || GEPLHS->hasOneUse()) &&
921  (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) &&
922  PtrBase->stripPointerCasts() ==
923  GEPRHS->getOperand(0)->stripPointerCasts()) {
924  Value *LOffset = EmitGEPOffset(GEPLHS);
925  Value *ROffset = EmitGEPOffset(GEPRHS);
926 
927  // If we looked through an addrspacecast between different sized address
928  // spaces, the LHS and RHS pointers are different sized
929  // integers. Truncate to the smaller one.
930  Type *LHSIndexTy = LOffset->getType();
931  Type *RHSIndexTy = ROffset->getType();
932  if (LHSIndexTy != RHSIndexTy) {
933  if (LHSIndexTy->getPrimitiveSizeInBits() <
934  RHSIndexTy->getPrimitiveSizeInBits()) {
935  ROffset = Builder.CreateTrunc(ROffset, LHSIndexTy);
936  } else
937  LOffset = Builder.CreateTrunc(LOffset, RHSIndexTy);
938  }
939 
940  Value *Cmp = Builder.CreateICmp(ICmpInst::getSignedPredicate(Cond),
941  LOffset, ROffset);
942  return replaceInstUsesWith(I, Cmp);
943  }
944 
945  // Otherwise, the base pointers are different and the indices are
946  // different. Try convert this to an indexed compare by looking through
947  // PHIs/casts.
948  return transformToIndexedCompare(GEPLHS, RHS, Cond, DL);
949  }
950 
951  // If one of the GEPs has all zero indices, recurse.
952  if (GEPLHS->hasAllZeroIndices())
953  return foldGEPICmp(GEPRHS, GEPLHS->getOperand(0),
955 
956  // If the other GEP has all zero indices, recurse.
957  if (GEPRHS->hasAllZeroIndices())
958  return foldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I);
959 
960  bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds();
961  if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) {
962  // If the GEPs only differ by one index, compare it.
963  unsigned NumDifferences = 0; // Keep track of # differences.
964  unsigned DiffOperand = 0; // The operand that differs.
965  for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i)
966  if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
967  if (GEPLHS->getOperand(i)->getType()->getPrimitiveSizeInBits() !=
968  GEPRHS->getOperand(i)->getType()->getPrimitiveSizeInBits()) {
969  // Irreconcilable differences.
970  NumDifferences = 2;
971  break;
972  } else {
973  if (NumDifferences++) break;
974  DiffOperand = i;
975  }
976  }
977 
978  if (NumDifferences == 0) // SAME GEP?
979  return replaceInstUsesWith(I, // No comparison is needed here.
981 
982  else if (NumDifferences == 1 && GEPsInBounds) {
983  Value *LHSV = GEPLHS->getOperand(DiffOperand);
984  Value *RHSV = GEPRHS->getOperand(DiffOperand);
985  // Make sure we do a signed comparison here.
986  return new ICmpInst(ICmpInst::getSignedPredicate(Cond), LHSV, RHSV);
987  }
988  }
989 
990  // Only lower this if the icmp is the only user of the GEP or if we expect
991  // the result to fold to a constant!
992  if (GEPsInBounds && (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
993  (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) {
994  // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2)
995  Value *L = EmitGEPOffset(GEPLHS);
996  Value *R = EmitGEPOffset(GEPRHS);
997  return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R);
998  }
999  }
1000 
1001  // Try convert this to an indexed compare by looking through PHIs/casts as a
1002  // last resort.
1003  return transformToIndexedCompare(GEPLHS, RHS, Cond, DL);
1004 }
1005 
1006 Instruction *InstCombiner::foldAllocaCmp(ICmpInst &ICI,
1007  const AllocaInst *Alloca,
1008  const Value *Other) {
1009  assert(ICI.isEquality() && "Cannot fold non-equality comparison.");
1010 
1011  // It would be tempting to fold away comparisons between allocas and any
1012  // pointer not based on that alloca (e.g. an argument). However, even
1013  // though such pointers cannot alias, they can still compare equal.
1014  //
1015  // But LLVM doesn't specify where allocas get their memory, so if the alloca
1016  // doesn't escape we can argue that it's impossible to guess its value, and we
1017  // can therefore act as if any such guesses are wrong.
1018  //
1019  // The code below checks that the alloca doesn't escape, and that it's only
1020  // used in a comparison once (the current instruction). The
1021  // single-comparison-use condition ensures that we're trivially folding all
1022  // comparisons against the alloca consistently, and avoids the risk of
1023  // erroneously folding a comparison of the pointer with itself.
1024 
1025  unsigned MaxIter = 32; // Break cycles and bound to constant-time.
1026 
1028  for (const Use &U : Alloca->uses()) {
1029  if (Worklist.size() >= MaxIter)
1030  return nullptr;
1031  Worklist.push_back(&U);
1032  }
1033 
1034  unsigned NumCmps = 0;
1035  while (!Worklist.empty()) {
1036  assert(Worklist.size() <= MaxIter);
1037  const Use *U = Worklist.pop_back_val();
1038  const Value *V = U->getUser();
1039  --MaxIter;
1040 
1041  if (isa<BitCastInst>(V) || isa<GetElementPtrInst>(V) || isa<PHINode>(V) ||
1042  isa<SelectInst>(V)) {
1043  // Track the uses.
1044  } else if (isa<LoadInst>(V)) {
1045  // Loading from the pointer doesn't escape it.
1046  continue;
1047  } else if (const auto *SI = dyn_cast<StoreInst>(V)) {
1048  // Storing *to* the pointer is fine, but storing the pointer escapes it.
1049  if (SI->getValueOperand() == U->get())
1050  return nullptr;
1051  continue;
1052  } else if (isa<ICmpInst>(V)) {
1053  if (NumCmps++)
1054  return nullptr; // Found more than one cmp.
1055  continue;
1056  } else if (const auto *Intrin = dyn_cast<IntrinsicInst>(V)) {
1057  switch (Intrin->getIntrinsicID()) {
1058  // These intrinsics don't escape or compare the pointer. Memset is safe
1059  // because we don't allow ptrtoint. Memcpy and memmove are safe because
1060  // we don't allow stores, so src cannot point to V.
1061  case Intrinsic::lifetime_start: case Intrinsic::lifetime_end:
1062  case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset:
1063  continue;
1064  default:
1065  return nullptr;
1066  }
1067  } else {
1068  return nullptr;
1069  }
1070  for (const Use &U : V->uses()) {
1071  if (Worklist.size() >= MaxIter)
1072  return nullptr;
1073  Worklist.push_back(&U);
1074  }
1075  }
1076 
1077  Type *CmpTy = CmpInst::makeCmpResultType(Other->getType());
1078  return replaceInstUsesWith(
1079  ICI,
1081 }
1082 
1083 /// Fold "icmp pred (X+C), X".
1084 Instruction *InstCombiner::foldICmpAddOpConst(Value *X, const APInt &C,
1085  ICmpInst::Predicate Pred) {
1086  // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
1087  // so the values can never be equal. Similarly for all other "or equals"
1088  // operators.
1089  assert(!!C && "C should not be zero!");
1090 
1091  // (X+1) <u X --> X >u (MAXUINT-1) --> X == 255
1092  // (X+2) <u X --> X >u (MAXUINT-2) --> X > 253
1093  // (X+MAXUINT) <u X --> X >u (MAXUINT-MAXUINT) --> X != 0
1094  if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
1095  Constant *R = ConstantInt::get(X->getType(),
1096  APInt::getMaxValue(C.getBitWidth()) - C);
1097  return new ICmpInst(ICmpInst::ICMP_UGT, X, R);
1098  }
1099 
1100  // (X+1) >u X --> X <u (0-1) --> X != 255
1101  // (X+2) >u X --> X <u (0-2) --> X <u 254
1102  // (X+MAXUINT) >u X --> X <u (0-MAXUINT) --> X <u 1 --> X == 0
1103  if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE)
1104  return new ICmpInst(ICmpInst::ICMP_ULT, X,
1105  ConstantInt::get(X->getType(), -C));
1106 
1108 
1109  // (X+ 1) <s X --> X >s (MAXSINT-1) --> X == 127
1110  // (X+ 2) <s X --> X >s (MAXSINT-2) --> X >s 125
1111  // (X+MAXSINT) <s X --> X >s (MAXSINT-MAXSINT) --> X >s 0
1112  // (X+MINSINT) <s X --> X >s (MAXSINT-MINSINT) --> X >s -1
1113  // (X+ -2) <s X --> X >s (MAXSINT- -2) --> X >s 126
1114  // (X+ -1) <s X --> X >s (MAXSINT- -1) --> X != 127
1115  if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
1116  return new ICmpInst(ICmpInst::ICMP_SGT, X,
1117  ConstantInt::get(X->getType(), SMax - C));
1118 
1119  // (X+ 1) >s X --> X <s (MAXSINT-(1-1)) --> X != 127
1120  // (X+ 2) >s X --> X <s (MAXSINT-(2-1)) --> X <s 126
1121  // (X+MAXSINT) >s X --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1
1122  // (X+MINSINT) >s X --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2
1123  // (X+ -2) >s X --> X <s (MAXSINT-(-2-1)) --> X <s -126
1124  // (X+ -1) >s X --> X <s (MAXSINT-(-1-1)) --> X == -128
1125 
1126  assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE);
1127  return new ICmpInst(ICmpInst::ICMP_SLT, X,
1128  ConstantInt::get(X->getType(), SMax - (C - 1)));
1129 }
1130 
1131 /// Handle "(icmp eq/ne (ashr/lshr AP2, A), AP1)" ->
1132 /// (icmp eq/ne A, Log2(AP2/AP1)) ->
1133 /// (icmp eq/ne A, Log2(AP2) - Log2(AP1)).
1134 Instruction *InstCombiner::foldICmpShrConstConst(ICmpInst &I, Value *A,
1135  const APInt &AP1,
1136  const APInt &AP2) {
1137  assert(I.isEquality() && "Cannot fold icmp gt/lt");
1138 
1139  auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) {
1140  if (I.getPredicate() == I.ICMP_NE)
1141  Pred = CmpInst::getInversePredicate(Pred);
1142  return new ICmpInst(Pred, LHS, RHS);
1143  };
1144 
1145  // Don't bother doing any work for cases which InstSimplify handles.
1146  if (AP2.isNullValue())
1147  return nullptr;
1148 
1149  bool IsAShr = isa<AShrOperator>(I.getOperand(0));
1150  if (IsAShr) {
1151  if (AP2.isAllOnesValue())
1152  return nullptr;
1153  if (AP2.isNegative() != AP1.isNegative())
1154  return nullptr;
1155  if (AP2.sgt(AP1))
1156  return nullptr;
1157  }
1158 
1159  if (!AP1)
1160  // 'A' must be large enough to shift out the highest set bit.
1161  return getICmp(I.ICMP_UGT, A,
1162  ConstantInt::get(A->getType(), AP2.logBase2()));
1163 
1164  if (AP1 == AP2)
1165  return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType()));
1166 
1167  int Shift;
1168  if (IsAShr && AP1.isNegative())
1169  Shift = AP1.countLeadingOnes() - AP2.countLeadingOnes();
1170  else
1171  Shift = AP1.countLeadingZeros() - AP2.countLeadingZeros();
1172 
1173  if (Shift > 0) {
1174  if (IsAShr && AP1 == AP2.ashr(Shift)) {
1175  // There are multiple solutions if we are comparing against -1 and the LHS
1176  // of the ashr is not a power of two.
1177  if (AP1.isAllOnesValue() && !AP2.isPowerOf2())
1178  return getICmp(I.ICMP_UGE, A, ConstantInt::get(A->getType(), Shift));
1179  return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
1180  } else if (AP1 == AP2.lshr(Shift)) {
1181  return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
1182  }
1183  }
1184 
1185  // Shifting const2 will never be equal to const1.
1186  // FIXME: This should always be handled by InstSimplify?
1187  auto *TorF = ConstantInt::get(I.getType(), I.getPredicate() == I.ICMP_NE);
1188  return replaceInstUsesWith(I, TorF);
1189 }
1190 
1191 /// Handle "(icmp eq/ne (shl AP2, A), AP1)" ->
1192 /// (icmp eq/ne A, TrailingZeros(AP1) - TrailingZeros(AP2)).
1193 Instruction *InstCombiner::foldICmpShlConstConst(ICmpInst &I, Value *A,
1194  const APInt &AP1,
1195  const APInt &AP2) {
1196  assert(I.isEquality() && "Cannot fold icmp gt/lt");
1197 
1198  auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) {
1199  if (I.getPredicate() == I.ICMP_NE)
1200  Pred = CmpInst::getInversePredicate(Pred);
1201  return new ICmpInst(Pred, LHS, RHS);
1202  };
1203 
1204  // Don't bother doing any work for cases which InstSimplify handles.
1205  if (AP2.isNullValue())
1206  return nullptr;
1207 
1208  unsigned AP2TrailingZeros = AP2.countTrailingZeros();
1209 
1210  if (!AP1 && AP2TrailingZeros != 0)
1211  return getICmp(
1212  I.ICMP_UGE, A,
1213  ConstantInt::get(A->getType(), AP2.getBitWidth() - AP2TrailingZeros));
1214 
1215  if (AP1 == AP2)
1216  return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType()));
1217 
1218  // Get the distance between the lowest bits that are set.
1219  int Shift = AP1.countTrailingZeros() - AP2TrailingZeros;
1220 
1221  if (Shift > 0 && AP2.shl(Shift) == AP1)
1222  return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
1223 
1224  // Shifting const2 will never be equal to const1.
1225  // FIXME: This should always be handled by InstSimplify?
1226  auto *TorF = ConstantInt::get(I.getType(), I.getPredicate() == I.ICMP_NE);
1227  return replaceInstUsesWith(I, TorF);
1228 }
1229 
1230 /// The caller has matched a pattern of the form:
1231 /// I = icmp ugt (add (add A, B), CI2), CI1
1232 /// If this is of the form:
1233 /// sum = a + b
1234 /// if (sum+128 >u 255)
1235 /// Then replace it with llvm.sadd.with.overflow.i8.
1236 ///
1238  ConstantInt *CI2, ConstantInt *CI1,
1239  InstCombiner &IC) {
1240  // The transformation we're trying to do here is to transform this into an
1241  // llvm.sadd.with.overflow. To do this, we have to replace the original add
1242  // with a narrower add, and discard the add-with-constant that is part of the
1243  // range check (if we can't eliminate it, this isn't profitable).
1244 
1245  // In order to eliminate the add-with-constant, the compare can be its only
1246  // use.
1247  Instruction *AddWithCst = cast<Instruction>(I.getOperand(0));
1248  if (!AddWithCst->hasOneUse())
1249  return nullptr;
1250 
1251  // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow.
1252  if (!CI2->getValue().isPowerOf2())
1253  return nullptr;
1254  unsigned NewWidth = CI2->getValue().countTrailingZeros();
1255  if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31)
1256  return nullptr;
1257 
1258  // The width of the new add formed is 1 more than the bias.
1259  ++NewWidth;
1260 
1261  // Check to see that CI1 is an all-ones value with NewWidth bits.
1262  if (CI1->getBitWidth() == NewWidth ||
1263  CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth))
1264  return nullptr;
1265 
1266  // This is only really a signed overflow check if the inputs have been
1267  // sign-extended; check for that condition. For example, if CI2 is 2^31 and
1268  // the operands of the add are 64 bits wide, we need at least 33 sign bits.
1269  unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1;
1270  if (IC.ComputeNumSignBits(A, 0, &I) < NeededSignBits ||
1271  IC.ComputeNumSignBits(B, 0, &I) < NeededSignBits)
1272  return nullptr;
1273 
1274  // In order to replace the original add with a narrower
1275  // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant
1276  // and truncates that discard the high bits of the add. Verify that this is
1277  // the case.
1278  Instruction *OrigAdd = cast<Instruction>(AddWithCst->getOperand(0));
1279  for (User *U : OrigAdd->users()) {
1280  if (U == AddWithCst)
1281  continue;
1282 
1283  // Only accept truncates for now. We would really like a nice recursive
1284  // predicate like SimplifyDemandedBits, but which goes downwards the use-def
1285  // chain to see which bits of a value are actually demanded. If the
1286  // original add had another add which was then immediately truncated, we
1287  // could still do the transformation.
1288  TruncInst *TI = dyn_cast<TruncInst>(U);
1289  if (!TI || TI->getType()->getPrimitiveSizeInBits() > NewWidth)
1290  return nullptr;
1291  }
1292 
1293  // If the pattern matches, truncate the inputs to the narrower type and
1294  // use the sadd_with_overflow intrinsic to efficiently compute both the
1295  // result and the overflow bit.
1296  Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
1298  Intrinsic::sadd_with_overflow, NewType);
1299 
1300  InstCombiner::BuilderTy &Builder = IC.Builder;
1301 
1302  // Put the new code above the original add, in case there are any uses of the
1303  // add between the add and the compare.
1304  Builder.SetInsertPoint(OrigAdd);
1305 
1306  Value *TruncA = Builder.CreateTrunc(A, NewType, A->getName() + ".trunc");
1307  Value *TruncB = Builder.CreateTrunc(B, NewType, B->getName() + ".trunc");
1308  CallInst *Call = Builder.CreateCall(F, {TruncA, TruncB}, "sadd");
1309  Value *Add = Builder.CreateExtractValue(Call, 0, "sadd.result");
1310  Value *ZExt = Builder.CreateZExt(Add, OrigAdd->getType());
1311 
1312  // The inner add was the result of the narrow add, zero extended to the
1313  // wider type. Replace it with the result computed by the intrinsic.
1314  IC.replaceInstUsesWith(*OrigAdd, ZExt);
1315 
1316  // The original icmp gets replaced with the overflow value.
1317  return ExtractValueInst::Create(Call, 1, "sadd.overflow");
1318 }
1319 
1320 // Handle (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0)
1321 Instruction *InstCombiner::foldICmpWithZero(ICmpInst &Cmp) {
1322  CmpInst::Predicate Pred = Cmp.getPredicate();
1323  Value *X = Cmp.getOperand(0);
1324 
1325  if (match(Cmp.getOperand(1), m_Zero()) && Pred == ICmpInst::ICMP_SGT) {
1326  Value *A, *B;
1327  SelectPatternResult SPR = matchSelectPattern(X, A, B);
1328  if (SPR.Flavor == SPF_SMIN) {
1329  if (isKnownPositive(A, DL, 0, &AC, &Cmp, &DT))
1330  return new ICmpInst(Pred, B, Cmp.getOperand(1));
1331  if (isKnownPositive(B, DL, 0, &AC, &Cmp, &DT))
1332  return new ICmpInst(Pred, A, Cmp.getOperand(1));
1333  }
1334  }
1335  return nullptr;
1336 }
1337 
1338 // Fold icmp Pred X, C.
1339 Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) {
1340  CmpInst::Predicate Pred = Cmp.getPredicate();
1341  Value *X = Cmp.getOperand(0);
1342 
1343  const APInt *C;
1344  if (!match(Cmp.getOperand(1), m_APInt(C)))
1345  return nullptr;
1346 
1347  Value *A = nullptr, *B = nullptr;
1348 
1349  // Match the following pattern, which is a common idiom when writing
1350  // overflow-safe integer arithmetic functions. The source performs an addition
1351  // in wider type and explicitly checks for overflow using comparisons against
1352  // INT_MIN and INT_MAX. Simplify by using the sadd_with_overflow intrinsic.
1353  //
1354  // TODO: This could probably be generalized to handle other overflow-safe
1355  // operations if we worked out the formulas to compute the appropriate magic
1356  // constants.
1357  //
1358  // sum = a + b
1359  // if (sum+128 >u 255) ... -> llvm.sadd.with.overflow.i8
1360  {
1361  ConstantInt *CI2; // I = icmp ugt (add (add A, B), CI2), CI
1362  if (Pred == ICmpInst::ICMP_UGT &&
1363  match(X, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2))))
1365  Cmp, A, B, CI2, cast<ConstantInt>(Cmp.getOperand(1)), *this))
1366  return Res;
1367  }
1368 
1369  // FIXME: Use m_APInt to allow folds for splat constants.
1370  ConstantInt *CI = dyn_cast<ConstantInt>(Cmp.getOperand(1));
1371  if (!CI)
1372  return nullptr;
1373 
1374  // Canonicalize icmp instructions based on dominating conditions.
1375  BasicBlock *Parent = Cmp.getParent();
1376  BasicBlock *Dom = Parent->getSinglePredecessor();
1377  auto *BI = Dom ? dyn_cast<BranchInst>(Dom->getTerminator()) : nullptr;
1378  ICmpInst::Predicate Pred2;
1379  BasicBlock *TrueBB, *FalseBB;
1380  ConstantInt *CI2;
1381  if (BI && match(BI, m_Br(m_ICmp(Pred2, m_Specific(X), m_ConstantInt(CI2)),
1382  TrueBB, FalseBB)) &&
1383  TrueBB != FalseBB) {
1384  ConstantRange CR =
1386  ConstantRange DominatingCR =
1387  (Parent == TrueBB)
1390  CmpInst::getInversePredicate(Pred2), CI2->getValue());
1391  ConstantRange Intersection = DominatingCR.intersectWith(CR);
1392  ConstantRange Difference = DominatingCR.difference(CR);
1393  if (Intersection.isEmptySet())
1394  return replaceInstUsesWith(Cmp, Builder.getFalse());
1395  if (Difference.isEmptySet())
1396  return replaceInstUsesWith(Cmp, Builder.getTrue());
1397 
1398  // If this is a normal comparison, it demands all bits. If it is a sign
1399  // bit comparison, it only demands the sign bit.
1400  bool UnusedBit;
1401  bool IsSignBit = isSignBitCheck(Pred, CI->getValue(), UnusedBit);
1402 
1403  // Canonicalizing a sign bit comparison that gets used in a branch,
1404  // pessimizes codegen by generating branch on zero instruction instead
1405  // of a test and branch. So we avoid canonicalizing in such situations
1406  // because test and branch instruction has better branch displacement
1407  // than compare and branch instruction.
1408  if (Cmp.isEquality() || (IsSignBit && hasBranchUse(Cmp)))
1409  return nullptr;
1410 
1411  if (auto *AI = Intersection.getSingleElement())
1412  return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder.getInt(*AI));
1413  if (auto *AD = Difference.getSingleElement())
1414  return new ICmpInst(ICmpInst::ICMP_NE, X, Builder.getInt(*AD));
1415  }
1416 
1417  return nullptr;
1418 }
1419 
1420 /// Fold icmp (trunc X, Y), C.
1421 Instruction *InstCombiner::foldICmpTruncConstant(ICmpInst &Cmp,
1422  TruncInst *Trunc,
1423  const APInt &C) {
1424  ICmpInst::Predicate Pred = Cmp.getPredicate();
1425  Value *X = Trunc->getOperand(0);
1426  if (C.isOneValue() && C.getBitWidth() > 1) {
1427  // icmp slt trunc(signum(V)) 1 --> icmp slt V, 1
1428  Value *V = nullptr;
1429  if (Pred == ICmpInst::ICMP_SLT && match(X, m_Signum(m_Value(V))))
1430  return new ICmpInst(ICmpInst::ICMP_SLT, V,
1431  ConstantInt::get(V->getType(), 1));
1432  }
1433 
1434  if (Cmp.isEquality() && Trunc->hasOneUse()) {
1435  // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all
1436  // of the high bits truncated out of x are known.
1437  unsigned DstBits = Trunc->getType()->getScalarSizeInBits(),
1438  SrcBits = X->getType()->getScalarSizeInBits();
1439  KnownBits Known = computeKnownBits(X, 0, &Cmp);
1440 
1441  // If all the high bits are known, we can do this xform.
1442  if ((Known.Zero | Known.One).countLeadingOnes() >= SrcBits - DstBits) {
1443  // Pull in the high bits from known-ones set.
1444  APInt NewRHS = C.zext(SrcBits);
1445  NewRHS |= Known.One & APInt::getHighBitsSet(SrcBits, SrcBits - DstBits);
1446  return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), NewRHS));
1447  }
1448  }
1449 
1450  return nullptr;
1451 }
1452 
1453 /// Fold icmp (xor X, Y), C.
1454 Instruction *InstCombiner::foldICmpXorConstant(ICmpInst &Cmp,
1455  BinaryOperator *Xor,
1456  const APInt &C) {
1457  Value *X = Xor->getOperand(0);
1458  Value *Y = Xor->getOperand(1);
1459  const APInt *XorC;
1460  if (!match(Y, m_APInt(XorC)))
1461  return nullptr;
1462 
1463  // If this is a comparison that tests the signbit (X < 0) or (x > -1),
1464  // fold the xor.
1465  ICmpInst::Predicate Pred = Cmp.getPredicate();
1466  bool TrueIfSigned = false;
1467  if (isSignBitCheck(Cmp.getPredicate(), C, TrueIfSigned)) {
1468 
1469  // If the sign bit of the XorCst is not set, there is no change to
1470  // the operation, just stop using the Xor.
1471  if (!XorC->isNegative()) {
1472  Cmp.setOperand(0, X);
1473  Worklist.Add(Xor);
1474  return &Cmp;
1475  }
1476 
1477  // Emit the opposite comparison.
1478  if (TrueIfSigned)
1479  return new ICmpInst(ICmpInst::ICMP_SGT, X,
1481  else
1482  return new ICmpInst(ICmpInst::ICMP_SLT, X,
1484  }
1485 
1486  if (Xor->hasOneUse()) {
1487  // (icmp u/s (xor X SignMask), C) -> (icmp s/u X, (xor C SignMask))
1488  if (!Cmp.isEquality() && XorC->isSignMask()) {
1489  Pred = Cmp.isSigned() ? Cmp.getUnsignedPredicate()
1490  : Cmp.getSignedPredicate();
1491  return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), C ^ *XorC));
1492  }
1493 
1494  // (icmp u/s (xor X ~SignMask), C) -> (icmp s/u X, (xor C ~SignMask))
1495  if (!Cmp.isEquality() && XorC->isMaxSignedValue()) {
1496  Pred = Cmp.isSigned() ? Cmp.getUnsignedPredicate()
1497  : Cmp.getSignedPredicate();
1498  Pred = Cmp.getSwappedPredicate(Pred);
1499  return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), C ^ *XorC));
1500  }
1501  }
1502 
1503  // Mask constant magic can eliminate an 'xor' with unsigned compares.
1504  if (Pred == ICmpInst::ICMP_UGT) {
1505  // (xor X, ~C) >u C --> X <u ~C (when C+1 is a power of 2)
1506  if (*XorC == ~C && (C + 1).isPowerOf2())
1507  return new ICmpInst(ICmpInst::ICMP_ULT, X, Y);
1508  // (xor X, C) >u C --> X >u C (when C+1 is a power of 2)
1509  if (*XorC == C && (C + 1).isPowerOf2())
1510  return new ICmpInst(ICmpInst::ICMP_UGT, X, Y);
1511  }
1512  if (Pred == ICmpInst::ICMP_ULT) {
1513  // (xor X, -C) <u C --> X >u ~C (when C is a power of 2)
1514  if (*XorC == -C && C.isPowerOf2())
1515  return new ICmpInst(ICmpInst::ICMP_UGT, X,
1516  ConstantInt::get(X->getType(), ~C));
1517  // (xor X, C) <u C --> X >u ~C (when -C is a power of 2)
1518  if (*XorC == C && (-C).isPowerOf2())
1519  return new ICmpInst(ICmpInst::ICMP_UGT, X,
1520  ConstantInt::get(X->getType(), ~C));
1521  }
1522  return nullptr;
1523 }
1524 
1525 /// Fold icmp (and (sh X, Y), C2), C1.
1526 Instruction *InstCombiner::foldICmpAndShift(ICmpInst &Cmp, BinaryOperator *And,
1527  const APInt &C1, const APInt &C2) {
1528  BinaryOperator *Shift = dyn_cast<BinaryOperator>(And->getOperand(0));
1529  if (!Shift || !Shift->isShift())
1530  return nullptr;
1531 
1532  // If this is: (X >> C3) & C2 != C1 (where any shift and any compare could
1533  // exist), turn it into (X & (C2 << C3)) != (C1 << C3). This happens a LOT in
1534  // code produced by the clang front-end, for bitfield access.
1535  // This seemingly simple opportunity to fold away a shift turns out to be
1536  // rather complicated. See PR17827 for details.
1537  unsigned ShiftOpcode = Shift->getOpcode();
1538  bool IsShl = ShiftOpcode == Instruction::Shl;
1539  const APInt *C3;
1540  if (match(Shift->getOperand(1), m_APInt(C3))) {
1541  bool CanFold = false;
1542  if (ShiftOpcode == Instruction::Shl) {
1543  // For a left shift, we can fold if the comparison is not signed. We can
1544  // also fold a signed comparison if the mask value and comparison value
1545  // are not negative. These constraints may not be obvious, but we can
1546  // prove that they are correct using an SMT solver.
1547  if (!Cmp.isSigned() || (!C2.isNegative() && !C1.isNegative()))
1548  CanFold = true;
1549  } else {
1550  bool IsAshr = ShiftOpcode == Instruction::AShr;
1551  // For a logical right shift, we can fold if the comparison is not signed.
1552  // We can also fold a signed comparison if the shifted mask value and the
1553  // shifted comparison value are not negative. These constraints may not be
1554  // obvious, but we can prove that they are correct using an SMT solver.
1555  // For an arithmetic shift right we can do the same, if we ensure
1556  // the And doesn't use any bits being shifted in. Normally these would
1557  // be turned into lshr by SimplifyDemandedBits, but not if there is an
1558  // additional user.
1559  if (!IsAshr || (C2.shl(*C3).lshr(*C3) == C2)) {
1560  if (!Cmp.isSigned() ||
1561  (!C2.shl(*C3).isNegative() && !C1.shl(*C3).isNegative()))
1562  CanFold = true;
1563  }
1564  }
1565 
1566  if (CanFold) {
1567  APInt NewCst = IsShl ? C1.lshr(*C3) : C1.shl(*C3);
1568  APInt SameAsC1 = IsShl ? NewCst.shl(*C3) : NewCst.lshr(*C3);
1569  // Check to see if we are shifting out any of the bits being compared.
1570  if (SameAsC1 != C1) {
1571  // If we shifted bits out, the fold is not going to work out. As a
1572  // special case, check to see if this means that the result is always
1573  // true or false now.
1574  if (Cmp.getPredicate() == ICmpInst::ICMP_EQ)
1575  return replaceInstUsesWith(Cmp, ConstantInt::getFalse(Cmp.getType()));
1576  if (Cmp.getPredicate() == ICmpInst::ICMP_NE)
1577  return replaceInstUsesWith(Cmp, ConstantInt::getTrue(Cmp.getType()));
1578  } else {
1579  Cmp.setOperand(1, ConstantInt::get(And->getType(), NewCst));
1580  APInt NewAndCst = IsShl ? C2.lshr(*C3) : C2.shl(*C3);
1581  And->setOperand(1, ConstantInt::get(And->getType(), NewAndCst));
1582  And->setOperand(0, Shift->getOperand(0));
1583  Worklist.Add(Shift); // Shift is dead.
1584  return &Cmp;
1585  }
1586  }
1587  }
1588 
1589  // Turn ((X >> Y) & C2) == 0 into (X & (C2 << Y)) == 0. The latter is
1590  // preferable because it allows the C2 << Y expression to be hoisted out of a
1591  // loop if Y is invariant and X is not.
1592  if (Shift->hasOneUse() && C1.isNullValue() && Cmp.isEquality() &&
1593  !Shift->isArithmeticShift() && !isa<Constant>(Shift->getOperand(0))) {
1594  // Compute C2 << Y.
1595  Value *NewShift =
1596  IsShl ? Builder.CreateLShr(And->getOperand(1), Shift->getOperand(1))
1597  : Builder.CreateShl(And->getOperand(1), Shift->getOperand(1));
1598 
1599  // Compute X & (C2 << Y).
1600  Value *NewAnd = Builder.CreateAnd(Shift->getOperand(0), NewShift);
1601  Cmp.setOperand(0, NewAnd);
1602  return &Cmp;
1603  }
1604 
1605  return nullptr;
1606 }
1607 
1608 /// Fold icmp (and X, C2), C1.
1609 Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp,
1610  BinaryOperator *And,
1611  const APInt &C1) {
1612  // For vectors: icmp ne (and X, 1), 0 --> trunc X to N x i1
1613  // TODO: We canonicalize to the longer form for scalars because we have
1614  // better analysis/folds for icmp, and codegen may be better with icmp.
1615  if (Cmp.getPredicate() == CmpInst::ICMP_NE && Cmp.getType()->isVectorTy() &&
1616  C1.isNullValue() && match(And->getOperand(1), m_One()))
1617  return new TruncInst(And->getOperand(0), Cmp.getType());
1618 
1619  const APInt *C2;
1620  if (!match(And->getOperand(1), m_APInt(C2)))
1621  return nullptr;
1622 
1623  if (!And->hasOneUse())
1624  return nullptr;
1625 
1626  // If the LHS is an 'and' of a truncate and we can widen the and/compare to
1627  // the input width without changing the value produced, eliminate the cast:
1628  //
1629  // icmp (and (trunc W), C2), C1 -> icmp (and W, C2'), C1'
1630  //
1631  // We can do this transformation if the constants do not have their sign bits
1632  // set or if it is an equality comparison. Extending a relational comparison
1633  // when we're checking the sign bit would not work.
1634  Value *W;
1635  if (match(And->getOperand(0), m_OneUse(m_Trunc(m_Value(W)))) &&
1636  (Cmp.isEquality() || (!C1.isNegative() && !C2->isNegative()))) {
1637  // TODO: Is this a good transform for vectors? Wider types may reduce
1638  // throughput. Should this transform be limited (even for scalars) by using
1639  // shouldChangeType()?
1640  if (!Cmp.getType()->isVectorTy()) {
1641  Type *WideType = W->getType();
1642  unsigned WideScalarBits = WideType->getScalarSizeInBits();
1643  Constant *ZextC1 = ConstantInt::get(WideType, C1.zext(WideScalarBits));
1644  Constant *ZextC2 = ConstantInt::get(WideType, C2->zext(WideScalarBits));
1645  Value *NewAnd = Builder.CreateAnd(W, ZextC2, And->getName());
1646  return new ICmpInst(Cmp.getPredicate(), NewAnd, ZextC1);
1647  }
1648  }
1649 
1650  if (Instruction *I = foldICmpAndShift(Cmp, And, C1, *C2))
1651  return I;
1652 
1653  // (icmp pred (and (or (lshr A, B), A), 1), 0) -->
1654  // (icmp pred (and A, (or (shl 1, B), 1), 0))
1655  //
1656  // iff pred isn't signed
1657  if (!Cmp.isSigned() && C1.isNullValue() && And->getOperand(0)->hasOneUse() &&
1658  match(And->getOperand(1), m_One())) {
1659  Constant *One = cast<Constant>(And->getOperand(1));
1660  Value *Or = And->getOperand(0);
1661  Value *A, *B, *LShr;
1662  if (match(Or, m_Or(m_Value(LShr), m_Value(A))) &&
1663  match(LShr, m_LShr(m_Specific(A), m_Value(B)))) {
1664  unsigned UsesRemoved = 0;
1665  if (And->hasOneUse())
1666  ++UsesRemoved;
1667  if (Or->hasOneUse())
1668  ++UsesRemoved;
1669  if (LShr->hasOneUse())
1670  ++UsesRemoved;
1671 
1672  // Compute A & ((1 << B) | 1)
1673  Value *NewOr = nullptr;
1674  if (auto *C = dyn_cast<Constant>(B)) {
1675  if (UsesRemoved >= 1)
1676  NewOr = ConstantExpr::getOr(ConstantExpr::getNUWShl(One, C), One);
1677  } else {
1678  if (UsesRemoved >= 3)
1679  NewOr = Builder.CreateOr(Builder.CreateShl(One, B, LShr->getName(),
1680  /*HasNUW=*/true),
1681  One, Or->getName());
1682  }
1683  if (NewOr) {
1684  Value *NewAnd = Builder.CreateAnd(A, NewOr, And->getName());
1685  Cmp.setOperand(0, NewAnd);
1686  return &Cmp;
1687  }
1688  }
1689  }
1690 
1691  return nullptr;
1692 }
1693 
1694 /// Fold icmp (and X, Y), C.
1695 Instruction *InstCombiner::foldICmpAndConstant(ICmpInst &Cmp,
1696  BinaryOperator *And,
1697  const APInt &C) {
1698  if (Instruction *I = foldICmpAndConstConst(Cmp, And, C))
1699  return I;
1700 
1701  // TODO: These all require that Y is constant too, so refactor with the above.
1702 
1703  // Try to optimize things like "A[i] & 42 == 0" to index computations.
1704  Value *X = And->getOperand(0);
1705  Value *Y = And->getOperand(1);
1706  if (auto *LI = dyn_cast<LoadInst>(X))
1707  if (auto *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0)))
1708  if (auto *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
1709  if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
1710  !LI->isVolatile() && isa<ConstantInt>(Y)) {
1711  ConstantInt *C2 = cast<ConstantInt>(Y);
1712  if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, Cmp, C2))
1713  return Res;
1714  }
1715 
1716  if (!Cmp.isEquality())
1717  return nullptr;
1718 
1719  // X & -C == -C -> X > u ~C
1720  // X & -C != -C -> X <= u ~C
1721  // iff C is a power of 2
1722  if (Cmp.getOperand(1) == Y && (-C).isPowerOf2()) {
1723  auto NewPred = Cmp.getPredicate() == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGT
1725  return new ICmpInst(NewPred, X, SubOne(cast<Constant>(Cmp.getOperand(1))));
1726  }
1727 
1728  // (X & C2) == 0 -> (trunc X) >= 0
1729  // (X & C2) != 0 -> (trunc X) < 0
1730  // iff C2 is a power of 2 and it masks the sign bit of a legal integer type.
1731  const APInt *C2;
1732  if (And->hasOneUse() && C.isNullValue() && match(Y, m_APInt(C2))) {
1733  int32_t ExactLogBase2 = C2->exactLogBase2();
1734  if (ExactLogBase2 != -1 && DL.isLegalInteger(ExactLogBase2 + 1)) {
1735  Type *NTy = IntegerType::get(Cmp.getContext(), ExactLogBase2 + 1);
1736  if (And->getType()->isVectorTy())
1737  NTy = VectorType::get(NTy, And->getType()->getVectorNumElements());
1738  Value *Trunc = Builder.CreateTrunc(X, NTy);
1739  auto NewPred = Cmp.getPredicate() == CmpInst::ICMP_EQ ? CmpInst::ICMP_SGE
1741  return new ICmpInst(NewPred, Trunc, Constant::getNullValue(NTy));
1742  }
1743  }
1744 
1745  return nullptr;
1746 }
1747 
1748 /// Fold icmp (or X, Y), C.
1749 Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or,
1750  const APInt &C) {
1751  ICmpInst::Predicate Pred = Cmp.getPredicate();
1752  if (C.isOneValue()) {
1753  // icmp slt signum(V) 1 --> icmp slt V, 1
1754  Value *V = nullptr;
1755  if (Pred == ICmpInst::ICMP_SLT && match(Or, m_Signum(m_Value(V))))
1756  return new ICmpInst(ICmpInst::ICMP_SLT, V,
1757  ConstantInt::get(V->getType(), 1));
1758  }
1759 
1760  // X | C == C --> X <=u C
1761  // X | C != C --> X >u C
1762  // iff C+1 is a power of 2 (C is a bitmask of the low bits)
1763  if (Cmp.isEquality() && Cmp.getOperand(1) == Or->getOperand(1) &&
1764  (C + 1).isPowerOf2()) {
1766  return new ICmpInst(Pred, Or->getOperand(0), Or->getOperand(1));
1767  }
1768 
1769  if (!Cmp.isEquality() || !C.isNullValue() || !Or->hasOneUse())
1770  return nullptr;
1771 
1772  Value *P, *Q;
1773  if (match(Or, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) {
1774  // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0
1775  // -> and (icmp eq P, null), (icmp eq Q, null).
1776  Value *CmpP =
1777  Builder.CreateICmp(Pred, P, ConstantInt::getNullValue(P->getType()));
1778  Value *CmpQ =
1779  Builder.CreateICmp(Pred, Q, ConstantInt::getNullValue(Q->getType()));
1780  auto BOpc = Pred == CmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
1781  return BinaryOperator::Create(BOpc, CmpP, CmpQ);
1782  }
1783 
1784  // Are we using xors to bitwise check for a pair of (in)equalities? Convert to
1785  // a shorter form that has more potential to be folded even further.
1786  Value *X1, *X2, *X3, *X4;
1787  if (match(Or->getOperand(0), m_OneUse(m_Xor(m_Value(X1), m_Value(X2)))) &&
1788  match(Or->getOperand(1), m_OneUse(m_Xor(m_Value(X3), m_Value(X4))))) {
1789  // ((X1 ^ X2) || (X3 ^ X4)) == 0 --> (X1 == X2) && (X3 == X4)
1790  // ((X1 ^ X2) || (X3 ^ X4)) != 0 --> (X1 != X2) || (X3 != X4)
1791  Value *Cmp12 = Builder.CreateICmp(Pred, X1, X2);
1792  Value *Cmp34 = Builder.CreateICmp(Pred, X3, X4);
1793  auto BOpc = Pred == CmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
1794  return BinaryOperator::Create(BOpc, Cmp12, Cmp34);
1795  }
1796 
1797  return nullptr;
1798 }
1799 
1800 /// Fold icmp (mul X, Y), C.
1801 Instruction *InstCombiner::foldICmpMulConstant(ICmpInst &Cmp,
1802  BinaryOperator *Mul,
1803  const APInt &C) {
1804  const APInt *MulC;
1805  if (!match(Mul->getOperand(1), m_APInt(MulC)))
1806  return nullptr;
1807 
1808  // If this is a test of the sign bit and the multiply is sign-preserving with
1809  // a constant operand, use the multiply LHS operand instead.
1810  ICmpInst::Predicate Pred = Cmp.getPredicate();
1811  if (isSignTest(Pred, C) && Mul->hasNoSignedWrap()) {
1812  if (MulC->isNegative())
1813  Pred = ICmpInst::getSwappedPredicate(Pred);
1814  return new ICmpInst(Pred, Mul->getOperand(0),
1816  }
1817 
1818  return nullptr;
1819 }
1820 
1821 /// Fold icmp (shl 1, Y), C.
1823  const APInt &C) {
1824  Value *Y;
1825  if (!match(Shl, m_Shl(m_One(), m_Value(Y))))
1826  return nullptr;
1827 
1828  Type *ShiftType = Shl->getType();
1829  unsigned TypeBits = C.getBitWidth();
1830  bool CIsPowerOf2 = C.isPowerOf2();
1831  ICmpInst::Predicate Pred = Cmp.getPredicate();
1832  if (Cmp.isUnsigned()) {
1833  // (1 << Y) pred C -> Y pred Log2(C)
1834  if (!CIsPowerOf2) {
1835  // (1 << Y) < 30 -> Y <= 4
1836  // (1 << Y) <= 30 -> Y <= 4
1837  // (1 << Y) >= 30 -> Y > 4
1838  // (1 << Y) > 30 -> Y > 4
1839  if (Pred == ICmpInst::ICMP_ULT)
1840  Pred = ICmpInst::ICMP_ULE;
1841  else if (Pred == ICmpInst::ICMP_UGE)
1842  Pred = ICmpInst::ICMP_UGT;
1843  }
1844 
1845  // (1 << Y) >= 2147483648 -> Y >= 31 -> Y == 31
1846  // (1 << Y) < 2147483648 -> Y < 31 -> Y != 31
1847  unsigned CLog2 = C.logBase2();
1848  if (CLog2 == TypeBits - 1) {
1849  if (Pred == ICmpInst::ICMP_UGE)
1850  Pred = ICmpInst::ICMP_EQ;
1851  else if (Pred == ICmpInst::ICMP_ULT)
1852  Pred = ICmpInst::ICMP_NE;
1853  }
1854  return new ICmpInst(Pred, Y, ConstantInt::get(ShiftType, CLog2));
1855  } else if (Cmp.isSigned()) {
1856  Constant *BitWidthMinusOne = ConstantInt::get(ShiftType, TypeBits - 1);
1857  if (C.isAllOnesValue()) {
1858  // (1 << Y) <= -1 -> Y == 31
1859  if (Pred == ICmpInst::ICMP_SLE)
1860  return new ICmpInst(ICmpInst::ICMP_EQ, Y, BitWidthMinusOne);
1861 
1862  // (1 << Y) > -1 -> Y != 31
1863  if (Pred == ICmpInst::ICMP_SGT)
1864  return new ICmpInst(ICmpInst::ICMP_NE, Y, BitWidthMinusOne);
1865  } else if (!C) {
1866  // (1 << Y) < 0 -> Y == 31
1867  // (1 << Y) <= 0 -> Y == 31
1868  if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
1869  return new ICmpInst(ICmpInst::ICMP_EQ, Y, BitWidthMinusOne);
1870 
1871  // (1 << Y) >= 0 -> Y != 31
1872  // (1 << Y) > 0 -> Y != 31
1873  if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE)
1874  return new ICmpInst(ICmpInst::ICMP_NE, Y, BitWidthMinusOne);
1875  }
1876  } else if (Cmp.isEquality() && CIsPowerOf2) {
1877  return new ICmpInst(Pred, Y, ConstantInt::get(ShiftType, C.logBase2()));
1878  }
1879 
1880  return nullptr;
1881 }
1882 
1883 /// Fold icmp (shl X, Y), C.
1884 Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp,
1885  BinaryOperator *Shl,
1886  const APInt &C) {
1887  const APInt *ShiftVal;
1888  if (Cmp.isEquality() && match(Shl->getOperand(0), m_APInt(ShiftVal)))
1889  return foldICmpShlConstConst(Cmp, Shl->getOperand(1), C, *ShiftVal);
1890 
1891  const APInt *ShiftAmt;
1892  if (!match(Shl->getOperand(1), m_APInt(ShiftAmt)))
1893  return foldICmpShlOne(Cmp, Shl, C);
1894 
1895  // Check that the shift amount is in range. If not, don't perform undefined
1896  // shifts. When the shift is visited, it will be simplified.
1897  unsigned TypeBits = C.getBitWidth();
1898  if (ShiftAmt->uge(TypeBits))
1899  return nullptr;
1900 
1901  ICmpInst::Predicate Pred = Cmp.getPredicate();
1902  Value *X = Shl->getOperand(0);
1903  Type *ShType = Shl->getType();
1904 
1905  // NSW guarantees that we are only shifting out sign bits from the high bits,
1906  // so we can ASHR the compare constant without needing a mask and eliminate
1907  // the shift.
1908  if (Shl->hasNoSignedWrap()) {
1909  if (Pred == ICmpInst::ICMP_SGT) {
1910  // icmp Pred (shl nsw X, ShiftAmt), C --> icmp Pred X, (C >>s ShiftAmt)
1911  APInt ShiftedC = C.ashr(*ShiftAmt);
1912  return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
1913  }
1914  if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
1915  C.ashr(*ShiftAmt).shl(*ShiftAmt) == C) {
1916  APInt ShiftedC = C.ashr(*ShiftAmt);
1917  return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
1918  }
1919  if (Pred == ICmpInst::ICMP_SLT) {
1920  // SLE is the same as above, but SLE is canonicalized to SLT, so convert:
1921  // (X << S) <=s C is equiv to X <=s (C >> S) for all C
1922  // (X << S) <s (C + 1) is equiv to X <s (C >> S) + 1 if C <s SMAX
1923  // (X << S) <s C is equiv to X <s ((C - 1) >> S) + 1 if C >s SMIN
1924  assert(!C.isMinSignedValue() && "Unexpected icmp slt");
1925  APInt ShiftedC = (C - 1).ashr(*ShiftAmt) + 1;
1926  return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
1927  }
1928  // If this is a signed comparison to 0 and the shift is sign preserving,
1929  // use the shift LHS operand instead; isSignTest may change 'Pred', so only
1930  // do that if we're sure to not continue on in this function.
1931  if (isSignTest(Pred, C))
1932  return new ICmpInst(Pred, X, Constant::getNullValue(ShType));
1933  }
1934 
1935  // NUW guarantees that we are only shifting out zero bits from the high bits,
1936  // so we can LSHR the compare constant without needing a mask and eliminate
1937  // the shift.
1938  if (Shl->hasNoUnsignedWrap()) {
1939  if (Pred == ICmpInst::ICMP_UGT) {
1940  // icmp Pred (shl nuw X, ShiftAmt), C --> icmp Pred X, (C >>u ShiftAmt)
1941  APInt ShiftedC = C.lshr(*ShiftAmt);
1942  return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
1943  }
1944  if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
1945  C.lshr(*ShiftAmt).shl(*ShiftAmt) == C) {
1946  APInt ShiftedC = C.lshr(*ShiftAmt);
1947  return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
1948  }
1949  if (Pred == ICmpInst::ICMP_ULT) {
1950  // ULE is the same as above, but ULE is canonicalized to ULT, so convert:
1951  // (X << S) <=u C is equiv to X <=u (C >> S) for all C
1952  // (X << S) <u (C + 1) is equiv to X <u (C >> S) + 1 if C <u ~0u
1953  // (X << S) <u C is equiv to X <u ((C - 1) >> S) + 1 if C >u 0
1954  assert(C.ugt(0) && "ult 0 should have been eliminated");
1955  APInt ShiftedC = (C - 1).lshr(*ShiftAmt) + 1;
1956  return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
1957  }
1958  }
1959 
1960  if (Cmp.isEquality() && Shl->hasOneUse()) {
1961  // Strength-reduce the shift into an 'and'.
1963  ShType,
1964  APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt->getZExtValue()));
1965  Value *And = Builder.CreateAnd(X, Mask, Shl->getName() + ".mask");
1966  Constant *LShrC = ConstantInt::get(ShType, C.lshr(*ShiftAmt));
1967  return new ICmpInst(Pred, And, LShrC);
1968  }
1969 
1970  // Otherwise, if this is a comparison of the sign bit, simplify to and/test.
1971  bool TrueIfSigned = false;
1972  if (Shl->hasOneUse() && isSignBitCheck(Pred, C, TrueIfSigned)) {
1973  // (X << 31) <s 0 --> (X & 1) != 0
1975  ShType,
1976  APInt::getOneBitSet(TypeBits, TypeBits - ShiftAmt->getZExtValue() - 1));
1977  Value *And = Builder.CreateAnd(X, Mask, Shl->getName() + ".mask");
1978  return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
1979  And, Constant::getNullValue(ShType));
1980  }
1981 
1982  // Transform (icmp pred iM (shl iM %v, N), C)
1983  // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (C>>N))
1984  // Transform the shl to a trunc if (trunc (C>>N)) has no loss and M-N.
1985  // This enables us to get rid of the shift in favor of a trunc that may be
1986  // free on the target. It has the additional benefit of comparing to a
1987  // smaller constant that may be more target-friendly.
1988  unsigned Amt = ShiftAmt->getLimitedValue(TypeBits - 1);
1989  if (Shl->hasOneUse() && Amt != 0 && C.countTrailingZeros() >= Amt &&
1990  DL.isLegalInteger(TypeBits - Amt)) {
1991  Type *TruncTy = IntegerType::get(Cmp.getContext(), TypeBits - Amt);
1992  if (ShType->isVectorTy())
1993  TruncTy = VectorType::get(TruncTy, ShType->getVectorNumElements());
1994  Constant *NewC =
1995  ConstantInt::get(TruncTy, C.ashr(*ShiftAmt).trunc(TypeBits - Amt));
1996  return new ICmpInst(Pred, Builder.CreateTrunc(X, TruncTy), NewC);
1997  }
1998 
1999  return nullptr;
2000 }
2001 
2002 /// Fold icmp ({al}shr X, Y), C.
2003 Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp,
2004  BinaryOperator *Shr,
2005  const APInt &C) {
2006  // An exact shr only shifts out zero bits, so:
2007  // icmp eq/ne (shr X, Y), 0 --> icmp eq/ne X, 0
2008  Value *X = Shr->getOperand(0);
2009  CmpInst::Predicate Pred = Cmp.getPredicate();
2010  if (Cmp.isEquality() && Shr->isExact() && Shr->hasOneUse() &&
2011  C.isNullValue())
2012  return new ICmpInst(Pred, X, Cmp.getOperand(1));
2013 
2014  const APInt *ShiftVal;
2015  if (Cmp.isEquality() && match(Shr->getOperand(0), m_APInt(ShiftVal)))
2016  return foldICmpShrConstConst(Cmp, Shr->getOperand(1), C, *ShiftVal);
2017 
2018  const APInt *ShiftAmt;
2019  if (!match(Shr->getOperand(1), m_APInt(ShiftAmt)))
2020  return nullptr;
2021 
2022  // Check that the shift amount is in range. If not, don't perform undefined
2023  // shifts. When the shift is visited it will be simplified.
2024  unsigned TypeBits = C.getBitWidth();
2025  unsigned ShAmtVal = ShiftAmt->getLimitedValue(TypeBits);
2026  if (ShAmtVal >= TypeBits || ShAmtVal == 0)
2027  return nullptr;
2028 
2029  bool IsAShr = Shr->getOpcode() == Instruction::AShr;
2030  bool IsExact = Shr->isExact();
2031  Type *ShrTy = Shr->getType();
2032  // TODO: If we could guarantee that InstSimplify would handle all of the
2033  // constant-value-based preconditions in the folds below, then we could assert
2034  // those conditions rather than checking them. This is difficult because of
2035  // undef/poison (PR34838).
2036  if (IsAShr) {
2037  if (Pred == CmpInst::ICMP_SLT || (Pred == CmpInst::ICMP_SGT && IsExact)) {
2038  // icmp slt (ashr X, ShAmtC), C --> icmp slt X, (C << ShAmtC)
2039  // icmp sgt (ashr exact X, ShAmtC), C --> icmp sgt X, (C << ShAmtC)
2040  APInt ShiftedC = C.shl(ShAmtVal);
2041  if (ShiftedC.ashr(ShAmtVal) == C)
2042  return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
2043  }
2044  if (Pred == CmpInst::ICMP_SGT) {
2045  // icmp sgt (ashr X, ShAmtC), C --> icmp sgt X, ((C + 1) << ShAmtC) - 1
2046  APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1;
2047  if (!C.isMaxSignedValue() && !(C + 1).shl(ShAmtVal).isMinSignedValue() &&
2048  (ShiftedC + 1).ashr(ShAmtVal) == (C + 1))
2049  return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
2050  }
2051  } else {
2052  if (Pred == CmpInst::ICMP_ULT || (Pred == CmpInst::ICMP_UGT && IsExact)) {
2053  // icmp ult (lshr X, ShAmtC), C --> icmp ult X, (C << ShAmtC)
2054  // icmp ugt (lshr exact X, ShAmtC), C --> icmp ugt X, (C << ShAmtC)
2055  APInt ShiftedC = C.shl(ShAmtVal);
2056  if (ShiftedC.lshr(ShAmtVal) == C)
2057  return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
2058  }
2059  if (Pred == CmpInst::ICMP_UGT) {
2060  // icmp ugt (lshr X, ShAmtC), C --> icmp ugt X, ((C + 1) << ShAmtC) - 1
2061  APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1;
2062  if ((ShiftedC + 1).lshr(ShAmtVal) == (C + 1))
2063  return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
2064  }
2065  }
2066 
2067  if (!Cmp.isEquality())
2068  return nullptr;
2069 
2070  // Handle equality comparisons of shift-by-constant.
2071 
2072  // If the comparison constant changes with the shift, the comparison cannot
2073  // succeed (bits of the comparison constant cannot match the shifted value).
2074  // This should be known by InstSimplify and already be folded to true/false.
2075  assert(((IsAShr && C.shl(ShAmtVal).ashr(ShAmtVal) == C) ||
2076  (!IsAShr && C.shl(ShAmtVal).lshr(ShAmtVal) == C)) &&
2077  "Expected icmp+shr simplify did not occur.");
2078 
2079  // If the bits shifted out are known zero, compare the unshifted value:
2080  // (X & 4) >> 1 == 2 --> (X & 4) == 4.
2081  if (Shr->isExact())
2082  return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, C << ShAmtVal));
2083 
2084  if (Shr->hasOneUse()) {
2085  // Canonicalize the shift into an 'and':
2086  // icmp eq/ne (shr X, ShAmt), C --> icmp eq/ne (and X, HiMask), (C << ShAmt)
2087  APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
2088  Constant *Mask = ConstantInt::get(ShrTy, Val);
2089  Value *And = Builder.CreateAnd(X, Mask, Shr->getName() + ".mask");
2090  return new ICmpInst(Pred, And, ConstantInt::get(ShrTy, C << ShAmtVal));
2091  }
2092 
2093  return nullptr;
2094 }
2095 
2096 /// Fold icmp (udiv X, Y), C.
2097 Instruction *InstCombiner::foldICmpUDivConstant(ICmpInst &Cmp,
2098  BinaryOperator *UDiv,
2099  const APInt &C) {
2100  const APInt *C2;
2101  if (!match(UDiv->getOperand(0), m_APInt(C2)))
2102  return nullptr;
2103 
2104  assert(*C2 != 0 && "udiv 0, X should have been simplified already.");
2105 
2106  // (icmp ugt (udiv C2, Y), C) -> (icmp ule Y, C2/(C+1))
2107  Value *Y = UDiv->getOperand(1);
2108  if (Cmp.getPredicate() == ICmpInst::ICMP_UGT) {
2109  assert(!C.isMaxValue() &&
2110  "icmp ugt X, UINT_MAX should have been simplified already.");
2111  return new ICmpInst(ICmpInst::ICMP_ULE, Y,
2112  ConstantInt::get(Y->getType(), C2->udiv(C + 1)));
2113  }
2114 
2115  // (icmp ult (udiv C2, Y), C) -> (icmp ugt Y, C2/C)
2116  if (Cmp.getPredicate() == ICmpInst::ICMP_ULT) {
2117  assert(C != 0 && "icmp ult X, 0 should have been simplified already.");
2118  return new ICmpInst(ICmpInst::ICMP_UGT, Y,
2119  ConstantInt::get(Y->getType(), C2->udiv(C)));
2120  }
2121 
2122  return nullptr;
2123 }
2124 
2125 /// Fold icmp ({su}div X, Y), C.
2126 Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp,
2127  BinaryOperator *Div,
2128  const APInt &C) {
2129  // Fold: icmp pred ([us]div X, C2), C -> range test
2130  // Fold this div into the comparison, producing a range check.
2131  // Determine, based on the divide type, what the range is being
2132  // checked. If there is an overflow on the low or high side, remember
2133  // it, otherwise compute the range [low, hi) bounding the new value.
2134  // See: InsertRangeTest above for the kinds of replacements possible.
2135  const APInt *C2;
2136  if (!match(Div->getOperand(1), m_APInt(C2)))
2137  return nullptr;
2138 
2139  // FIXME: If the operand types don't match the type of the divide
2140  // then don't attempt this transform. The code below doesn't have the
2141  // logic to deal with a signed divide and an unsigned compare (and
2142  // vice versa). This is because (x /s C2) <s C produces different
2143  // results than (x /s C2) <u C or (x /u C2) <s C or even
2144  // (x /u C2) <u C. Simply casting the operands and result won't
2145  // work. :( The if statement below tests that condition and bails
2146  // if it finds it.
2147  bool DivIsSigned = Div->getOpcode() == Instruction::SDiv;
2148  if (!Cmp.isEquality() && DivIsSigned != Cmp.isSigned())
2149  return nullptr;
2150 
2151  // The ProdOV computation fails on divide by 0 and divide by -1. Cases with
2152  // INT_MIN will also fail if the divisor is 1. Although folds of all these
2153  // division-by-constant cases should be present, we can not assert that they
2154  // have happened before we reach this icmp instruction.
2155  if (C2->isNullValue() || C2->isOneValue() ||
2156  (DivIsSigned && C2->isAllOnesValue()))
2157  return nullptr;
2158 
2159  // Compute Prod = C * C2. We are essentially solving an equation of
2160  // form X / C2 = C. We solve for X by multiplying C2 and C.
2161  // By solving for X, we can turn this into a range check instead of computing
2162  // a divide.
2163  APInt Prod = C * *C2;
2164 
2165  // Determine if the product overflows by seeing if the product is not equal to
2166  // the divide. Make sure we do the same kind of divide as in the LHS
2167  // instruction that we're folding.
2168  bool ProdOV = (DivIsSigned ? Prod.sdiv(*C2) : Prod.udiv(*C2)) != C;
2169 
2170  ICmpInst::Predicate Pred = Cmp.getPredicate();
2171 
2172  // If the division is known to be exact, then there is no remainder from the
2173  // divide, so the covered range size is unit, otherwise it is the divisor.
2174  APInt RangeSize = Div->isExact() ? APInt(C2->getBitWidth(), 1) : *C2;
2175 
2176  // Figure out the interval that is being checked. For example, a comparison
2177  // like "X /u 5 == 0" is really checking that X is in the interval [0, 5).
2178  // Compute this interval based on the constants involved and the signedness of
2179  // the compare/divide. This computes a half-open interval, keeping track of
2180  // whether either value in the interval overflows. After analysis each
2181  // overflow variable is set to 0 if it's corresponding bound variable is valid
2182  // -1 if overflowed off the bottom end, or +1 if overflowed off the top end.
2183  int LoOverflow = 0, HiOverflow = 0;
2184  APInt LoBound, HiBound;
2185 
2186  if (!DivIsSigned) { // udiv
2187  // e.g. X/5 op 3 --> [15, 20)
2188  LoBound = Prod;
2189  HiOverflow = LoOverflow = ProdOV;
2190  if (!HiOverflow) {
2191  // If this is not an exact divide, then many values in the range collapse
2192  // to the same result value.
2193  HiOverflow = addWithOverflow(HiBound, LoBound, RangeSize, false);
2194  }
2195  } else if (C2->isStrictlyPositive()) { // Divisor is > 0.
2196  if (C.isNullValue()) { // (X / pos) op 0
2197  // Can't overflow. e.g. X/2 op 0 --> [-1, 2)
2198  LoBound = -(RangeSize - 1);
2199  HiBound = RangeSize;
2200  } else if (C.isStrictlyPositive()) { // (X / pos) op pos
2201  LoBound = Prod; // e.g. X/5 op 3 --> [15, 20)
2202  HiOverflow = LoOverflow = ProdOV;
2203  if (!HiOverflow)
2204  HiOverflow = addWithOverflow(HiBound, Prod, RangeSize, true);
2205  } else { // (X / pos) op neg
2206  // e.g. X/5 op -3 --> [-15-4, -15+1) --> [-19, -14)
2207  HiBound = Prod + 1;
2208  LoOverflow = HiOverflow = ProdOV ? -1 : 0;
2209  if (!LoOverflow) {
2210  APInt DivNeg = -RangeSize;
2211  LoOverflow = addWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0;
2212  }
2213  }
2214  } else if (C2->isNegative()) { // Divisor is < 0.
2215  if (Div->isExact())
2216  RangeSize.negate();
2217  if (C.isNullValue()) { // (X / neg) op 0
2218  // e.g. X/-5 op 0 --> [-4, 5)
2219  LoBound = RangeSize + 1;
2220  HiBound = -RangeSize;
2221  if (HiBound == *C2) { // -INTMIN = INTMIN
2222  HiOverflow = 1; // [INTMIN+1, overflow)
2223  HiBound = APInt(); // e.g. X/INTMIN = 0 --> X > INTMIN
2224  }
2225  } else if (C.isStrictlyPositive()) { // (X / neg) op pos
2226  // e.g. X/-5 op 3 --> [-19, -14)
2227  HiBound = Prod + 1;
2228  HiOverflow = LoOverflow = ProdOV ? -1 : 0;
2229  if (!LoOverflow)
2230  LoOverflow = addWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0;
2231  } else { // (X / neg) op neg
2232  LoBound = Prod; // e.g. X/-5 op -3 --> [15, 20)
2233  LoOverflow = HiOverflow = ProdOV;
2234  if (!HiOverflow)
2235  HiOverflow = subWithOverflow(HiBound, Prod, RangeSize, true);
2236  }
2237 
2238  // Dividing by a negative swaps the condition. LT <-> GT
2239  Pred = ICmpInst::getSwappedPredicate(Pred);
2240  }
2241 
2242  Value *X = Div->getOperand(0);
2243  switch (Pred) {
2244  default: llvm_unreachable("Unhandled icmp opcode!");
2245  case ICmpInst::ICMP_EQ:
2246  if (LoOverflow && HiOverflow)
2247  return replaceInstUsesWith(Cmp, Builder.getFalse());
2248  if (HiOverflow)
2249  return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
2250  ICmpInst::ICMP_UGE, X,
2251  ConstantInt::get(Div->getType(), LoBound));
2252  if (LoOverflow)
2253  return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
2254  ICmpInst::ICMP_ULT, X,
2255  ConstantInt::get(Div->getType(), HiBound));
2256  return replaceInstUsesWith(
2257  Cmp, insertRangeTest(X, LoBound, HiBound, DivIsSigned, true));
2258  case ICmpInst::ICMP_NE:
2259  if (LoOverflow && HiOverflow)
2260  return replaceInstUsesWith(Cmp, Builder.getTrue());
2261  if (HiOverflow)
2262  return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
2263  ICmpInst::ICMP_ULT, X,
2264  ConstantInt::get(Div->getType(), LoBound));
2265  if (LoOverflow)
2266  return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
2267  ICmpInst::ICMP_UGE, X,
2268  ConstantInt::get(Div->getType(), HiBound));
2269  return replaceInstUsesWith(Cmp,
2270  insertRangeTest(X, LoBound, HiBound,
2271  DivIsSigned, false));
2272  case ICmpInst::ICMP_ULT:
2273  case ICmpInst::ICMP_SLT:
2274  if (LoOverflow == +1) // Low bound is greater than input range.
2275  return replaceInstUsesWith(Cmp, Builder.getTrue());
2276  if (LoOverflow == -1) // Low bound is less than input range.
2277  return replaceInstUsesWith(Cmp, Builder.getFalse());
2278  return new ICmpInst(Pred, X, ConstantInt::get(Div->getType(), LoBound));
2279  case ICmpInst::ICMP_UGT:
2280  case ICmpInst::ICMP_SGT:
2281  if (HiOverflow == +1) // High bound greater than input range.
2282  return replaceInstUsesWith(Cmp, Builder.getFalse());
2283  if (HiOverflow == -1) // High bound less than input range.
2284  return replaceInstUsesWith(Cmp, Builder.getTrue());
2285  if (Pred == ICmpInst::ICMP_UGT)
2286  return new ICmpInst(ICmpInst::ICMP_UGE, X,
2287  ConstantInt::get(Div->getType(), HiBound));
2288  return new ICmpInst(ICmpInst::ICMP_SGE, X,
2289  ConstantInt::get(Div->getType(), HiBound));
2290  }
2291 
2292  return nullptr;
2293 }
2294 
2295 /// Fold icmp (sub X, Y), C.
2296 Instruction *InstCombiner::foldICmpSubConstant(ICmpInst &Cmp,
2297  BinaryOperator *Sub,
2298  const APInt &C) {
2299  Value *X = Sub->getOperand(0), *Y = Sub->getOperand(1);
2300  ICmpInst::Predicate Pred = Cmp.getPredicate();
2301 
2302  // The following transforms are only worth it if the only user of the subtract
2303  // is the icmp.
2304  if (!Sub->hasOneUse())
2305  return nullptr;
2306 
2307  if (Sub->hasNoSignedWrap()) {
2308  // (icmp sgt (sub nsw X, Y), -1) -> (icmp sge X, Y)
2309  if (Pred == ICmpInst::ICMP_SGT && C.isAllOnesValue())
2310  return new ICmpInst(ICmpInst::ICMP_SGE, X, Y);
2311 
2312  // (icmp sgt (sub nsw X, Y), 0) -> (icmp sgt X, Y)
2313  if (Pred == ICmpInst::ICMP_SGT && C.isNullValue())
2314  return new ICmpInst(ICmpInst::ICMP_SGT, X, Y);
2315 
2316  // (icmp slt (sub nsw X, Y), 0) -> (icmp slt X, Y)
2317  if (Pred == ICmpInst::ICMP_SLT && C.isNullValue())
2318  return new ICmpInst(ICmpInst::ICMP_SLT, X, Y);
2319 
2320  // (icmp slt (sub nsw X, Y), 1) -> (icmp sle X, Y)
2321  if (Pred == ICmpInst::ICMP_SLT && C.isOneValue())
2322  return new ICmpInst(ICmpInst::ICMP_SLE, X, Y);
2323  }
2324 
2325  const APInt *C2;
2326  if (!match(X, m_APInt(C2)))
2327  return nullptr;
2328 
2329  // C2 - Y <u C -> (Y | (C - 1)) == C2
2330  // iff (C2 & (C - 1)) == C - 1 and C is a power of 2
2331  if (Pred == ICmpInst::ICMP_ULT && C.isPowerOf2() &&
2332  (*C2 & (C - 1)) == (C - 1))
2333  return new ICmpInst(ICmpInst::ICMP_EQ, Builder.CreateOr(Y, C - 1), X);
2334 
2335  // C2 - Y >u C -> (Y | C) != C2
2336  // iff C2 & C == C and C + 1 is a power of 2
2337  if (Pred == ICmpInst::ICMP_UGT && (C + 1).isPowerOf2() && (*C2 & C) == C)
2338  return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateOr(Y, C), X);
2339 
2340  return nullptr;
2341 }
2342 
2343 /// Fold icmp (add X, Y), C.
2344 Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp,
2346  const APInt &C) {
2347  Value *Y = Add->getOperand(1);
2348  const APInt *C2;
2349  if (Cmp.isEquality() || !match(Y, m_APInt(C2)))
2350  return nullptr;
2351 
2352  // Fold icmp pred (add X, C2), C.
2353  Value *X = Add->getOperand(0);
2354  Type *Ty = Add->getType();
2355  CmpInst::Predicate Pred = Cmp.getPredicate();
2356 
2357  if (!Add->hasOneUse())
2358  return nullptr;
2359 
2360  // If the add does not wrap, we can always adjust the compare by subtracting
2361  // the constants. Equality comparisons are handled elsewhere. SGE/SLE/UGE/ULE
2362  // are canonicalized to SGT/SLT/UGT/ULT.
2363  if ((Add->hasNoSignedWrap() &&
2364  (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT)) ||
2365  (Add->hasNoUnsignedWrap() &&
2366  (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULT))) {
2367  bool Overflow;
2368  APInt NewC =
2369  Cmp.isSigned() ? C.ssub_ov(*C2, Overflow) : C.usub_ov(*C2, Overflow);
2370  // If there is overflow, the result must be true or false.
2371  // TODO: Can we assert there is no overflow because InstSimplify always
2372  // handles those cases?
2373  if (!Overflow)
2374  // icmp Pred (add nsw X, C2), C --> icmp Pred X, (C - C2)
2375  return new ICmpInst(Pred, X, ConstantInt::get(Ty, NewC));
2376  }
2377 
2378  auto CR = ConstantRange::makeExactICmpRegion(Pred, C).subtract(*C2);
2379  const APInt &Upper = CR.getUpper();
2380  const APInt &Lower = CR.getLower();
2381  if (Cmp.isSigned()) {
2382  if (Lower.isSignMask())
2383  return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantInt::get(Ty, Upper));
2384  if (Upper.isSignMask())
2385  return new ICmpInst(ICmpInst::ICMP_SGE, X, ConstantInt::get(Ty, Lower));
2386  } else {
2387  if (Lower.isMinValue())
2388  return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantInt::get(Ty, Upper));
2389  if (Upper.isMinValue())
2390  return new ICmpInst(ICmpInst::ICMP_UGE, X, ConstantInt::get(Ty, Lower));
2391  }
2392 
2393  // X+C <u C2 -> (X & -C2) == C
2394  // iff C & (C2-1) == 0
2395  // C2 is a power of 2
2396  if (Pred == ICmpInst::ICMP_ULT && C.isPowerOf2() && (*C2 & (C - 1)) == 0)
2397  return new ICmpInst(ICmpInst::ICMP_EQ, Builder.CreateAnd(X, -C),
2398  ConstantExpr::getNeg(cast<Constant>(Y)));
2399 
2400  // X+C >u C2 -> (X & ~C2) != C
2401  // iff C & C2 == 0
2402  // C2+1 is a power of 2
2403  if (Pred == ICmpInst::ICMP_UGT && (C + 1).isPowerOf2() && (*C2 & C) == 0)
2404  return new ICmpInst(ICmpInst::ICMP_NE, Builder.CreateAnd(X, ~C),
2405  ConstantExpr::getNeg(cast<Constant>(Y)));
2406 
2407  return nullptr;
2408 }
2409 
2410 bool InstCombiner::matchThreeWayIntCompare(SelectInst *SI, Value *&LHS,
2411  Value *&RHS, ConstantInt *&Less,
2412  ConstantInt *&Equal,
2413  ConstantInt *&Greater) {
2414  // TODO: Generalize this to work with other comparison idioms or ensure
2415  // they get canonicalized into this form.
2416 
2417  // select i1 (a == b), i32 Equal, i32 (select i1 (a < b), i32 Less, i32
2418  // Greater), where Equal, Less and Greater are placeholders for any three
2419  // constants.
2420  ICmpInst::Predicate PredA, PredB;
2421  if (match(SI->getTrueValue(), m_ConstantInt(Equal)) &&
2422  match(SI->getCondition(), m_ICmp(PredA, m_Value(LHS), m_Value(RHS))) &&
2423  PredA == ICmpInst::ICMP_EQ &&
2424  match(SI->getFalseValue(),
2425  m_Select(m_ICmp(PredB, m_Specific(LHS), m_Specific(RHS)),
2426  m_ConstantInt(Less), m_ConstantInt(Greater))) &&
2427  PredB == ICmpInst::ICMP_SLT) {
2428  return true;
2429  }
2430  return false;
2431 }
2432 
2433 Instruction *InstCombiner::foldICmpSelectConstant(ICmpInst &Cmp,
2434  SelectInst *Select,
2435  ConstantInt *C) {
2436 
2437  assert(C && "Cmp RHS should be a constant int!");
2438  // If we're testing a constant value against the result of a three way
2439  // comparison, the result can be expressed directly in terms of the
2440  // original values being compared. Note: We could possibly be more
2441  // aggressive here and remove the hasOneUse test. The original select is
2442  // really likely to simplify or sink when we remove a test of the result.
2443  Value *OrigLHS, *OrigRHS;
2444  ConstantInt *C1LessThan, *C2Equal, *C3GreaterThan;
2445  if (Cmp.hasOneUse() &&
2446  matchThreeWayIntCompare(Select, OrigLHS, OrigRHS, C1LessThan, C2Equal,
2447  C3GreaterThan)) {
2448  assert(C1LessThan && C2Equal && C3GreaterThan);
2449 
2450  bool TrueWhenLessThan =
2451  ConstantExpr::getCompare(Cmp.getPredicate(), C1LessThan, C)
2452  ->isAllOnesValue();
2453  bool TrueWhenEqual =
2454  ConstantExpr::getCompare(Cmp.getPredicate(), C2Equal, C)
2455  ->isAllOnesValue();
2456  bool TrueWhenGreaterThan =
2457  ConstantExpr::getCompare(Cmp.getPredicate(), C3GreaterThan, C)
2458  ->isAllOnesValue();
2459 
2460  // This generates the new instruction that will replace the original Cmp
2461  // Instruction. Instead of enumerating the various combinations when
2462  // TrueWhenLessThan, TrueWhenEqual and TrueWhenGreaterThan are true versus
2463  // false, we rely on chaining of ORs and future passes of InstCombine to
2464  // simplify the OR further (i.e. a s< b || a == b becomes a s<= b).
2465 
2466  // When none of the three constants satisfy the predicate for the RHS (C),
2467  // the entire original Cmp can be simplified to a false.
2468  Value *Cond = Builder.getFalse();
2469  if (TrueWhenLessThan)
2470  Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SLT, OrigLHS, OrigRHS));
2471  if (TrueWhenEqual)
2472  Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_EQ, OrigLHS, OrigRHS));
2473  if (TrueWhenGreaterThan)
2474  Cond = Builder.CreateOr(Cond, Builder.CreateICmp(ICmpInst::ICMP_SGT, OrigLHS, OrigRHS));
2475 
2476  return replaceInstUsesWith(Cmp, Cond);
2477  }
2478  return nullptr;
2479 }
2480 
2481 Instruction *InstCombiner::foldICmpBitCastConstant(ICmpInst &Cmp,
2483  const APInt &C) {
2484  // Folding: icmp <pred> iN X, C
2485  // where X = bitcast <M x iK> (shufflevector <M x iK> %vec, undef, SC)) to iN
2486  // and C is a splat of a K-bit pattern
2487  // and SC is a constant vector = <C', C', C', ..., C'>
2488  // Into:
2489  // %E = extractelement <M x iK> %vec, i32 C'
2490  // icmp <pred> iK %E, trunc(C)
2491  if (!Bitcast->getType()->isIntegerTy() ||
2492  !Bitcast->getSrcTy()->isIntOrIntVectorTy())
2493  return nullptr;
2494 
2495  Value *BCIOp = Bitcast->getOperand(0);
2496  Value *Vec = nullptr; // 1st vector arg of the shufflevector
2497  Constant *Mask = nullptr; // Mask arg of the shufflevector
2498  if (match(BCIOp,
2499  m_ShuffleVector(m_Value(Vec), m_Undef(), m_Constant(Mask)))) {
2500  // Check whether every element of Mask is the same constant
2501  if (auto *Elem = dyn_cast_or_null<ConstantInt>(Mask->getSplatValue())) {
2502  auto *VecTy = cast<VectorType>(BCIOp->getType());
2503  auto *EltTy = cast<IntegerType>(VecTy->getElementType());
2504  auto Pred = Cmp.getPredicate();
2505  if (C.isSplat(EltTy->getBitWidth())) {
2506  // Fold the icmp based on the value of C
2507  // If C is M copies of an iK sized bit pattern,
2508  // then:
2509  // => %E = extractelement <N x iK> %vec, i32 Elem
2510  // icmp <pred> iK %SplatVal, <pattern>
2511  Value *Extract = Builder.CreateExtractElement(Vec, Elem);
2512  Value *NewC = ConstantInt::get(EltTy, C.trunc(EltTy->getBitWidth()));
2513  return new ICmpInst(Pred, Extract, NewC);
2514  }
2515  }
2516  }
2517  return nullptr;
2518 }
2519 
2520 /// Try to fold integer comparisons with a constant operand: icmp Pred X, C
2521 /// where X is some kind of instruction.
2522 Instruction *InstCombiner::foldICmpInstWithConstant(ICmpInst &Cmp) {
2523  const APInt *C;
2524  if (!match(Cmp.getOperand(1), m_APInt(C)))
2525  return nullptr;
2526 
2527  if (auto *BO = dyn_cast<BinaryOperator>(Cmp.getOperand(0))) {
2528  switch (BO->getOpcode()) {
2529  case Instruction::Xor:
2530  if (Instruction *I = foldICmpXorConstant(Cmp, BO, *C))
2531  return I;
2532  break;
2533  case Instruction::And:
2534  if (Instruction *I = foldICmpAndConstant(Cmp, BO, *C))
2535  return I;
2536  break;
2537  case Instruction::Or:
2538  if (Instruction *I = foldICmpOrConstant(Cmp, BO, *C))
2539  return I;
2540  break;
2541  case Instruction::Mul:
2542  if (Instruction *I = foldICmpMulConstant(Cmp, BO, *C))
2543  return I;
2544  break;
2545  case Instruction::Shl:
2546  if (Instruction *I = foldICmpShlConstant(Cmp, BO, *C))
2547  return I;
2548  break;
2549  case Instruction::LShr:
2550  case Instruction::AShr:
2551  if (Instruction *I = foldICmpShrConstant(Cmp, BO, *C))
2552  return I;
2553  break;
2554  case Instruction::UDiv:
2555  if (Instruction *I = foldICmpUDivConstant(Cmp, BO, *C))
2556  return I;
2558  case Instruction::SDiv:
2559  if (Instruction *I = foldICmpDivConstant(Cmp, BO, *C))
2560  return I;
2561  break;
2562  case Instruction::Sub:
2563  if (Instruction *I = foldICmpSubConstant(Cmp, BO, *C))
2564  return I;
2565  break;
2566  case Instruction::Add:
2567  if (Instruction *I = foldICmpAddConstant(Cmp, BO, *C))
2568  return I;
2569  break;
2570  default:
2571  break;
2572  }
2573  // TODO: These folds could be refactored to be part of the above calls.
2574  if (Instruction *I = foldICmpBinOpEqualityWithConstant(Cmp, BO, *C))
2575  return I;
2576  }
2577 
2578  // Match against CmpInst LHS being instructions other than binary operators.
2579 
2580  if (auto *SI = dyn_cast<SelectInst>(Cmp.getOperand(0))) {
2581  // For now, we only support constant integers while folding the
2582  // ICMP(SELECT)) pattern. We can extend this to support vector of integers
2583  // similar to the cases handled by binary ops above.
2584  if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(Cmp.getOperand(1)))
2585  if (Instruction *I = foldICmpSelectConstant(Cmp, SI, ConstRHS))
2586  return I;
2587  }
2588 
2589  if (auto *TI = dyn_cast<TruncInst>(Cmp.getOperand(0))) {
2590  if (Instruction *I = foldICmpTruncConstant(Cmp, TI, *C))
2591  return I;
2592  }
2593 
2594  if (auto *BCI = dyn_cast<BitCastInst>(Cmp.getOperand(0))) {
2595  if (Instruction *I = foldICmpBitCastConstant(Cmp, BCI, *C))
2596  return I;
2597  }
2598 
2599  if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, *C))
2600  return I;
2601 
2602  return nullptr;
2603 }
2604 
2605 /// Fold an icmp equality instruction with binary operator LHS and constant RHS:
2606 /// icmp eq/ne BO, C.
2607 Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
2608  BinaryOperator *BO,
2609  const APInt &C) {
2610  // TODO: Some of these folds could work with arbitrary constants, but this
2611  // function is limited to scalar and vector splat constants.
2612  if (!Cmp.isEquality())
2613  return nullptr;
2614 
2615  ICmpInst::Predicate Pred = Cmp.getPredicate();
2616  bool isICMP_NE = Pred == ICmpInst::ICMP_NE;
2617  Constant *RHS = cast<Constant>(Cmp.getOperand(1));
2618  Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1);
2619 
2620  switch (BO->getOpcode()) {
2621  case Instruction::SRem:
2622  // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one.
2623  if (C.isNullValue() && BO->hasOneUse()) {
2624  const APInt *BOC;
2625  if (match(BOp1, m_APInt(BOC)) && BOC->sgt(1) && BOC->isPowerOf2()) {
2626  Value *NewRem = Builder.CreateURem(BOp0, BOp1, BO->getName());
2627  return new ICmpInst(Pred, NewRem,
2629  }
2630  }
2631  break;
2632  case Instruction::Add: {
2633  // Replace ((add A, B) != C) with (A != C-B) if B & C are constants.
2634  const APInt *BOC;
2635  if (match(BOp1, m_APInt(BOC))) {
2636  if (BO->hasOneUse()) {
2637  Constant *SubC = ConstantExpr::getSub(RHS, cast<Constant>(BOp1));
2638  return new ICmpInst(Pred, BOp0, SubC);
2639  }
2640  } else if (C.isNullValue()) {
2641  // Replace ((add A, B) != 0) with (A != -B) if A or B is
2642  // efficiently invertible, or if the add has just this one use.
2643  if (Value *NegVal = dyn_castNegVal(BOp1))
2644  return new ICmpInst(Pred, BOp0, NegVal);
2645  if (Value *NegVal = dyn_castNegVal(BOp0))
2646  return new ICmpInst(Pred, NegVal, BOp1);
2647  if (BO->hasOneUse()) {
2648  Value *Neg = Builder.CreateNeg(BOp1);
2649  Neg->takeName(BO);
2650  return new ICmpInst(Pred, BOp0, Neg);
2651  }
2652  }
2653  break;
2654  }
2655  case Instruction::Xor:
2656  if (BO->hasOneUse()) {
2657  if (Constant *BOC = dyn_cast<Constant>(BOp1)) {
2658  // For the xor case, we can xor two constants together, eliminating
2659  // the explicit xor.
2660  return new ICmpInst(Pred, BOp0, ConstantExpr::getXor(RHS, BOC));
2661  } else if (C.isNullValue()) {
2662  // Replace ((xor A, B) != 0) with (A != B)
2663  return new ICmpInst(Pred, BOp0, BOp1);
2664  }
2665  }
2666  break;
2667  case Instruction::Sub:
2668  if (BO->hasOneUse()) {
2669  const APInt *BOC;
2670  if (match(BOp0, m_APInt(BOC))) {
2671  // Replace ((sub BOC, B) != C) with (B != BOC-C).
2672  Constant *SubC = ConstantExpr::getSub(cast<Constant>(BOp0), RHS);
2673  return new ICmpInst(Pred, BOp1, SubC);
2674  } else if (C.isNullValue()) {
2675  // Replace ((sub A, B) != 0) with (A != B).
2676  return new ICmpInst(Pred, BOp0, BOp1);
2677  }
2678  }
2679  break;
2680  case Instruction::Or: {
2681  const APInt *BOC;
2682  if (match(BOp1, m_APInt(BOC)) && BO->hasOneUse() && RHS->isAllOnesValue()) {
2683  // Comparing if all bits outside of a constant mask are set?
2684  // Replace (X | C) == -1 with (X & ~C) == ~C.
2685  // This removes the -1 constant.
2686  Constant *NotBOC = ConstantExpr::getNot(cast<Constant>(BOp1));
2687  Value *And = Builder.CreateAnd(BOp0, NotBOC);
2688  return new ICmpInst(Pred, And, NotBOC);
2689  }
2690  break;
2691  }
2692  case Instruction::And: {
2693  const APInt *BOC;
2694  if (match(BOp1, m_APInt(BOC))) {
2695  // If we have ((X & C) == C), turn it into ((X & C) != 0).
2696  if (C == *BOC && C.isPowerOf2())
2697  return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE,
2698  BO, Constant::getNullValue(RHS->getType()));
2699 
2700  // Don't perform the following transforms if the AND has multiple uses
2701  if (!BO->hasOneUse())
2702  break;
2703 
2704  // Replace (and X, (1 << size(X)-1) != 0) with x s< 0
2705  if (BOC->isSignMask()) {
2706  Constant *Zero = Constant::getNullValue(BOp0->getType());
2707  auto NewPred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
2708  return new ICmpInst(NewPred, BOp0, Zero);
2709  }
2710 
2711  // ((X & ~7) == 0) --> X < 8
2712  if (C.isNullValue() && (~(*BOC) + 1).isPowerOf2()) {
2713  Constant *NegBOC = ConstantExpr::getNeg(cast<Constant>(BOp1));
2714  auto NewPred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
2715  return new ICmpInst(NewPred, BOp0, NegBOC);
2716  }
2717  }
2718  break;
2719  }
2720  case Instruction::Mul:
2721  if (C.isNullValue() && BO->hasNoSignedWrap()) {
2722  const APInt *BOC;
2723  if (match(BOp1, m_APInt(BOC)) && !BOC->isNullValue()) {
2724  // The trivial case (mul X, 0) is handled by InstSimplify.
2725  // General case : (mul X, C) != 0 iff X != 0
2726  // (mul X, C) == 0 iff X == 0
2727  return new ICmpInst(Pred, BOp0, Constant::getNullValue(RHS->getType()));
2728  }
2729  }
2730  break;
2731  case Instruction::UDiv:
2732  if (C.isNullValue()) {
2733  // (icmp eq/ne (udiv A, B), 0) -> (icmp ugt/ule i32 B, A)
2734  auto NewPred = isICMP_NE ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT;
2735  return new ICmpInst(NewPred, BOp1, BOp0);
2736  }
2737  break;
2738  default:
2739  break;
2740  }
2741  return nullptr;
2742 }
2743 
2744 /// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C.
2745 Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
2746  const APInt &C) {
2748  if (!II || !Cmp.isEquality())
2749  return nullptr;
2750 
2751  // Handle icmp {eq|ne} <intrinsic>, Constant.
2752  Type *Ty = II->getType();
2753  switch (II->getIntrinsicID()) {
2754  case Intrinsic::bswap:
2755  Worklist.Add(II);
2756  Cmp.setOperand(0, II->getArgOperand(0));
2757  Cmp.setOperand(1, ConstantInt::get(Ty, C.byteSwap()));
2758  return &Cmp;
2759 
2760  case Intrinsic::ctlz:
2761  case Intrinsic::cttz:
2762  // ctz(A) == bitwidth(A) -> A == 0 and likewise for !=
2763  if (C == C.getBitWidth()) {
2764  Worklist.Add(II);
2765  Cmp.setOperand(0, II->getArgOperand(0));
2767  return &Cmp;
2768  }
2769  break;
2770 
2771  case Intrinsic::ctpop: {
2772  // popcount(A) == 0 -> A == 0 and likewise for !=
2773  // popcount(A) == bitwidth(A) -> A == -1 and likewise for !=
2774  bool IsZero = C.isNullValue();
2775  if (IsZero || C == C.getBitWidth()) {
2776  Worklist.Add(II);
2777  Cmp.setOperand(0, II->getArgOperand(0));
2778  auto *NewOp =
2780  Cmp.setOperand(1, NewOp);
2781  return &Cmp;
2782  }
2783  break;
2784  }
2785  default:
2786  break;
2787  }
2788 
2789  return nullptr;
2790 }
2791 
2792 /// Handle icmp with constant (but not simple integer constant) RHS.
2793 Instruction *InstCombiner::foldICmpInstWithConstantNotInt(ICmpInst &I) {
2794  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
2795  Constant *RHSC = dyn_cast<Constant>(Op1);
2796  Instruction *LHSI = dyn_cast<Instruction>(Op0);
2797  if (!RHSC || !LHSI)
2798  return nullptr;
2799 
2800  switch (LHSI->getOpcode()) {
2801  case Instruction::GetElementPtr:
2802  // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null
2803  if (RHSC->isNullValue() &&
2804  cast<GetElementPtrInst>(LHSI)->hasAllZeroIndices())
2805  return new ICmpInst(
2806  I.getPredicate(), LHSI->getOperand(0),
2807  Constant::getNullValue(LHSI->getOperand(0)->getType()));
2808  break;
2809  case Instruction::PHI:
2810  // Only fold icmp into the PHI if the phi and icmp are in the same
2811  // block. If in the same block, we're encouraging jump threading. If
2812  // not, we are just pessimizing the code by making an i1 phi.
2813  if (LHSI->getParent() == I.getParent())
2814  if (Instruction *NV = foldOpIntoPhi(I, cast<PHINode>(LHSI)))
2815  return NV;
2816  break;
2817  case Instruction::Select: {
2818  // If either operand of the select is a constant, we can fold the
2819  // comparison into the select arms, which will cause one to be
2820  // constant folded and the select turned into a bitwise or.
2821  Value *Op1 = nullptr, *Op2 = nullptr;
2822  ConstantInt *CI = nullptr;
2823  if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) {
2824  Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
2825  CI = dyn_cast<ConstantInt>(Op1);
2826  }
2827  if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) {
2828  Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
2829  CI = dyn_cast<ConstantInt>(Op2);
2830  }
2831 
2832  // We only want to perform this transformation if it will not lead to
2833  // additional code. This is true if either both sides of the select
2834  // fold to a constant (in which case the icmp is replaced with a select
2835  // which will usually simplify) or this is the only user of the
2836  // select (in which case we are trading a select+icmp for a simpler
2837  // select+icmp) or all uses of the select can be replaced based on
2838  // dominance information ("Global cases").
2839  bool Transform = false;
2840  if (Op1 && Op2)
2841  Transform = true;
2842  else if (Op1 || Op2) {
2843  // Local case
2844  if (LHSI->hasOneUse())
2845  Transform = true;
2846  // Global cases
2847  else if (CI && !CI->isZero())
2848  // When Op1 is constant try replacing select with second operand.
2849  // Otherwise Op2 is constant and try replacing select with first
2850  // operand.
2851  Transform =
2852  replacedSelectWithOperand(cast<SelectInst>(LHSI), &I, Op1 ? 2 : 1);
2853  }
2854  if (Transform) {
2855  if (!Op1)
2856  Op1 = Builder.CreateICmp(I.getPredicate(), LHSI->getOperand(1), RHSC,
2857  I.getName());
2858  if (!Op2)
2859  Op2 = Builder.CreateICmp(I.getPredicate(), LHSI->getOperand(2), RHSC,
2860  I.getName());
2861  return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
2862  }
2863  break;
2864  }
2865  case Instruction::IntToPtr:
2866  // icmp pred inttoptr(X), null -> icmp pred X, 0
2867  if (RHSC->isNullValue() &&
2868  DL.getIntPtrType(RHSC->getType()) == LHSI->getOperand(0)->getType())
2869  return new ICmpInst(
2870  I.getPredicate(), LHSI->getOperand(0),
2871  Constant::getNullValue(LHSI->getOperand(0)->getType()));
2872  break;
2873 
2874  case Instruction::Load:
2875  // Try to optimize things like "A[i] > 4" to index computations.
2876  if (GetElementPtrInst *GEP =
2877  dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
2878  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
2879  if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
2880  !cast<LoadInst>(LHSI)->isVolatile())
2881  if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, I))
2882  return Res;
2883  }
2884  break;
2885  }
2886 
2887  return nullptr;
2888 }
2889 
2890 /// Some comparisons can be simplified.
2891 /// In this case, we are looking for comparisons that look like
2892 /// a check for a lossy truncation.
2893 /// Folds:
2894 /// icmp SrcPred (x & Mask), x to icmp DstPred x, Mask
2895 /// Where Mask is some pattern that produces all-ones in low bits:
2896 /// (-1 >> y)
2897 /// ((-1 << y) >> y) <- non-canonical, has extra uses
2898 /// ~(-1 << y)
2899 /// ((1 << y) + (-1)) <- non-canonical, has extra uses
2900 /// The Mask can be a constant, too.
2901 /// For some predicates, the operands are commutative.
2902 /// For others, x can only be on a specific side.
2904  InstCombiner::BuilderTy &Builder) {
2905  ICmpInst::Predicate SrcPred;
2906  Value *X, *M, *Y;
2907  auto m_VariableMask = m_CombineOr(
2909  m_Add(m_Shl(m_One(), m_Value()), m_AllOnes())),
2911  m_LShr(m_Shl(m_AllOnes(), m_Value(Y)), m_Deferred(Y))));
2912  auto m_Mask = m_CombineOr(m_VariableMask, m_LowBitMask());
2913  if (!match(&I, m_c_ICmp(SrcPred,
2914  m_c_And(m_CombineAnd(m_Mask, m_Value(M)), m_Value(X)),
2915  m_Deferred(X))))
2916  return nullptr;
2917 
2918  ICmpInst::Predicate DstPred;
2919  switch (SrcPred) {
2920  case ICmpInst::Predicate::ICMP_EQ:
2921  // x & (-1 >> y) == x -> x u<= (-1 >> y)
2922  DstPred = ICmpInst::Predicate::ICMP_ULE;
2923  break;
2924  case ICmpInst::Predicate::ICMP_NE:
2925  // x & (-1 >> y) != x -> x u> (-1 >> y)
2926  DstPred = ICmpInst::Predicate::ICMP_UGT;
2927  break;
2928  case ICmpInst::Predicate::ICMP_UGT:
2929  // x u> x & (-1 >> y) -> x u> (-1 >> y)
2930  assert(X == I.getOperand(0) && "instsimplify took care of commut. variant");
2931  DstPred = ICmpInst::Predicate::ICMP_UGT;
2932  break;
2933  case ICmpInst::Predicate::ICMP_UGE:
2934  // x & (-1 >> y) u>= x -> x u<= (-1 >> y)
2935  assert(X == I.getOperand(1) && "instsimplify took care of commut. variant");
2936  DstPred = ICmpInst::Predicate::ICMP_ULE;
2937  break;
2938  case ICmpInst::Predicate::ICMP_ULT:
2939  // x & (-1 >> y) u< x -> x u> (-1 >> y)
2940  assert(X == I.getOperand(1) && "instsimplify took care of commut. variant");
2941  DstPred = ICmpInst::Predicate::ICMP_UGT;
2942  break;
2943  case ICmpInst::Predicate::ICMP_ULE:
2944  // x u<= x & (-1 >> y) -> x u<= (-1 >> y)
2945  assert(X == I.getOperand(0) && "instsimplify took care of commut. variant");
2946  DstPred = ICmpInst::Predicate::ICMP_ULE;
2947  break;
2948  case ICmpInst::Predicate::ICMP_SGT:
2949  // x s> x & (-1 >> y) -> x s> (-1 >> y)
2950  if (X != I.getOperand(0)) // X must be on LHS of comparison!
2951  return nullptr; // Ignore the other case.
2952  DstPred = ICmpInst::Predicate::ICMP_SGT;
2953  break;
2954  case ICmpInst::Predicate::ICMP_SGE:
2955  // x & (-1 >> y) s>= x -> x s<= (-1 >> y)
2956  if (X != I.getOperand(1)) // X must be on RHS of comparison!
2957  return nullptr; // Ignore the other case.
2958  DstPred = ICmpInst::Predicate::ICMP_SLE;
2959  break;
2960  case ICmpInst::Predicate::ICMP_SLT:
2961  // x & (-1 >> y) s< x -> x s> (-1 >> y)
2962  if (X != I.getOperand(1)) // X must be on RHS of comparison!
2963  return nullptr; // Ignore the other case.
2964  DstPred = ICmpInst::Predicate::ICMP_SGT;
2965  break;
2966  case ICmpInst::Predicate::ICMP_SLE:
2967  // x s<= x & (-1 >> y) -> x s<= (-1 >> y)
2968  if (X != I.getOperand(0)) // X must be on LHS of comparison!
2969  return nullptr; // Ignore the other case.
2970  DstPred = ICmpInst::Predicate::ICMP_SLE;
2971  break;
2972  default:
2973  llvm_unreachable("All possible folds are handled.");
2974  }
2975 
2976  return Builder.CreateICmp(DstPred, X, M);
2977 }
2978 
2979 /// Some comparisons can be simplified.
2980 /// In this case, we are looking for comparisons that look like
2981 /// a check for a lossy signed truncation.
2982 /// Folds: (MaskedBits is a constant.)
2983 /// ((%x << MaskedBits) a>> MaskedBits) SrcPred %x
2984 /// Into:
2985 /// (add %x, (1 << (KeptBits-1))) DstPred (1 << KeptBits)
2986 /// Where KeptBits = bitwidth(%x) - MaskedBits
2987 static Value *
2989  InstCombiner::BuilderTy &Builder) {
2990  ICmpInst::Predicate SrcPred;
2991  Value *X;
2992  const APInt *C0, *C1; // FIXME: non-splats, potentially with undef.
2993  // We are ok with 'shl' having multiple uses, but 'ashr' must be one-use.
2994  if (!match(&I, m_c_ICmp(SrcPred,
2995  m_OneUse(m_AShr(m_Shl(m_Value(X), m_APInt(C0)),
2996  m_APInt(C1))),
2997  m_Deferred(X))))
2998  return nullptr;
2999 
3000  // Potential handling of non-splats: for each element:
3001  // * if both are undef, replace with constant 0.
3002  // Because (1<<0) is OK and is 1, and ((1<<0)>>1) is also OK and is 0.
3003  // * if both are not undef, and are different, bailout.
3004  // * else, only one is undef, then pick the non-undef one.
3005 
3006  // The shift amount must be equal.
3007  if (*C0 != *C1)
3008  return nullptr;
3009  const APInt &MaskedBits = *C0;
3010  assert(MaskedBits != 0 && "shift by zero should be folded away already.");
3011 
3012  ICmpInst::Predicate DstPred;
3013  switch (SrcPred) {
3014  case ICmpInst::Predicate::ICMP_EQ:
3015  // ((%x << MaskedBits) a>> MaskedBits) == %x
3016  // =>
3017  // (add %x, (1 << (KeptBits-1))) u< (1 << KeptBits)
3018  DstPred = ICmpInst::Predicate::ICMP_ULT;
3019  break;
3020  case ICmpInst::Predicate::ICMP_NE:
3021  // ((%x << MaskedBits) a>> MaskedBits) != %x
3022  // =>
3023  // (add %x, (1 << (KeptBits-1))) u>= (1 << KeptBits)
3024  DstPred = ICmpInst::Predicate::ICMP_UGE;
3025  break;
3026  // FIXME: are more folds possible?
3027  default:
3028  return nullptr;
3029  }
3030 
3031  auto *XType = X->getType();
3032  const unsigned XBitWidth = XType->getScalarSizeInBits();
3033  const APInt BitWidth = APInt(XBitWidth, XBitWidth);
3034  assert(BitWidth.ugt(MaskedBits) && "shifts should leave some bits untouched");
3035 
3036  // KeptBits = bitwidth(%x) - MaskedBits
3037  const APInt KeptBits = BitWidth - MaskedBits;
3038  assert(KeptBits.ugt(0) && KeptBits.ult(BitWidth) && "unreachable");
3039  // ICmpCst = (1 << KeptBits)
3040  const APInt ICmpCst = APInt(XBitWidth, 1).shl(KeptBits);
3041  assert(ICmpCst.isPowerOf2());
3042  // AddCst = (1 << (KeptBits-1))
3043  const APInt AddCst = ICmpCst.lshr(1);
3044  assert(AddCst.ult(ICmpCst) && AddCst.isPowerOf2());
3045 
3046  // T0 = add %x, AddCst
3047  Value *T0 = Builder.CreateAdd(X, ConstantInt::get(XType, AddCst));
3048  // T1 = T0 DstPred ICmpCst
3049  Value *T1 = Builder.CreateICmp(DstPred, T0, ConstantInt::get(XType, ICmpCst));
3050 
3051  return T1;
3052 }
3053 
3054 /// Try to fold icmp (binop), X or icmp X, (binop).
3055 /// TODO: A large part of this logic is duplicated in InstSimplify's
3056 /// simplifyICmpWithBinOp(). We should be able to share that and avoid the code
3057 /// duplication.
3058 Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
3059  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
3060 
3061  // Special logic for binary operators.
3062  BinaryOperator *BO0 = dyn_cast<BinaryOperator>(Op0);
3063  BinaryOperator *BO1 = dyn_cast<BinaryOperator>(Op1);
3064  if (!BO0 && !BO1)
3065  return nullptr;
3066 
3067  const CmpInst::Predicate Pred = I.getPredicate();
3068  Value *X;
3069 
3070  // Convert add-with-unsigned-overflow comparisons into a 'not' with compare.
3071  // (Op1 + X) <u Op1 --> ~Op1 <u X
3072  // Op0 >u (Op0 + X) --> X >u ~Op0
3073  if (match(Op0, m_OneUse(m_c_Add(m_Specific(Op1), m_Value(X)))) &&
3074  Pred == ICmpInst::ICMP_ULT)
3075  return new ICmpInst(Pred, Builder.CreateNot(Op1), X);
3076  if (match(Op1, m_OneUse(m_c_Add(m_Specific(Op0), m_Value(X)))) &&
3077  Pred == ICmpInst::ICMP_UGT)
3078  return new ICmpInst(Pred, X, Builder.CreateNot(Op0));
3079 
3080  bool NoOp0WrapProblem = false, NoOp1WrapProblem = false;
3081  if (BO0 && isa<OverflowingBinaryOperator>(BO0))
3082  NoOp0WrapProblem =
3083  ICmpInst::isEquality(Pred) ||
3084  (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) ||
3085  (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap());
3086  if (BO1 && isa<OverflowingBinaryOperator>(BO1))
3087  NoOp1WrapProblem =
3088  ICmpInst::isEquality(Pred) ||
3089  (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) ||
3090  (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap());
3091 
3092  // Analyze the case when either Op0 or Op1 is an add instruction.
3093  // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null).
3094  Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
3095  if (BO0 && BO0->getOpcode() == Instruction::Add) {
3096  A = BO0->getOperand(0);
3097  B = BO0->getOperand(1);
3098  }
3099  if (BO1 && BO1->getOpcode() == Instruction::Add) {
3100  C = BO1->getOperand(0);
3101  D = BO1->getOperand(1);
3102  }
3103 
3104  // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
3105  if ((A == Op1 || B == Op1) && NoOp0WrapProblem)
3106  return new ICmpInst(Pred, A == Op1 ? B : A,
3107  Constant::getNullValue(Op1->getType()));
3108 
3109  // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
3110  if ((C == Op0 || D == Op0) && NoOp1WrapProblem)
3111  return new ICmpInst(Pred, Constant::getNullValue(Op0->getType()),
3112  C == Op0 ? D : C);
3113 
3114  // icmp (X+Y), (X+Z) -> icmp Y, Z for equalities or if there is no overflow.
3115  if (A && C && (A == C || A == D || B == C || B == D) && NoOp0WrapProblem &&
3116  NoOp1WrapProblem &&
3117  // Try not to increase register pressure.
3118  BO0->hasOneUse() && BO1->hasOneUse()) {
3119  // Determine Y and Z in the form icmp (X+Y), (X+Z).
3120  Value *Y, *Z;
3121  if (A == C) {
3122  // C + B == C + D -> B == D
3123  Y = B;
3124  Z = D;
3125  } else if (A == D) {
3126  // D + B == C + D -> B == C
3127  Y = B;
3128  Z = C;
3129  } else if (B == C) {
3130  // A + C == C + D -> A == D
3131  Y = A;
3132  Z = D;
3133  } else {
3134  assert(B == D);
3135  // A + D == C + D -> A == C
3136  Y = A;
3137  Z = C;
3138  }
3139  return new ICmpInst(Pred, Y, Z);
3140  }
3141 
3142  // icmp slt (X + -1), Y -> icmp sle X, Y
3143  if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLT &&
3144  match(B, m_AllOnes()))
3145  return new ICmpInst(CmpInst::ICMP_SLE, A, Op1);
3146 
3147  // icmp sge (X + -1), Y -> icmp sgt X, Y
3148  if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGE &&
3149  match(B, m_AllOnes()))
3150  return new ICmpInst(CmpInst::ICMP_SGT, A, Op1);
3151 
3152  // icmp sle (X + 1), Y -> icmp slt X, Y
3153  if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLE && match(B, m_One()))
3154  return new ICmpInst(CmpInst::ICMP_SLT, A, Op1);
3155 
3156  // icmp sgt (X + 1), Y -> icmp sge X, Y
3157  if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGT && match(B, m_One()))
3158  return new ICmpInst(CmpInst::ICMP_SGE, A, Op1);
3159 
3160  // icmp sgt X, (Y + -1) -> icmp sge X, Y
3161  if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGT &&
3162  match(D, m_AllOnes()))
3163  return new ICmpInst(CmpInst::ICMP_SGE, Op0, C);
3164 
3165  // icmp sle X, (Y + -1) -> icmp slt X, Y
3166  if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLE &&
3167  match(D, m_AllOnes()))
3168  return new ICmpInst(CmpInst::ICMP_SLT, Op0, C);
3169 
3170  // icmp sge X, (Y + 1) -> icmp sgt X, Y
3171  if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGE && match(D, m_One()))
3172  return new ICmpInst(CmpInst::ICMP_SGT, Op0, C);
3173 
3174  // icmp slt X, (Y + 1) -> icmp sle X, Y
3175  if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLT && match(D, m_One()))
3176  return new ICmpInst(CmpInst::ICMP_SLE, Op0, C);
3177 
3178  // TODO: The subtraction-related identities shown below also hold, but
3179  // canonicalization from (X -nuw 1) to (X + -1) means that the combinations
3180  // wouldn't happen even if they were implemented.
3181  //
3182  // icmp ult (X - 1), Y -> icmp ule X, Y
3183  // icmp uge (X - 1), Y -> icmp ugt X, Y
3184  // icmp ugt X, (Y - 1) -> icmp uge X, Y
3185  // icmp ule X, (Y - 1) -> icmp ult X, Y
3186 
3187  // icmp ule (X + 1), Y -> icmp ult X, Y
3188  if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_ULE && match(B, m_One()))
3189  return new ICmpInst(CmpInst::ICMP_ULT, A, Op1);
3190 
3191  // icmp ugt (X + 1), Y -> icmp uge X, Y
3192  if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_UGT && match(B, m_One()))
3193  return new ICmpInst(CmpInst::ICMP_UGE, A, Op1);
3194 
3195  // icmp uge X, (Y + 1) -> icmp ugt X, Y
3196  if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_UGE && match(D, m_One()))
3197  return new ICmpInst(CmpInst::ICMP_UGT, Op0, C);
3198 
3199  // icmp ult X, (Y + 1) -> icmp ule X, Y
3200  if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_ULT && match(D, m_One()))
3201  return new ICmpInst(CmpInst::ICMP_ULE, Op0, C);
3202 
3203  // if C1 has greater magnitude than C2:
3204  // icmp (X + C1), (Y + C2) -> icmp (X + C3), Y
3205  // s.t. C3 = C1 - C2
3206  //
3207  // if C2 has greater magnitude than C1:
3208  // icmp (X + C1), (Y + C2) -> icmp X, (Y + C3)
3209  // s.t. C3 = C2 - C1
3210  if (A && C && NoOp0WrapProblem && NoOp1WrapProblem &&
3211  (BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned())
3212  if (ConstantInt *C1 = dyn_cast<ConstantInt>(B))
3213  if (ConstantInt *C2 = dyn_cast<ConstantInt>(D)) {
3214  const APInt &AP1 = C1->getValue();
3215  const APInt &AP2 = C2->getValue();
3216  if (AP1.isNegative() == AP2.isNegative()) {
3217  APInt AP1Abs = C1->getValue().abs();
3218  APInt AP2Abs = C2->getValue().abs();
3219  if (AP1Abs.uge(AP2Abs)) {
3220  ConstantInt *C3 = Builder.getInt(AP1 - AP2);
3221  Value *NewAdd = Builder.CreateNSWAdd(A, C3);
3222  return new ICmpInst(Pred, NewAdd, C);
3223  } else {
3224  ConstantInt *C3 = Builder.getInt(AP2 - AP1);
3225  Value *NewAdd = Builder.CreateNSWAdd(C, C3);
3226  return new ICmpInst(Pred, A, NewAdd);
3227  }
3228  }
3229  }
3230 
3231  // Analyze the case when either Op0 or Op1 is a sub instruction.
3232  // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null).
3233  A = nullptr;
3234  B = nullptr;
3235  C = nullptr;
3236  D = nullptr;
3237  if (BO0 && BO0->getOpcode() == Instruction::Sub) {
3238  A = BO0->getOperand(0);
3239  B = BO0->getOperand(1);
3240  }
3241  if (BO1 && BO1->getOpcode() == Instruction::Sub) {
3242  C = BO1->getOperand(0);
3243  D = BO1->getOperand(1);
3244  }
3245 
3246  // icmp (X-Y), X -> icmp 0, Y for equalities or if there is no overflow.
3247  if (A == Op1 && NoOp0WrapProblem)
3248  return new ICmpInst(Pred, Constant::getNullValue(Op1->getType()), B);
3249  // icmp X, (X-Y) -> icmp Y, 0 for equalities or if there is no overflow.
3250  if (C == Op0 && NoOp1WrapProblem)
3251  return new ICmpInst(Pred, D, Constant::getNullValue(Op0->getType()));
3252 
3253  // (A - B) >u A --> A <u B
3254  if (A == Op1 && Pred == ICmpInst::ICMP_UGT)
3255  return new ICmpInst(ICmpInst::ICMP_ULT, A, B);
3256  // C <u (C - D) --> C <u D
3257  if (C == Op0 && Pred == ICmpInst::ICMP_ULT)
3258  return new ICmpInst(ICmpInst::ICMP_ULT, C, D);
3259 
3260  // icmp (Y-X), (Z-X) -> icmp Y, Z for equalities or if there is no overflow.
3261  if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem &&
3262  // Try not to increase register pressure.
3263  BO0->hasOneUse() && BO1->hasOneUse())
3264  return new ICmpInst(Pred, A, C);
3265  // icmp (X-Y), (X-Z) -> icmp Z, Y for equalities or if there is no overflow.
3266  if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem &&
3267  // Try not to increase register pressure.
3268  BO0->hasOneUse() && BO1->hasOneUse())
3269  return new ICmpInst(Pred, D, B);
3270 
3271  // icmp (0-X) < cst --> x > -cst
3272  if (NoOp0WrapProblem && ICmpInst::isSigned(Pred)) {
3273  Value *X;
3274  if (match(BO0, m_Neg(m_Value(X))))
3275  if (Constant *RHSC = dyn_cast<Constant>(Op1))
3276  if (RHSC->isNotMinSignedValue())
3277  return new ICmpInst(I.getSwappedPredicate(), X,
3278  ConstantExpr::getNeg(RHSC));
3279  }
3280 
3281  BinaryOperator *SRem = nullptr;
3282  // icmp (srem X, Y), Y
3283  if (BO0 && BO0->getOpcode() == Instruction::SRem && Op1 == BO0->getOperand(1))
3284  SRem = BO0;
3285  // icmp Y, (srem X, Y)
3286  else if (BO1 && BO1->getOpcode() == Instruction::SRem &&
3287  Op0 == BO1->getOperand(1))
3288  SRem = BO1;
3289  if (SRem) {
3290  // We don't check hasOneUse to avoid increasing register pressure because
3291  // the value we use is the same value this instruction was already using.
3292  switch (SRem == BO0 ? ICmpInst::getSwappedPredicate(Pred) : Pred) {
3293  default:
3294  break;
3295  case ICmpInst::ICMP_EQ:
3296  return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
3297  case ICmpInst::ICMP_NE:
3298  return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
3299  case ICmpInst::ICMP_SGT:
3300  case ICmpInst::ICMP_SGE:
3301  return new ICmpInst(ICmpInst::ICMP_SGT, SRem->getOperand(1),
3303  case ICmpInst::ICMP_SLT:
3304  case ICmpInst::ICMP_SLE:
3305  return new ICmpInst(ICmpInst::ICMP_SLT, SRem->getOperand(1),
3306  Constant::getNullValue(SRem->getType()));
3307  }
3308  }
3309 
3310  if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() && BO0->hasOneUse() &&
3311  BO1->hasOneUse() && BO0->getOperand(1) == BO1->getOperand(1)) {
3312  switch (BO0->getOpcode()) {
3313  default:
3314  break;
3315  case Instruction::Add:
3316  case Instruction::Sub:
3317  case Instruction::Xor: {
3318  if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b
3319  return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
3320 
3321  const APInt *C;
3322  if (match(BO0->getOperand(1), m_APInt(C))) {
3323  // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
3324  if (C->isSignMask()) {
3325  ICmpInst::Predicate NewPred =
3327  return new ICmpInst(NewPred, BO0->getOperand(0), BO1->getOperand(0));
3328  }
3329 
3330  // icmp u/s (a ^ maxsignval), (b ^ maxsignval) --> icmp s/u' a, b
3331  if (BO0->getOpcode() == Instruction::Xor && C->isMaxSignedValue()) {
3332  ICmpInst::Predicate NewPred =
3334  NewPred = I.getSwappedPredicate(NewPred);
3335  return new ICmpInst(NewPred, BO0->getOperand(0), BO1->getOperand(0));
3336  }
3337  }
3338  break;
3339  }
3340  case Instruction::Mul: {
3341  if (!I.isEquality())
3342  break;
3343 
3344  const APInt *C;
3345  if (match(BO0->getOperand(1), m_APInt(C)) && !C->isNullValue() &&
3346  !C->isOneValue()) {
3347  // icmp eq/ne (X * C), (Y * C) --> icmp (X & Mask), (Y & Mask)
3348  // Mask = -1 >> count-trailing-zeros(C).
3349  if (unsigned TZs = C->countTrailingZeros()) {
3351  BO0->getType(),
3352  APInt::getLowBitsSet(C->getBitWidth(), C->getBitWidth() - TZs));
3353  Value *And1 = Builder.CreateAnd(BO0->getOperand(0), Mask);
3354  Value *And2 = Builder.CreateAnd(BO1->getOperand(0), Mask);
3355  return new ICmpInst(Pred, And1, And2);
3356  }
3357  // If there are no trailing zeros in the multiplier, just eliminate
3358  // the multiplies (no masking is needed):
3359  // icmp eq/ne (X * C), (Y * C) --> icmp eq/ne X, Y
3360  return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
3361  }
3362  break;
3363  }
3364  case Instruction::UDiv:
3365  case Instruction::LShr:
3366  if (I.isSigned() || !BO0->isExact() || !BO1->isExact())
3367  break;
3368  return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
3369 
3370  case Instruction::SDiv:
3371  if (!I.isEquality() || !BO0->isExact() || !BO1->isExact())
3372  break;
3373  return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
3374 
3375  case Instruction::AShr:
3376  if (!BO0->isExact() || !BO1->isExact())
3377  break;
3378  return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
3379 
3380  case Instruction::Shl: {
3381  bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap();
3382  bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap();
3383  if (!NUW && !NSW)
3384  break;
3385  if (!NSW && I.isSigned())
3386  break;
3387  return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
3388  }
3389  }
3390  }
3391 
3392  if (BO0) {
3393  // Transform A & (L - 1) `ult` L --> L != 0
3394  auto LSubOne = m_Add(m_Specific(Op1), m_AllOnes());
3395  auto BitwiseAnd = m_c_And(m_Value(), LSubOne);
3396 
3397  if (match(BO0, BitwiseAnd) && Pred == ICmpInst::ICMP_ULT) {
3398  auto *Zero = Constant::getNullValue(BO0->getType());
3399  return new ICmpInst(ICmpInst::ICMP_NE, Op1, Zero);
3400  }
3401  }
3402 
3403  if (Value *V = foldICmpWithLowBitMaskedVal(I, Builder))
3404  return replaceInstUsesWith(I, V);
3405 
3406  if (Value *V = foldICmpWithTruncSignExtendedVal(I, Builder))
3407  return replaceInstUsesWith(I, V);
3408 
3409  return nullptr;
3410 }
3411 
3412 /// Fold icmp Pred min|max(X, Y), X.
3414  ICmpInst::Predicate Pred = Cmp.getPredicate();
3415  Value *Op0 = Cmp.getOperand(0);
3416  Value *X = Cmp.getOperand(1);
3417 
3418  // Canonicalize minimum or maximum operand to LHS of the icmp.
3419  if (match(X, m_c_SMin(m_Specific(Op0), m_Value())) ||
3420  match(X, m_c_SMax(m_Specific(Op0), m_Value())) ||
3421  match(X, m_c_UMin(m_Specific(Op0), m_Value())) ||
3422  match(X, m_c_UMax(m_Specific(Op0), m_Value()))) {
3423  std::swap(Op0, X);
3424  Pred = Cmp.getSwappedPredicate();
3425  }
3426 
3427  Value *Y;
3428  if (match(Op0, m_c_SMin(m_Specific(X), m_Value(Y)))) {
3429  // smin(X, Y) == X --> X s<= Y
3430  // smin(X, Y) s>= X --> X s<= Y
3431  if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SGE)
3432  return new ICmpInst(ICmpInst::ICMP_SLE, X, Y);
3433 
3434  // smin(X, Y) != X --> X s> Y
3435  // smin(X, Y) s< X --> X s> Y
3436  if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_SLT)
3437  return new ICmpInst(ICmpInst::ICMP_SGT, X, Y);
3438 
3439  // These cases should be handled in InstSimplify:
3440  // smin(X, Y) s<= X --> true
3441  // smin(X, Y) s> X --> false
3442  return nullptr;
3443  }
3444 
3445  if (match(Op0, m_c_SMax(m_Specific(X), m_Value(Y)))) {
3446  // smax(X, Y) == X --> X s>= Y
3447  // smax(X, Y) s<= X --> X s>= Y
3448  if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SLE)
3449  return new ICmpInst(ICmpInst::ICMP_SGE, X, Y);
3450 
3451  // smax(X, Y) != X --> X s< Y
3452  // smax(X, Y) s> X --> X s< Y
3453  if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_SGT)
3454  return new ICmpInst(ICmpInst::ICMP_SLT, X, Y);
3455 
3456  // These cases should be handled in InstSimplify:
3457  // smax(X, Y) s>= X --> true
3458  // smax(X, Y) s< X --> false
3459  return nullptr;
3460  }
3461 
3462  if (match(Op0, m_c_UMin(m_Specific(X), m_Value(Y)))) {
3463  // umin(X, Y) == X --> X u<= Y
3464  // umin(X, Y) u>= X --> X u<= Y
3465  if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_UGE)
3466  return new ICmpInst(ICmpInst::ICMP_ULE, X, Y);
3467 
3468  // umin(X, Y) != X --> X u> Y
3469  // umin(X, Y) u< X --> X u> Y
3470  if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_ULT)
3471  return new ICmpInst(ICmpInst::ICMP_UGT, X, Y);
3472 
3473  // These cases should be handled in InstSimplify:
3474  // umin(X, Y) u<= X --> true
3475  // umin(X, Y) u> X --> false
3476  return nullptr;
3477  }
3478 
3479  if (match(Op0, m_c_UMax(m_Specific(X), m_Value(Y)))) {
3480  // umax(X, Y) == X --> X u>= Y
3481  // umax(X, Y) u<= X --> X u>= Y
3482  if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_ULE)
3483  return new ICmpInst(ICmpInst::ICMP_UGE, X, Y);
3484 
3485  // umax(X, Y) != X --> X u< Y
3486  // umax(X, Y) u> X --> X u< Y
3487  if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_UGT)
3488  return new ICmpInst(ICmpInst::ICMP_ULT, X, Y);
3489 
3490  // These cases should be handled in InstSimplify:
3491  // umax(X, Y) u>= X --> true
3492  // umax(X, Y) u< X --> false
3493  return nullptr;
3494  }
3495 
3496  return nullptr;
3497 }
3498 
3499 Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
3500  if (!I.isEquality())
3501  return nullptr;
3502 
3503  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
3504  const CmpInst::Predicate Pred = I.getPredicate();
3505  Value *A, *B, *C, *D;
3506  if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
3507  if (A == Op1 || B == Op1) { // (A^B) == A -> B == 0
3508  Value *OtherVal = A == Op1 ? B : A;
3509  return new ICmpInst(Pred, OtherVal, Constant::getNullValue(A->getType()));
3510  }
3511 
3512  if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) {
3513  // A^c1 == C^c2 --> A == C^(c1^c2)
3514  ConstantInt *C1, *C2;
3515  if (match(B, m_ConstantInt(C1)) && match(D, m_ConstantInt(C2)) &&
3516  Op1->hasOneUse()) {
3517  Constant *NC = Builder.getInt(C1->getValue() ^ C2->getValue());
3518  Value *Xor = Builder.CreateXor(C, NC);
3519  return new ICmpInst(Pred, A, Xor);
3520  }
3521 
3522  // A^B == A^D -> B == D
3523  if (A == C)
3524  return new ICmpInst(Pred, B, D);
3525  if (A == D)
3526  return new ICmpInst(Pred, B, C);
3527  if (B == C)
3528  return new ICmpInst(Pred, A, D);
3529  if (B == D)
3530  return new ICmpInst(Pred, A, C);
3531  }
3532  }
3533 
3534  if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && (A == Op0 || B == Op0)) {
3535  // A == (A^B) -> B == 0
3536  Value *OtherVal = A == Op0 ? B : A;
3537  return new ICmpInst(Pred, OtherVal, Constant::getNullValue(A->getType()));
3538  }
3539 
3540  // (X&Z) == (Y&Z) -> (X^Y) & Z == 0
3541  if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) &&
3542  match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) {
3543  Value *X = nullptr, *Y = nullptr, *Z = nullptr;
3544 
3545  if (A == C) {
3546  X = B;
3547  Y = D;
3548  Z = A;
3549  } else if (A == D) {
3550  X = B;
3551  Y = C;
3552  Z = A;
3553  } else if (B == C) {
3554  X = A;
3555  Y = D;
3556  Z = B;
3557  } else if (B == D) {
3558  X = A;
3559  Y = C;
3560  Z = B;
3561  }
3562 
3563  if (X) { // Build (X^Y) & Z
3564  Op1 = Builder.CreateXor(X, Y);
3565  Op1 = Builder.CreateAnd(Op1, Z);
3566  I.setOperand(0, Op1);
3567  I.setOperand(1, Constant::getNullValue(Op1->getType()));
3568  return &I;
3569  }
3570  }
3571 
3572  // Transform (zext A) == (B & (1<<X)-1) --> A == (trunc B)
3573  // and (B & (1<<X)-1) == (zext A) --> A == (trunc B)
3574  ConstantInt *Cst1;
3575  if ((Op0->hasOneUse() && match(Op0, m_ZExt(m_Value(A))) &&
3576  match(Op1, m_And(m_Value(B), m_ConstantInt(Cst1)))) ||
3577  (Op1->hasOneUse() && match(Op0, m_And(m_Value(B), m_ConstantInt(Cst1))) &&
3578  match(Op1, m_ZExt(m_Value(A))))) {
3579  APInt Pow2 = Cst1->getValue() + 1;
3580  if (Pow2.isPowerOf2() && isa<IntegerType>(A->getType()) &&
3581  Pow2.logBase2() == cast<IntegerType>(A->getType())->getBitWidth())
3582  return new ICmpInst(Pred, A, Builder.CreateTrunc(B, A->getType()));
3583  }
3584 
3585  // (A >> C) == (B >> C) --> (A^B) u< (1 << C)
3586  // For lshr and ashr pairs.
3587  if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_ConstantInt(Cst1)))) &&
3588  match(Op1, m_OneUse(m_LShr(m_Value(B), m_Specific(Cst1))))) ||
3589  (match(Op0, m_OneUse(m_AShr(m_Value(A), m_ConstantInt(Cst1)))) &&
3590  match(Op1, m_OneUse(m_AShr(m_Value(B), m_Specific(Cst1)))))) {
3591  unsigned TypeBits = Cst1->getBitWidth();
3592  unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
3593  if (ShAmt < TypeBits && ShAmt != 0) {
3594  ICmpInst::Predicate NewPred =
3596  Value *Xor = Builder.CreateXor(A, B, I.getName() + ".unshifted");
3597  APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt);
3598  return new ICmpInst(NewPred, Xor, Builder.getInt(CmpVal));
3599  }
3600  }
3601 
3602  // (A << C) == (B << C) --> ((A^B) & (~0U >> C)) == 0
3603  if (match(Op0, m_OneUse(m_Shl(m_Value(A), m_ConstantInt(Cst1)))) &&
3604  match(Op1, m_OneUse(m_Shl(m_Value(B), m_Specific(Cst1))))) {
3605  unsigned TypeBits = Cst1->getBitWidth();
3606  unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
3607  if (ShAmt < TypeBits && ShAmt != 0) {
3608  Value *Xor = Builder.CreateXor(A, B, I.getName() + ".unshifted");
3609  APInt AndVal = APInt::getLowBitsSet(TypeBits, TypeBits - ShAmt);
3610  Value *And = Builder.CreateAnd(Xor, Builder.getInt(AndVal),
3611  I.getName() + ".mask");
3612  return new ICmpInst(Pred, And, Constant::getNullValue(Cst1->getType()));
3613  }
3614  }
3615 
3616  // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to
3617  // "icmp (and X, mask), cst"
3618  uint64_t ShAmt = 0;
3619  if (Op0->hasOneUse() &&
3620  match(Op0, m_Trunc(m_OneUse(m_LShr(m_Value(A), m_ConstantInt(ShAmt))))) &&
3621  match(Op1, m_ConstantInt(Cst1)) &&
3622  // Only do this when A has multiple uses. This is most important to do
3623  // when it exposes other optimizations.
3624  !A->hasOneUse()) {
3625  unsigned ASize = cast<IntegerType>(A->getType())->getPrimitiveSizeInBits();
3626 
3627  if (ShAmt < ASize) {
3628  APInt MaskV =
3630  MaskV <<= ShAmt;
3631 
3632  APInt CmpV = Cst1->getValue().zext(ASize);
3633  CmpV <<= ShAmt;
3634 
3635  Value *Mask = Builder.CreateAnd(A, Builder.getInt(MaskV));
3636  return new ICmpInst(Pred, Mask, Builder.getInt(CmpV));
3637  }
3638  }
3639 
3640  // If both operands are byte-swapped or bit-reversed, just compare the
3641  // original values.
3642  // TODO: Move this to a function similar to foldICmpIntrinsicWithConstant()
3643  // and handle more intrinsics.
3644  if ((match(Op0, m_BSwap(m_Value(A))) && match(Op1, m_BSwap(m_Value(B)))) ||
3645  (match(Op0, m_BitReverse(m_Value(A))) &&
3646  match(Op1, m_BitReverse(m_Value(B)))))
3647  return new ICmpInst(Pred, A, B);
3648 
3649  return nullptr;
3650 }
3651 
3652 /// Handle icmp (cast x to y), (cast/cst). We only handle extending casts so
3653 /// far.
3654 Instruction *InstCombiner::foldICmpWithCastAndCast(ICmpInst &ICmp) {
3655  const CastInst *LHSCI = cast<CastInst>(ICmp.getOperand(0));
3656  Value *LHSCIOp = LHSCI->getOperand(0);
3657  Type *SrcTy = LHSCIOp->getType();
3658  Type *DestTy = LHSCI->getType();
3659  Value *RHSCIOp;
3660 
3661  // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the
3662  // integer type is the same size as the pointer type.
3663  const auto& CompatibleSizes = [&](Type* SrcTy, Type* DestTy) -> bool {
3664  if (isa<VectorType>(SrcTy)) {
3665  SrcTy = cast<VectorType>(SrcTy)->getElementType();
3666  DestTy = cast<VectorType>(DestTy)->getElementType();
3667  }
3668  return DL.getPointerTypeSizeInBits(SrcTy) == DestTy->getIntegerBitWidth();
3669  };
3670  if (LHSCI->getOpcode() == Instruction::PtrToInt &&
3671  CompatibleSizes(SrcTy, DestTy)) {
3672  Value *RHSOp = nullptr;
3673  if (auto *RHSC = dyn_cast<PtrToIntOperator>(ICmp.getOperand(1))) {
3674  Value *RHSCIOp = RHSC->getOperand(0);
3675  if (RHSCIOp->getType()->getPointerAddressSpace() ==
3676  LHSCIOp->getType()->getPointerAddressSpace()) {
3677  RHSOp = RHSC->getOperand(0);
3678  // If the pointer types don't match, insert a bitcast.
3679  if (LHSCIOp->getType() != RHSOp->getType())
3680  RHSOp = Builder.CreateBitCast(RHSOp, LHSCIOp->getType());
3681  }
3682  } else if (auto *RHSC = dyn_cast<Constant>(ICmp.getOperand(1))) {
3683  RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
3684  }
3685 
3686  if (RHSOp)
3687  return new ICmpInst(ICmp.getPredicate(), LHSCIOp, RHSOp);
3688  }
3689 
3690  // The code below only handles extension cast instructions, so far.
3691  // Enforce this.
3692  if (LHSCI->getOpcode() != Instruction::ZExt &&
3693  LHSCI->getOpcode() != Instruction::SExt)
3694  return nullptr;
3695 
3696  bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt;
3697  bool isSignedCmp = ICmp.isSigned();
3698 
3699  if (auto *CI = dyn_cast<CastInst>(ICmp.getOperand(1))) {
3700  // Not an extension from the same type?
3701  RHSCIOp = CI->getOperand(0);
3702  if (RHSCIOp->getType() != LHSCIOp->getType())
3703  return nullptr;
3704 
3705  // If the signedness of the two casts doesn't agree (i.e. one is a sext
3706  // and the other is a zext), then we can't handle this.
3707  if (CI->getOpcode() != LHSCI->getOpcode())
3708  return nullptr;
3709 
3710  // Deal with equality cases early.
3711  if (ICmp.isEquality())
3712  return new ICmpInst(ICmp.getPredicate(), LHSCIOp, RHSCIOp);
3713 
3714  // A signed comparison of sign extended values simplifies into a
3715  // signed comparison.
3716  if (isSignedCmp && isSignedExt)
3717  return new ICmpInst(ICmp.getPredicate(), LHSCIOp, RHSCIOp);
3718 
3719  // The other three cases all fold into an unsigned comparison.
3720  return new ICmpInst(ICmp.getUnsignedPredicate(), LHSCIOp, RHSCIOp);
3721  }
3722 
3723  // If we aren't dealing with a constant on the RHS, exit early.
3724  auto *C = dyn_cast<Constant>(ICmp.getOperand(1));
3725  if (!C)
3726  return nullptr;
3727 
3728  // Compute the constant that would happen if we truncated to SrcTy then
3729  // re-extended to DestTy.
3730  Constant *Res1 = ConstantExpr::getTrunc(C, SrcTy);
3731  Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(), Res1, DestTy);
3732 
3733  // If the re-extended constant didn't change...
3734  if (Res2 == C) {
3735  // Deal with equality cases early.
3736  if (ICmp.isEquality())
3737  return new ICmpInst(ICmp.getPredicate(), LHSCIOp, Res1);
3738 
3739  // A signed comparison of sign extended values simplifies into a
3740  // signed comparison.
3741  if (isSignedExt && isSignedCmp)
3742  return new ICmpInst(ICmp.getPredicate(), LHSCIOp, Res1);
3743 
3744  // The other three cases all fold into an unsigned comparison.
3745  return new ICmpInst(ICmp.getUnsignedPredicate(), LHSCIOp, Res1);
3746  }
3747 
3748  // The re-extended constant changed, partly changed (in the case of a vector),
3749  // or could not be determined to be equal (in the case of a constant
3750  // expression), so the constant cannot be represented in the shorter type.
3751  // Consequently, we cannot emit a simple comparison.
3752  // All the cases that fold to true or false will have already been handled
3753  // by SimplifyICmpInst, so only deal with the tricky case.
3754 
3755  if (isSignedCmp || !isSignedExt || !isa<ConstantInt>(C))
3756  return nullptr;
3757 
3758  // Evaluate the comparison for LT (we invert for GT below). LE and GE cases
3759  // should have been folded away previously and not enter in here.
3760 
3761  // We're performing an unsigned comp with a sign extended value.
3762  // This is true if the input is >= 0. [aka >s -1]
3763  Constant *NegOne = Constant::getAllOnesValue(SrcTy);
3764  Value *Result = Builder.CreateICmpSGT(LHSCIOp, NegOne, ICmp.getName());
3765 
3766  // Finally, return the value computed.
3767  if (ICmp.getPredicate() == ICmpInst::ICMP_ULT)
3768  return replaceInstUsesWith(ICmp, Result);
3769 
3770  assert(ICmp.getPredicate() == ICmpInst::ICMP_UGT && "ICmp should be folded!");
3771  return BinaryOperator::CreateNot(Result);
3772 }
3773 
3774 bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
3775  Value *RHS, Instruction &OrigI,
3776  Value *&Result, Constant *&Overflow) {
3777  if (OrigI.isCommutative() && isa<Constant>(LHS) && !isa<Constant>(RHS))
3778  std::swap(LHS, RHS);
3779 
3780  auto SetResult = [&](Value *OpResult, Constant *OverflowVal, bool ReuseName) {
3781  Result = OpResult;
3782  Overflow = OverflowVal;
3783  if (ReuseName)
3784  Result->takeName(&OrigI);
3785  return true;
3786  };
3787 
3788  // If the overflow check was an add followed by a compare, the insertion point
3789  // may be pointing to the compare. We want to insert the new instructions
3790  // before the add in case there are uses of the add between the add and the
3791  // compare.
3792  Builder.SetInsertPoint(&OrigI);
3793 
3794  switch (OCF) {
3795  case OCF_INVALID:
3796  llvm_unreachable("bad overflow check kind!");
3797 
3798  case OCF_UNSIGNED_ADD: {
3799  OverflowResult OR = computeOverflowForUnsignedAdd(LHS, RHS, &OrigI);
3801  return SetResult(Builder.CreateNUWAdd(LHS, RHS), Builder.getFalse(),
3802  true);
3803 
3805  return SetResult(Builder.CreateAdd(LHS, RHS), Builder.getTrue(), true);
3806 
3807  // Fall through uadd into sadd
3809  }
3810  case OCF_SIGNED_ADD: {
3811  // X + 0 -> {X, false}
3812  if (match(RHS, m_Zero()))
3813  return SetResult(LHS, Builder.getFalse(), false);
3814 
3815  // We can strength reduce this signed add into a regular add if we can prove
3816  // that it will never overflow.
3817  if (OCF == OCF_SIGNED_ADD)
3818  if (willNotOverflowSignedAdd(LHS, RHS, OrigI))
3819  return SetResult(Builder.CreateNSWAdd(LHS, RHS), Builder.getFalse(),
3820  true);
3821  break;
3822  }
3823 
3824  case OCF_UNSIGNED_SUB:
3825  case OCF_SIGNED_SUB: {
3826  // X - 0 -> {X, false}
3827  if (match(RHS, m_Zero()))
3828  return SetResult(LHS, Builder.getFalse(), false);
3829 
3830  if (OCF == OCF_SIGNED_SUB) {
3831  if (willNotOverflowSignedSub(LHS, RHS, OrigI))
3832  return SetResult(Builder.CreateNSWSub(LHS, RHS), Builder.getFalse(),
3833  true);
3834  } else {
3835  if (willNotOverflowUnsignedSub(LHS, RHS, OrigI))
3836  return SetResult(Builder.CreateNUWSub(LHS, RHS), Builder.getFalse(),
3837  true);
3838  }
3839  break;
3840  }
3841 
3842  case OCF_UNSIGNED_MUL: {
3843  OverflowResult OR = computeOverflowForUnsignedMul(LHS, RHS, &OrigI);
3845  return SetResult(Builder.CreateNUWMul(LHS, RHS), Builder.getFalse(),
3846  true);
3848  return SetResult(Builder.CreateMul(LHS, RHS), Builder.getTrue(), true);
3850  }
3851  case OCF_SIGNED_MUL:
3852  // X * undef -> undef
3853  if (isa<UndefValue>(RHS))
3854  return SetResult(RHS, UndefValue::get(Builder.getInt1Ty()), false);
3855 
3856  // X * 0 -> {0, false}
3857  if (match(RHS, m_Zero()))
3858  return SetResult(RHS, Builder.getFalse(), false);
3859 
3860  // X * 1 -> {X, false}
3861  if (match(RHS, m_One()))
3862  return SetResult(LHS, Builder.getFalse(), false);
3863 
3864  if (OCF == OCF_SIGNED_MUL)
3865  if (willNotOverflowSignedMul(LHS, RHS, OrigI))
3866  return SetResult(Builder.CreateNSWMul(LHS, RHS), Builder.getFalse(),
3867  true);
3868  break;
3869  }
3870 
3871  return false;
3872 }
3873 
3874 /// Recognize and process idiom involving test for multiplication
3875 /// overflow.
3876 ///
3877 /// The caller has matched a pattern of the form:
3878 /// I = cmp u (mul(zext A, zext B), V
3879 /// The function checks if this is a test for overflow and if so replaces
3880 /// multiplication with call to 'mul.with.overflow' intrinsic.
3881 ///
3882 /// \param I Compare instruction.
3883 /// \param MulVal Result of 'mult' instruction. It is one of the arguments of
3884 /// the compare instruction. Must be of integer type.
3885 /// \param OtherVal The other argument of compare instruction.
3886 /// \returns Instruction which must replace the compare instruction, NULL if no
3887 /// replacement required.
3889  Value *OtherVal, InstCombiner &IC) {
3890  // Don't bother doing this transformation for pointers, don't do it for
3891  // vectors.
3892  if (!isa<IntegerType>(MulVal->getType()))
3893  return nullptr;
3894 
3895  assert(I.getOperand(0) == MulVal || I.getOperand(1) == MulVal);
3896  assert(I.getOperand(0) == OtherVal || I.getOperand(1) == OtherVal);
3897  auto *MulInstr = dyn_cast<Instruction>(MulVal);
3898  if (!MulInstr)
3899  return nullptr;
3900  assert(MulInstr->getOpcode() == Instruction::Mul);
3901 
3902  auto *LHS = cast<ZExtOperator>(MulInstr->getOperand(0)),
3903  *RHS = cast<ZExtOperator>(MulInstr->getOperand(1));
3904  assert(LHS->getOpcode() == Instruction::ZExt);
3905  assert(RHS->getOpcode() == Instruction::ZExt);
3906  Value *A = LHS->getOperand(0), *B = RHS->getOperand(0);
3907 
3908  // Calculate type and width of the result produced by mul.with.overflow.
3909  Type *TyA = A->getType(), *TyB = B->getType();
3910  unsigned WidthA = TyA->getPrimitiveSizeInBits(),
3911  WidthB = TyB->getPrimitiveSizeInBits();
3912  unsigned MulWidth;
3913  Type *MulType;
3914  if (WidthB > WidthA) {
3915  MulWidth = WidthB;
3916  MulType = TyB;
3917  } else {
3918  MulWidth = WidthA;
3919  MulType = TyA;
3920  }
3921 
3922  // In order to replace the original mul with a narrower mul.with.overflow,
3923  // all uses must ignore upper bits of the product. The number of used low
3924  // bits must be not greater than the width of mul.with.overflow.
3925  if (MulVal->hasNUsesOrMore(2))
3926  for (User *U : MulVal->users()) {
3927  if (U == &I)
3928  continue;
3929  if (TruncInst *TI = dyn_cast<TruncInst>(U)) {
3930  // Check if truncation ignores bits above MulWidth.
3931  unsigned TruncWidth = TI->getType()->getPrimitiveSizeInBits();
3932  if (TruncWidth > MulWidth)
3933  return nullptr;
3934  } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
3935  // Check if AND ignores bits above MulWidth.
3936  if (BO->getOpcode() != Instruction::And)
3937  return nullptr;
3938  if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
3939  const APInt &CVal = CI->getValue();
3940  if (CVal.getBitWidth() - CVal.countLeadingZeros() > MulWidth)
3941  return nullptr;
3942  } else {
3943  // In this case we could have the operand of the binary operation
3944  // being defined in another block, and performing the replacement
3945  // could break the dominance relation.
3946  return nullptr;
3947  }
3948  } else {
3949  // Other uses prohibit this transformation.
3950  return nullptr;
3951  }
3952  }
3953 
3954  // Recognize patterns
3955  switch (I.getPredicate()) {
3956  case ICmpInst::ICMP_EQ:
3957  case ICmpInst::ICMP_NE:
3958  // Recognize pattern:
3959  // mulval = mul(zext A, zext B)
3960  // cmp eq/neq mulval, zext trunc mulval
3961  if (ZExtInst *Zext = dyn_cast<ZExtInst>(OtherVal))
3962  if (Zext->hasOneUse()) {
3963  Value *ZextArg = Zext->getOperand(0);
3964  if (TruncInst *Trunc = dyn_cast<TruncInst>(ZextArg))
3965  if (Trunc->getType()->getPrimitiveSizeInBits() == MulWidth)
3966  break; //Recognized
3967  }
3968 
3969  // Recognize pattern:
3970  // mulval = mul(zext A, zext B)
3971  // cmp eq/neq mulval, and(mulval, mask), mask selects low MulWidth bits.
3972  ConstantInt *CI;
3973  Value *ValToMask;
3974  if (match(OtherVal, m_And(m_Value(ValToMask), m_ConstantInt(CI)))) {
3975  if (ValToMask != MulVal)
3976  return nullptr;
3977  const APInt &CVal = CI->getValue() + 1;
3978  if (CVal.isPowerOf2()) {
3979  unsigned MaskWidth = CVal.logBase2();
3980  if (MaskWidth == MulWidth)
3981  break; // Recognized
3982  }
3983  }
3984  return nullptr;
3985 
3986  case ICmpInst::ICMP_UGT:
3987  // Recognize pattern:
3988  // mulval = mul(zext A, zext B)
3989  // cmp ugt mulval, max
3990  if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
3991  APInt MaxVal = APInt::getMaxValue(MulWidth);
3992  MaxVal = MaxVal.zext(CI->getBitWidth());
3993  if (MaxVal.eq(CI->getValue()))
3994  break; // Recognized
3995  }
3996  return nullptr;
3997 
3998  case ICmpInst::ICMP_UGE:
3999  // Recognize pattern:
4000  // mulval = mul(zext A, zext B)
4001  // cmp uge mulval, max+1
4002  if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
4003  APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth);
4004  if (MaxVal.eq(CI->getValue()))
4005  break; // Recognized
4006  }
4007  return nullptr;
4008 
4009  case ICmpInst::ICMP_ULE:
4010  // Recognize pattern:
4011  // mulval = mul(zext A, zext B)
4012  // cmp ule mulval, max
4013  if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
4014  APInt MaxVal = APInt::getMaxValue(MulWidth);
4015  MaxVal = MaxVal.zext(CI->getBitWidth());
4016  if (MaxVal.eq(CI->getValue()))
4017  break; // Recognized
4018  }
4019  return nullptr;
4020 
4021  case ICmpInst::ICMP_ULT:
4022  // Recognize pattern:
4023  // mulval = mul(zext A, zext B)
4024  // cmp ule mulval, max + 1
4025  if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal)) {
4026  APInt MaxVal = APInt::getOneBitSet(CI->getBitWidth(), MulWidth);
4027  if (MaxVal.eq(CI->getValue()))
4028  break; // Recognized
4029  }
4030  return nullptr;
4031 
4032  default:
4033  return nullptr;
4034  }
4035 
4036  InstCombiner::BuilderTy &Builder = IC.Builder;
4037  Builder.SetInsertPoint(MulInstr);
4038 
4039  // Replace: mul(zext A, zext B) --> mul.with.overflow(A, B)
4040  Value *MulA = A, *MulB = B;
4041  if (WidthA < MulWidth)
4042  MulA = Builder.CreateZExt(A, MulType);
4043  if (WidthB < MulWidth)
4044  MulB = Builder.CreateZExt(B, MulType);
4046  Intrinsic::umul_with_overflow, MulType);
4047  CallInst *Call = Builder.CreateCall(F, {MulA, MulB}, "umul");
4048  IC.Worklist.Add(MulInstr);
4049 
4050  // If there are uses of mul result other than the comparison, we know that
4051  // they are truncation or binary AND. Change them to use result of
4052  // mul.with.overflow and adjust properly mask/size.
4053  if (MulVal->hasNUsesOrMore(2)) {
4054  Value *Mul = Builder.CreateExtractValue(Call, 0, "umul.value");
4055  for (auto UI = MulVal->user_begin(), UE = MulVal->user_end(); UI != UE;) {
4056  User *U = *UI++;
4057  if (U == &I || U == OtherVal)
4058  continue;
4059  if (TruncInst *TI = dyn_cast<TruncInst>(U)) {
4060  if (TI->getType()->getPrimitiveSizeInBits() == MulWidth)
4061  IC.replaceInstUsesWith(*TI, Mul);
4062  else
4063  TI->setOperand(0, Mul);
4064  } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U)) {
4065  assert(BO->getOpcode() == Instruction::And);
4066  // Replace (mul & mask) --> zext (mul.with.overflow & short_mask)
4067  ConstantInt *CI = cast<ConstantInt>(BO->getOperand(1));
4068  APInt ShortMask = CI->getValue().trunc(MulWidth);
4069  Value *ShortAnd = Builder.CreateAnd(Mul, ShortMask);
4070  Instruction *Zext =
4071  cast<Instruction>(Builder.CreateZExt(ShortAnd, BO->getType()));
4072  IC.Worklist.Add(Zext);
4073  IC.replaceInstUsesWith(*BO, Zext);
4074  } else {
4075  llvm_unreachable("Unexpected Binary operation");
4076  }
4077  IC.Worklist.Add(cast<Instruction>(U));
4078  }
4079  }
4080  if (isa<Instruction>(OtherVal))
4081  IC.Worklist.Add(cast<Instruction>(OtherVal));
4082 
4083  // The original icmp gets replaced with the overflow value, maybe inverted
4084  // depending on predicate.
4085  bool Inverse = false;
4086  switch (I.getPredicate()) {
4087  case ICmpInst::ICMP_NE:
4088  break;
4089  case ICmpInst::ICMP_EQ:
4090  Inverse = true;
4091  break;
4092  case ICmpInst::ICMP_UGT:
4093  case ICmpInst::ICMP_UGE:
4094  if (I.getOperand(0) == MulVal)
4095  break;
4096  Inverse = true;
4097  break;
4098  case ICmpInst::ICMP_ULT:
4099  case ICmpInst::ICMP_ULE:
4100  if (I.getOperand(1) == MulVal)
4101  break;
4102  Inverse = true;
4103  break;
4104  default:
4105  llvm_unreachable("Unexpected predicate");
4106  }
4107  if (Inverse) {
4108  Value *Res = Builder.CreateExtractValue(Call, 1);
4109  return BinaryOperator::CreateNot(Res);
4110  }
4111 
4112  return ExtractValueInst::Create(Call, 1);
4113 }
4114 
4115 /// When performing a comparison against a constant, it is possible that not all
4116 /// the bits in the LHS are demanded. This helper method computes the mask that
4117 /// IS demanded.
4118 static APInt getDemandedBitsLHSMask(ICmpInst &I, unsigned BitWidth) {
4119  const APInt *RHS;
4120  if (!match(I.getOperand(1), m_APInt(RHS)))
4121  return APInt::getAllOnesValue(BitWidth);
4122 
4123  // If this is a normal comparison, it demands all bits. If it is a sign bit
4124  // comparison, it only demands the sign bit.
4125  bool UnusedBit;
4126  if (isSignBitCheck(I.getPredicate(), *RHS, UnusedBit))
4127  return APInt::getSignMask(BitWidth);
4128 
4129  switch (I.getPredicate()) {
4130  // For a UGT comparison, we don't care about any bits that
4131  // correspond to the trailing ones of the comparand. The value of these
4132  // bits doesn't impact the outcome of the comparison, because any value
4133  // greater than the RHS must differ in a bit higher than these due to carry.
4134  case ICmpInst::ICMP_UGT:
4135  return APInt::getBitsSetFrom(BitWidth, RHS->countTrailingOnes());
4136 
4137  // Similarly, for a ULT comparison, we don't care about the trailing zeros.
4138  // Any value less than the RHS must differ in a higher bit because of carries.
4139  case ICmpInst::ICMP_ULT:
4140  return APInt::getBitsSetFrom(BitWidth, RHS->countTrailingZeros());
4141 
4142  default:
4143  return APInt::getAllOnesValue(BitWidth);
4144  }
4145 }
4146 
4147 /// Check if the order of \p Op0 and \p Op1 as operands in an ICmpInst
4148 /// should be swapped.
4149 /// The decision is based on how many times these two operands are reused
4150 /// as subtract operands and their positions in those instructions.
4151 /// The rationale is that several architectures use the same instruction for
4152 /// both subtract and cmp. Thus, it is better if the order of those operands
4153 /// match.
4154 /// \return true if Op0 and Op1 should be swapped.
4155 static bool swapMayExposeCSEOpportunities(const Value *Op0, const Value *Op1) {
4156  // Filter out pointer values as those cannot appear directly in subtract.
4157  // FIXME: we may want to go through inttoptrs or bitcasts.
4158  if (Op0->getType()->isPointerTy())
4159  return false;
4160  // If a subtract already has the same operands as a compare, swapping would be
4161  // bad. If a subtract has the same operands as a compare but in reverse order,
4162  // then swapping is good.
4163  int GoodToSwap = 0;
4164  for (const User *U : Op0->users()) {
4165  if (match(U, m_Sub(m_Specific(Op1), m_Specific(Op0))))
4166  GoodToSwap++;
4167  else if (match(U, m_Sub(m_Specific(Op0), m_Specific(Op1))))
4168  GoodToSwap--;
4169  }
4170  return GoodToSwap > 0;
4171 }
4172 
4173 /// Check that one use is in the same block as the definition and all
4174 /// other uses are in blocks dominated by a given block.
4175 ///
4176 /// \param DI Definition
4177 /// \param UI Use
4178 /// \param DB Block that must dominate all uses of \p DI outside
4179 /// the parent block
4180 /// \return true when \p UI is the only use of \p DI in the parent block
4181 /// and all other uses of \p DI are in blocks dominated by \p DB.
4182 ///
4184  const Instruction *UI,
4185  const BasicBlock *DB) const {
4186  assert(DI && UI && "Instruction not defined\n");
4187  // Ignore incomplete definitions.
4188  if (!DI->getParent())
4189  return false;
4190  // DI and UI must be in the same block.
4191  if (DI->getParent() != UI->getParent())
4192  return false;
4193  // Protect from self-referencing blocks.
4194  if (DI->getParent() == DB)
4195  return false;
4196  for (const User *U : DI->users()) {
4197  auto *Usr = cast<Instruction>(U);
4198  if (Usr != UI && !DT.dominates(DB, Usr->getParent()))
4199  return false;
4200  }
4201  return true;
4202 }
4203 
4204 /// Return true when the instruction sequence within a block is select-cmp-br.
4205 static bool isChainSelectCmpBranch(const SelectInst *SI) {
4206  const BasicBlock *BB = SI->getParent();
4207  if (!BB)
4208  return false;
4209  auto *BI = dyn_cast_or_null<BranchInst>(BB->getTerminator());
4210  if (!BI || BI->getNumSuccessors() != 2)
4211  return false;
4212  auto *IC = dyn_cast<ICmpInst>(BI->getCondition());
4213  if (!IC || (IC->getOperand(0) != SI && IC->getOperand(1) != SI))
4214  return false;
4215  return true;
4216 }
4217 
4218 /// True when a select result is replaced by one of its operands
4219 /// in select-icmp sequence. This will eventually result in the elimination
4220 /// of the select.
4221 ///
4222 /// \param SI Select instruction
4223 /// \param Icmp Compare instruction
4224 /// \param SIOpd Operand that replaces the select
4225 ///
4226 /// Notes:
4227 /// - The replacement is global and requires dominator information
4228 /// - The caller is responsible for the actual replacement
4229 ///
4230 /// Example:
4231 ///
4232 /// entry:
4233 /// %4 = select i1 %3, %C* %0, %C* null
4234 /// %5 = icmp eq %C* %4, null
4235 /// br i1 %5, label %9, label %7
4236 /// ...
4237 /// ; <label>:7 ; preds = %entry
4238 /// %8 = getelementptr inbounds %C* %4, i64 0, i32 0
4239 /// ...
4240 ///
4241 /// can be transformed to
4242 ///
4243 /// %5 = icmp eq %C* %0, null
4244 /// %6 = select i1 %3, i1 %5, i1 true
4245 /// br i1 %6, label %9, label %7
4246 /// ...
4247 /// ; <label>:7 ; preds = %entry
4248 /// %8 = getelementptr inbounds %C* %0, i64 0, i32 0 // replace by %0!
4249 ///
4250 /// Similar when the first operand of the select is a constant or/and
4251 /// the compare is for not equal rather than equal.
4252 ///
4253 /// NOTE: The function is only called when the select and compare constants
4254 /// are equal, the optimization can work only for EQ predicates. This is not a
4255 /// major restriction since a NE compare should be 'normalized' to an equal
4256 /// compare, which usually happens in the combiner and test case
4257 /// select-cmp-br.ll checks for it.
4259  const ICmpInst *Icmp,
4260  const unsigned SIOpd) {
4261  assert((SIOpd == 1 || SIOpd == 2) && "Invalid select operand!");
4262  if (isChainSelectCmpBranch(SI) && Icmp->getPredicate() == ICmpInst::ICMP_EQ) {
4263  BasicBlock *Succ = SI->getParent()->getTerminator()->getSuccessor(1);
4264  // The check for the single predecessor is not the best that can be
4265  // done. But it protects efficiently against cases like when SI's
4266  // home block has two successors, Succ and Succ1, and Succ1 predecessor
4267  // of Succ. Then SI can't be replaced by SIOpd because the use that gets
4268  // replaced can be reached on either path. So the uniqueness check
4269  // guarantees that the path all uses of SI (outside SI's parent) are on
4270  // is disjoint from all other paths out of SI. But that information
4271  // is more expensive to compute, and the trade-off here is in favor
4272  // of compile-time. It should also be noticed that we check for a single
4273  // predecessor and not only uniqueness. This to handle the situation when
4274  // Succ and Succ1 points to the same basic block.
4275  if (Succ->getSinglePredecessor() && dominatesAllUses(SI, Icmp, Succ)) {
4276  NumSel++;
4277  SI->replaceUsesOutsideBlock(SI->getOperand(SIOpd), SI->getParent());
4278  return true;
4279  }
4280  }
4281  return false;
4282 }
4283 
4284 /// Try to fold the comparison based on range information we can get by checking
4285 /// whether bits are known to be zero or one in the inputs.
4286 Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) {
4287  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
4288  Type *Ty = Op0->getType();
4289  ICmpInst::Predicate Pred = I.getPredicate();
4290 
4291  // Get scalar or pointer size.
4292  unsigned BitWidth = Ty->isIntOrIntVectorTy()
4293  ? Ty->getScalarSizeInBits()
4294  : DL.getIndexTypeSizeInBits(Ty->getScalarType());
4295 
4296  if (!BitWidth)
4297  return nullptr;
4298 
4299  KnownBits Op0Known(BitWidth);
4300  KnownBits Op1Known(BitWidth);
4301 
4302  if (SimplifyDemandedBits(&I, 0,
4303  getDemandedBitsLHSMask(I, BitWidth),
4304  Op0Known, 0))
4305  return &I;
4306 
4307  if (SimplifyDemandedBits(&I, 1, APInt::getAllOnesValue(BitWidth),
4308  Op1Known, 0))
4309  return &I;
4310 
4311  // Given the known and unknown bits, compute a range that the LHS could be
4312  // in. Compute the Min, Max and RHS values based on the known bits. For the
4313  // EQ and NE we use unsigned values.
4314  APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0);
4315  APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0);
4316  if (I.isSigned()) {
4317  computeSignedMinMaxValuesFromKnownBits(Op0Known, Op0Min, Op0Max);
4318  computeSignedMinMaxValuesFromKnownBits(Op1Known, Op1Min, Op1Max);
4319  } else {
4320  computeUnsignedMinMaxValuesFromKnownBits(Op0Known, Op0Min, Op0Max);
4321  computeUnsignedMinMaxValuesFromKnownBits(Op1Known, Op1Min, Op1Max);
4322  }
4323 
4324  // If Min and Max are known to be the same, then SimplifyDemandedBits figured
4325  // out that the LHS or RHS is a constant. Constant fold this now, so that
4326  // code below can assume that Min != Max.
4327  if (!isa<Constant>(Op0) && Op0Min == Op0Max)
4328  return new ICmpInst(Pred, ConstantExpr::getIntegerValue(Ty, Op0Min), Op1);
4329  if (!isa<Constant>(Op1) && Op1Min == Op1Max)
4330  return new ICmpInst(Pred, Op0, ConstantExpr::getIntegerValue(Ty, Op1Min));
4331 
4332  // Based on the range information we know about the LHS, see if we can
4333  // simplify this comparison. For example, (x&4) < 8 is always true.
4334  switch (Pred) {
4335  default:
4336  llvm_unreachable("Unknown icmp opcode!");
4337  case ICmpInst::ICMP_EQ:
4338  case ICmpInst::ICMP_NE: {
4339  if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) {
4340  return Pred == CmpInst::ICMP_EQ
4341  ? replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()))
4342  : replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
4343  }
4344 
4345  // If all bits are known zero except for one, then we know at most one bit
4346  // is set. If the comparison is against zero, then this is a check to see if
4347  // *that* bit is set.
4348  APInt Op0KnownZeroInverted = ~Op0Known.Zero;
4349  if (Op1Known.isZero()) {
4350  // If the LHS is an AND with the same constant, look through it.
4351  Value *LHS = nullptr;
4352  const APInt *LHSC;
4353  if (!match(Op0, m_And(m_Value(LHS), m_APInt(LHSC))) ||
4354  *LHSC != Op0KnownZeroInverted)
4355  LHS = Op0;
4356 
4357  Value *X;
4358  if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
4359  APInt ValToCheck = Op0KnownZeroInverted;
4360  Type *XTy = X->getType();
4361  if (ValToCheck.isPowerOf2()) {
4362  // ((1 << X) & 8) == 0 -> X != 3
4363  // ((1 << X) & 8) != 0 -> X == 3
4364  auto *CmpC = ConstantInt::get(XTy, ValToCheck.countTrailingZeros());
4365  auto NewPred = ICmpInst::getInversePredicate(Pred);
4366  return new ICmpInst(NewPred, X, CmpC);
4367  } else if ((++ValToCheck).isPowerOf2()) {
4368  // ((1 << X) & 7) == 0 -> X >= 3
4369  // ((1 << X) & 7) != 0 -> X < 3
4370  auto *CmpC = ConstantInt::get(XTy, ValToCheck.countTrailingZeros());
4371  auto NewPred =
4373  return new ICmpInst(NewPred, X, CmpC);
4374  }
4375  }
4376 
4377  // Check if the LHS is 8 >>u x and the result is a power of 2 like 1.
4378  const APInt *CI;
4379  if (Op0KnownZeroInverted.isOneValue() &&
4380  match(LHS, m_LShr(m_Power2(CI), m_Value(X)))) {
4381  // ((8 >>u X) & 1) == 0 -> X != 3
4382  // ((8 >>u X) & 1) != 0 -> X == 3
4383  unsigned CmpVal = CI->countTrailingZeros();
4384  auto NewPred = ICmpInst::getInversePredicate(Pred);
4385  return new ICmpInst(NewPred, X, ConstantInt::get(X->getType(), CmpVal));
4386  }
4387  }
4388  break;
4389  }
4390  case ICmpInst::ICMP_ULT: {
4391  if (Op0Max.ult(Op1Min)) // A <u B -> true if max(A) < min(B)
4392  return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
4393  if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B)
4394  return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
4395  if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B)
4396  return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
4397 
4398  const APInt *CmpC;
4399  if (match(Op1, m_APInt(CmpC))) {
4400  // A <u C -> A == C-1 if min(A)+1 == C
4401  if (*CmpC == Op0Min + 1)
4402  return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
4403  ConstantInt::get(Op1->getType(), *CmpC - 1));
4404  // X <u C --> X == 0, if the number of zero bits in the bottom of X
4405  // exceeds the log2 of C.
4406  if (Op0Known.countMinTrailingZeros() >= CmpC->ceilLogBase2())
4407  return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
4408  Constant::getNullValue(Op1->getType()));
4409  }
4410  break;
4411  }
4412  case ICmpInst::ICMP_UGT: {
4413  if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B)
4414  return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
4415  if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B)
4416  return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
4417  if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B)
4418  return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
4419 
4420  const APInt *CmpC;
4421  if (match(Op1, m_APInt(CmpC))) {
4422  // A >u C -> A == C+1 if max(a)-1 == C
4423  if (*CmpC == Op0Max - 1)
4424  return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
4425  ConstantInt::get(Op1->getType(), *CmpC + 1));
4426  // X >u C --> X != 0, if the number of zero bits in the bottom of X
4427  // exceeds the log2 of C.
4428  if (Op0Known.countMinTrailingZeros() >= CmpC->getActiveBits())
4429  return new ICmpInst(ICmpInst::ICMP_NE, Op0,
4430  Constant::getNullValue(Op1->getType()));
4431  }
4432  break;
4433  }
4434  case ICmpInst::ICMP_SLT: {
4435  if (Op0Max.slt(Op1Min)) // A <s B -> true if max(A) < min(C)
4436  return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
4437  if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C)
4438  return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
4439  if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B)
4440  return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
4441  const APInt *CmpC;
4442  if (match(Op1, m_APInt(CmpC))) {
4443  if (*CmpC == Op0Min + 1) // A <s C -> A == C-1 if min(A)+1 == C
4444  return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
4445  ConstantInt::get(Op1->getType(), *CmpC - 1));
4446  }
4447  break;
4448  }
4449  case ICmpInst::ICMP_SGT: {
4450  if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B)
4451  return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
4452  if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B)
4453  return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
4454  if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B)
4455  return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
4456  const APInt *CmpC;
4457  if (match(Op1, m_APInt(CmpC))) {
4458  if (*CmpC == Op0Max - 1) // A >s C -> A == C+1 if max(A)-1 == C
4459  return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
4460  ConstantInt::get(Op1->getType(), *CmpC + 1));
4461  }
4462  break;
4463  }
4464  case ICmpInst::ICMP_SGE:
4465  assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!");
4466  if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B)
4467  return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
4468  if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B)
4469  return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
4470  if (Op1Min == Op0Max) // A >=s B -> A == B if max(A) == min(B)
4471  return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
4472  break;
4473  case ICmpInst::ICMP_SLE:
4474  assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!");
4475  if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B)
4476  return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
4477  if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B)
4478  return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
4479  if (Op1Max == Op0Min) // A <=s B -> A == B if min(A) == max(B)
4480  return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
4481  break;
4482  case ICmpInst::ICMP_UGE:
4483  assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!");
4484  if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B)
4485  return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
4486  if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B)
4487  return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
4488  if (Op1Min == Op0Max) // A >=u B -> A == B if max(A) == min(B)
4489  return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
4490  break;
4491  case ICmpInst::ICMP_ULE:
4492  assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!");
4493  if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B)
4494  return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
4495  if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B)
4496  return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
4497  if (Op1Max == Op0Min) // A <=u B -> A == B if min(A) == max(B)
4498  return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Op1);
4499  break;
4500  }
4501 
4502  // Turn a signed comparison into an unsigned one if both operands are known to
4503  // have the same sign.
4504  if (I.isSigned() &&
4505  ((Op0Known.Zero.isNegative() && Op1Known.Zero.isNegative()) ||
4506  (Op0Known.One.isNegative() && Op1Known.One.isNegative())))
4507  return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1);
4508 
4509  return nullptr;
4510 }
4511 
4512 /// If we have an icmp le or icmp ge instruction with a constant operand, turn
4513 /// it into the appropriate icmp lt or icmp gt instruction. This transform
4514 /// allows them to be folded in visitICmpInst.
4516  ICmpInst::Predicate Pred = I.getPredicate();
4517  if (Pred != ICmpInst::ICMP_SLE && Pred != ICmpInst::ICMP_SGE &&
4518  Pred != ICmpInst::ICMP_ULE && Pred != ICmpInst::ICMP_UGE)
4519  return nullptr;
4520 
4521  Value *Op0 = I.getOperand(0);
4522  Value *Op1 = I.getOperand(1);
4523  auto *Op1C = dyn_cast<Constant>(Op1);
4524  if (!Op1C)
4525  return nullptr;
4526 
4527  // Check if the constant operand can be safely incremented/decremented without
4528  // overflowing/underflowing. For scalars, SimplifyICmpInst has already handled
4529  // the edge cases for us, so we just assert on them. For vectors, we must
4530  // handle the edge cases.
4531  Type *Op1Type = Op1->getType();
4532  bool IsSigned = I.isSigned();
4533  bool IsLE = (Pred == ICmpInst::ICMP_SLE || Pred == ICmpInst::ICMP_ULE);
4534  auto *CI = dyn_cast<ConstantInt>(Op1C);
4535  if (CI) {
4536  // A <= MAX -> TRUE ; A >= MIN -> TRUE
4537  assert(IsLE ? !CI->isMaxValue(IsSigned) : !CI->isMinValue(IsSigned));
4538  } else if (Op1Type->isVectorTy()) {
4539  // TODO? If the edge cases for vectors were guaranteed to be handled as they
4540  // are for scalar, we could remove the min/max checks. However, to do that,
4541  // we would have to use insertelement/shufflevector to replace edge values.
4542  unsigned NumElts = Op1Type->getVectorNumElements();
4543  for (unsigned i = 0; i != NumElts; ++i) {
4544  Constant *Elt = Op1C->getAggregateElement(i);
4545  if (!Elt)
4546  return nullptr;
4547 
4548  if (isa<UndefValue>(Elt))
4549  continue;
4550 
4551  // Bail out if we can't determine if this constant is min/max or if we
4552  // know that this constant is min/max.
4553  auto *CI = dyn_cast<ConstantInt>(Elt);
4554  if (!CI || (IsLE ? CI->isMaxValue(IsSigned) : CI->isMinValue(IsSigned)))
4555  return nullptr;
4556  }
4557  } else {
4558  // ConstantExpr?
4559  return nullptr;
4560  }
4561 
4562  // Increment or decrement the constant and set the new comparison predicate:
4563  // ULE -> ULT ; UGE -> UGT ; SLE -> SLT ; SGE -> SGT
4564  Constant *OneOrNegOne = ConstantInt::get(Op1Type, IsLE ? 1 : -1, true);
4566  NewPred = IsSigned ? ICmpInst::getSignedPredicate(NewPred) : NewPred;
4567  return new ICmpInst(NewPred, Op0, ConstantExpr::getAdd(Op1C, OneOrNegOne));
4568 }
4569 
4570 /// Integer compare with boolean values can always be turned into bitwise ops.
4572  InstCombiner::BuilderTy &Builder) {
4573  Value *A = I.getOperand(0), *B = I.getOperand(1);
4574  assert(A->getType()->isIntOrIntVectorTy(1) && "Bools only");
4575 
4576  // A boolean compared to true/false can be simplified to Op0/true/false in
4577  // 14 out of the 20 (10 predicates * 2 constants) possible combinations.
4578  // Cases not handled by InstSimplify are always 'not' of Op0.
4579  if (match(B, m_Zero())) {
4580  switch (I.getPredicate()) {
4581  case CmpInst::ICMP_EQ: // A == 0 -> !A
4582  case CmpInst::ICMP_ULE: // A <=u 0 -> !A
4583  case CmpInst::ICMP_SGE: // A >=s 0 -> !A
4584  return BinaryOperator::CreateNot(A);
4585  default:
4586  llvm_unreachable("ICmp i1 X, C not simplified as expected.");
4587  }
4588  } else if (match(B, m_One())) {
4589  switch (I.getPredicate()) {
4590  case CmpInst::ICMP_NE: // A != 1 -> !A
4591  case CmpInst::ICMP_ULT: // A <u 1 -> !A
4592  case CmpInst::ICMP_SGT: // A >s -1 -> !A
4593  return BinaryOperator::CreateNot(A);
4594  default:
4595  llvm_unreachable("ICmp i1 X, C not simplified as expected.");
4596  }
4597  }
4598 
4599  switch (I.getPredicate()) {
4600  default:
4601  llvm_unreachable("Invalid icmp instruction!");
4602  case ICmpInst::ICMP_EQ:
4603  // icmp eq i1 A, B -> ~(A ^ B)
4604  return BinaryOperator::CreateNot(Builder.CreateXor(A, B));
4605 
4606  case ICmpInst::ICMP_NE:
4607  // icmp ne i1 A, B -> A ^ B
4608  return BinaryOperator::CreateXor(A, B);
4609 
4610  case ICmpInst::ICMP_UGT:
4611  // icmp ugt -> icmp ult
4612  std::swap(A, B);
4614  case ICmpInst::ICMP_ULT:
4615  // icmp ult i1 A, B -> ~A & B
4616  return BinaryOperator::CreateAnd(Builder.CreateNot(A), B);
4617 
4618  case ICmpInst::ICMP_SGT:
4619  // icmp sgt -> icmp slt
4620  std::swap(A, B);
4622  case ICmpInst::ICMP_SLT:
4623  // icmp slt i1 A, B -> A & ~B
4624  return BinaryOperator::CreateAnd(Builder.CreateNot(B), A);
4625 
4626  case ICmpInst::ICMP_UGE:
4627  // icmp uge -> icmp ule
4628  std::swap(A, B);
4630  case ICmpInst::ICMP_ULE:
4631  // icmp ule i1 A, B -> ~A | B
4632  return BinaryOperator::CreateOr(Builder.CreateNot(A), B);
4633 
4634  case ICmpInst::ICMP_SGE:
4635  // icmp sge -> icmp sle
4636  std::swap(A, B);
4638  case ICmpInst::ICMP_SLE:
4639  // icmp sle i1 A, B -> A | ~B
4640  return BinaryOperator::CreateOr(Builder.CreateNot(B), A);
4641  }
4642 }
4643 
4644 // Transform pattern like:
4645 // (1 << Y) u<= X or ~(-1 << Y) u< X or ((1 << Y)+(-1)) u< X
4646 // (1 << Y) u> X or ~(-1 << Y) u>= X or ((1 << Y)+(-1)) u>= X
4647 // Into:
4648 // (X l>> Y) != 0
4649 // (X l>> Y) == 0
4651  InstCombiner::BuilderTy &Builder) {
4652  ICmpInst::Predicate Pred, NewPred;
4653  Value *X, *Y;
4654  if (match(&Cmp,
4655  m_c_ICmp(Pred, m_OneUse(m_Shl(m_One(), m_Value(Y))), m_Value(X)))) {
4656  // We want X to be the icmp's second operand, so swap predicate if it isn't.
4657  if (Cmp.getOperand(0) == X)
4658  Pred = Cmp.getSwappedPredicate();
4659 
4660  switch (Pred) {
4661  case ICmpInst::ICMP_ULE:
4662  NewPred = ICmpInst::ICMP_NE;
4663  break;
4664  case ICmpInst::ICMP_UGT:
4665  NewPred = ICmpInst::ICMP_EQ;
4666  break;
4667  default:
4668  return nullptr;
4669  }
4670  } else if (match(&Cmp, m_c_ICmp(Pred,
4672  m_Not(m_Shl(m_AllOnes(), m_Value(Y))),
4673  m_Add(m_Shl(m_One(), m_Value(Y)),
4674  m_AllOnes()))),
4675  m_Value(X)))) {
4676  // The variant with 'add' is not canonical, (the variant with 'not' is)
4677  // we only get it because it has extra uses, and can't be canonicalized,
4678 
4679  // We want X to be the icmp's second operand, so swap predicate if it isn't.
4680  if (Cmp.getOperand(0) == X)
4681  Pred = Cmp.getSwappedPredicate();
4682 
4683  switch (Pred) {
4684  case ICmpInst::ICMP_ULT:
4685  NewPred = ICmpInst::ICMP_NE;
4686  break;
4687  case ICmpInst::ICMP_UGE:
4688  NewPred = ICmpInst::ICMP_EQ;
4689  break;
4690  default:
4691  return nullptr;
4692  }
4693  } else
4694  return nullptr;
4695 
4696  Value *NewX = Builder.CreateLShr(X, Y, X->getName() + ".highbits");
4697  Constant *Zero = Constant::getNullValue(NewX->getType());
4698  return CmpInst::Create(Instruction::ICmp, NewPred, NewX, Zero);
4699 }
4700 
4702  InstCombiner::BuilderTy &Builder) {
4703  // If both arguments of the cmp are shuffles that use the same mask and
4704  // shuffle within a single vector, move the shuffle after the cmp.
4705  Value *LHS = Cmp.getOperand(0), *RHS = Cmp.getOperand(1);
4706  Value *V1, *V2;
4707  Constant *M;
4708  if (match(LHS, m_ShuffleVector(m_Value(V1), m_Undef(), m_Constant(M))) &&
4709  match(RHS, m_ShuffleVector(m_Value(V2), m_Undef(), m_Specific(M))) &&
4710  V1->getType() == V2->getType() &&
4711  (LHS->hasOneUse() || RHS->hasOneUse())) {
4712  // cmp (shuffle V1, M), (shuffle V2, M) --> shuffle (cmp V1, V2), M
4714  Value *NewCmp = isa<ICmpInst>(Cmp) ? Builder.CreateICmp(P, V1, V2)
4715  : Builder.CreateFCmp(P, V1, V2);
4716  return new ShuffleVectorInst(NewCmp, UndefValue::get(NewCmp->getType()), M);
4717  }
4718  return nullptr;
4719 }
4720 
4722  bool Changed = false;
4723  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
4724  unsigned Op0Cplxity = getComplexity(Op0);
4725  unsigned Op1Cplxity = getComplexity(Op1);
4726 
4727  /// Orders the operands of the compare so that they are listed from most
4728  /// complex to least complex. This puts constants before unary operators,
4729  /// before binary operators.
4730  if (Op0Cplxity < Op1Cplxity ||
4731  (Op0Cplxity == Op1Cplxity && swapMayExposeCSEOpportunities(Op0, Op1))) {
4732  I.swapOperands();
4733  std::swap(Op0, Op1);
4734  Changed = true;
4735  }
4736 
4737  if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1,
4738  SQ.getWithInstruction(&I)))
4739  return replaceInstUsesWith(I, V);
4740 
4741  // Comparing -val or val with non-zero is the same as just comparing val
4742  // ie, abs(val) != 0 -> val != 0
4743  if (I.getPredicate() == ICmpInst::ICMP_NE && match(Op1, m_Zero())) {
4744  Value *Cond, *SelectTrue, *SelectFalse;
4745  if (match(Op0, m_Select(m_Value(Cond), m_Value(SelectTrue),
4746  m_Value(SelectFalse)))) {
4747  if (Value *V = dyn_castNegVal(SelectTrue)) {
4748  if (V == SelectFalse)
4749  return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1);
4750  }
4751  else if (Value *V = dyn_castNegVal(SelectFalse)) {
4752  if (V == SelectTrue)
4753  return CmpInst::Create(Instruction::ICmp, I.getPredicate(), V, Op1);
4754  }
4755  }
4756  }
4757 
4758  if (Op0->getType()->isIntOrIntVectorTy(1))
4759  if (Instruction *Res = canonicalizeICmpBool(I, Builder))
4760  return Res;
4761 
4762  if (ICmpInst *NewICmp = canonicalizeCmpWithConstant(I))
4763  return NewICmp;
4764 
4765  if (Instruction *Res = foldICmpWithConstant(I))
4766  return Res;
4767 
4768  if (Instruction *Res = foldICmpUsingKnownBits(I))
4769  return Res;
4770 
4771  // Test if the ICmpInst instruction is used exclusively by a select as
4772  // part of a minimum or maximum operation. If so, refrain from doing
4773  // any other folding. This helps out other analyses which understand
4774  // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
4775  // and CodeGen. And in this case, at least one of the comparison
4776  // operands has at least one user besides the compare (the select),
4777  // which would often largely negate the benefit of folding anyway.
4778  //
4779  // Do the same for the other patterns recognized by matchSelectPattern.
4780  if (I.hasOneUse())
4781  if (SelectInst *SI = dyn_cast<SelectInst>(I.user_back())) {
4782  Value *A, *B;
4783  SelectPatternResult SPR = matchSelectPattern(SI, A, B);
4784  if (SPR.Flavor != SPF_UNKNOWN)
4785  return nullptr;
4786  }
4787 
4788  // Do this after checking for min/max to prevent infinite looping.
4789  if (Instruction *Res = foldICmpWithZero(I))
4790  return Res;
4791 
4792  // FIXME: We only do this after checking for min/max to prevent infinite
4793  // looping caused by a reverse canonicalization of these patterns for min/max.
4794  // FIXME: The organization of folds is a mess. These would naturally go into
4795  // canonicalizeCmpWithConstant(), but we can't move all of the above folds
4796  // down here after the min/max restriction.
4797  ICmpInst::Predicate Pred = I.getPredicate();
4798  const APInt *C;
4799  if (match(Op1, m_APInt(C))) {
4800  // For i32: x >u 2147483647 -> x <s 0 -> true if sign bit set
4801  if (Pred == ICmpInst::ICMP_UGT && C->isMaxSignedValue()) {
4802  Constant *Zero = Constant::getNullValue(Op0->getType());
4803  return new ICmpInst(ICmpInst::ICMP_SLT, Op0, Zero);
4804  }
4805 
4806  // For i32: x <u 2147483648 -> x >s -1 -> true if sign bit clear
4807  if (Pred == ICmpInst::ICMP_ULT && C->isMinSignedValue()) {
4808  Constant *AllOnes = Constant::getAllOnesValue(Op0->getType());
4809  return new ICmpInst(ICmpInst::ICMP_SGT, Op0, AllOnes);
4810  }
4811  }
4812 
4813  if (Instruction *Res = foldICmpInstWithConstant(I))
4814  return Res;
4815 
4816  if (Instruction *Res = foldICmpInstWithConstantNotInt(I))
4817  return Res;
4818 
4819  // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now.
4820  if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0))
4821  if (Instruction *NI = foldGEPICmp(GEP, Op1, I.getPredicate(), I))
4822  return NI;
4823  if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1))
4824  if (Instruction *NI = foldGEPICmp(GEP, Op0,
4826  return NI;
4827 
4828  // Try to optimize equality comparisons against alloca-based pointers.
4829  if (Op0->getType()->isPointerTy() && I.isEquality()) {
4830  assert(Op1->getType()->isPointerTy() && "Comparing pointer with non-pointer?");
4831  if (auto *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Op0, DL)))
4832  if (Instruction *New = foldAllocaCmp(I, Alloca, Op1))
4833  return New;
4834  if (auto *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Op1, DL)))
4835  if (Instruction *New = foldAllocaCmp(I, Alloca, Op0))
4836  return New;
4837  }
4838 
4839  // Zero-equality and sign-bit checks are preserved through sitofp + bitcast.
4840  Value *X;
4841  if (match(Op0, m_BitCast(m_SIToFP(m_Value(X))))) {
4842  // icmp eq (bitcast (sitofp X)), 0 --> icmp eq X, 0
4843  // icmp ne (bitcast (sitofp X)), 0 --> icmp ne X, 0
4844  // icmp slt (bitcast (sitofp X)), 0 --> icmp slt X, 0
4845  // icmp sgt (bitcast (sitofp X)), 0 --> icmp sgt X, 0
4846  if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_SLT ||
4847  Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT) &&
4848  match(Op1, m_Zero()))
4849  return new ICmpInst(Pred, X, ConstantInt::getNullValue(X->getType()));
4850 
4851  // icmp slt (bitcast (sitofp X)), 1 --> icmp slt X, 1
4852  if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_One()))
4853  return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), 1));
4854 
4855  // icmp sgt (bitcast (sitofp X)), -1 --> icmp sgt X, -1
4856  if (Pred == ICmpInst::ICMP_SGT && match(Op1, m_AllOnes()))
4857  return new ICmpInst(Pred, X, ConstantInt::getAllOnesValue(X->getType()));
4858  }
4859 
4860  // Zero-equality checks are preserved through unsigned floating-point casts:
4861  // icmp eq (bitcast (uitofp X)), 0 --> icmp eq X, 0
4862  // icmp ne (bitcast (uitofp X)), 0 --> icmp ne X, 0
4863  if (match(Op0, m_BitCast(m_UIToFP(m_Value(X)))))
4864  if (I.isEquality() && match(Op1, m_Zero()))
4865  return new ICmpInst(Pred, X, ConstantInt::getNullValue(X->getType()));
4866 
4867  // Test to see if the operands of the icmp are casted versions of other
4868  // values. If the ptr->ptr cast can be stripped off both arguments, we do so
4869  // now.
4870  if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) {
4871  if (Op0->getType()->isPointerTy() &&
4872  (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) {
4873  // We keep moving the cast from the left operand over to the right
4874  // operand, where it can often be eliminated completely.
4875  Op0 = CI->getOperand(0);
4876 
4877  // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast
4878  // so eliminate it as well.
4879  if (BitCastInst *CI2 = dyn_cast<BitCastInst>(Op1))
4880  Op1 = CI2->getOperand(0);
4881 
4882  // If Op1 is a constant, we can fold the cast into the constant.
4883  if (Op0->getType() != Op1->getType()) {
4884  if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
4885  Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType());
4886  } else {
4887  // Otherwise, cast the RHS right before the icmp
4888  Op1 = Builder.CreateBitCast(Op1, Op0->getType());
4889  }
4890  }
4891  return new ICmpInst(I.getPredicate(), Op0, Op1);
4892  }
4893  }
4894 
4895  if (isa<CastInst>(Op0)) {
4896  // Handle the special case of: icmp (cast bool to X), <cst>
4897  // This comes up when you have code like
4898  // int X = A < B;
4899  // if (X) ...
4900  // For generality, we handle any zero-extension of any operand comparison
4901  // with a constant or another cast from the same type.
4902  if (isa<Constant>(Op1) || isa<CastInst>(Op1))
4903  if (Instruction *R = foldICmpWithCastAndCast(I))
4904  return R;
4905  }
4906 
4907  if (Instruction *Res = foldICmpBinOp(I))
4908  return Res;
4909 
4910  if (Instruction *Res = foldICmpWithMinMax(I))
4911  return Res;
4912 
4913  {
4914  Value *A, *B;
4915  // Transform (A & ~B) == 0 --> (A & B) != 0
4916  // and (A & ~B) != 0 --> (A & B) == 0
4917  // if A is a power of 2.
4918  if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
4919  match(Op1, m_Zero()) &&
4920  isKnownToBeAPowerOfTwo(A, false, 0, &I) && I.isEquality())
4921  return new ICmpInst(I.getInversePredicate(), Builder.CreateAnd(A, B),
4922  Op1);
4923 
4924  // ~X < ~Y --> Y < X
4925  // ~X < C --> X > ~C
4926  if (match(Op0, m_Not(m_Value(A)))) {
4927  if (match(Op1, m_Not(m_Value(B))))
4928  return new ICmpInst(I.getPredicate(), B, A);
4929 
4930  const APInt *C;
4931  if (match(Op1, m_APInt(C)))
4932  return new ICmpInst(I.getSwappedPredicate(), A,
4933  ConstantInt::get(Op1->getType(), ~(*C)));
4934  }
4935 
4936  Instruction *AddI = nullptr;
4937  if (match(&I, m_UAddWithOverflow(m_Value(A), m_Value(B),
4938  m_Instruction(AddI))) &&
4939  isa<IntegerType>(A->getType())) {
4940  Value *Result;
4941  Constant *Overflow;
4942  if (OptimizeOverflowCheck(OCF_UNSIGNED_ADD, A, B, *AddI, Result,
4943  Overflow)) {
4944  replaceInstUsesWith(*AddI, Result);
4945  return replaceInstUsesWith(I, Overflow);
4946  }
4947  }
4948 
4949  // (zext a) * (zext b) --> llvm.umul.with.overflow.
4950  if (match(Op0, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {
4951  if (Instruction *R = processUMulZExtIdiom(I, Op0, Op1, *this))
4952  return R;
4953  }
4954  if (match(Op1, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {
4955  if (Instruction *R = processUMulZExtIdiom(I, Op1, Op0, *this))
4956  return R;
4957  }
4958  }
4959 
4960  if (Instruction *Res = foldICmpEquality(I))
4961  return Res;
4962 
4963  // The 'cmpxchg' instruction returns an aggregate containing the old value and
4964  // an i1 which indicates whether or not we successfully did the swap.
4965  //
4966  // Replace comparisons between the old value and the expected value with the
4967  // indicator that 'cmpxchg' returns.
4968  //
4969  // N.B. This transform is only valid when the 'cmpxchg' is not permitted to
4970  // spuriously fail. In those cases, the old value may equal the expected
4971  // value but it is possible for the swap to not occur.
4972  if (I.getPredicate() == ICmpInst::ICMP_EQ)
4973  if (auto *EVI = dyn_cast<ExtractValueInst>(Op0))
4974  if (auto *ACXI = dyn_cast<AtomicCmpXchgInst>(EVI->getAggregateOperand()))
4975  if (EVI->getIndices()[0] == 0 && ACXI->getCompareOperand() == Op1 &&
4976  !ACXI->isWeak())
4977  return ExtractValueInst::Create(ACXI, 1);
4978 
4979  {
4980  Value *X;
4981  const APInt *C;
4982  // icmp X+Cst, X
4983  if (match(Op0, m_Add(m_Value(X), m_APInt(C))) && Op1 == X)
4984  return foldICmpAddOpConst(X, *C, I.getPredicate());
4985 
4986  // icmp X, X+Cst
4987  if (match(Op1, m_Add(m_Value(X), m_APInt(C))) && Op0 == X)
4988  return foldICmpAddOpConst(X, *C, I.getSwappedPredicate());
4989  }
4990 
4991  if (Instruction *Res = foldICmpWithHighBitMask(I, Builder))
4992  return Res;
4993 
4994  if (I.getType()->isVectorTy())
4995  if (Instruction *Res = foldVectorCmp(I, Builder))
4996  return Res;
4997 
4998  return Changed ? &I : nullptr;
4999 }
5000 
5001 /// Fold fcmp ([us]itofp x, cst) if possible.
5002 Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
5003  Constant *RHSC) {
5004  if (!isa<ConstantFP>(RHSC)) return nullptr;
5005  const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF();
5006 
5007  // Get the width of the mantissa. We don't want to hack on conversions that
5008  // might lose information from the integer, e.g. "i64 -> float"
5009  int MantissaWidth = LHSI->getType()->getFPMantissaWidth();
5010  if (MantissaWidth == -1) return nullptr; // Unknown.
5011 
5012  IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType());
5013 
5014  bool LHSUnsigned = isa<UIToFPInst>(LHSI);
5015 
5016  if (I.isEquality()) {
5018  bool IsExact = false;
5019  APSInt RHSCvt(IntTy->getBitWidth(), LHSUnsigned);
5020  RHS.convertToInteger(RHSCvt, APFloat::rmNearestTiesToEven, &IsExact);
5021 
5022  // If the floating point constant isn't an integer value, we know if we will
5023  // ever compare equal / not equal to it.
5024  if (!IsExact) {
5025  // TODO: Can never be -0.0 and other non-representable values
5026  APFloat RHSRoundInt(RHS);
5028  if (RHS.compare(RHSRoundInt) != APFloat::cmpEqual) {
5029  if (P == FCmpInst::FCMP_OEQ || P == FCmpInst::FCMP_UEQ)
5030  return replaceInstUsesWith(I, Builder.getFalse());
5031 
5033  return replaceInstUsesWith(I, Builder.getTrue());
5034  }
5035  }
5036 
5037  // TODO: If the constant is exactly representable, is it always OK to do
5038  // equality compares as integer?
5039  }
5040 
5041  // Check to see that the input is converted from an integer type that is small
5042  // enough that preserves all bits. TODO: check here for "known" sign bits.
5043  // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
5044  unsigned InputSize = IntTy->getScalarSizeInBits();
5045 
5046  // Following test does NOT adjust InputSize downwards for signed inputs,
5047  // because the most negative value still requires all the mantissa bits
5048  // to distinguish it from one less than that value.
5049  if ((int)InputSize > MantissaWidth) {