LLVM  13.0.0git
InstCombineCasts.cpp
Go to the documentation of this file.
1 //===- InstCombineCasts.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the visit functions for cast operations.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "InstCombineInternal.h"
14 #include "llvm/ADT/SetVector.h"
17 #include "llvm/IR/DIBuilder.h"
18 #include "llvm/IR/DataLayout.h"
19 #include "llvm/IR/PatternMatch.h"
20 #include "llvm/Support/KnownBits.h"
22 #include <numeric>
23 using namespace llvm;
24 using namespace PatternMatch;
25 
26 #define DEBUG_TYPE "instcombine"
27 
28 /// Analyze 'Val', seeing if it is a simple linear expression.
29 /// If so, decompose it, returning some value X, such that Val is
30 /// X*Scale+Offset.
31 ///
32 static Value *decomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
33  uint64_t &Offset) {
34  if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
35  Offset = CI->getZExtValue();
36  Scale = 0;
37  return ConstantInt::get(Val->getType(), 0);
38  }
39 
40  if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
41  // Cannot look past anything that might overflow.
42  OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val);
43  if (OBI && !OBI->hasNoUnsignedWrap() && !OBI->hasNoSignedWrap()) {
44  Scale = 1;
45  Offset = 0;
46  return Val;
47  }
48 
49  if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
50  if (I->getOpcode() == Instruction::Shl) {
51  // This is a value scaled by '1 << the shift amt'.
52  Scale = UINT64_C(1) << RHS->getZExtValue();
53  Offset = 0;
54  return I->getOperand(0);
55  }
56 
57  if (I->getOpcode() == Instruction::Mul) {
58  // This value is scaled by 'RHS'.
59  Scale = RHS->getZExtValue();
60  Offset = 0;
61  return I->getOperand(0);
62  }
63 
64  if (I->getOpcode() == Instruction::Add) {
65  // We have X+C. Check to see if we really have (X*C2)+C1,
66  // where C1 is divisible by C2.
67  unsigned SubScale;
68  Value *SubVal =
69  decomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
70  Offset += RHS->getZExtValue();
71  Scale = SubScale;
72  return SubVal;
73  }
74  }
75  }
76 
77  // Otherwise, we can't look past this.
78  Scale = 1;
79  Offset = 0;
80  return Val;
81 }
82 
83 /// If we find a cast of an allocation instruction, try to eliminate the cast by
84 /// moving the type information into the alloc.
86  AllocaInst &AI) {
87  PointerType *PTy = cast<PointerType>(CI.getType());
88 
90  Builder.SetInsertPoint(&AI);
91 
92  // Get the type really allocated and the type casted to.
93  Type *AllocElTy = AI.getAllocatedType();
94  Type *CastElTy = PTy->getElementType();
95  if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr;
96 
97  // This optimisation does not work for cases where the cast type
98  // is scalable and the allocated type is not. This because we need to
99  // know how many times the casted type fits into the allocated type.
100  // For the opposite case where the allocated type is scalable and the
101  // cast type is not this leads to poor code quality due to the
102  // introduction of 'vscale' into the calculations. It seems better to
103  // bail out for this case too until we've done a proper cost-benefit
104  // analysis.
105  bool AllocIsScalable = isa<ScalableVectorType>(AllocElTy);
106  bool CastIsScalable = isa<ScalableVectorType>(CastElTy);
107  if (AllocIsScalable != CastIsScalable) return nullptr;
108 
109  Align AllocElTyAlign = DL.getABITypeAlign(AllocElTy);
110  Align CastElTyAlign = DL.getABITypeAlign(CastElTy);
111  if (CastElTyAlign < AllocElTyAlign) return nullptr;
112 
113  // If the allocation has multiple uses, only promote it if we are strictly
114  // increasing the alignment of the resultant allocation. If we keep it the
115  // same, we open the door to infinite loops of various kinds.
116  if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return nullptr;
117 
118  // The alloc and cast types should be either both fixed or both scalable.
119  uint64_t AllocElTySize = DL.getTypeAllocSize(AllocElTy).getKnownMinSize();
120  uint64_t CastElTySize = DL.getTypeAllocSize(CastElTy).getKnownMinSize();
121  if (CastElTySize == 0 || AllocElTySize == 0) return nullptr;
122 
123  // If the allocation has multiple uses, only promote it if we're not
124  // shrinking the amount of memory being allocated.
125  uint64_t AllocElTyStoreSize = DL.getTypeStoreSize(AllocElTy).getKnownMinSize();
126  uint64_t CastElTyStoreSize = DL.getTypeStoreSize(CastElTy).getKnownMinSize();
127  if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return nullptr;
128 
129  // See if we can satisfy the modulus by pulling a scale out of the array
130  // size argument.
131  unsigned ArraySizeScale;
132  uint64_t ArrayOffset;
133  Value *NumElements = // See if the array size is a decomposable linear expr.
134  decomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
135 
136  // If we can now satisfy the modulus, by using a non-1 scale, we really can
137  // do the xform.
138  if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 ||
139  (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return nullptr;
140 
141  // We don't currently support arrays of scalable types.
142  assert(!AllocIsScalable || (ArrayOffset == 1 && ArraySizeScale == 0));
143 
144  unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize;
145  Value *Amt = nullptr;
146  if (Scale == 1) {
147  Amt = NumElements;
148  } else {
149  Amt = ConstantInt::get(AI.getArraySize()->getType(), Scale);
150  // Insert before the alloca, not before the cast.
151  Amt = Builder.CreateMul(Amt, NumElements);
152  }
153 
154  if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
156  Offset, true);
157  Amt = Builder.CreateAdd(Amt, Off);
158  }
159 
160  AllocaInst *New = Builder.CreateAlloca(CastElTy, Amt);
161  New->setAlignment(AI.getAlign());
162  New->takeName(&AI);
163  New->setUsedWithInAlloca(AI.isUsedWithInAlloca());
164 
165  // If the allocation has multiple real uses, insert a cast and change all
166  // things that used it to use the new cast. This will also hack on CI, but it
167  // will die soon.
168  if (!AI.hasOneUse()) {
169  // New is the allocation instruction, pointer typed. AI is the original
170  // allocation instruction, also pointer typed. Thus, cast to use is BitCast.
171  Value *NewCast = Builder.CreateBitCast(New, AI.getType(), "tmpcast");
172  replaceInstUsesWith(AI, NewCast);
173  eraseInstFromFunction(AI);
174  }
175  return replaceInstUsesWith(CI, New);
176 }
177 
178 /// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
179 /// true for, actually insert the code to evaluate the expression.
181  bool isSigned) {
182  if (Constant *C = dyn_cast<Constant>(V)) {
183  C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
184  // If we got a constantexpr back, try to simplify it with DL info.
185  return ConstantFoldConstant(C, DL, &TLI);
186  }
187 
188  // Otherwise, it must be an instruction.
189  Instruction *I = cast<Instruction>(V);
190  Instruction *Res = nullptr;
191  unsigned Opc = I->getOpcode();
192  switch (Opc) {
193  case Instruction::Add:
194  case Instruction::Sub:
195  case Instruction::Mul:
196  case Instruction::And:
197  case Instruction::Or:
198  case Instruction::Xor:
199  case Instruction::AShr:
200  case Instruction::LShr:
201  case Instruction::Shl:
202  case Instruction::UDiv:
203  case Instruction::URem: {
204  Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
205  Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
206  Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
207  break;
208  }
209  case Instruction::Trunc:
210  case Instruction::ZExt:
211  case Instruction::SExt:
212  // If the source type of the cast is the type we're trying for then we can
213  // just return the source. There's no need to insert it because it is not
214  // new.
215  if (I->getOperand(0)->getType() == Ty)
216  return I->getOperand(0);
217 
218  // Otherwise, must be the same type of cast, so just reinsert a new one.
219  // This also handles the case of zext(trunc(x)) -> zext(x).
220  Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
221  Opc == Instruction::SExt);
222  break;
223  case Instruction::Select: {
224  Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
225  Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned);
226  Res = SelectInst::Create(I->getOperand(0), True, False);
227  break;
228  }
229  case Instruction::PHI: {
230  PHINode *OPN = cast<PHINode>(I);
231  PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues());
232  for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
233  Value *V =
234  EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
235  NPN->addIncoming(V, OPN->getIncomingBlock(i));
236  }
237  Res = NPN;
238  break;
239  }
240  default:
241  // TODO: Can handle more cases here.
242  llvm_unreachable("Unreachable!");
243  }
244 
245  Res->takeName(I);
246  return InsertNewInstWith(Res, *I);
247 }
248 
250 InstCombinerImpl::isEliminableCastPair(const CastInst *CI1,
251  const CastInst *CI2) {
252  Type *SrcTy = CI1->getSrcTy();
253  Type *MidTy = CI1->getDestTy();
254  Type *DstTy = CI2->getDestTy();
255 
256  Instruction::CastOps firstOp = CI1->getOpcode();
257  Instruction::CastOps secondOp = CI2->getOpcode();
258  Type *SrcIntPtrTy =
259  SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
260  Type *MidIntPtrTy =
261  MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr;
262  Type *DstIntPtrTy =
263  DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
264  unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
265  DstTy, SrcIntPtrTy, MidIntPtrTy,
266  DstIntPtrTy);
267 
268  // We don't want to form an inttoptr or ptrtoint that converts to an integer
269  // type that differs from the pointer size.
270  if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) ||
271  (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy))
272  Res = 0;
273 
274  return Instruction::CastOps(Res);
275 }
276 
277 /// Implement the transforms common to all CastInst visitors.
279  Value *Src = CI.getOperand(0);
280 
281  // Try to eliminate a cast of a cast.
282  if (auto *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
283  if (Instruction::CastOps NewOpc = isEliminableCastPair(CSrc, &CI)) {
284  // The first cast (CSrc) is eliminable so we need to fix up or replace
285  // the second cast (CI). CSrc will then have a good chance of being dead.
286  auto *Ty = CI.getType();
287  auto *Res = CastInst::Create(NewOpc, CSrc->getOperand(0), Ty);
288  // Point debug users of the dying cast to the new one.
289  if (CSrc->hasOneUse())
290  replaceAllDbgUsesWith(*CSrc, *Res, CI, DT);
291  return Res;
292  }
293  }
294 
295  if (auto *Sel = dyn_cast<SelectInst>(Src)) {
296  // We are casting a select. Try to fold the cast into the select if the
297  // select does not have a compare instruction with matching operand types
298  // or the select is likely better done in a narrow type.
299  // Creating a select with operands that are different sizes than its
300  // condition may inhibit other folds and lead to worse codegen.
301  auto *Cmp = dyn_cast<CmpInst>(Sel->getCondition());
302  if (!Cmp || Cmp->getOperand(0)->getType() != Sel->getType() ||
303  (CI.getOpcode() == Instruction::Trunc &&
304  shouldChangeType(CI.getSrcTy(), CI.getType()))) {
305  if (Instruction *NV = FoldOpIntoSelect(CI, Sel)) {
306  replaceAllDbgUsesWith(*Sel, *NV, CI, DT);
307  return NV;
308  }
309  }
310  }
311 
312  // If we are casting a PHI, then fold the cast into the PHI.
313  if (auto *PN = dyn_cast<PHINode>(Src)) {
314  // Don't do this if it would create a PHI node with an illegal type from a
315  // legal type.
316  if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() ||
317  shouldChangeType(CI.getSrcTy(), CI.getType()))
318  if (Instruction *NV = foldOpIntoPhi(CI, PN))
319  return NV;
320  }
321 
322  return nullptr;
323 }
324 
325 /// Constants and extensions/truncates from the destination type are always
326 /// free to be evaluated in that type. This is a helper for canEvaluate*.
327 static bool canAlwaysEvaluateInType(Value *V, Type *Ty) {
328  if (isa<Constant>(V))
329  return true;
330  Value *X;
331  if ((match(V, m_ZExtOrSExt(m_Value(X))) || match(V, m_Trunc(m_Value(X)))) &&
332  X->getType() == Ty)
333  return true;
334 
335  return false;
336 }
337 
338 /// Filter out values that we can not evaluate in the destination type for free.
339 /// This is a helper for canEvaluate*.
340 static bool canNotEvaluateInType(Value *V, Type *Ty) {
341  assert(!isa<Constant>(V) && "Constant should already be handled.");
342  if (!isa<Instruction>(V))
343  return true;
344  // We don't extend or shrink something that has multiple uses -- doing so
345  // would require duplicating the instruction which isn't profitable.
346  if (!V->hasOneUse())
347  return true;
348 
349  return false;
350 }
351 
352 /// Return true if we can evaluate the specified expression tree as type Ty
353 /// instead of its larger type, and arrive with the same value.
354 /// This is used by code that tries to eliminate truncates.
355 ///
356 /// Ty will always be a type smaller than V. We should return true if trunc(V)
357 /// can be computed by computing V in the smaller type. If V is an instruction,
358 /// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
359 /// makes sense if x and y can be efficiently truncated.
360 ///
361 /// This function works on both vectors and scalars.
362 ///
364  Instruction *CxtI) {
365  if (canAlwaysEvaluateInType(V, Ty))
366  return true;
367  if (canNotEvaluateInType(V, Ty))
368  return false;
369 
370  auto *I = cast<Instruction>(V);
371  Type *OrigTy = V->getType();
372  switch (I->getOpcode()) {
373  case Instruction::Add:
374  case Instruction::Sub:
375  case Instruction::Mul:
376  case Instruction::And:
377  case Instruction::Or:
378  case Instruction::Xor:
379  // These operators can all arbitrarily be extended or truncated.
380  return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
381  canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
382 
383  case Instruction::UDiv:
384  case Instruction::URem: {
385  // UDiv and URem can be truncated if all the truncated bits are zero.
386  uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
388  assert(BitWidth < OrigBitWidth && "Unexpected bitwidths!");
389  APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
390  if (IC.MaskedValueIsZero(I->getOperand(0), Mask, 0, CxtI) &&
391  IC.MaskedValueIsZero(I->getOperand(1), Mask, 0, CxtI)) {
392  return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
393  canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
394  }
395  break;
396  }
397  case Instruction::Shl: {
398  // If we are truncating the result of this SHL, and if it's a shift of an
399  // inrange amount, we can always perform a SHL in a smaller type.
401  KnownBits AmtKnownBits =
402  llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
403  if (AmtKnownBits.getMaxValue().ult(BitWidth))
404  return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
405  canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
406  break;
407  }
408  case Instruction::LShr: {
409  // If this is a truncate of a logical shr, we can truncate it to a smaller
410  // lshr iff we know that the bits we would otherwise be shifting in are
411  // already zeros.
412  // TODO: It is enough to check that the bits we would be shifting in are
413  // zero - use AmtKnownBits.getMaxValue().
414  uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
416  KnownBits AmtKnownBits =
417  llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
418  APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
419  if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
420  IC.MaskedValueIsZero(I->getOperand(0), ShiftedBits, 0, CxtI)) {
421  return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
422  canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
423  }
424  break;
425  }
426  case Instruction::AShr: {
427  // If this is a truncate of an arithmetic shr, we can truncate it to a
428  // smaller ashr iff we know that all the bits from the sign bit of the
429  // original type and the sign bit of the truncate type are similar.
430  // TODO: It is enough to check that the bits we would be shifting in are
431  // similar to sign bit of the truncate type.
432  uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
434  KnownBits AmtKnownBits =
435  llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
436  unsigned ShiftedBits = OrigBitWidth - BitWidth;
437  if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
438  ShiftedBits < IC.ComputeNumSignBits(I->getOperand(0), 0, CxtI))
439  return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
440  canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
441  break;
442  }
443  case Instruction::Trunc:
444  // trunc(trunc(x)) -> trunc(x)
445  return true;
446  case Instruction::ZExt:
447  case Instruction::SExt:
448  // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
449  // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
450  return true;
451  case Instruction::Select: {
452  SelectInst *SI = cast<SelectInst>(I);
453  return canEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) &&
454  canEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI);
455  }
456  case Instruction::PHI: {
457  // We can change a phi if we can change all operands. Note that we never
458  // get into trouble with cyclic PHIs here because we only consider
459  // instructions with a single use.
460  PHINode *PN = cast<PHINode>(I);
461  for (Value *IncValue : PN->incoming_values())
462  if (!canEvaluateTruncated(IncValue, Ty, IC, CxtI))
463  return false;
464  return true;
465  }
466  default:
467  // TODO: Can handle more cases here.
468  break;
469  }
470 
471  return false;
472 }
473 
474 /// Given a vector that is bitcast to an integer, optionally logically
475 /// right-shifted, and truncated, convert it to an extractelement.
476 /// Example (big endian):
477 /// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
478 /// --->
479 /// extractelement <4 x i32> %X, 1
481  InstCombinerImpl &IC) {
482  Value *TruncOp = Trunc.getOperand(0);
483  Type *DestType = Trunc.getType();
484  if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType))
485  return nullptr;
486 
487  Value *VecInput = nullptr;
488  ConstantInt *ShiftVal = nullptr;
489  if (!match(TruncOp, m_CombineOr(m_BitCast(m_Value(VecInput)),
490  m_LShr(m_BitCast(m_Value(VecInput)),
491  m_ConstantInt(ShiftVal)))) ||
492  !isa<VectorType>(VecInput->getType()))
493  return nullptr;
494 
495  VectorType *VecType = cast<VectorType>(VecInput->getType());
496  unsigned VecWidth = VecType->getPrimitiveSizeInBits();
497  unsigned DestWidth = DestType->getPrimitiveSizeInBits();
498  unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0;
499 
500  if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0))
501  return nullptr;
502 
503  // If the element type of the vector doesn't match the result type,
504  // bitcast it to a vector type that we can extract from.
505  unsigned NumVecElts = VecWidth / DestWidth;
506  if (VecType->getElementType() != DestType) {
507  VecType = FixedVectorType::get(DestType, NumVecElts);
508  VecInput = IC.Builder.CreateBitCast(VecInput, VecType, "bc");
509  }
510 
511  unsigned Elt = ShiftAmount / DestWidth;
512  if (IC.getDataLayout().isBigEndian())
513  Elt = NumVecElts - 1 - Elt;
514 
515  return ExtractElementInst::Create(VecInput, IC.Builder.getInt32(Elt));
516 }
517 
518 /// Funnel/Rotate left/right may occur in a wider type than necessary because of
519 /// type promotion rules. Try to narrow the inputs and convert to funnel shift.
520 Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {
521  assert((isa<VectorType>(Trunc.getSrcTy()) ||
522  shouldChangeType(Trunc.getSrcTy(), Trunc.getType())) &&
523  "Don't narrow to an illegal scalar type");
524 
525  // Bail out on strange types. It is possible to handle some of these patterns
526  // even with non-power-of-2 sizes, but it is not a likely scenario.
527  Type *DestTy = Trunc.getType();
528  unsigned NarrowWidth = DestTy->getScalarSizeInBits();
529  if (!isPowerOf2_32(NarrowWidth))
530  return nullptr;
531 
532  // First, find an or'd pair of opposite shifts:
533  // trunc (or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1))
534  BinaryOperator *Or0, *Or1;
535  if (!match(Trunc.getOperand(0), m_OneUse(m_Or(m_BinOp(Or0), m_BinOp(Or1)))))
536  return nullptr;
537 
538  Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
539  if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
540  !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||
541  Or0->getOpcode() == Or1->getOpcode())
542  return nullptr;
543 
544  // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
545  if (Or0->getOpcode() == BinaryOperator::LShr) {
546  std::swap(Or0, Or1);
547  std::swap(ShVal0, ShVal1);
548  std::swap(ShAmt0, ShAmt1);
549  }
550  assert(Or0->getOpcode() == BinaryOperator::Shl &&
551  Or1->getOpcode() == BinaryOperator::LShr &&
552  "Illegal or(shift,shift) pair");
553 
554  // Match the shift amount operands for a funnel/rotate pattern. This always
555  // matches a subtraction on the R operand.
556  auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
557  // The shift amounts may add up to the narrow bit width:
558  // (shl ShVal0, L) | (lshr ShVal1, Width - L)
560  return L;
561 
562  // The following patterns currently only work for rotation patterns.
563  // TODO: Add more general funnel-shift compatible patterns.
564  if (ShVal0 != ShVal1)
565  return nullptr;
566 
567  // The shift amount may be masked with negation:
568  // (shl ShVal0, (X & (Width - 1))) | (lshr ShVal1, ((-X) & (Width - 1)))
569  Value *X;
570  unsigned Mask = Width - 1;
571  if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
573  return X;
574 
575  // Same as above, but the shift amount may be extended after masking:
576  if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
578  return X;
579 
580  return nullptr;
581  };
582 
583  Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, NarrowWidth);
584  bool IsFshl = true; // Sub on LSHR.
585  if (!ShAmt) {
586  ShAmt = matchShiftAmount(ShAmt1, ShAmt0, NarrowWidth);
587  IsFshl = false; // Sub on SHL.
588  }
589  if (!ShAmt)
590  return nullptr;
591 
592  // The shifted value must have high zeros in the wide type. Typically, this
593  // will be a zext, but it could also be the result of an 'and' or 'shift'.
594  unsigned WideWidth = Trunc.getSrcTy()->getScalarSizeInBits();
595  APInt HiBitMask = APInt::getHighBitsSet(WideWidth, WideWidth - NarrowWidth);
596  if (!MaskedValueIsZero(ShVal0, HiBitMask, 0, &Trunc) ||
597  !MaskedValueIsZero(ShVal1, HiBitMask, 0, &Trunc))
598  return nullptr;
599 
600  // We have an unnecessarily wide rotate!
601  // trunc (or (lshr ShVal0, ShAmt), (shl ShVal1, BitWidth - ShAmt))
602  // Narrow the inputs and convert to funnel shift intrinsic:
603  // llvm.fshl.i8(trunc(ShVal), trunc(ShVal), trunc(ShAmt))
604  Value *NarrowShAmt = Builder.CreateTrunc(ShAmt, DestTy);
605  Value *X, *Y;
606  X = Y = Builder.CreateTrunc(ShVal0, DestTy);
607  if (ShVal0 != ShVal1)
608  Y = Builder.CreateTrunc(ShVal1, DestTy);
609  Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
610  Function *F = Intrinsic::getDeclaration(Trunc.getModule(), IID, DestTy);
611  return CallInst::Create(F, {X, Y, NarrowShAmt});
612 }
613 
614 /// Try to narrow the width of math or bitwise logic instructions by pulling a
615 /// truncate ahead of binary operators.
616 /// TODO: Transforms for truncated shifts should be moved into here.
617 Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) {
618  Type *SrcTy = Trunc.getSrcTy();
619  Type *DestTy = Trunc.getType();
620  if (!isa<VectorType>(SrcTy) && !shouldChangeType(SrcTy, DestTy))
621  return nullptr;
622 
623  BinaryOperator *BinOp;
624  if (!match(Trunc.getOperand(0), m_OneUse(m_BinOp(BinOp))))
625  return nullptr;
626 
627  Value *BinOp0 = BinOp->getOperand(0);
628  Value *BinOp1 = BinOp->getOperand(1);
629  switch (BinOp->getOpcode()) {
630  case Instruction::And:
631  case Instruction::Or:
632  case Instruction::Xor:
633  case Instruction::Add:
634  case Instruction::Sub:
635  case Instruction::Mul: {
636  Constant *C;
637  if (match(BinOp0, m_Constant(C))) {
638  // trunc (binop C, X) --> binop (trunc C', X)
639  Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
640  Value *TruncX = Builder.CreateTrunc(BinOp1, DestTy);
641  return BinaryOperator::Create(BinOp->getOpcode(), NarrowC, TruncX);
642  }
643  if (match(BinOp1, m_Constant(C))) {
644  // trunc (binop X, C) --> binop (trunc X, C')
645  Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
646  Value *TruncX = Builder.CreateTrunc(BinOp0, DestTy);
647  return BinaryOperator::Create(BinOp->getOpcode(), TruncX, NarrowC);
648  }
649  Value *X;
650  if (match(BinOp0, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
651  // trunc (binop (ext X), Y) --> binop X, (trunc Y)
652  Value *NarrowOp1 = Builder.CreateTrunc(BinOp1, DestTy);
653  return BinaryOperator::Create(BinOp->getOpcode(), X, NarrowOp1);
654  }
655  if (match(BinOp1, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
656  // trunc (binop Y, (ext X)) --> binop (trunc Y), X
657  Value *NarrowOp0 = Builder.CreateTrunc(BinOp0, DestTy);
658  return BinaryOperator::Create(BinOp->getOpcode(), NarrowOp0, X);
659  }
660  break;
661  }
662 
663  default: break;
664  }
665 
666  if (Instruction *NarrowOr = narrowFunnelShift(Trunc))
667  return NarrowOr;
668 
669  return nullptr;
670 }
671 
672 /// Try to narrow the width of a splat shuffle. This could be generalized to any
673 /// shuffle with a constant operand, but we limit the transform to avoid
674 /// creating a shuffle type that targets may not be able to lower effectively.
677  auto *Shuf = dyn_cast<ShuffleVectorInst>(Trunc.getOperand(0));
678  if (Shuf && Shuf->hasOneUse() && match(Shuf->getOperand(1), m_Undef()) &&
679  is_splat(Shuf->getShuffleMask()) &&
680  Shuf->getType() == Shuf->getOperand(0)->getType()) {
681  // trunc (shuf X, Undef, SplatMask) --> shuf (trunc X), Undef, SplatMask
682  Constant *NarrowUndef = UndefValue::get(Trunc.getType());
683  Value *NarrowOp = Builder.CreateTrunc(Shuf->getOperand(0), Trunc.getType());
684  return new ShuffleVectorInst(NarrowOp, NarrowUndef, Shuf->getShuffleMask());
685  }
686 
687  return nullptr;
688 }
689 
690 /// Try to narrow the width of an insert element. This could be generalized for
691 /// any vector constant, but we limit the transform to insertion into undef to
692 /// avoid potential backend problems from unsupported insertion widths. This
693 /// could also be extended to handle the case of inserting a scalar constant
694 /// into a vector variable.
697  Instruction::CastOps Opcode = Trunc.getOpcode();
698  assert((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
699  "Unexpected instruction for shrinking");
700 
701  auto *InsElt = dyn_cast<InsertElementInst>(Trunc.getOperand(0));
702  if (!InsElt || !InsElt->hasOneUse())
703  return nullptr;
704 
705  Type *DestTy = Trunc.getType();
706  Type *DestScalarTy = DestTy->getScalarType();
707  Value *VecOp = InsElt->getOperand(0);
708  Value *ScalarOp = InsElt->getOperand(1);
709  Value *Index = InsElt->getOperand(2);
710 
711  if (match(VecOp, m_Undef())) {
712  // trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
713  // fptrunc (inselt undef, X, Index) --> inselt undef, (fptrunc X), Index
714  UndefValue *NarrowUndef = UndefValue::get(DestTy);
715  Value *NarrowOp = Builder.CreateCast(Opcode, ScalarOp, DestScalarTy);
716  return InsertElementInst::Create(NarrowUndef, NarrowOp, Index);
717  }
718 
719  return nullptr;
720 }
721 
723  if (Instruction *Result = commonCastTransforms(Trunc))
724  return Result;
725 
726  Value *Src = Trunc.getOperand(0);
727  Type *DestTy = Trunc.getType(), *SrcTy = Src->getType();
728  unsigned DestWidth = DestTy->getScalarSizeInBits();
729  unsigned SrcWidth = SrcTy->getScalarSizeInBits();
730 
731  // Attempt to truncate the entire input expression tree to the destination
732  // type. Only do this if the dest type is a simple type, don't convert the
733  // expression tree to something weird like i93 unless the source is also
734  // strange.
735  if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) &&
736  canEvaluateTruncated(Src, DestTy, *this, &Trunc)) {
737 
738  // If this cast is a truncate, evaluting in a different type always
739  // eliminates the cast, so it is always a win.
740  LLVM_DEBUG(
741  dbgs() << "ICE: EvaluateInDifferentType converting expression type"
742  " to avoid cast: "
743  << Trunc << '\n');
744  Value *Res = EvaluateInDifferentType(Src, DestTy, false);
745  assert(Res->getType() == DestTy);
746  return replaceInstUsesWith(Trunc, Res);
747  }
748 
749  // For integer types, check if we can shorten the entire input expression to
750  // DestWidth * 2, which won't allow removing the truncate, but reducing the
751  // width may enable further optimizations, e.g. allowing for larger
752  // vectorization factors.
753  if (auto *DestITy = dyn_cast<IntegerType>(DestTy)) {
754  if (DestWidth * 2 < SrcWidth) {
755  auto *NewDestTy = DestITy->getExtendedType();
756  if (shouldChangeType(SrcTy, NewDestTy) &&
757  canEvaluateTruncated(Src, NewDestTy, *this, &Trunc)) {
758  LLVM_DEBUG(
759  dbgs() << "ICE: EvaluateInDifferentType converting expression type"
760  " to reduce the width of operand of"
761  << Trunc << '\n');
762  Value *Res = EvaluateInDifferentType(Src, NewDestTy, false);
763  return new TruncInst(Res, DestTy);
764  }
765  }
766  }
767 
768  // Test if the trunc is the user of a select which is part of a
769  // minimum or maximum operation. If so, don't do any more simplification.
770  // Even simplifying demanded bits can break the canonical form of a
771  // min/max.
772  Value *LHS, *RHS;
773  if (SelectInst *Sel = dyn_cast<SelectInst>(Src))
774  if (matchSelectPattern(Sel, LHS, RHS).Flavor != SPF_UNKNOWN)
775  return nullptr;
776 
777  // See if we can simplify any instructions used by the input whose sole
778  // purpose is to compute bits we don't care about.
779  if (SimplifyDemandedInstructionBits(Trunc))
780  return &Trunc;
781 
782  if (DestWidth == 1) {
783  Value *Zero = Constant::getNullValue(SrcTy);
784  if (DestTy->isIntegerTy()) {
785  // Canonicalize trunc x to i1 -> icmp ne (and x, 1), 0 (scalar only).
786  // TODO: We canonicalize to more instructions here because we are probably
787  // lacking equivalent analysis for trunc relative to icmp. There may also
788  // be codegen concerns. If those trunc limitations were removed, we could
789  // remove this transform.
790  Value *And = Builder.CreateAnd(Src, ConstantInt::get(SrcTy, 1));
791  return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
792  }
793 
794  // For vectors, we do not canonicalize all truncs to icmp, so optimize
795  // patterns that would be covered within visitICmpInst.
796  Value *X;
797  Constant *C;
798  if (match(Src, m_OneUse(m_LShr(m_Value(X), m_Constant(C))))) {
799  // trunc (lshr X, C) to i1 --> icmp ne (and X, C'), 0
800  Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
801  Constant *MaskC = ConstantExpr::getShl(One, C);
802  Value *And = Builder.CreateAnd(X, MaskC);
803  return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
804  }
806  m_Deferred(X))))) {
807  // trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0
808  Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
809  Constant *MaskC = ConstantExpr::getShl(One, C);
810  MaskC = ConstantExpr::getOr(MaskC, One);
811  Value *And = Builder.CreateAnd(X, MaskC);
812  return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
813  }
814  }
815 
816  Value *A;
817  Constant *C;
818  if (match(Src, m_LShr(m_SExt(m_Value(A)), m_Constant(C)))) {
819  unsigned AWidth = A->getType()->getScalarSizeInBits();
820  unsigned MaxShiftAmt = SrcWidth - std::max(DestWidth, AWidth);
821  auto *OldSh = cast<Instruction>(Src);
822  bool IsExact = OldSh->isExact();
823 
824  // If the shift is small enough, all zero bits created by the shift are
825  // removed by the trunc.
827  APInt(SrcWidth, MaxShiftAmt)))) {
828  // trunc (lshr (sext A), C) --> ashr A, C
829  if (A->getType() == DestTy) {
830  Constant *MaxAmt = ConstantInt::get(SrcTy, DestWidth - 1, false);
831  Constant *ShAmt = ConstantExpr::getUMin(C, MaxAmt);
832  ShAmt = ConstantExpr::getTrunc(ShAmt, A->getType());
833  ShAmt = Constant::mergeUndefsWith(ShAmt, C);
834  return IsExact ? BinaryOperator::CreateExactAShr(A, ShAmt)
835  : BinaryOperator::CreateAShr(A, ShAmt);
836  }
837  // The types are mismatched, so create a cast after shifting:
838  // trunc (lshr (sext A), C) --> sext/trunc (ashr A, C)
839  if (Src->hasOneUse()) {
840  Constant *MaxAmt = ConstantInt::get(SrcTy, AWidth - 1, false);
841  Constant *ShAmt = ConstantExpr::getUMin(C, MaxAmt);
842  ShAmt = ConstantExpr::getTrunc(ShAmt, A->getType());
843  Value *Shift = Builder.CreateAShr(A, ShAmt, "", IsExact);
844  return CastInst::CreateIntegerCast(Shift, DestTy, true);
845  }
846  }
847  // TODO: Mask high bits with 'and'.
848  }
849 
850  // trunc (*shr (trunc A), C) --> trunc(*shr A, C)
851  if (match(Src, m_OneUse(m_Shr(m_Trunc(m_Value(A)), m_Constant(C))))) {
852  unsigned MaxShiftAmt = SrcWidth - DestWidth;
853 
854  // If the shift is small enough, all zero/sign bits created by the shift are
855  // removed by the trunc.
857  APInt(SrcWidth, MaxShiftAmt)))) {
858  auto *OldShift = cast<Instruction>(Src);
859  bool IsExact = OldShift->isExact();
860  auto *ShAmt = ConstantExpr::getIntegerCast(C, A->getType(), true);
861  ShAmt = Constant::mergeUndefsWith(ShAmt, C);
862  Value *Shift =
863  OldShift->getOpcode() == Instruction::AShr
864  ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact)
865  : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact);
866  return CastInst::CreateTruncOrBitCast(Shift, DestTy);
867  }
868  }
869 
870  if (Instruction *I = narrowBinOp(Trunc))
871  return I;
872 
873  if (Instruction *I = shrinkSplatShuffle(Trunc, Builder))
874  return I;
875 
876  if (Instruction *I = shrinkInsertElt(Trunc, Builder))
877  return I;
878 
879  if (Src->hasOneUse() &&
880  (isa<VectorType>(SrcTy) || shouldChangeType(SrcTy, DestTy))) {
881  // Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the
882  // dest type is native and cst < dest size.
883  if (match(Src, m_Shl(m_Value(A), m_Constant(C))) &&
884  !match(A, m_Shr(m_Value(), m_Constant()))) {
885  // Skip shifts of shift by constants. It undoes a combine in
886  // FoldShiftByConstant and is the extend in reg pattern.
887  APInt Threshold = APInt(C->getType()->getScalarSizeInBits(), DestWidth);
889  Value *NewTrunc = Builder.CreateTrunc(A, DestTy, A->getName() + ".tr");
890  return BinaryOperator::Create(Instruction::Shl, NewTrunc,
891  ConstantExpr::getTrunc(C, DestTy));
892  }
893  }
894  }
895 
896  if (Instruction *I = foldVecTruncToExtElt(Trunc, *this))
897  return I;
898 
899  // Whenever an element is extracted from a vector, and then truncated,
900  // canonicalize by converting it to a bitcast followed by an
901  // extractelement.
902  //
903  // Example (little endian):
904  // trunc (extractelement <4 x i64> %X, 0) to i32
905  // --->
906  // extractelement <8 x i32> (bitcast <4 x i64> %X to <8 x i32>), i32 0
907  Value *VecOp;
908  ConstantInt *Cst;
909  if (match(Src, m_OneUse(m_ExtractElt(m_Value(VecOp), m_ConstantInt(Cst))))) {
910  auto *VecOpTy = cast<VectorType>(VecOp->getType());
911  auto VecElts = VecOpTy->getElementCount();
912 
913  // A badly fit destination size would result in an invalid cast.
914  if (SrcWidth % DestWidth == 0) {
915  uint64_t TruncRatio = SrcWidth / DestWidth;
916  uint64_t BitCastNumElts = VecElts.getKnownMinValue() * TruncRatio;
917  uint64_t VecOpIdx = Cst->getZExtValue();
918  uint64_t NewIdx = DL.isBigEndian() ? (VecOpIdx + 1) * TruncRatio - 1
919  : VecOpIdx * TruncRatio;
920  assert(BitCastNumElts <= std::numeric_limits<uint32_t>::max() &&
921  "overflow 32-bits");
922 
923  auto *BitCastTo =
924  VectorType::get(DestTy, BitCastNumElts, VecElts.isScalable());
925  Value *BitCast = Builder.CreateBitCast(VecOp, BitCastTo);
926  return ExtractElementInst::Create(BitCast, Builder.getInt32(NewIdx));
927  }
928  }
929 
930  return nullptr;
931 }
932 
933 Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext,
934  bool DoTransform) {
935  // If we are just checking for a icmp eq of a single bit and zext'ing it
936  // to an integer, then shift the bit to the appropriate place and then
937  // cast to integer to avoid the comparison.
938  const APInt *Op1CV;
939  if (match(Cmp->getOperand(1), m_APInt(Op1CV))) {
940 
941  // zext (x <s 0) to i32 --> x>>u31 true if signbit set.
942  // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear.
943  if ((Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isNullValue()) ||
944  (Cmp->getPredicate() == ICmpInst::ICMP_SGT && Op1CV->isAllOnesValue())) {
945  if (!DoTransform) return Cmp;
946 
947  Value *In = Cmp->getOperand(0);
948  Value *Sh = ConstantInt::get(In->getType(),
949  In->getType()->getScalarSizeInBits() - 1);
950  In = Builder.CreateLShr(In, Sh, In->getName() + ".lobit");
951  if (In->getType() != Zext.getType())
952  In = Builder.CreateIntCast(In, Zext.getType(), false /*ZExt*/);
953 
954  if (Cmp->getPredicate() == ICmpInst::ICMP_SGT) {
955  Constant *One = ConstantInt::get(In->getType(), 1);
956  In = Builder.CreateXor(In, One, In->getName() + ".not");
957  }
958 
959  return replaceInstUsesWith(Zext, In);
960  }
961 
962  // zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
963  // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
964  // zext (X == 1) to i32 --> X iff X has only the low bit set.
965  // zext (X == 2) to i32 --> X>>1 iff X has only the 2nd bit set.
966  // zext (X != 0) to i32 --> X iff X has only the low bit set.
967  // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
968  // zext (X != 1) to i32 --> X^1 iff X has only the low bit set.
969  // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
970  if ((Op1CV->isNullValue() || Op1CV->isPowerOf2()) &&
971  // This only works for EQ and NE
972  Cmp->isEquality()) {
973  // If Op1C some other power of two, convert:
974  KnownBits Known = computeKnownBits(Cmp->getOperand(0), 0, &Zext);
975 
976  APInt KnownZeroMask(~Known.Zero);
977  if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
978  if (!DoTransform) return Cmp;
979 
980  bool isNE = Cmp->getPredicate() == ICmpInst::ICMP_NE;
981  if (!Op1CV->isNullValue() && (*Op1CV != KnownZeroMask)) {
982  // (X&4) == 2 --> false
983  // (X&4) != 2 --> true
984  Constant *Res = ConstantInt::get(Zext.getType(), isNE);
985  return replaceInstUsesWith(Zext, Res);
986  }
987 
988  uint32_t ShAmt = KnownZeroMask.logBase2();
989  Value *In = Cmp->getOperand(0);
990  if (ShAmt) {
991  // Perform a logical shr by shiftamt.
992  // Insert the shift to put the result in the low bit.
993  In = Builder.CreateLShr(In, ConstantInt::get(In->getType(), ShAmt),
994  In->getName() + ".lobit");
995  }
996 
997  if (!Op1CV->isNullValue() == isNE) { // Toggle the low bit.
998  Constant *One = ConstantInt::get(In->getType(), 1);
999  In = Builder.CreateXor(In, One);
1000  }
1001 
1002  if (Zext.getType() == In->getType())
1003  return replaceInstUsesWith(Zext, In);
1004 
1005  Value *IntCast = Builder.CreateIntCast(In, Zext.getType(), false);
1006  return replaceInstUsesWith(Zext, IntCast);
1007  }
1008  }
1009  }
1010 
1011  // icmp ne A, B is equal to xor A, B when A and B only really have one bit.
1012  // It is also profitable to transform icmp eq into not(xor(A, B)) because that
1013  // may lead to additional simplifications.
1014  if (Cmp->isEquality() && Zext.getType() == Cmp->getOperand(0)->getType()) {
1015  if (IntegerType *ITy = dyn_cast<IntegerType>(Zext.getType())) {
1016  Value *LHS = Cmp->getOperand(0);
1017  Value *RHS = Cmp->getOperand(1);
1018 
1019  KnownBits KnownLHS = computeKnownBits(LHS, 0, &Zext);
1020  KnownBits KnownRHS = computeKnownBits(RHS, 0, &Zext);
1021 
1022  if (KnownLHS.Zero == KnownRHS.Zero && KnownLHS.One == KnownRHS.One) {
1023  APInt KnownBits = KnownLHS.Zero | KnownLHS.One;
1024  APInt UnknownBit = ~KnownBits;
1025  if (UnknownBit.countPopulation() == 1) {
1026  if (!DoTransform) return Cmp;
1027 
1028  Value *Result = Builder.CreateXor(LHS, RHS);
1029 
1030  // Mask off any bits that are set and won't be shifted away.
1031  if (KnownLHS.One.uge(UnknownBit))
1032  Result = Builder.CreateAnd(Result,
1033  ConstantInt::get(ITy, UnknownBit));
1034 
1035  // Shift the bit we're testing down to the lsb.
1036  Result = Builder.CreateLShr(
1037  Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros()));
1038 
1039  if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
1040  Result = Builder.CreateXor(Result, ConstantInt::get(ITy, 1));
1041  Result->takeName(Cmp);
1042  return replaceInstUsesWith(Zext, Result);
1043  }
1044  }
1045  }
1046  }
1047 
1048  return nullptr;
1049 }
1050 
1051 /// Determine if the specified value can be computed in the specified wider type
1052 /// and produce the same low bits. If not, return false.
1053 ///
1054 /// If this function returns true, it can also return a non-zero number of bits
1055 /// (in BitsToClear) which indicates that the value it computes is correct for
1056 /// the zero extend, but that the additional BitsToClear bits need to be zero'd
1057 /// out. For example, to promote something like:
1058 ///
1059 /// %B = trunc i64 %A to i32
1060 /// %C = lshr i32 %B, 8
1061 /// %E = zext i32 %C to i64
1062 ///
1063 /// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
1064 /// set to 8 to indicate that the promoted value needs to have bits 24-31
1065 /// cleared in addition to bits 32-63. Since an 'and' will be generated to
1066 /// clear the top bits anyway, doing this has no extra cost.
1067 ///
1068 /// This function works on both vectors and scalars.
1069 static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
1070  InstCombinerImpl &IC, Instruction *CxtI) {
1071  BitsToClear = 0;
1072  if (canAlwaysEvaluateInType(V, Ty))
1073  return true;
1074  if (canNotEvaluateInType(V, Ty))
1075  return false;
1076 
1077  auto *I = cast<Instruction>(V);
1078  unsigned Tmp;
1079  switch (I->getOpcode()) {
1080  case Instruction::ZExt: // zext(zext(x)) -> zext(x).
1081  case Instruction::SExt: // zext(sext(x)) -> sext(x).
1082  case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x)
1083  return true;
1084  case Instruction::And:
1085  case Instruction::Or:
1086  case Instruction::Xor:
1087  case Instruction::Add:
1088  case Instruction::Sub:
1089  case Instruction::Mul:
1090  if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||
1091  !canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI))
1092  return false;
1093  // These can all be promoted if neither operand has 'bits to clear'.
1094  if (BitsToClear == 0 && Tmp == 0)
1095  return true;
1096 
1097  // If the operation is an AND/OR/XOR and the bits to clear are zero in the
1098  // other side, BitsToClear is ok.
1099  if (Tmp == 0 && I->isBitwiseLogicOp()) {
1100  // We use MaskedValueIsZero here for generality, but the case we care
1101  // about the most is constant RHS.
1102  unsigned VSize = V->getType()->getScalarSizeInBits();
1103  if (IC.MaskedValueIsZero(I->getOperand(1),
1104  APInt::getHighBitsSet(VSize, BitsToClear),
1105  0, CxtI)) {
1106  // If this is an And instruction and all of the BitsToClear are
1107  // known to be zero we can reset BitsToClear.
1108  if (I->getOpcode() == Instruction::And)
1109  BitsToClear = 0;
1110  return true;
1111  }
1112  }
1113 
1114  // Otherwise, we don't know how to analyze this BitsToClear case yet.
1115  return false;
1116 
1117  case Instruction::Shl: {
1118  // We can promote shl(x, cst) if we can promote x. Since shl overwrites the
1119  // upper bits we can reduce BitsToClear by the shift amount.
1120  const APInt *Amt;
1121  if (match(I->getOperand(1), m_APInt(Amt))) {
1122  if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1123  return false;
1124  uint64_t ShiftAmt = Amt->getZExtValue();
1125  BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0;
1126  return true;
1127  }
1128  return false;
1129  }
1130  case Instruction::LShr: {
1131  // We can promote lshr(x, cst) if we can promote x. This requires the
1132  // ultimate 'and' to clear out the high zero bits we're clearing out though.
1133  const APInt *Amt;
1134  if (match(I->getOperand(1), m_APInt(Amt))) {
1135  if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1136  return false;
1137  BitsToClear += Amt->getZExtValue();
1138  if (BitsToClear > V->getType()->getScalarSizeInBits())
1139  BitsToClear = V->getType()->getScalarSizeInBits();
1140  return true;
1141  }
1142  // Cannot promote variable LSHR.
1143  return false;
1144  }
1145  case Instruction::Select:
1146  if (!canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI) ||
1147  !canEvaluateZExtd(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||
1148  // TODO: If important, we could handle the case when the BitsToClear are
1149  // known zero in the disagreeing side.
1150  Tmp != BitsToClear)
1151  return false;
1152  return true;
1153 
1154  case Instruction::PHI: {
1155  // We can change a phi if we can change all operands. Note that we never
1156  // get into trouble with cyclic PHIs here because we only consider
1157  // instructions with a single use.
1158  PHINode *PN = cast<PHINode>(I);
1159  if (!canEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear, IC, CxtI))
1160  return false;
1161  for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
1162  if (!canEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||
1163  // TODO: If important, we could handle the case when the BitsToClear
1164  // are known zero in the disagreeing input.
1165  Tmp != BitsToClear)
1166  return false;
1167  return true;
1168  }
1169  default:
1170  // TODO: Can handle more cases here.
1171  return false;
1172  }
1173 }
1174 
1176  // If this zero extend is only used by a truncate, let the truncate be
1177  // eliminated before we try to optimize this zext.
1178  if (CI.hasOneUse() && isa<TruncInst>(CI.user_back()))
1179  return nullptr;
1180 
1181  // If one of the common conversion will work, do it.
1182  if (Instruction *Result = commonCastTransforms(CI))
1183  return Result;
1184 
1185  Value *Src = CI.getOperand(0);
1186  Type *SrcTy = Src->getType(), *DestTy = CI.getType();
1187 
1188  // Try to extend the entire expression tree to the wide destination type.
1189  unsigned BitsToClear;
1190  if (shouldChangeType(SrcTy, DestTy) &&
1191  canEvaluateZExtd(Src, DestTy, BitsToClear, *this, &CI)) {
1192  assert(BitsToClear <= SrcTy->getScalarSizeInBits() &&
1193  "Can't clear more bits than in SrcTy");
1194 
1195  // Okay, we can transform this! Insert the new expression now.
1196  LLVM_DEBUG(
1197  dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1198  " to avoid zero extend: "
1199  << CI << '\n');
1200  Value *Res = EvaluateInDifferentType(Src, DestTy, false);
1201  assert(Res->getType() == DestTy);
1202 
1203  // Preserve debug values referring to Src if the zext is its last use.
1204  if (auto *SrcOp = dyn_cast<Instruction>(Src))
1205  if (SrcOp->hasOneUse())
1206  replaceAllDbgUsesWith(*SrcOp, *Res, CI, DT);
1207 
1208  uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits()-BitsToClear;
1209  uint32_t DestBitSize = DestTy->getScalarSizeInBits();
1210 
1211  // If the high bits are already filled with zeros, just replace this
1212  // cast with the result.
1213  if (MaskedValueIsZero(Res,
1214  APInt::getHighBitsSet(DestBitSize,
1215  DestBitSize-SrcBitsKept),
1216  0, &CI))
1217  return replaceInstUsesWith(CI, Res);
1218 
1219  // We need to emit an AND to clear the high bits.
1220  Constant *C = ConstantInt::get(Res->getType(),
1221  APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
1222  return BinaryOperator::CreateAnd(Res, C);
1223  }
1224 
1225  // If this is a TRUNC followed by a ZEXT then we are dealing with integral
1226  // types and if the sizes are just right we can convert this into a logical
1227  // 'and' which will be much cheaper than the pair of casts.
1228  if (TruncInst *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast
1229  // TODO: Subsume this into EvaluateInDifferentType.
1230 
1231  // Get the sizes of the types involved. We know that the intermediate type
1232  // will be smaller than A or C, but don't know the relation between A and C.
1233  Value *A = CSrc->getOperand(0);
1234  unsigned SrcSize = A->getType()->getScalarSizeInBits();
1235  unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
1236  unsigned DstSize = CI.getType()->getScalarSizeInBits();
1237  // If we're actually extending zero bits, then if
1238  // SrcSize < DstSize: zext(a & mask)
1239  // SrcSize == DstSize: a & mask
1240  // SrcSize > DstSize: trunc(a) & mask
1241  if (SrcSize < DstSize) {
1242  APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1243  Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
1244  Value *And = Builder.CreateAnd(A, AndConst, CSrc->getName() + ".mask");
1245  return new ZExtInst(And, CI.getType());
1246  }
1247 
1248  if (SrcSize == DstSize) {
1249  APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1250  return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
1251  AndValue));
1252  }
1253  if (SrcSize > DstSize) {
1254  Value *Trunc = Builder.CreateTrunc(A, CI.getType());
1255  APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
1256  return BinaryOperator::CreateAnd(Trunc,
1257  ConstantInt::get(Trunc->getType(),
1258  AndValue));
1259  }
1260  }
1261 
1262  if (ICmpInst *Cmp = dyn_cast<ICmpInst>(Src))
1263  return transformZExtICmp(Cmp, CI);
1264 
1265  BinaryOperator *SrcI = dyn_cast<BinaryOperator>(Src);
1266  if (SrcI && SrcI->getOpcode() == Instruction::Or) {
1267  // zext (or icmp, icmp) -> or (zext icmp), (zext icmp) if at least one
1268  // of the (zext icmp) can be eliminated. If so, immediately perform the
1269  // according elimination.
1270  ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0));
1271  ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1));
1272  if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() &&
1273  LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType() &&
1274  (transformZExtICmp(LHS, CI, false) ||
1275  transformZExtICmp(RHS, CI, false))) {
1276  // zext (or icmp, icmp) -> or (zext icmp), (zext icmp)
1277  Value *LCast = Builder.CreateZExt(LHS, CI.getType(), LHS->getName());
1278  Value *RCast = Builder.CreateZExt(RHS, CI.getType(), RHS->getName());
1279  Value *Or = Builder.CreateOr(LCast, RCast, CI.getName());
1280  if (auto *OrInst = dyn_cast<Instruction>(Or))
1281  Builder.SetInsertPoint(OrInst);
1282 
1283  // Perform the elimination.
1284  if (auto *LZExt = dyn_cast<ZExtInst>(LCast))
1285  transformZExtICmp(LHS, *LZExt);
1286  if (auto *RZExt = dyn_cast<ZExtInst>(RCast))
1287  transformZExtICmp(RHS, *RZExt);
1288 
1289  return replaceInstUsesWith(CI, Or);
1290  }
1291  }
1292 
1293  // zext(trunc(X) & C) -> (X & zext(C)).
1294  Constant *C;
1295  Value *X;
1296  if (SrcI &&
1297  match(SrcI, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&
1298  X->getType() == CI.getType())
1299  return BinaryOperator::CreateAnd(X, ConstantExpr::getZExt(C, CI.getType()));
1300 
1301  // zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
1302  Value *And;
1303  if (SrcI && match(SrcI, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
1305  X->getType() == CI.getType()) {
1306  Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
1307  return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC);
1308  }
1309 
1310  return nullptr;
1311 }
1312 
1313 /// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.
1314 Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *ICI,
1315  Instruction &CI) {
1316  Value *Op0 = ICI->getOperand(0), *Op1 = ICI->getOperand(1);
1317  ICmpInst::Predicate Pred = ICI->getPredicate();
1318 
1319  // Don't bother if Op1 isn't of vector or integer type.
1320  if (!Op1->getType()->isIntOrIntVectorTy())
1321  return nullptr;
1322 
1323  if ((Pred == ICmpInst::ICMP_SLT && match(Op1, m_ZeroInt())) ||
1324  (Pred == ICmpInst::ICMP_SGT && match(Op1, m_AllOnes()))) {
1325  // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if negative
1326  // (x >s -1) ? -1 : 0 -> not (ashr x, 31) -> all ones if positive
1327  Value *Sh = ConstantInt::get(Op0->getType(),
1328  Op0->getType()->getScalarSizeInBits() - 1);
1329  Value *In = Builder.CreateAShr(Op0, Sh, Op0->getName() + ".lobit");
1330  if (In->getType() != CI.getType())
1331  In = Builder.CreateIntCast(In, CI.getType(), true /*SExt*/);
1332 
1333  if (Pred == ICmpInst::ICMP_SGT)
1334  In = Builder.CreateNot(In, In->getName() + ".not");
1335  return replaceInstUsesWith(CI, In);
1336  }
1337 
1338  if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1339  // If we know that only one bit of the LHS of the icmp can be set and we
1340  // have an equality comparison with zero or a power of 2, we can transform
1341  // the icmp and sext into bitwise/integer operations.
1342  if (ICI->hasOneUse() &&
1343  ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
1344  KnownBits Known = computeKnownBits(Op0, 0, &CI);
1345 
1346  APInt KnownZeroMask(~Known.Zero);
1347  if (KnownZeroMask.isPowerOf2()) {
1348  Value *In = ICI->getOperand(0);
1349 
1350  // If the icmp tests for a known zero bit we can constant fold it.
1351  if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {
1352  Value *V = Pred == ICmpInst::ICMP_NE ?
1354  ConstantInt::getNullValue(CI.getType());
1355  return replaceInstUsesWith(CI, V);
1356  }
1357 
1358  if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
1359  // sext ((x & 2^n) == 0) -> (x >> n) - 1
1360  // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
1361  unsigned ShiftAmt = KnownZeroMask.countTrailingZeros();
1362  // Perform a right shift to place the desired bit in the LSB.
1363  if (ShiftAmt)
1364  In = Builder.CreateLShr(In,
1365  ConstantInt::get(In->getType(), ShiftAmt));
1366 
1367  // At this point "In" is either 1 or 0. Subtract 1 to turn
1368  // {1, 0} -> {0, -1}.
1369  In = Builder.CreateAdd(In,
1370  ConstantInt::getAllOnesValue(In->getType()),
1371  "sext");
1372  } else {
1373  // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
1374  // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
1375  unsigned ShiftAmt = KnownZeroMask.countLeadingZeros();
1376  // Perform a left shift to place the desired bit in the MSB.
1377  if (ShiftAmt)
1378  In = Builder.CreateShl(In,
1379  ConstantInt::get(In->getType(), ShiftAmt));
1380 
1381  // Distribute the bit over the whole bit width.
1382  In = Builder.CreateAShr(In, ConstantInt::get(In->getType(),
1383  KnownZeroMask.getBitWidth() - 1), "sext");
1384  }
1385 
1386  if (CI.getType() == In->getType())
1387  return replaceInstUsesWith(CI, In);
1388  return CastInst::CreateIntegerCast(In, CI.getType(), true/*SExt*/);
1389  }
1390  }
1391  }
1392 
1393  return nullptr;
1394 }
1395 
1396 /// Return true if we can take the specified value and return it as type Ty
1397 /// without inserting any new casts and without changing the value of the common
1398 /// low bits. This is used by code that tries to promote integer operations to
1399 /// a wider types will allow us to eliminate the extension.
1400 ///
1401 /// This function works on both vectors and scalars.
1402 ///
1403 static bool canEvaluateSExtd(Value *V, Type *Ty) {
1405  "Can't sign extend type to a smaller type");
1406  if (canAlwaysEvaluateInType(V, Ty))
1407  return true;
1408  if (canNotEvaluateInType(V, Ty))
1409  return false;
1410 
1411  auto *I = cast<Instruction>(V);
1412  switch (I->getOpcode()) {
1413  case Instruction::SExt: // sext(sext(x)) -> sext(x)
1414  case Instruction::ZExt: // sext(zext(x)) -> zext(x)
1415  case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x)
1416  return true;
1417  case Instruction::And:
1418  case Instruction::Or:
1419  case Instruction::Xor:
1420  case Instruction::Add:
1421  case Instruction::Sub:
1422  case Instruction::Mul:
1423  // These operators can all arbitrarily be extended if their inputs can.
1424  return canEvaluateSExtd(I->getOperand(0), Ty) &&
1425  canEvaluateSExtd(I->getOperand(1), Ty);
1426 
1427  //case Instruction::Shl: TODO
1428  //case Instruction::LShr: TODO
1429 
1430  case Instruction::Select:
1431  return canEvaluateSExtd(I->getOperand(1), Ty) &&
1432  canEvaluateSExtd(I->getOperand(2), Ty);
1433 
1434  case Instruction::PHI: {
1435  // We can change a phi if we can change all operands. Note that we never
1436  // get into trouble with cyclic PHIs here because we only consider
1437  // instructions with a single use.
1438  PHINode *PN = cast<PHINode>(I);
1439  for (Value *IncValue : PN->incoming_values())
1440  if (!canEvaluateSExtd(IncValue, Ty)) return false;
1441  return true;
1442  }
1443  default:
1444  // TODO: Can handle more cases here.
1445  break;
1446  }
1447 
1448  return false;
1449 }
1450 
1452  // If this sign extend is only used by a truncate, let the truncate be
1453  // eliminated before we try to optimize this sext.
1454  if (CI.hasOneUse() && isa<TruncInst>(CI.user_back()))
1455  return nullptr;
1456 
1457  if (Instruction *I = commonCastTransforms(CI))
1458  return I;
1459 
1460  Value *Src = CI.getOperand(0);
1461  Type *SrcTy = Src->getType(), *DestTy = CI.getType();
1462 
1463  // If we know that the value being extended is positive, we can use a zext
1464  // instead.
1465  KnownBits Known = computeKnownBits(Src, 0, &CI);
1466  if (Known.isNonNegative())
1467  return CastInst::Create(Instruction::ZExt, Src, DestTy);
1468 
1469  // Try to extend the entire expression tree to the wide destination type.
1470  if (shouldChangeType(SrcTy, DestTy) && canEvaluateSExtd(Src, DestTy)) {
1471  // Okay, we can transform this! Insert the new expression now.
1472  LLVM_DEBUG(
1473  dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1474  " to avoid sign extend: "
1475  << CI << '\n');
1476  Value *Res = EvaluateInDifferentType(Src, DestTy, true);
1477  assert(Res->getType() == DestTy);
1478 
1479  uint32_t SrcBitSize = SrcTy->getScalarSizeInBits();
1480  uint32_t DestBitSize = DestTy->getScalarSizeInBits();
1481 
1482  // If the high bits are already filled with sign bit, just replace this
1483  // cast with the result.
1484  if (ComputeNumSignBits(Res, 0, &CI) > DestBitSize - SrcBitSize)
1485  return replaceInstUsesWith(CI, Res);
1486 
1487  // We need to emit a shl + ashr to do the sign extend.
1488  Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
1489  return BinaryOperator::CreateAShr(Builder.CreateShl(Res, ShAmt, "sext"),
1490  ShAmt);
1491  }
1492 
1493  // If the input is a trunc from the destination type, then turn sext(trunc(x))
1494  // into shifts.
1495  Value *X;
1496  if (match(Src, m_OneUse(m_Trunc(m_Value(X)))) && X->getType() == DestTy) {
1497  // sext(trunc(X)) --> ashr(shl(X, C), C)
1498  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
1499  unsigned DestBitSize = DestTy->getScalarSizeInBits();
1500  Constant *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
1501  return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShAmt), ShAmt);
1502  }
1503 
1504  if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src))
1505  return transformSExtICmp(ICI, CI);
1506 
1507  // If the input is a shl/ashr pair of a same constant, then this is a sign
1508  // extension from a smaller value. If we could trust arbitrary bitwidth
1509  // integers, we could turn this into a truncate to the smaller bit and then
1510  // use a sext for the whole extension. Since we don't, look deeper and check
1511  // for a truncate. If the source and dest are the same type, eliminate the
1512  // trunc and extend and just do shifts. For example, turn:
1513  // %a = trunc i32 %i to i8
1514  // %b = shl i8 %a, C
1515  // %c = ashr i8 %b, C
1516  // %d = sext i8 %c to i32
1517  // into:
1518  // %a = shl i32 %i, 32-(8-C)
1519  // %d = ashr i32 %a, 32-(8-C)
1520  Value *A = nullptr;
1521  // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
1522  Constant *BA = nullptr, *CA = nullptr;
1523  if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_Constant(BA)),
1524  m_Constant(CA))) &&
1525  BA->isElementWiseEqual(CA) && A->getType() == DestTy) {
1526  Constant *WideCurrShAmt = ConstantExpr::getSExt(CA, DestTy);
1527  Constant *NumLowbitsLeft = ConstantExpr::getSub(
1528  ConstantInt::get(DestTy, SrcTy->getScalarSizeInBits()), WideCurrShAmt);
1529  Constant *NewShAmt = ConstantExpr::getSub(
1530  ConstantInt::get(DestTy, DestTy->getScalarSizeInBits()),
1531  NumLowbitsLeft);
1532  NewShAmt =
1534  A = Builder.CreateShl(A, NewShAmt, CI.getName());
1535  return BinaryOperator::CreateAShr(A, NewShAmt);
1536  }
1537 
1538  return nullptr;
1539 }
1540 
1541 /// Return a Constant* for the specified floating-point constant if it fits
1542 /// in the specified FP type without changing its value.
1543 static bool fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
1544  bool losesInfo;
1545  APFloat F = CFP->getValueAPF();
1546  (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
1547  return !losesInfo;
1548 }
1549 
1551  if (CFP->getType() == Type::getPPC_FP128Ty(CFP->getContext()))
1552  return nullptr; // No constant folding of this.
1553  // See if the value can be truncated to half and then reextended.
1554  if (fitsInFPType(CFP, APFloat::IEEEhalf()))
1555  return Type::getHalfTy(CFP->getContext());
1556  // See if the value can be truncated to float and then reextended.
1557  if (fitsInFPType(CFP, APFloat::IEEEsingle()))
1558  return Type::getFloatTy(CFP->getContext());
1559  if (CFP->getType()->isDoubleTy())
1560  return nullptr; // Won't shrink.
1561  if (fitsInFPType(CFP, APFloat::IEEEdouble()))
1562  return Type::getDoubleTy(CFP->getContext());
1563  // Don't try to shrink to various long double types.
1564  return nullptr;
1565 }
1566 
1567 // Determine if this is a vector of ConstantFPs and if so, return the minimal
1568 // type we can safely truncate all elements to.
1569 // TODO: Make these support undef elements.
1571  auto *CV = dyn_cast<Constant>(V);
1572  auto *CVVTy = dyn_cast<FixedVectorType>(V->getType());
1573  if (!CV || !CVVTy)
1574  return nullptr;
1575 
1576  Type *MinType = nullptr;
1577 
1578  unsigned NumElts = CVVTy->getNumElements();
1579 
1580  // For fixed-width vectors we find the minimal type by looking
1581  // through the constant values of the vector.
1582  for (unsigned i = 0; i != NumElts; ++i) {
1583  auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i));
1584  if (!CFP)
1585  return nullptr;
1586 
1587  Type *T = shrinkFPConstant(CFP);
1588  if (!T)
1589  return nullptr;
1590 
1591  // If we haven't found a type yet or this type has a larger mantissa than
1592  // our previous type, this is our new minimal type.
1593  if (!MinType || T->getFPMantissaWidth() > MinType->getFPMantissaWidth())
1594  MinType = T;
1595  }
1596 
1597  // Make a vector type from the minimal type.
1598  return FixedVectorType::get(MinType, NumElts);
1599 }
1600 
1601 /// Find the minimum FP type we can safely truncate to.
1603  if (auto *FPExt = dyn_cast<FPExtInst>(V))
1604  return FPExt->getOperand(0)->getType();
1605 
1606  // If this value is a constant, return the constant in the smallest FP type
1607  // that can accurately represent it. This allows us to turn
1608  // (float)((double)X+2.0) into x+2.0f.
1609  if (auto *CFP = dyn_cast<ConstantFP>(V))
1610  if (Type *T = shrinkFPConstant(CFP))
1611  return T;
1612 
1613  // We can only correctly find a minimum type for a scalable vector when it is
1614  // a splat. For splats of constant values the fpext is wrapped up as a
1615  // ConstantExpr.
1616  if (auto *FPCExt = dyn_cast<ConstantExpr>(V))
1617  if (FPCExt->getOpcode() == Instruction::FPExt)
1618  return FPCExt->getOperand(0)->getType();
1619 
1620  // Try to shrink a vector of FP constants. This returns nullptr on scalable
1621  // vectors
1622  if (Type *T = shrinkFPConstantVector(V))
1623  return T;
1624 
1625  return V->getType();
1626 }
1627 
1628 /// Return true if the cast from integer to FP can be proven to be exact for all
1629 /// possible inputs (the conversion does not lose any precision).
1631  CastInst::CastOps Opcode = I.getOpcode();
1632  assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) &&
1633  "Unexpected cast");
1634  Value *Src = I.getOperand(0);
1635  Type *SrcTy = Src->getType();
1636  Type *FPTy = I.getType();
1637  bool IsSigned = Opcode == Instruction::SIToFP;
1638  int SrcSize = (int)SrcTy->getScalarSizeInBits() - IsSigned;
1639 
1640  // Easy case - if the source integer type has less bits than the FP mantissa,
1641  // then the cast must be exact.
1642  int DestNumSigBits = FPTy->getFPMantissaWidth();
1643  if (SrcSize <= DestNumSigBits)
1644  return true;
1645 
1646  // Cast from FP to integer and back to FP is independent of the intermediate
1647  // integer width because of poison on overflow.
1648  Value *F;
1649  if (match(Src, m_FPToSI(m_Value(F))) || match(Src, m_FPToUI(m_Value(F)))) {
1650  // If this is uitofp (fptosi F), the source needs an extra bit to avoid
1651  // potential rounding of negative FP input values.
1652  int SrcNumSigBits = F->getType()->getFPMantissaWidth();
1653  if (!IsSigned && match(Src, m_FPToSI(m_Value())))
1654  SrcNumSigBits++;
1655 
1656  // [su]itofp (fpto[su]i F) --> exact if the source type has less or equal
1657  // significant bits than the destination (and make sure neither type is
1658  // weird -- ppc_fp128).
1659  if (SrcNumSigBits > 0 && DestNumSigBits > 0 &&
1660  SrcNumSigBits <= DestNumSigBits)
1661  return true;
1662  }
1663 
1664  // TODO:
1665  // Try harder to find if the source integer type has less significant bits.
1666  // For example, compute number of sign bits or compute low bit mask.
1667  return false;
1668 }
1669 
1671  if (Instruction *I = commonCastTransforms(FPT))
1672  return I;
1673 
1674  // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
1675  // simplify this expression to avoid one or more of the trunc/extend
1676  // operations if we can do so without changing the numerical results.
1677  //
1678  // The exact manner in which the widths of the operands interact to limit
1679  // what we can and cannot do safely varies from operation to operation, and
1680  // is explained below in the various case statements.
1681  Type *Ty = FPT.getType();
1682  auto *BO = dyn_cast<BinaryOperator>(FPT.getOperand(0));
1683  if (BO && BO->hasOneUse()) {
1684  Type *LHSMinType = getMinimumFPType(BO->getOperand(0));
1685  Type *RHSMinType = getMinimumFPType(BO->getOperand(1));
1686  unsigned OpWidth = BO->getType()->getFPMantissaWidth();
1687  unsigned LHSWidth = LHSMinType->getFPMantissaWidth();
1688  unsigned RHSWidth = RHSMinType->getFPMantissaWidth();
1689  unsigned SrcWidth = std::max(LHSWidth, RHSWidth);
1690  unsigned DstWidth = Ty->getFPMantissaWidth();
1691  switch (BO->getOpcode()) {
1692  default: break;
1693  case Instruction::FAdd:
1694  case Instruction::FSub:
1695  // For addition and subtraction, the infinitely precise result can
1696  // essentially be arbitrarily wide; proving that double rounding
1697  // will not occur because the result of OpI is exact (as we will for
1698  // FMul, for example) is hopeless. However, we *can* nonetheless
1699  // frequently know that double rounding cannot occur (or that it is
1700  // innocuous) by taking advantage of the specific structure of
1701  // infinitely-precise results that admit double rounding.
1702  //
1703  // Specifically, if OpWidth >= 2*DstWdith+1 and DstWidth is sufficient
1704  // to represent both sources, we can guarantee that the double
1705  // rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis,
1706  // "A Rigorous Framework for Fully Supporting the IEEE Standard ..."
1707  // for proof of this fact).
1708  //
1709  // Note: Figueroa does not consider the case where DstFormat !=
1710  // SrcFormat. It's possible (likely even!) that this analysis
1711  // could be tightened for those cases, but they are rare (the main
1712  // case of interest here is (float)((double)float + float)).
1713  if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) {
1714  Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1715  Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1716  Instruction *RI = BinaryOperator::Create(BO->getOpcode(), LHS, RHS);
1717  RI->copyFastMathFlags(BO);
1718  return RI;
1719  }
1720  break;
1721  case Instruction::FMul:
1722  // For multiplication, the infinitely precise result has at most
1723  // LHSWidth + RHSWidth significant bits; if OpWidth is sufficient
1724  // that such a value can be exactly represented, then no double
1725  // rounding can possibly occur; we can safely perform the operation
1726  // in the destination format if it can represent both sources.
1727  if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) {
1728  Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1729  Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1730  return BinaryOperator::CreateFMulFMF(LHS, RHS, BO);
1731  }
1732  break;
1733  case Instruction::FDiv:
1734  // For division, we use again use the bound from Figueroa's
1735  // dissertation. I am entirely certain that this bound can be
1736  // tightened in the unbalanced operand case by an analysis based on
1737  // the diophantine rational approximation bound, but the well-known
1738  // condition used here is a good conservative first pass.
1739  // TODO: Tighten bound via rigorous analysis of the unbalanced case.
1740  if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) {
1741  Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1742  Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1743  return BinaryOperator::CreateFDivFMF(LHS, RHS, BO);
1744  }
1745  break;
1746  case Instruction::FRem: {
1747  // Remainder is straightforward. Remainder is always exact, so the
1748  // type of OpI doesn't enter into things at all. We simply evaluate
1749  // in whichever source type is larger, then convert to the
1750  // destination type.
1751  if (SrcWidth == OpWidth)
1752  break;
1753  Value *LHS, *RHS;
1754  if (LHSWidth == SrcWidth) {
1755  LHS = Builder.CreateFPTrunc(BO->getOperand(0), LHSMinType);
1756  RHS = Builder.CreateFPTrunc(BO->getOperand(1), LHSMinType);
1757  } else {
1758  LHS = Builder.CreateFPTrunc(BO->getOperand(0), RHSMinType);
1759  RHS = Builder.CreateFPTrunc(BO->getOperand(1), RHSMinType);
1760  }
1761 
1762  Value *ExactResult = Builder.CreateFRemFMF(LHS, RHS, BO);
1763  return CastInst::CreateFPCast(ExactResult, Ty);
1764  }
1765  }
1766  }
1767 
1768  // (fptrunc (fneg x)) -> (fneg (fptrunc x))
1769  Value *X;
1770  Instruction *Op = dyn_cast<Instruction>(FPT.getOperand(0));
1771  if (Op && Op->hasOneUse()) {
1772  // FIXME: The FMF should propagate from the fptrunc, not the source op.
1774  if (isa<FPMathOperator>(Op))
1775  Builder.setFastMathFlags(Op->getFastMathFlags());
1776 
1777  if (match(Op, m_FNeg(m_Value(X)))) {
1778  Value *InnerTrunc = Builder.CreateFPTrunc(X, Ty);
1779 
1780  return UnaryOperator::CreateFNegFMF(InnerTrunc, Op);
1781  }
1782 
1783  // If we are truncating a select that has an extended operand, we can
1784  // narrow the other operand and do the select as a narrow op.
1785  Value *Cond, *X, *Y;
1786  if (match(Op, m_Select(m_Value(Cond), m_FPExt(m_Value(X)), m_Value(Y))) &&
1787  X->getType() == Ty) {
1788  // fptrunc (select Cond, (fpext X), Y --> select Cond, X, (fptrunc Y)
1789  Value *NarrowY = Builder.CreateFPTrunc(Y, Ty);
1790  Value *Sel = Builder.CreateSelect(Cond, X, NarrowY, "narrow.sel", Op);
1791  return replaceInstUsesWith(FPT, Sel);
1792  }
1793  if (match(Op, m_Select(m_Value(Cond), m_Value(Y), m_FPExt(m_Value(X)))) &&
1794  X->getType() == Ty) {
1795  // fptrunc (select Cond, Y, (fpext X) --> select Cond, (fptrunc Y), X
1796  Value *NarrowY = Builder.CreateFPTrunc(Y, Ty);
1797  Value *Sel = Builder.CreateSelect(Cond, NarrowY, X, "narrow.sel", Op);
1798  return replaceInstUsesWith(FPT, Sel);
1799  }
1800  }
1801 
1802  if (auto *II = dyn_cast<IntrinsicInst>(FPT.getOperand(0))) {
1803  switch (II->getIntrinsicID()) {
1804  default: break;
1805  case Intrinsic::ceil:
1806  case Intrinsic::fabs:
1807  case Intrinsic::floor:
1808  case Intrinsic::nearbyint:
1809  case Intrinsic::rint:
1810  case Intrinsic::round:
1811  case Intrinsic::roundeven:
1812  case Intrinsic::trunc: {
1813  Value *Src = II->getArgOperand(0);
1814  if (!Src->hasOneUse())
1815  break;
1816 
1817  // Except for fabs, this transformation requires the input of the unary FP
1818  // operation to be itself an fpext from the type to which we're
1819  // truncating.
1820  if (II->getIntrinsicID() != Intrinsic::fabs) {
1821  FPExtInst *FPExtSrc = dyn_cast<FPExtInst>(Src);
1822  if (!FPExtSrc || FPExtSrc->getSrcTy() != Ty)
1823  break;
1824  }
1825 
1826  // Do unary FP operation on smaller type.
1827  // (fptrunc (fabs x)) -> (fabs (fptrunc x))
1828  Value *InnerTrunc = Builder.CreateFPTrunc(Src, Ty);
1829  Function *Overload = Intrinsic::getDeclaration(FPT.getModule(),
1830  II->getIntrinsicID(), Ty);
1832  II->getOperandBundlesAsDefs(OpBundles);
1833  CallInst *NewCI =
1834  CallInst::Create(Overload, {InnerTrunc}, OpBundles, II->getName());
1835  NewCI->copyFastMathFlags(II);
1836  return NewCI;
1837  }
1838  }
1839  }
1840 
1841  if (Instruction *I = shrinkInsertElt(FPT, Builder))
1842  return I;
1843 
1844  Value *Src = FPT.getOperand(0);
1845  if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
1846  auto *FPCast = cast<CastInst>(Src);
1847  if (isKnownExactCastIntToFP(*FPCast))
1848  return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
1849  }
1850 
1851  return nullptr;
1852 }
1853 
1855  // If the source operand is a cast from integer to FP and known exact, then
1856  // cast the integer operand directly to the destination type.
1857  Type *Ty = FPExt.getType();
1858  Value *Src = FPExt.getOperand(0);
1859  if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
1860  auto *FPCast = cast<CastInst>(Src);
1861  if (isKnownExactCastIntToFP(*FPCast))
1862  return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
1863  }
1864 
1865  return commonCastTransforms(FPExt);
1866 }
1867 
1868 /// fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
1869 /// This is safe if the intermediate type has enough bits in its mantissa to
1870 /// accurately represent all values of X. For example, this won't work with
1871 /// i64 -> float -> i64.
1873  if (!isa<UIToFPInst>(FI.getOperand(0)) && !isa<SIToFPInst>(FI.getOperand(0)))
1874  return nullptr;
1875 
1876  auto *OpI = cast<CastInst>(FI.getOperand(0));
1877  Value *X = OpI->getOperand(0);
1878  Type *XType = X->getType();
1879  Type *DestType = FI.getType();
1880  bool IsOutputSigned = isa<FPToSIInst>(FI);
1881 
1882  // Since we can assume the conversion won't overflow, our decision as to
1883  // whether the input will fit in the float should depend on the minimum
1884  // of the input range and output range.
1885 
1886  // This means this is also safe for a signed input and unsigned output, since
1887  // a negative input would lead to undefined behavior.
1888  if (!isKnownExactCastIntToFP(*OpI)) {
1889  // The first cast may not round exactly based on the source integer width
1890  // and FP width, but the overflow UB rules can still allow this to fold.
1891  // If the destination type is narrow, that means the intermediate FP value
1892  // must be large enough to hold the source value exactly.
1893  // For example, (uint8_t)((float)(uint32_t 16777217) is undefined behavior.
1894  int OutputSize = (int)DestType->getScalarSizeInBits() - IsOutputSigned;
1895  if (OutputSize > OpI->getType()->getFPMantissaWidth())
1896  return nullptr;
1897  }
1898 
1899  if (DestType->getScalarSizeInBits() > XType->getScalarSizeInBits()) {
1900  bool IsInputSigned = isa<SIToFPInst>(OpI);
1901  if (IsInputSigned && IsOutputSigned)
1902  return new SExtInst(X, DestType);
1903  return new ZExtInst(X, DestType);
1904  }
1905  if (DestType->getScalarSizeInBits() < XType->getScalarSizeInBits())
1906  return new TruncInst(X, DestType);
1907 
1908  assert(XType == DestType && "Unexpected types for int to FP to int casts");
1909  return replaceInstUsesWith(FI, X);
1910 }
1911 
1913  if (Instruction *I = foldItoFPtoI(FI))
1914  return I;
1915 
1916  return commonCastTransforms(FI);
1917 }
1918 
1920  if (Instruction *I = foldItoFPtoI(FI))
1921  return I;
1922 
1923  return commonCastTransforms(FI);
1924 }
1925 
1927  return commonCastTransforms(CI);
1928 }
1929 
1931  return commonCastTransforms(CI);
1932 }
1933 
1935  // If the source integer type is not the intptr_t type for this target, do a
1936  // trunc or zext to the intptr_t type, then inttoptr of it. This allows the
1937  // cast to be exposed to other transforms.
1938  unsigned AS = CI.getAddressSpace();
1939  if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
1940  DL.getPointerSizeInBits(AS)) {
1941  Type *Ty = CI.getOperand(0)->getType()->getWithNewType(
1942  DL.getIntPtrType(CI.getContext(), AS));
1943  Value *P = Builder.CreateZExtOrTrunc(CI.getOperand(0), Ty);
1944  return new IntToPtrInst(P, CI.getType());
1945  }
1946 
1947  if (Instruction *I = commonCastTransforms(CI))
1948  return I;
1949 
1950  return nullptr;
1951 }
1952 
1953 /// Implement the transforms for cast of pointer (bitcast/ptrtoint)
1955  Value *Src = CI.getOperand(0);
1956 
1957  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
1958  // If casting the result of a getelementptr instruction with no offset, turn
1959  // this into a cast of the original pointer!
1960  if (GEP->hasAllZeroIndices() &&
1961  // If CI is an addrspacecast and GEP changes the poiner type, merging
1962  // GEP into CI would undo canonicalizing addrspacecast with different
1963  // pointer types, causing infinite loops.
1964  (!isa<AddrSpaceCastInst>(CI) ||
1965  GEP->getType() == GEP->getPointerOperandType())) {
1966  // Changing the cast operand is usually not a good idea but it is safe
1967  // here because the pointer operand is being replaced with another
1968  // pointer operand so the opcode doesn't need to change.
1969  return replaceOperand(CI, 0, GEP->getOperand(0));
1970  }
1971  }
1972 
1973  return commonCastTransforms(CI);
1974 }
1975 
1977  // If the destination integer type is not the intptr_t type for this target,
1978  // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
1979  // to be exposed to other transforms.
1980  Value *SrcOp = CI.getPointerOperand();
1981  Type *SrcTy = SrcOp->getType();
1982  Type *Ty = CI.getType();
1983  unsigned AS = CI.getPointerAddressSpace();
1984  unsigned TySize = Ty->getScalarSizeInBits();
1985  unsigned PtrSize = DL.getPointerSizeInBits(AS);
1986  if (TySize != PtrSize) {
1987  Type *IntPtrTy =
1988  SrcTy->getWithNewType(DL.getIntPtrType(CI.getContext(), AS));
1989  Value *P = Builder.CreatePtrToInt(SrcOp, IntPtrTy);
1990  return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
1991  }
1992 
1993  Value *Vec, *Scalar, *Index;
1995  m_Value(Scalar), m_Value(Index)))) &&
1996  Vec->getType() == Ty) {
1997  assert(Vec->getType()->getScalarSizeInBits() == PtrSize && "Wrong type");
1998  // Convert the scalar to int followed by insert to eliminate one cast:
1999  // p2i (ins (i2p Vec), Scalar, Index --> ins Vec, (p2i Scalar), Index
2000  Value *NewCast = Builder.CreatePtrToInt(Scalar, Ty->getScalarType());
2001  return InsertElementInst::Create(Vec, NewCast, Index);
2002  }
2003 
2004  return commonPointerCastTransforms(CI);
2005 }
2006 
2007 /// This input value (which is known to have vector type) is being zero extended
2008 /// or truncated to the specified vector type. Since the zext/trunc is done
2009 /// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
2010 /// endianness will impact which end of the vector that is extended or
2011 /// truncated.
2012 ///
2013 /// A vector is always stored with index 0 at the lowest address, which
2014 /// corresponds to the most significant bits for a big endian stored integer and
2015 /// the least significant bits for little endian. A trunc/zext of an integer
2016 /// impacts the big end of the integer. Thus, we need to add/remove elements at
2017 /// the front of the vector for big endian targets, and the back of the vector
2018 /// for little endian targets.
2019 ///
2020 /// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
2021 ///
2022 /// The source and destination vector types may have different element types.
2023 static Instruction *
2025  InstCombinerImpl &IC) {
2026  // We can only do this optimization if the output is a multiple of the input
2027  // element size, or the input is a multiple of the output element size.
2028  // Convert the input type to have the same element type as the output.
2029  VectorType *SrcTy = cast<VectorType>(InVal->getType());
2030 
2031  if (SrcTy->getElementType() != DestTy->getElementType()) {
2032  // The input types don't need to be identical, but for now they must be the
2033  // same size. There is no specific reason we couldn't handle things like
2034  // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
2035  // there yet.
2036  if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
2038  return nullptr;
2039 
2040  SrcTy =
2042  cast<FixedVectorType>(SrcTy)->getNumElements());
2043  InVal = IC.Builder.CreateBitCast(InVal, SrcTy);
2044  }
2045 
2046  bool IsBigEndian = IC.getDataLayout().isBigEndian();
2047  unsigned SrcElts = cast<FixedVectorType>(SrcTy)->getNumElements();
2048  unsigned DestElts = cast<FixedVectorType>(DestTy)->getNumElements();
2049 
2050  assert(SrcElts != DestElts && "Element counts should be different.");
2051 
2052  // Now that the element types match, get the shuffle mask and RHS of the
2053  // shuffle to use, which depends on whether we're increasing or decreasing the
2054  // size of the input.
2055  SmallVector<int, 16> ShuffleMaskStorage;
2056  ArrayRef<int> ShuffleMask;
2057  Value *V2;
2058 
2059  // Produce an identify shuffle mask for the src vector.
2060  ShuffleMaskStorage.resize(SrcElts);
2061  std::iota(ShuffleMaskStorage.begin(), ShuffleMaskStorage.end(), 0);
2062 
2063  if (SrcElts > DestElts) {
2064  // If we're shrinking the number of elements (rewriting an integer
2065  // truncate), just shuffle in the elements corresponding to the least
2066  // significant bits from the input and use undef as the second shuffle
2067  // input.
2068  V2 = UndefValue::get(SrcTy);
2069  // Make sure the shuffle mask selects the "least significant bits" by
2070  // keeping elements from back of the src vector for big endian, and from the
2071  // front for little endian.
2072  ShuffleMask = ShuffleMaskStorage;
2073  if (IsBigEndian)
2074  ShuffleMask = ShuffleMask.take_back(DestElts);
2075  else
2076  ShuffleMask = ShuffleMask.take_front(DestElts);
2077  } else {
2078  // If we're increasing the number of elements (rewriting an integer zext),
2079  // shuffle in all of the elements from InVal. Fill the rest of the result
2080  // elements with zeros from a constant zero.
2081  V2 = Constant::getNullValue(SrcTy);
2082  // Use first elt from V2 when indicating zero in the shuffle mask.
2083  uint32_t NullElt = SrcElts;
2084  // Extend with null values in the "most significant bits" by adding elements
2085  // in front of the src vector for big endian, and at the back for little
2086  // endian.
2087  unsigned DeltaElts = DestElts - SrcElts;
2088  if (IsBigEndian)
2089  ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt);
2090  else
2091  ShuffleMaskStorage.append(DeltaElts, NullElt);
2092  ShuffleMask = ShuffleMaskStorage;
2093  }
2094 
2095  return new ShuffleVectorInst(InVal, V2, ShuffleMask);
2096 }
2097 
2098 static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) {
2099  return Value % Ty->getPrimitiveSizeInBits() == 0;
2100 }
2101 
2102 static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
2103  return Value / Ty->getPrimitiveSizeInBits();
2104 }
2105 
2106 /// V is a value which is inserted into a vector of VecEltTy.
2107 /// Look through the value to see if we can decompose it into
2108 /// insertions into the vector. See the example in the comment for
2109 /// OptimizeIntegerToVectorInsertions for the pattern this handles.
2110 /// The type of V is always a non-zero multiple of VecEltTy's size.
2111 /// Shift is the number of bits between the lsb of V and the lsb of
2112 /// the vector.
2113 ///
2114 /// This returns false if the pattern can't be matched or true if it can,
2115 /// filling in Elements with the elements found here.
2116 static bool collectInsertionElements(Value *V, unsigned Shift,
2117  SmallVectorImpl<Value *> &Elements,
2118  Type *VecEltTy, bool isBigEndian) {
2119  assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
2120  "Shift should be a multiple of the element type size");
2121 
2122  // Undef values never contribute useful bits to the result.
2123  if (isa<UndefValue>(V)) return true;
2124 
2125  // If we got down to a value of the right type, we win, try inserting into the
2126  // right element.
2127  if (V->getType() == VecEltTy) {
2128  // Inserting null doesn't actually insert any elements.
2129  if (Constant *C = dyn_cast<Constant>(V))
2130  if (C->isNullValue())
2131  return true;
2132 
2133  unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
2134  if (isBigEndian)
2135  ElementIndex = Elements.size() - ElementIndex - 1;
2136 
2137  // Fail if multiple elements are inserted into this slot.
2138  if (Elements[ElementIndex])
2139  return false;
2140 
2141  Elements[ElementIndex] = V;
2142  return true;
2143  }
2144 
2145  if (Constant *C = dyn_cast<Constant>(V)) {
2146  // Figure out the # elements this provides, and bitcast it or slice it up
2147  // as required.
2148  unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
2149  VecEltTy);
2150  // If the constant is the size of a vector element, we just need to bitcast
2151  // it to the right type so it gets properly inserted.
2152  if (NumElts == 1)
2154  Shift, Elements, VecEltTy, isBigEndian);
2155 
2156  // Okay, this is a constant that covers multiple elements. Slice it up into
2157  // pieces and insert each element-sized piece into the vector.
2158  if (!isa<IntegerType>(C->getType()))
2160  C->getType()->getPrimitiveSizeInBits()));
2161  unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
2162  Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
2163 
2164  for (unsigned i = 0; i != NumElts; ++i) {
2165  unsigned ShiftI = Shift+i*ElementSize;
2166  Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
2167  ShiftI));
2168  Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
2169  if (!collectInsertionElements(Piece, ShiftI, Elements, VecEltTy,
2170  isBigEndian))
2171  return false;
2172  }
2173  return true;
2174  }
2175 
2176  if (!V->hasOneUse()) return false;
2177 
2178  Instruction *I = dyn_cast<Instruction>(V);
2179  if (!I) return false;
2180  switch (I->getOpcode()) {
2181  default: return false; // Unhandled case.
2182  case Instruction::BitCast:
2183  return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2184  isBigEndian);
2185  case Instruction::ZExt:
2186  if (!isMultipleOfTypeSize(
2187  I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
2188  VecEltTy))
2189  return false;
2190  return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2191  isBigEndian);
2192  case Instruction::Or:
2193  return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2194  isBigEndian) &&
2195  collectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
2196  isBigEndian);
2197  case Instruction::Shl: {
2198  // Must be shifting by a constant that is a multiple of the element size.
2199  ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
2200  if (!CI) return false;
2201  Shift += CI->getZExtValue();
2202  if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
2203  return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2204  isBigEndian);
2205  }
2206 
2207  }
2208 }
2209 
2210 
2211 /// If the input is an 'or' instruction, we may be doing shifts and ors to
2212 /// assemble the elements of the vector manually.
2213 /// Try to rip the code out and replace it with insertelements. This is to
2214 /// optimize code like this:
2215 ///
2216 /// %tmp37 = bitcast float %inc to i32
2217 /// %tmp38 = zext i32 %tmp37 to i64
2218 /// %tmp31 = bitcast float %inc5 to i32
2219 /// %tmp32 = zext i32 %tmp31 to i64
2220 /// %tmp33 = shl i64 %tmp32, 32
2221 /// %ins35 = or i64 %tmp33, %tmp38
2222 /// %tmp43 = bitcast i64 %ins35 to <2 x float>
2223 ///
2224 /// Into two insertelements that do "buildvector{%inc, %inc5}".
2226  InstCombinerImpl &IC) {
2227  auto *DestVecTy = cast<FixedVectorType>(CI.getType());
2228  Value *IntInput = CI.getOperand(0);
2229 
2230  SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
2231  if (!collectInsertionElements(IntInput, 0, Elements,
2232  DestVecTy->getElementType(),
2233  IC.getDataLayout().isBigEndian()))
2234  return nullptr;
2235 
2236  // If we succeeded, we know that all of the element are specified by Elements
2237  // or are zero if Elements has a null entry. Recast this as a set of
2238  // insertions.
2239  Value *Result = Constant::getNullValue(CI.getType());
2240  for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
2241  if (!Elements[i]) continue; // Unset element.
2242 
2243  Result = IC.Builder.CreateInsertElement(Result, Elements[i],
2244  IC.Builder.getInt32(i));
2245  }
2246 
2247  return Result;
2248 }
2249 
2250 /// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
2251 /// vector followed by extract element. The backend tends to handle bitcasts of
2252 /// vectors better than bitcasts of scalars because vector registers are
2253 /// usually not type-specific like scalar integer or scalar floating-point.
2255  InstCombinerImpl &IC) {
2256  // TODO: Create and use a pattern matcher for ExtractElementInst.
2257  auto *ExtElt = dyn_cast<ExtractElementInst>(BitCast.getOperand(0));
2258  if (!ExtElt || !ExtElt->hasOneUse())
2259  return nullptr;
2260 
2261  // The bitcast must be to a vectorizable type, otherwise we can't make a new
2262  // type to extract from.
2263  Type *DestType = BitCast.getType();
2264  if (!VectorType::isValidElementType(DestType))
2265  return nullptr;
2266 
2267  auto *NewVecType = VectorType::get(DestType, ExtElt->getVectorOperandType());
2268  auto *NewBC = IC.Builder.CreateBitCast(ExtElt->getVectorOperand(),
2269  NewVecType, "bc");
2270  return ExtractElementInst::Create(NewBC, ExtElt->getIndexOperand());
2271 }
2272 
2273 /// Change the type of a bitwise logic operation if we can eliminate a bitcast.
2276  Type *DestTy = BitCast.getType();
2277  BinaryOperator *BO;
2278  if (!DestTy->isIntOrIntVectorTy() ||
2279  !match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) ||
2280  !BO->isBitwiseLogicOp())
2281  return nullptr;
2282 
2283  // FIXME: This transform is restricted to vector types to avoid backend
2284  // problems caused by creating potentially illegal operations. If a fix-up is
2285  // added to handle that situation, we can remove this check.
2286  if (!DestTy->isVectorTy() || !BO->getType()->isVectorTy())
2287  return nullptr;
2288 
2289  Value *X;
2290  if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
2291  X->getType() == DestTy && !isa<Constant>(X)) {
2292  // bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
2293  Value *CastedOp1 = Builder.CreateBitCast(BO->getOperand(1), DestTy);
2294  return BinaryOperator::Create(BO->getOpcode(), X, CastedOp1);
2295  }
2296 
2297  if (match(BO->getOperand(1), m_OneUse(m_BitCast(m_Value(X)))) &&
2298  X->getType() == DestTy && !isa<Constant>(X)) {
2299  // bitcast(logic(Y, bitcast(X))) --> logic'(bitcast(Y), X)
2300  Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2301  return BinaryOperator::Create(BO->getOpcode(), CastedOp0, X);
2302  }
2303 
2304  // Canonicalize vector bitcasts to come before vector bitwise logic with a
2305  // constant. This eases recognition of special constants for later ops.
2306  // Example:
2307  // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
2308  Constant *C;
2309  if (match(BO->getOperand(1), m_Constant(C))) {
2310  // bitcast (logic X, C) --> logic (bitcast X, C')
2311  Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2312  Value *CastedC = Builder.CreateBitCast(C, DestTy);
2313  return BinaryOperator::Create(BO->getOpcode(), CastedOp0, CastedC);
2314  }
2315 
2316  return nullptr;
2317 }
2318 
2319 /// Change the type of a select if we can eliminate a bitcast.
2322  Value *Cond, *TVal, *FVal;
2323  if (!match(BitCast.getOperand(0),
2324  m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
2325  return nullptr;
2326 
2327  // A vector select must maintain the same number of elements in its operands.
2328  Type *CondTy = Cond->getType();
2329  Type *DestTy = BitCast.getType();
2330  if (auto *CondVTy = dyn_cast<VectorType>(CondTy))
2331  if (!DestTy->isVectorTy() ||
2332  CondVTy->getElementCount() !=
2333  cast<VectorType>(DestTy)->getElementCount())
2334  return nullptr;
2335 
2336  // FIXME: This transform is restricted from changing the select between
2337  // scalars and vectors to avoid backend problems caused by creating
2338  // potentially illegal operations. If a fix-up is added to handle that
2339  // situation, we can remove this check.
2340  if (DestTy->isVectorTy() != TVal->getType()->isVectorTy())
2341  return nullptr;
2342 
2343  auto *Sel = cast<Instruction>(BitCast.getOperand(0));
2344  Value *X;
2345  if (match(TVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
2346  !isa<Constant>(X)) {
2347  // bitcast(select(Cond, bitcast(X), Y)) --> select'(Cond, X, bitcast(Y))
2348  Value *CastedVal = Builder.CreateBitCast(FVal, DestTy);
2349  return SelectInst::Create(Cond, X, CastedVal, "", nullptr, Sel);
2350  }
2351 
2352  if (match(FVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
2353  !isa<Constant>(X)) {
2354  // bitcast(select(Cond, Y, bitcast(X))) --> select'(Cond, bitcast(Y), X)
2355  Value *CastedVal = Builder.CreateBitCast(TVal, DestTy);
2356  return SelectInst::Create(Cond, CastedVal, X, "", nullptr, Sel);
2357  }
2358 
2359  return nullptr;
2360 }
2361 
2362 /// Check if all users of CI are StoreInsts.
2363 static bool hasStoreUsersOnly(CastInst &CI) {
2364  for (User *U : CI.users()) {
2365  if (!isa<StoreInst>(U))
2366  return false;
2367  }
2368  return true;
2369 }
2370 
2371 /// This function handles following case
2372 ///
2373 /// A -> B cast
2374 /// PHI
2375 /// B -> A cast
2376 ///
2377 /// All the related PHI nodes can be replaced by new PHI nodes with type A.
2378 /// The uses of \p CI can be changed to the new PHI node corresponding to \p PN.
2379 Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI,
2380  PHINode *PN) {
2381  // BitCast used by Store can be handled in InstCombineLoadStoreAlloca.cpp.
2382  if (hasStoreUsersOnly(CI))
2383  return nullptr;
2384 
2385  Value *Src = CI.getOperand(0);
2386  Type *SrcTy = Src->getType(); // Type B
2387  Type *DestTy = CI.getType(); // Type A
2388 
2389  SmallVector<PHINode *, 4> PhiWorklist;
2390  SmallSetVector<PHINode *, 4> OldPhiNodes;
2391 
2392  // Find all of the A->B casts and PHI nodes.
2393  // We need to inspect all related PHI nodes, but PHIs can be cyclic, so
2394  // OldPhiNodes is used to track all known PHI nodes, before adding a new
2395  // PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
2396  PhiWorklist.push_back(PN);
2397  OldPhiNodes.insert(PN);
2398  while (!PhiWorklist.empty()) {
2399  auto *OldPN = PhiWorklist.pop_back_val();
2400  for (Value *IncValue : OldPN->incoming_values()) {
2401  if (isa<Constant>(IncValue))
2402  continue;
2403 
2404  if (auto *LI = dyn_cast<LoadInst>(IncValue)) {
2405  // If there is a sequence of one or more load instructions, each loaded
2406  // value is used as address of later load instruction, bitcast is
2407  // necessary to change the value type, don't optimize it. For
2408  // simplicity we give up if the load address comes from another load.
2409  Value *Addr = LI->getOperand(0);
2410  if (Addr == &CI || isa<LoadInst>(Addr))
2411  return nullptr;
2412  // Don't tranform "load <256 x i32>, <256 x i32>*" to
2413  // "load x86_amx, x86_amx*", because x86_amx* is invalid.
2414  // TODO: Remove this check when bitcast between vector and x86_amx
2415  // is replaced with a specific intrinsic.
2416  if (DestTy->isX86_AMXTy())
2417  return nullptr;
2418  if (LI->hasOneUse() && LI->isSimple())
2419  continue;
2420  // If a LoadInst has more than one use, changing the type of loaded
2421  // value may create another bitcast.
2422  return nullptr;
2423  }
2424 
2425  if (auto *PNode = dyn_cast<PHINode>(IncValue)) {
2426  if (OldPhiNodes.insert(PNode))
2427  PhiWorklist.push_back(PNode);
2428  continue;
2429  }
2430 
2431  auto *BCI = dyn_cast<BitCastInst>(IncValue);
2432  // We can't handle other instructions.
2433  if (!BCI)
2434  return nullptr;
2435 
2436  // Verify it's a A->B cast.
2437  Type *TyA = BCI->getOperand(0)->getType();
2438  Type *TyB = BCI->getType();
2439  if (TyA != DestTy || TyB != SrcTy)
2440  return nullptr;
2441  }
2442  }
2443 
2444  // Check that each user of each old PHI node is something that we can
2445  // rewrite, so that all of the old PHI nodes can be cleaned up afterwards.
2446  for (auto *OldPN : OldPhiNodes) {
2447  for (User *V : OldPN->users()) {
2448  if (auto *SI = dyn_cast<StoreInst>(V)) {
2449  if (!SI->isSimple() || SI->getOperand(0) != OldPN)
2450  return nullptr;
2451  } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2452  // Verify it's a B->A cast.
2453  Type *TyB = BCI->getOperand(0)->getType();
2454  Type *TyA = BCI->getType();
2455  if (TyA != DestTy || TyB != SrcTy)
2456  return nullptr;
2457  } else if (auto *PHI = dyn_cast<PHINode>(V)) {
2458  // As long as the user is another old PHI node, then even if we don't
2459  // rewrite it, the PHI web we're considering won't have any users
2460  // outside itself, so it'll be dead.
2461  if (OldPhiNodes.count(PHI) == 0)
2462  return nullptr;
2463  } else {
2464  return nullptr;
2465  }
2466  }
2467  }
2468 
2469  // For each old PHI node, create a corresponding new PHI node with a type A.
2471  for (auto *OldPN : OldPhiNodes) {
2472  Builder.SetInsertPoint(OldPN);
2473  PHINode *NewPN = Builder.CreatePHI(DestTy, OldPN->getNumOperands());
2474  NewPNodes[OldPN] = NewPN;
2475  }
2476 
2477  // Fill in the operands of new PHI nodes.
2478  for (auto *OldPN : OldPhiNodes) {
2479  PHINode *NewPN = NewPNodes[OldPN];
2480  for (unsigned j = 0, e = OldPN->getNumOperands(); j != e; ++j) {
2481  Value *V = OldPN->getOperand(j);
2482  Value *NewV = nullptr;
2483  if (auto *C = dyn_cast<Constant>(V)) {
2484  NewV = ConstantExpr::getBitCast(C, DestTy);
2485  } else if (auto *LI = dyn_cast<LoadInst>(V)) {
2486  // Explicitly perform load combine to make sure no opposing transform
2487  // can remove the bitcast in the meantime and trigger an infinite loop.
2488  Builder.SetInsertPoint(LI);
2489  NewV = combineLoadToNewType(*LI, DestTy);
2490  // Remove the old load and its use in the old phi, which itself becomes
2491  // dead once the whole transform finishes.
2492  replaceInstUsesWith(*LI, UndefValue::get(LI->getType()));
2493  eraseInstFromFunction(*LI);
2494  } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2495  NewV = BCI->getOperand(0);
2496  } else if (auto *PrevPN = dyn_cast<PHINode>(V)) {
2497  NewV = NewPNodes[PrevPN];
2498  }
2499  assert(NewV);
2500  NewPN->addIncoming(NewV, OldPN->getIncomingBlock(j));
2501  }
2502  }
2503 
2504  // Traverse all accumulated PHI nodes and process its users,
2505  // which are Stores and BitcCasts. Without this processing
2506  // NewPHI nodes could be replicated and could lead to extra
2507  // moves generated after DeSSA.
2508  // If there is a store with type B, change it to type A.
2509 
2510 
2511  // Replace users of BitCast B->A with NewPHI. These will help
2512  // later to get rid off a closure formed by OldPHI nodes.
2513  Instruction *RetVal = nullptr;
2514  for (auto *OldPN : OldPhiNodes) {
2515  PHINode *NewPN = NewPNodes[OldPN];
2516  for (User *V : make_early_inc_range(OldPN->users())) {
2517  if (auto *SI = dyn_cast<StoreInst>(V)) {
2518  assert(SI->isSimple() && SI->getOperand(0) == OldPN);
2519  Builder.SetInsertPoint(SI);
2520  auto *NewBC =
2521  cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));
2522  SI->setOperand(0, NewBC);
2523  Worklist.push(SI);
2524  assert(hasStoreUsersOnly(*NewBC));
2525  }
2526  else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2527  Type *TyB = BCI->getOperand(0)->getType();
2528  Type *TyA = BCI->getType();
2529  assert(TyA == DestTy && TyB == SrcTy);
2530  (void) TyA;
2531  (void) TyB;
2532  Instruction *I = replaceInstUsesWith(*BCI, NewPN);
2533  if (BCI == &CI)
2534  RetVal = I;
2535  } else if (auto *PHI = dyn_cast<PHINode>(V)) {
2536  assert(OldPhiNodes.contains(PHI));
2537  (void) PHI;
2538  } else {
2539  llvm_unreachable("all uses should be handled");
2540  }
2541  }
2542  }
2543 
2544  return RetVal;
2545 }
2546 
2548  // If the operands are integer typed then apply the integer transforms,
2549  // otherwise just apply the common ones.
2550  Value *Src = CI.getOperand(0);
2551  Type *SrcTy = Src->getType();
2552  Type *DestTy = CI.getType();
2553 
2554  // Get rid of casts from one type to the same type. These are useless and can
2555  // be replaced by the operand.
2556  if (DestTy == Src->getType())
2557  return replaceInstUsesWith(CI, Src);
2558 
2559  if (isa<PointerType>(SrcTy) && isa<PointerType>(DestTy)) {
2560  PointerType *SrcPTy = cast<PointerType>(SrcTy);
2561  PointerType *DstPTy = cast<PointerType>(DestTy);
2562  Type *DstElTy = DstPTy->getElementType();
2563  Type *SrcElTy = SrcPTy->getElementType();
2564 
2565  // Casting pointers between the same type, but with different address spaces
2566  // is an addrspace cast rather than a bitcast.
2567  if ((DstElTy == SrcElTy) &&
2568  (DstPTy->getAddressSpace() != SrcPTy->getAddressSpace()))
2569  return new AddrSpaceCastInst(Src, DestTy);
2570 
2571  // If we are casting a alloca to a pointer to a type of the same
2572  // size, rewrite the allocation instruction to allocate the "right" type.
2573  // There is no need to modify malloc calls because it is their bitcast that
2574  // needs to be cleaned up.
2575  if (AllocaInst *AI = dyn_cast<AllocaInst>(Src))
2576  if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
2577  return V;
2578 
2579  // When the type pointed to is not sized the cast cannot be
2580  // turned into a gep.
2581  Type *PointeeType =
2582  cast<PointerType>(Src->getType()->getScalarType())->getElementType();
2583  if (!PointeeType->isSized())
2584  return nullptr;
2585 
2586  // If the source and destination are pointers, and this cast is equivalent
2587  // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep.
2588  // This can enhance SROA and other transforms that want type-safe pointers.
2589  unsigned NumZeros = 0;
2590  while (SrcElTy && SrcElTy != DstElTy) {
2591  SrcElTy = GetElementPtrInst::getTypeAtIndex(SrcElTy, (uint64_t)0);
2592  ++NumZeros;
2593  }
2594 
2595  // If we found a path from the src to dest, create the getelementptr now.
2596  if (SrcElTy == DstElTy) {
2597  SmallVector<Value *, 8> Idxs(NumZeros + 1, Builder.getInt32(0));
2599  GetElementPtrInst::Create(SrcPTy->getElementType(), Src, Idxs);
2600 
2601  // If the source pointer is dereferenceable, then assume it points to an
2602  // allocated object and apply "inbounds" to the GEP.
2603  bool CanBeNull, CanBeFreed;
2604  if (Src->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed)) {
2605  // In a non-default address space (not 0), a null pointer can not be
2606  // assumed inbounds, so ignore that case (dereferenceable_or_null).
2607  // The reason is that 'null' is not treated differently in these address
2608  // spaces, and we consequently ignore the 'gep inbounds' special case
2609  // for 'null' which allows 'inbounds' on 'null' if the indices are
2610  // zeros.
2611  if (SrcPTy->getAddressSpace() == 0 || !CanBeNull)
2612  GEP->setIsInBounds();
2613  }
2614  return GEP;
2615  }
2616  }
2617 
2618  if (FixedVectorType *DestVTy = dyn_cast<FixedVectorType>(DestTy)) {
2619  // Beware: messing with this target-specific oddity may cause trouble.
2620  if (DestVTy->getNumElements() == 1 && SrcTy->isX86_MMXTy()) {
2621  Value *Elem = Builder.CreateBitCast(Src, DestVTy->getElementType());
2622  return InsertElementInst::Create(UndefValue::get(DestTy), Elem,
2624  }
2625 
2626  if (isa<IntegerType>(SrcTy)) {
2627  // If this is a cast from an integer to vector, check to see if the input
2628  // is a trunc or zext of a bitcast from vector. If so, we can replace all
2629  // the casts with a shuffle and (potentially) a bitcast.
2630  if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {
2631  CastInst *SrcCast = cast<CastInst>(Src);
2632  if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
2633  if (isa<VectorType>(BCIn->getOperand(0)->getType()))
2635  BCIn->getOperand(0), cast<VectorType>(DestTy), *this))
2636  return I;
2637  }
2638 
2639  // If the input is an 'or' instruction, we may be doing shifts and ors to
2640  // assemble the elements of the vector manually. Try to rip the code out
2641  // and replace it with insertelements.
2642  if (Value *V = optimizeIntegerToVectorInsertions(CI, *this))
2643  return replaceInstUsesWith(CI, V);
2644  }
2645  }
2646 
2647  if (FixedVectorType *SrcVTy = dyn_cast<FixedVectorType>(SrcTy)) {
2648  if (SrcVTy->getNumElements() == 1) {
2649  // If our destination is not a vector, then make this a straight
2650  // scalar-scalar cast.
2651  if (!DestTy->isVectorTy()) {
2652  Value *Elem =
2653  Builder.CreateExtractElement(Src,
2655  return CastInst::Create(Instruction::BitCast, Elem, DestTy);
2656  }
2657 
2658  // Otherwise, see if our source is an insert. If so, then use the scalar
2659  // component directly:
2660  // bitcast (inselt <1 x elt> V, X, 0) to <n x m> --> bitcast X to <n x m>
2661  if (auto *InsElt = dyn_cast<InsertElementInst>(Src))
2662  return new BitCastInst(InsElt->getOperand(1), DestTy);
2663  }
2664  }
2665 
2666  if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Src)) {
2667  // Okay, we have (bitcast (shuffle ..)). Check to see if this is
2668  // a bitcast to a vector with the same # elts.
2669  Value *ShufOp0 = Shuf->getOperand(0);
2670  Value *ShufOp1 = Shuf->getOperand(1);
2671  auto ShufElts = cast<VectorType>(Shuf->getType())->getElementCount();
2672  auto SrcVecElts = cast<VectorType>(ShufOp0->getType())->getElementCount();
2673  if (Shuf->hasOneUse() && DestTy->isVectorTy() &&
2674  cast<VectorType>(DestTy)->getElementCount() == ShufElts &&
2675  ShufElts == SrcVecElts) {
2676  BitCastInst *Tmp;
2677  // If either of the operands is a cast from CI.getType(), then
2678  // evaluating the shuffle in the casted destination's type will allow
2679  // us to eliminate at least one cast.
2680  if (((Tmp = dyn_cast<BitCastInst>(ShufOp0)) &&
2681  Tmp->getOperand(0)->getType() == DestTy) ||
2682  ((Tmp = dyn_cast<BitCastInst>(ShufOp1)) &&
2683  Tmp->getOperand(0)->getType() == DestTy)) {
2684  Value *LHS = Builder.CreateBitCast(ShufOp0, DestTy);
2685  Value *RHS = Builder.CreateBitCast(ShufOp1, DestTy);
2686  // Return a new shuffle vector. Use the same element ID's, as we
2687  // know the vector types match #elts.
2688  return new ShuffleVectorInst(LHS, RHS, Shuf->getShuffleMask());
2689  }
2690  }
2691 
2692  // A bitcasted-to-scalar and byte-reversing shuffle is better recognized as
2693  // a byte-swap:
2694  // bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) --> bswap (bitcast X)
2695  // TODO: We should match the related pattern for bitreverse.
2696  if (DestTy->isIntegerTy() &&
2697  DL.isLegalInteger(DestTy->getScalarSizeInBits()) &&
2698  SrcTy->getScalarSizeInBits() == 8 &&
2699  ShufElts.getKnownMinValue() % 2 == 0 && Shuf->hasOneUse() &&
2700  Shuf->isReverse()) {
2701  assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask");
2702  assert(match(ShufOp1, m_Undef()) && "Unexpected shuffle op");
2703  Function *Bswap =
2704  Intrinsic::getDeclaration(CI.getModule(), Intrinsic::bswap, DestTy);
2705  Value *ScalarX = Builder.CreateBitCast(ShufOp0, DestTy);
2706  return CallInst::Create(Bswap, { ScalarX });
2707  }
2708  }
2709 
2710  // Handle the A->B->A cast, and there is an intervening PHI node.
2711  if (PHINode *PN = dyn_cast<PHINode>(Src))
2712  if (Instruction *I = optimizeBitCastFromPhi(CI, PN))
2713  return I;
2714 
2715  if (Instruction *I = canonicalizeBitCastExtElt(CI, *this))
2716  return I;
2717 
2719  return I;
2720 
2721  if (Instruction *I = foldBitCastSelect(CI, Builder))
2722  return I;
2723 
2724  if (SrcTy->isPointerTy())
2725  return commonPointerCastTransforms(CI);
2726  return commonCastTransforms(CI);
2727 }
2728 
2730  // If the destination pointer element type is not the same as the source's
2731  // first do a bitcast to the destination type, and then the addrspacecast.
2732  // This allows the cast to be exposed to other transforms.
2733  Value *Src = CI.getOperand(0);
2734  PointerType *SrcTy = cast<PointerType>(Src->getType()->getScalarType());
2735  PointerType *DestTy = cast<PointerType>(CI.getType()->getScalarType());
2736 
2737  Type *DestElemTy = DestTy->getElementType();
2738  if (SrcTy->getElementType() != DestElemTy) {
2739  Type *MidTy = PointerType::get(DestElemTy, SrcTy->getAddressSpace());
2740  // Handle vectors of pointers.
2741  if (VectorType *VT = dyn_cast<VectorType>(CI.getType()))
2742  MidTy = VectorType::get(MidTy, VT->getElementCount());
2743 
2744  Value *NewBitCast = Builder.CreateBitCast(Src, MidTy);
2745  return new AddrSpaceCastInst(NewBitCast, CI.getType());
2746  }
2747 
2748  return commonPointerCastTransforms(CI);
2749 }
i
i
Definition: README.txt:29
shrinkInsertElt
static Instruction * shrinkInsertElt(CastInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of an insert element.
Definition: InstCombineCasts.cpp:695
llvm::Type::isSized
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:272
llvm::InstCombinerImpl::visitFPToSI
Instruction * visitFPToSI(FPToSIInst &FI)
Definition: InstCombineCasts.cpp:1919
llvm
Definition: AllocatorList.h:23
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:743
llvm::RecurKind::Or
@ Or
Bitwise or logical OR of integers.
llvm::MaskedValueIsZero
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if 'V & Mask' is known to be zero.
Definition: ValueTracking.cpp:359
llvm::PHINode::incoming_values
op_range incoming_values()
Definition: Instructions.h:2656
llvm::Instruction::isBitwiseLogicOp
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
Definition: Instruction.h:210
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:447
InstCombiner.h
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1295
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:722
llvm::Type::isX86_MMXTy
bool isX86_MMXTy() const
Return true if this is X86 MMX.
Definition: Type.h:184
llvm::AllocaInst::getAlign
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:119
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:229
ceil
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g ceil
Definition: README-FPStack.txt:54
llvm::APFloatBase::IEEEsingle
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:163
llvm::ConstantExpr::getZExt
static Constant * getZExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2097
llvm::PointerType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:653
llvm::Function
Definition: Function.h:61
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::PatternMatch::m_LShr
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1142
llvm::APInt::isPowerOf2
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:469
canonicalizeBitCastExtElt
static Instruction * canonicalizeBitCastExtElt(BitCastInst &BitCast, InstCombinerImpl &IC)
Canonicalize scalar bitcasts of extracted elements into a bitcast of the vector followed by extract e...
Definition: InstCombineCasts.cpp:2254
collectInsertionElements
static bool collectInsertionElements(Value *V, unsigned Shift, SmallVectorImpl< Value * > &Elements, Type *VecEltTy, bool isBigEndian)
V is a value which is inserted into a vector of VecEltTy.
Definition: InstCombineCasts.cpp:2116
llvm::PointerType::get
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Definition: Type.cpp:687
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5136
llvm::KnownBits::Zero
APInt Zero
Definition: KnownBits.h:24
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:317
llvm::PatternMatch::m_InsertElt
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Definition: PatternMatch.h:1489
llvm::ConstantExpr::getSExt
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2083
llvm::AllocaInst::getType
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:103
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::InstCombiner::Builder
BuilderTy & Builder
Definition: InstCombiner.h:56
isMultipleOfTypeSize
static bool isMultipleOfTypeSize(unsigned Value, Type *Ty)
Definition: InstCombineCasts.cpp:2098
llvm::BinaryOperator::CreateFDivFMF
static BinaryOperator * CreateFDivFMF(Value *V1, Value *V2, Instruction *FMFSource, const Twine &Name="")
Definition: InstrTypes.h:275
llvm::PatternMatch::m_CombineOr
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:210
llvm::IRBuilder< TargetFolder, IRBuilderCallbackInserter >
hasStoreUsersOnly
static bool hasStoreUsersOnly(CastInst &CI)
Check if all users of CI are StoreInsts.
Definition: InstCombineCasts.cpp:2363
llvm::CastInst::Create
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Definition: Instructions.cpp:2945
llvm::PointerType::getAddressSpace
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:662
llvm::SmallDenseMap
Definition: DenseMap.h:880
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:744
llvm::ConstantExpr::getBitCast
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2207
llvm::CastInst::isEliminableCastPair
static unsigned isEliminableCastPair(Instruction::CastOps firstOpcode, Instruction::CastOps secondOpcode, Type *SrcTy, Type *MidTy, Type *DstTy, Type *SrcIntPtrTy, Type *MidIntPtrTy, Type *DstIntPtrTy)
Determine how a pair of casts can be eliminated, if they can be at all.
Definition: Instructions.cpp:2728
foldBitCastSelect
static Instruction * foldBitCastSelect(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a select if we can eliminate a bitcast.
Definition: InstCombineCasts.cpp:2320
llvm::InsertElementInst::Create
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1928
llvm::CmpInst::ICMP_SGT
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:749
Shift
bool Shift
Definition: README.txt:468
llvm::ExtractElementInst::Create
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1863
llvm::InstCombinerImpl::visitAddrSpaceCast
Instruction * visitAddrSpaceCast(AddrSpaceCastInst &CI)
Definition: InstCombineCasts.cpp:2729
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
llvm::PatternMatch::m_FPToUI
CastClass_match< OpTy, Instruction::FPToUI > m_FPToUI(const OpTy &Op)
Definition: PatternMatch.h:1677
canAlwaysEvaluateInType
static bool canAlwaysEvaluateInType(Value *V, Type *Ty)
Constants and extensions/truncates from the destination type are always free to be evaluated in that ...
Definition: InstCombineCasts.cpp:327
llvm::InstCombinerImpl::visitBitCast
Instruction * visitBitCast(BitCastInst &CI)
Definition: InstCombineCasts.cpp:2547
isBigEndian
static Optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
Definition: CombinerHelper.cpp:92
llvm::ConstantFP::getValueAPF
const APFloat & getValueAPF() const
Definition: Constants.h:295
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::PatternMatch::m_BinOp
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:128
llvm::PatternMatch::m_AShr
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1148
llvm::ComputeNumSignBits
unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return the number of times the sign bit of the register is replicated into the other bits.
Definition: ValueTracking.cpp:383
llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:424
shrinkSplatShuffle
static Instruction * shrinkSplatShuffle(TruncInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of a splat shuffle.
Definition: InstCombineCasts.cpp:675
llvm::CastInst::CreateIntegerCast
static CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
Definition: Instructions.cpp:3111
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::OverflowingBinaryOperator::hasNoUnsignedWrap
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition: Operator.h:90
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:491
llvm::matchSelectPattern
SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
Definition: ValueTracking.cpp:6084
llvm::PatternMatch::m_BitCast
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
Definition: PatternMatch.h:1598
llvm::FixedVectorType
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:527
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
ConstantFolding.h
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:197
llvm::PatternMatch::m_Deferred
deferredval_ty< Value > m_Deferred(Value *const &V)
A commutative-friendly version of m_Specific().
Definition: PatternMatch.h:815
llvm::InstCombinerImpl::visitFPExt
Instruction * visitFPExt(CastInst &CI)
Definition: InstCombineCasts.cpp:1854
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
llvm::Type::getWithNewType
Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
Definition: DerivedTypes.h:680
llvm::NVPTX::PTXLdStInstCode::VecType
VecType
Definition: NVPTX.h:121
llvm::CastInst::getDestTy
Type * getDestTy() const
Return the destination type, as a convenience.
Definition: InstrTypes.h:686
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::IntToPtrInst::getAddressSpace
unsigned getAddressSpace() const
Returns the address space of this instruction's pointer type.
Definition: Instructions.h:5067
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:129
llvm::APInt::countPopulation
unsigned countPopulation() const
Count the number of bits set.
Definition: APInt.h:1728
KnownBits.h
floor
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g floor
Definition: README-FPStack.txt:54
llvm::PatternMatch::m_OneUse
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
llvm::ConstantExpr::getSub
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2675
llvm::PtrToIntInst::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:5118
llvm::PatternMatch::m_APInt
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:270
llvm::APInt::uge
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1313
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
llvm::KnownBits::isNonNegative
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:99
llvm::Intrinsic::getType
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:1251
llvm::SPF_UNKNOWN
@ SPF_UNKNOWN
Definition: ValueTracking.h:658
canEvaluateSExtd
static bool canEvaluateSExtd(Value *V, Type *Ty)
Return true if we can take the specified value and return it as type Ty without inserting any new cas...
Definition: InstCombineCasts.cpp:1403
llvm::SelectInst::Create
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
Definition: Instructions.h:1746
llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
llvm::AddrSpaceCastInst
This class represents a conversion between pointers from one address space to another.
Definition: Instructions.h:5176
InstCombineInternal.h
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition: PatternMatch.h:1467
llvm::PatternMatch::m_ZExtOrSExt
match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, CastClass_match< OpTy, Instruction::SExt > > m_ZExtOrSExt(const OpTy &Op)
Definition: PatternMatch.h:1653
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::PHINode::getIncomingValue
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Definition: Instructions.h:2666
llvm::AllocaInst::getAllocatedType
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:112
getScalarSizeInBits
static unsigned getScalarSizeInBits(Type *Ty)
Definition: SystemZTargetTransformInfo.cpp:365
shrinkFPConstantVector
static Type * shrinkFPConstantVector(Value *V)
Definition: InstCombineCasts.cpp:1570
llvm::Type::getPPC_FP128Ty
static Type * getPPC_FP128Ty(LLVMContext &C)
Definition: Type.cpp:190
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:648
llvm::APFloatBase::IEEEhalf
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:157
llvm::User
Definition: User.h:44
llvm::Type::getDoubleTy
static Type * getDoubleTy(LLVMContext &C)
Definition: Type.cpp:185
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::CmpInst::ICMP_ULE
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:748
llvm::InstCombinerImpl::visitSIToFP
Instruction * visitSIToFP(CastInst &CI)
Definition: InstCombineCasts.cpp:1930
round
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::InstCombinerImpl::commonCastTransforms
Instruction * commonCastTransforms(CastInst &CI)
Implement the transforms common to all CastInst visitors.
Definition: InstCombineCasts.cpp:278
llvm::KnownBits::One
APInt One
Definition: KnownBits.h:25
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
decomposeSimpleLinearExpr
static Value * decomposeSimpleLinearExpr(Value *Val, unsigned &Scale, uint64_t &Offset)
Analyze 'Val', seeing if it is a simple linear expression.
Definition: InstCombineCasts.cpp:32
llvm::CallInst::Create
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1493
SI
@ SI
Definition: SIInstrInfo.cpp:7344
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::CastInst::CreateFPCast
static CastInst * CreateFPCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create an FPExt, BitCast, or FPTrunc for fp -> fp casts.
Definition: Instructions.cpp:3139
TargetLibraryInfo.h
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:235
llvm::PatternMatch::m_ZExt
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
Definition: PatternMatch.h:1634
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::Instruction::CastOps
CastOps
Definition: Instruction.h:782
llvm::PatternMatch::m_FNeg
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
Definition: PatternMatch.h:1058
llvm::Constant::getAllOnesValue
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:405
llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition: InstrTypes.h:395
llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:145
llvm::Instruction
Definition: Instruction.h:45
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:147
llvm::PatternMatch::m_c_Or
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
Definition: PatternMatch.h:2236
optimizeVectorResizeWithIntegerBitCasts
static Instruction * optimizeVectorResizeWithIntegerBitCasts(Value *InVal, VectorType *DestTy, InstCombinerImpl &IC)
This input value (which is known to have vector type) is being zero extended or truncated to the spec...
Definition: InstCombineCasts.cpp:2024
llvm::AllocaInst::getArraySize
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:99
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1631
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:255
llvm::IRBuilderBase::CreateInsertElement
Value * CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2404
llvm::APInt::getHighBitsSet
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:655
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:606
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1770
llvm::InstCombinerImpl
Definition: InstCombineInternal.h:60
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:885
PatternMatch.h
llvm::APInt::countTrailingZeros
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1700
llvm::CastInst::CreateTruncOrBitCast
static CastInst * CreateTruncOrBitCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a Trunc or BitCast cast instruction.
Definition: Instructions.cpp:3021
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:644
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::CastInst::getSrcTy
Type * getSrcTy() const
Return the source type, as a convenience.
Definition: InstrTypes.h:684
optimizeIntegerToVectorInsertions
static Value * optimizeIntegerToVectorInsertions(BitCastInst &CI, InstCombinerImpl &IC)
If the input is an 'or' instruction, we may be doing shifts and ors to assemble the elements of the v...
Definition: InstCombineCasts.cpp:2225
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2662
llvm::APInt::isAllOnesValue
bool isAllOnesValue() const
Determine if all bits are set.
Definition: APInt.h:401
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::ArrayRef::take_back
ArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:233
llvm::PatternMatch::m_ExtractElt
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
Definition: PatternMatch.h:1497
llvm::InstCombinerImpl::visitPtrToInt
Instruction * visitPtrToInt(PtrToIntInst &CI)
Definition: InstCombineCasts.cpp:1976
llvm::PatternMatch::m_Xor
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1130
llvm::PtrToIntInst::getPointerOperand
Value * getPointerOperand()
Gets the pointer operand.
Definition: Instructions.h:5111
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:202
llvm::PatternMatch::m_Shr
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
Definition: PatternMatch.h:1308
llvm::VectorType::isValidElementType
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:635
llvm::InstCombinerImpl::visitTrunc
Instruction * visitTrunc(TruncInst &CI)
Definition: InstCombineCasts.cpp:722
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:391
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::DataLayout::isBigEndian
bool isBigEndian() const
Definition: DataLayout.h:242
llvm::ConstantExpr::getTrunc
static Constant * getTrunc(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2069
llvm::InstCombinerImpl::visitZExt
Instruction * visitZExt(ZExtInst &CI)
Definition: InstCombineCasts.cpp:1175
llvm::APFloat
Definition: APFloat.h:701
llvm::Type::getFPMantissaWidth
int getFPMantissaWidth() const
Return the width of the mantissa of this type.
Definition: Type.cpp:152
llvm::IRBuilderBase::CreateBitCast
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2066
llvm::InstCombinerImpl::PromoteCastOfAllocation
Instruction * PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI)
If we find a cast of an allocation instruction, try to eliminate the cast by moving the type informat...
Definition: InstCombineCasts.cpp:85
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::Value::getPointerDereferenceableBytes
uint64_t getPointerDereferenceableBytes(const DataLayout &DL, bool &CanBeNull, bool &CanBeFreed) const
Returns the number of bytes known to be dereferenceable for the pointer value.
Definition: Value.cpp:804
llvm::UnaryOperator::CreateFNegFMF
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:166
llvm::Constant::isElementWiseEqual
bool isElementWiseEqual(Value *Y) const
Return true if this constant and a constant 'Y' are element-wise equal.
Definition: Constants.cpp:283
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1178
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
shrinkFPConstant
static Type * shrinkFPConstant(ConstantFP *CFP)
Definition: InstCombineCasts.cpp:1550
llvm::BinaryOperator::CreateFMulFMF
static BinaryOperator * CreateFMulFMF(Value *V1, Value *V2, Instruction *FMFSource, const Twine &Name="")
Definition: InstrTypes.h:270
llvm::FPToSIInst
This class represents a cast from floating point to signed integer.
Definition: Instructions.h:5003
llvm::InstCombiner::getDataLayout
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:370
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:80
llvm::InstCombinerImpl::visitIntToPtr
Instruction * visitIntToPtr(IntToPtrInst &CI)
Definition: InstCombineCasts.cpp:1934
llvm::Instruction::user_back
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:91
llvm::TruncInst
This class represents a truncation of integer types.
Definition: Instructions.h:4691
llvm::IRBuilderBase::getInt32
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:478
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2720
llvm::PatternMatch::m_Or
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1124
llvm::PatternMatch::m_AllOnes
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
Definition: PatternMatch.h:445
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::InstCombinerImpl::visitFPToUI
Instruction * visitFPToUI(FPToUIInst &FI)
Definition: InstCombineCasts.cpp:1912
llvm::UndefValue
'undef' values are things that do not have specified contents.
Definition: Constants.h:1347
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:905
llvm::ConstantExpr::getShl
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2740
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:581
llvm::ConstantExpr::getOr
static Constant * getOr(Constant *C1, Constant *C2)
Definition: Constants.cpp:2727
DIBuilder.h
llvm::PatternMatch::m_And
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1118
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:634
llvm::KnownBits::getMaxValue
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:134
llvm::ICmpInst::isEquality
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Definition: Instructions.h:1267
llvm::computeKnownBits
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, OptimizationRemarkEmitter *ORE=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
Definition: ValueTracking.cpp:211
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::PatternMatch::m_Sub
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1015
llvm::InstCombinerImpl::EvaluateInDifferentType
Value * EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned)
Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns true for,...
Definition: InstCombineCasts.cpp:180
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition: Instructions.h:1715
llvm::is_splat
bool is_splat(R &&Range)
Wrapper function around std::equal to detect if all elements in a container are same.
Definition: STLExtras.h:1660
llvm::FPToUIInst
This class represents a cast from floating point to unsigned integer.
Definition: Instructions.h:4964
llvm::PatternMatch::m_Constant
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:142
llvm::replaceAllDbgUsesWith
bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition: Local.cpp:1965
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:649
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::ZExtInst
This class represents zero extension of integer types.
Definition: Instructions.h:4730
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:70
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::CmpInst::ICMP_SLT
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:751
llvm::GetElementPtrInst::Create
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:931
llvm::PatternMatch::m_SExt
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
Definition: PatternMatch.h:1628
llvm::PatternMatch::m_SpecificInt
specific_intval< false > m_SpecificInt(APInt V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:880
foldBitCastBitwiseLogic
static Instruction * foldBitCastBitwiseLogic(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a bitwise logic operation if we can eliminate a bitcast.
Definition: InstCombineCasts.cpp:2274
llvm::ArrayRef< int >
llvm::BinaryOperator
Definition: InstrTypes.h:190
llvm::InstCombinerImpl::ComputeNumSignBits
unsigned ComputeNumSignBits(const Value *Op, unsigned Depth=0, const Instruction *CxtI=nullptr) const
Definition: InstCombineInternal.h:482
DataLayout.h
llvm::PatternMatch::m_SpecificInt_ICMP
cst_pred_ty< icmp_pred_with_threshold > m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold)
Match an integer or vector with every element comparing 'pred' (eg/ne/...) to Threshold.
Definition: PatternMatch.h:626
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:167
llvm::APFloatBase::IEEEdouble
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:166
llvm::PatternMatch::m_Undef
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:136
llvm::ConstantExpr::getUMin
static Constant * getUMin(Constant *C1, Constant *C2)
Definition: Constants.cpp:2735
llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:747
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
uint32_t
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:952
llvm::IRBuilderBase::InsertPointGuard
Definition: IRBuilder.h:367
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::PtrToIntInst
This class represents a cast from a pointer to an integer.
Definition: Instructions.h:5085
llvm::ConstantExpr::getIntegerCast
static Constant * getIntegerCast(Constant *C, Type *Ty, bool IsSigned)
Create a ZExt, Bitcast or Trunc for integer -> integer casts.
Definition: Constants.cpp:2045
llvm::FPExtInst
This class represents an extension of floating point types.
Definition: Instructions.h:4847
trunc
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g trunc
Definition: README-FPStack.txt:63
llvm::Instruction::copyFastMathFlags
void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
Definition: Instruction.cpp:214
llvm::OverflowingBinaryOperator
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl.
Definition: Operator.h:66
llvm::Type::isPtrOrPtrVectorTy
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:232
llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:432
llvm::APInt::ult
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1205
llvm::SExtInst
This class represents a sign extension of integer types.
Definition: Instructions.h:4769
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:299
llvm::InstCombinerImpl::visitSExt
Instruction * visitSExt(SExtInst &CI)
Definition: InstCombineCasts.cpp:1451
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:140
llvm::MCID::Select
@ Select
Definition: MCInstrDesc.h:163
llvm::PatternMatch::m_FPExt
CastClass_match< OpTy, Instruction::FPExt > m_FPExt(const OpTy &Op)
Definition: PatternMatch.h:1692
llvm::InstCombinerImpl::commonPointerCastTransforms
Instruction * commonPointerCastTransforms(CastInst &CI)
Implement the transforms for cast of pointer (bitcast/ptrtoint)
Definition: InstCombineCasts.cpp:1954
j
return j(j<< 16)
llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:123
canEvaluateTruncated
static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC, Instruction *CxtI)
Return true if we can evaluate the specified expression tree as type Ty instead of its larger type,...
Definition: InstCombineCasts.cpp:363
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:347
llvm::KnownBits
Definition: KnownBits.h:23
llvm::OverflowingBinaryOperator::hasNoSignedWrap
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
Definition: Operator.h:96
llvm::Type::getHalfTy
static Type * getHalfTy(LLVMContext &C)
Definition: Type.cpp:182
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2612
llvm::X86::FirstMacroFusionInstKind::Cmp
@ Cmp
llvm::ArrayRef::take_front
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:226
canEvaluateZExtd
static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear, InstCombinerImpl &IC, Instruction *CxtI)
Determine if the specified value can be computed in the specified wider type and produce the same low...
Definition: InstCombineCasts.cpp:1069
llvm::PatternMatch::m_FPToSI
CastClass_match< OpTy, Instruction::FPToSI > m_FPToSI(const OpTy &Op)
Definition: PatternMatch.h:1682
llvm::Type::isIntOrIntVectorTy
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:208
llvm::fltSemantics
Definition: APFloat.cpp:54
llvm::GetElementPtrInst::getTypeAtIndex
static Type * getTypeAtIndex(Type *Ty, Value *Idx)
Return the type of the element at the given index of an indexable type.
Definition: Instructions.cpp:1717
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::Type::isDoubleTy
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:151
llvm::PatternMatch::m_ZeroInt
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:522
llvm::ConstantExpr::getLShr
static Constant * getLShr(Constant *C1, Constant *C2, bool isExact=false)
Definition: Constants.cpp:2747
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:184
llvm::IntToPtrInst
This class represents a cast from an integer to a pointer.
Definition: Instructions.h:5042
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:403
llvm::PatternMatch::m_LogicalShift
BinOpPred_match< LHS, RHS, is_logical_shift_op > m_LogicalShift(const LHS &L, const RHS &R)
Matches logical shift operations.
Definition: PatternMatch.h:1316
llvm::Constant::mergeUndefsWith
static Constant * mergeUndefsWith(Constant *C, Constant *Other)
Merges undefs of a Constant with another Constant, along with the undefs already present.
Definition: Constants.cpp:789
llvm::APInt::isNullValue
bool isNullValue() const
Determine if all bits are clear.
Definition: APInt.h:411
fitsInFPType
static bool fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem)
Return a Constant* for the specified floating-point constant if it fits in the specified FP type with...
Definition: InstCombineCasts.cpp:1543
llvm::Instruction::BinaryOps
BinaryOps
Definition: Instruction.h:768
llvm::ShuffleVectorInst
This instruction constructs a fixed permutation of two input vectors.
Definition: Instructions.h:1986
llvm::APFloatBase::rmNearestTiesToEven
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:190
llvm::AllocaInst::isUsedWithInAlloca
bool isUsedWithInAlloca() const
Return true if this alloca is used as an inalloca argument to a call.
Definition: Instructions.h:137
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:802
getMinimumFPType
static Type * getMinimumFPType(Value *V)
Find the minimum FP type we can safely truncate to.
Definition: InstCombineCasts.cpp:1602
llvm::FPTruncInst
This class represents a truncation of floating point types.
Definition: Instructions.h:4808
llvm::CastInst::getOpcode
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:679
llvm::CmpInst::getPredicate
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:799
llvm::PHINode::getIncomingBlock
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Definition: Instructions.h:2686
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:350
llvm::PHINode
Definition: Instructions.h:2572
Threshold
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
getTypeSizeIndex
static unsigned getTypeSizeIndex(unsigned Value, Type *Ty)
Definition: InstCombineCasts.cpp:2102
llvm::SmallVectorImpl< Value * >
llvm::ConstantFoldConstant
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
Definition: ConstantFolding.cpp:1228
llvm::PatternMatch::m_IntToPtr
CastClass_match< OpTy, Instruction::IntToPtr > m_IntToPtr(const OpTy &Op)
Matches IntToPtr.
Definition: PatternMatch.h:1610
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::PatternMatch::m_Neg
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
Definition: PatternMatch.h:2251
llvm::APInt::getLowBitsSet
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:667
llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:269
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1450
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::InstCombinerImpl::foldItoFPtoI
Instruction * foldItoFPtoI(CastInst &FI)
fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X) This is safe if the intermediate ty...
Definition: InstCombineCasts.cpp:1872
llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:377
llvm::APInt::getBitsSetFrom
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Get a value with upper bits starting at loBit set.
Definition: APInt.h:643
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:61
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
canNotEvaluateInType
static bool canNotEvaluateInType(Value *V, Type *Ty)
Filter out values that we can not evaluate in the destination type for free.
Definition: InstCombineCasts.cpp:340
llvm::PatternMatch::m_Trunc
CastClass_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
Definition: PatternMatch.h:1616
llvm::BinaryOperator::Create
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Definition: Instructions.cpp:2549
llvm::InstCombinerImpl::visitFPTrunc
Instruction * visitFPTrunc(FPTruncInst &CI)
Definition: InstCombineCasts.cpp:1670
llvm::InstCombinerImpl::visitUIToFP
Instruction * visitUIToFP(CastInst &CI)
Definition: InstCombineCasts.cpp:1926
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::PatternMatch::m_Shl
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1136
llvm::InstCombinerImpl::MaskedValueIsZero
bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth=0, const Instruction *CxtI=nullptr) const
Definition: InstCombineInternal.h:477
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:628
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:434
llvm::Type::getFloatTy
static Type * getFloatTy(LLVMContext &C)
Definition: Type.cpp:184
llvm::SrcOp
Definition: MachineIRBuilder.h:119
llvm::Type::isX86_AMXTy
bool isX86_AMXTy() const
Return true if this is X86 AMX.
Definition: Type.h:187
isKnownExactCastIntToFP
static bool isKnownExactCastIntToFP(CastInst &I)
Return true if the cast from integer to FP can be proven to be exact for all possible inputs (the con...
Definition: InstCombineCasts.cpp:1630
SetVector.h
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:122
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
foldVecTruncToExtElt
static Instruction * foldVecTruncToExtElt(TruncInst &Trunc, InstCombinerImpl &IC)
Given a vector that is bitcast to an integer, optionally logically right-shifted, and truncated,...
Definition: InstCombineCasts.cpp:480
llvm::SmallVectorImpl::insert
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:773