LLVM  15.0.0git
InstCombineCasts.cpp
Go to the documentation of this file.
1 //===- InstCombineCasts.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the visit functions for cast operations.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "InstCombineInternal.h"
14 #include "llvm/ADT/SetVector.h"
16 #include "llvm/IR/DataLayout.h"
17 #include "llvm/IR/PatternMatch.h"
18 #include "llvm/Support/KnownBits.h"
20 using namespace llvm;
21 using namespace PatternMatch;
22 
23 #define DEBUG_TYPE "instcombine"
24 
25 /// Analyze 'Val', seeing if it is a simple linear expression.
26 /// If so, decompose it, returning some value X, such that Val is
27 /// X*Scale+Offset.
28 ///
29 static Value *decomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
30  uint64_t &Offset) {
31  if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
32  Offset = CI->getZExtValue();
33  Scale = 0;
34  return ConstantInt::get(Val->getType(), 0);
35  }
36 
37  if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
38  // Cannot look past anything that might overflow.
39  // We specifically require nuw because we store the Scale in an unsigned
40  // and perform an unsigned divide on it.
41  OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val);
42  if (OBI && !OBI->hasNoUnsignedWrap()) {
43  Scale = 1;
44  Offset = 0;
45  return Val;
46  }
47 
48  if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
49  if (I->getOpcode() == Instruction::Shl) {
50  // This is a value scaled by '1 << the shift amt'.
51  Scale = UINT64_C(1) << RHS->getZExtValue();
52  Offset = 0;
53  return I->getOperand(0);
54  }
55 
56  if (I->getOpcode() == Instruction::Mul) {
57  // This value is scaled by 'RHS'.
58  Scale = RHS->getZExtValue();
59  Offset = 0;
60  return I->getOperand(0);
61  }
62 
63  if (I->getOpcode() == Instruction::Add) {
64  // We have X+C. Check to see if we really have (X*C2)+C1,
65  // where C1 is divisible by C2.
66  unsigned SubScale;
67  Value *SubVal =
68  decomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
69  Offset += RHS->getZExtValue();
70  Scale = SubScale;
71  return SubVal;
72  }
73  }
74  }
75 
76  // Otherwise, we can't look past this.
77  Scale = 1;
78  Offset = 0;
79  return Val;
80 }
81 
82 /// If we find a cast of an allocation instruction, try to eliminate the cast by
83 /// moving the type information into the alloc.
85  AllocaInst &AI) {
86  PointerType *PTy = cast<PointerType>(CI.getType());
87  // Opaque pointers don't have an element type we could replace with.
88  if (PTy->isOpaque())
89  return nullptr;
90 
92  Builder.SetInsertPoint(&AI);
93 
94  // Get the type really allocated and the type casted to.
95  Type *AllocElTy = AI.getAllocatedType();
96  Type *CastElTy = PTy->getNonOpaquePointerElementType();
97  if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr;
98 
99  // This optimisation does not work for cases where the cast type
100  // is scalable and the allocated type is not. This because we need to
101  // know how many times the casted type fits into the allocated type.
102  // For the opposite case where the allocated type is scalable and the
103  // cast type is not this leads to poor code quality due to the
104  // introduction of 'vscale' into the calculations. It seems better to
105  // bail out for this case too until we've done a proper cost-benefit
106  // analysis.
107  bool AllocIsScalable = isa<ScalableVectorType>(AllocElTy);
108  bool CastIsScalable = isa<ScalableVectorType>(CastElTy);
109  if (AllocIsScalable != CastIsScalable) return nullptr;
110 
111  Align AllocElTyAlign = DL.getABITypeAlign(AllocElTy);
112  Align CastElTyAlign = DL.getABITypeAlign(CastElTy);
113  if (CastElTyAlign < AllocElTyAlign) return nullptr;
114 
115  // If the allocation has multiple uses, only promote it if we are strictly
116  // increasing the alignment of the resultant allocation. If we keep it the
117  // same, we open the door to infinite loops of various kinds.
118  if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return nullptr;
119 
120  // The alloc and cast types should be either both fixed or both scalable.
121  uint64_t AllocElTySize = DL.getTypeAllocSize(AllocElTy).getKnownMinSize();
122  uint64_t CastElTySize = DL.getTypeAllocSize(CastElTy).getKnownMinSize();
123  if (CastElTySize == 0 || AllocElTySize == 0) return nullptr;
124 
125  // If the allocation has multiple uses, only promote it if we're not
126  // shrinking the amount of memory being allocated.
127  uint64_t AllocElTyStoreSize = DL.getTypeStoreSize(AllocElTy).getKnownMinSize();
128  uint64_t CastElTyStoreSize = DL.getTypeStoreSize(CastElTy).getKnownMinSize();
129  if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return nullptr;
130 
131  // See if we can satisfy the modulus by pulling a scale out of the array
132  // size argument.
133  unsigned ArraySizeScale;
134  uint64_t ArrayOffset;
135  Value *NumElements = // See if the array size is a decomposable linear expr.
136  decomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
137 
138  // If we can now satisfy the modulus, by using a non-1 scale, we really can
139  // do the xform.
140  if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 ||
141  (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return nullptr;
142 
143  // We don't currently support arrays of scalable types.
144  assert(!AllocIsScalable || (ArrayOffset == 1 && ArraySizeScale == 0));
145 
146  unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize;
147  Value *Amt = nullptr;
148  if (Scale == 1) {
149  Amt = NumElements;
150  } else {
151  Amt = ConstantInt::get(AI.getArraySize()->getType(), Scale);
152  // Insert before the alloca, not before the cast.
153  Amt = Builder.CreateMul(Amt, NumElements);
154  }
155 
156  if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
158  Offset, true);
159  Amt = Builder.CreateAdd(Amt, Off);
160  }
161 
162  AllocaInst *New = Builder.CreateAlloca(CastElTy, AI.getAddressSpace(), Amt);
163  New->setAlignment(AI.getAlign());
164  New->takeName(&AI);
165  New->setUsedWithInAlloca(AI.isUsedWithInAlloca());
166 
167  // If the allocation has multiple real uses, insert a cast and change all
168  // things that used it to use the new cast. This will also hack on CI, but it
169  // will die soon.
170  if (!AI.hasOneUse()) {
171  // New is the allocation instruction, pointer typed. AI is the original
172  // allocation instruction, also pointer typed. Thus, cast to use is BitCast.
173  Value *NewCast = Builder.CreateBitCast(New, AI.getType(), "tmpcast");
174  replaceInstUsesWith(AI, NewCast);
175  eraseInstFromFunction(AI);
176  }
177  return replaceInstUsesWith(CI, New);
178 }
179 
180 /// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
181 /// true for, actually insert the code to evaluate the expression.
183  bool isSigned) {
184  if (Constant *C = dyn_cast<Constant>(V)) {
185  C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
186  // If we got a constantexpr back, try to simplify it with DL info.
187  return ConstantFoldConstant(C, DL, &TLI);
188  }
189 
190  // Otherwise, it must be an instruction.
191  Instruction *I = cast<Instruction>(V);
192  Instruction *Res = nullptr;
193  unsigned Opc = I->getOpcode();
194  switch (Opc) {
195  case Instruction::Add:
196  case Instruction::Sub:
197  case Instruction::Mul:
198  case Instruction::And:
199  case Instruction::Or:
200  case Instruction::Xor:
201  case Instruction::AShr:
202  case Instruction::LShr:
203  case Instruction::Shl:
204  case Instruction::UDiv:
205  case Instruction::URem: {
206  Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
207  Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
209  break;
210  }
211  case Instruction::Trunc:
212  case Instruction::ZExt:
213  case Instruction::SExt:
214  // If the source type of the cast is the type we're trying for then we can
215  // just return the source. There's no need to insert it because it is not
216  // new.
217  if (I->getOperand(0)->getType() == Ty)
218  return I->getOperand(0);
219 
220  // Otherwise, must be the same type of cast, so just reinsert a new one.
221  // This also handles the case of zext(trunc(x)) -> zext(x).
222  Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
223  Opc == Instruction::SExt);
224  break;
225  case Instruction::Select: {
226  Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
227  Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned);
228  Res = SelectInst::Create(I->getOperand(0), True, False);
229  break;
230  }
231  case Instruction::PHI: {
232  PHINode *OPN = cast<PHINode>(I);
233  PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues());
234  for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
235  Value *V =
236  EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
237  NPN->addIncoming(V, OPN->getIncomingBlock(i));
238  }
239  Res = NPN;
240  break;
241  }
242  default:
243  // TODO: Can handle more cases here.
244  llvm_unreachable("Unreachable!");
245  }
246 
247  Res->takeName(I);
248  return InsertNewInstWith(Res, *I);
249 }
250 
252 InstCombinerImpl::isEliminableCastPair(const CastInst *CI1,
253  const CastInst *CI2) {
254  Type *SrcTy = CI1->getSrcTy();
255  Type *MidTy = CI1->getDestTy();
256  Type *DstTy = CI2->getDestTy();
257 
258  Instruction::CastOps firstOp = CI1->getOpcode();
259  Instruction::CastOps secondOp = CI2->getOpcode();
260  Type *SrcIntPtrTy =
261  SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
262  Type *MidIntPtrTy =
263  MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr;
264  Type *DstIntPtrTy =
265  DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
266  unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
267  DstTy, SrcIntPtrTy, MidIntPtrTy,
268  DstIntPtrTy);
269 
270  // We don't want to form an inttoptr or ptrtoint that converts to an integer
271  // type that differs from the pointer size.
272  if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) ||
273  (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy))
274  Res = 0;
275 
276  return Instruction::CastOps(Res);
277 }
278 
279 /// Implement the transforms common to all CastInst visitors.
281  Value *Src = CI.getOperand(0);
282  Type *Ty = CI.getType();
283 
284  // Try to eliminate a cast of a cast.
285  if (auto *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
286  if (Instruction::CastOps NewOpc = isEliminableCastPair(CSrc, &CI)) {
287  // The first cast (CSrc) is eliminable so we need to fix up or replace
288  // the second cast (CI). CSrc will then have a good chance of being dead.
289  auto *Res = CastInst::Create(NewOpc, CSrc->getOperand(0), Ty);
290  // Point debug users of the dying cast to the new one.
291  if (CSrc->hasOneUse())
292  replaceAllDbgUsesWith(*CSrc, *Res, CI, DT);
293  return Res;
294  }
295  }
296 
297  if (auto *Sel = dyn_cast<SelectInst>(Src)) {
298  // We are casting a select. Try to fold the cast into the select if the
299  // select does not have a compare instruction with matching operand types
300  // or the select is likely better done in a narrow type.
301  // Creating a select with operands that are different sizes than its
302  // condition may inhibit other folds and lead to worse codegen.
303  auto *Cmp = dyn_cast<CmpInst>(Sel->getCondition());
304  if (!Cmp || Cmp->getOperand(0)->getType() != Sel->getType() ||
305  (CI.getOpcode() == Instruction::Trunc &&
306  shouldChangeType(CI.getSrcTy(), CI.getType()))) {
307  if (Instruction *NV = FoldOpIntoSelect(CI, Sel)) {
308  replaceAllDbgUsesWith(*Sel, *NV, CI, DT);
309  return NV;
310  }
311  }
312  }
313 
314  // If we are casting a PHI, then fold the cast into the PHI.
315  if (auto *PN = dyn_cast<PHINode>(Src)) {
316  // Don't do this if it would create a PHI node with an illegal type from a
317  // legal type.
318  if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() ||
319  shouldChangeType(CI.getSrcTy(), CI.getType()))
320  if (Instruction *NV = foldOpIntoPhi(CI, PN))
321  return NV;
322  }
323 
324  // Canonicalize a unary shuffle after the cast if neither operation changes
325  // the size or element size of the input vector.
326  // TODO: We could allow size-changing ops if that doesn't harm codegen.
327  // cast (shuffle X, Mask) --> shuffle (cast X), Mask
328  Value *X;
330  if (match(Src, m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))) {
331  // TODO: Allow scalable vectors?
332  auto *SrcTy = dyn_cast<FixedVectorType>(X->getType());
333  auto *DestTy = dyn_cast<FixedVectorType>(Ty);
334  if (SrcTy && DestTy &&
335  SrcTy->getNumElements() == DestTy->getNumElements() &&
336  SrcTy->getPrimitiveSizeInBits() == DestTy->getPrimitiveSizeInBits()) {
337  Value *CastX = Builder.CreateCast(CI.getOpcode(), X, DestTy);
338  return new ShuffleVectorInst(CastX, Mask);
339  }
340  }
341 
342  return nullptr;
343 }
344 
345 /// Constants and extensions/truncates from the destination type are always
346 /// free to be evaluated in that type. This is a helper for canEvaluate*.
347 static bool canAlwaysEvaluateInType(Value *V, Type *Ty) {
348  if (isa<Constant>(V))
349  return true;
350  Value *X;
351  if ((match(V, m_ZExtOrSExt(m_Value(X))) || match(V, m_Trunc(m_Value(X)))) &&
352  X->getType() == Ty)
353  return true;
354 
355  return false;
356 }
357 
358 /// Filter out values that we can not evaluate in the destination type for free.
359 /// This is a helper for canEvaluate*.
360 static bool canNotEvaluateInType(Value *V, Type *Ty) {
361  assert(!isa<Constant>(V) && "Constant should already be handled.");
362  if (!isa<Instruction>(V))
363  return true;
364  // We don't extend or shrink something that has multiple uses -- doing so
365  // would require duplicating the instruction which isn't profitable.
366  if (!V->hasOneUse())
367  return true;
368 
369  return false;
370 }
371 
372 /// Return true if we can evaluate the specified expression tree as type Ty
373 /// instead of its larger type, and arrive with the same value.
374 /// This is used by code that tries to eliminate truncates.
375 ///
376 /// Ty will always be a type smaller than V. We should return true if trunc(V)
377 /// can be computed by computing V in the smaller type. If V is an instruction,
378 /// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
379 /// makes sense if x and y can be efficiently truncated.
380 ///
381 /// This function works on both vectors and scalars.
382 ///
384  Instruction *CxtI) {
385  if (canAlwaysEvaluateInType(V, Ty))
386  return true;
387  if (canNotEvaluateInType(V, Ty))
388  return false;
389 
390  auto *I = cast<Instruction>(V);
391  Type *OrigTy = V->getType();
392  switch (I->getOpcode()) {
393  case Instruction::Add:
394  case Instruction::Sub:
395  case Instruction::Mul:
396  case Instruction::And:
397  case Instruction::Or:
398  case Instruction::Xor:
399  // These operators can all arbitrarily be extended or truncated.
400  return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
401  canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
402 
403  case Instruction::UDiv:
404  case Instruction::URem: {
405  // UDiv and URem can be truncated if all the truncated bits are zero.
406  uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
408  assert(BitWidth < OrigBitWidth && "Unexpected bitwidths!");
409  APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
410  if (IC.MaskedValueIsZero(I->getOperand(0), Mask, 0, CxtI) &&
411  IC.MaskedValueIsZero(I->getOperand(1), Mask, 0, CxtI)) {
412  return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
413  canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
414  }
415  break;
416  }
417  case Instruction::Shl: {
418  // If we are truncating the result of this SHL, and if it's a shift of an
419  // inrange amount, we can always perform a SHL in a smaller type.
421  KnownBits AmtKnownBits =
422  llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
423  if (AmtKnownBits.getMaxValue().ult(BitWidth))
424  return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
425  canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
426  break;
427  }
428  case Instruction::LShr: {
429  // If this is a truncate of a logical shr, we can truncate it to a smaller
430  // lshr iff we know that the bits we would otherwise be shifting in are
431  // already zeros.
432  // TODO: It is enough to check that the bits we would be shifting in are
433  // zero - use AmtKnownBits.getMaxValue().
434  uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
436  KnownBits AmtKnownBits =
437  llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
438  APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
439  if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
440  IC.MaskedValueIsZero(I->getOperand(0), ShiftedBits, 0, CxtI)) {
441  return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
442  canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
443  }
444  break;
445  }
446  case Instruction::AShr: {
447  // If this is a truncate of an arithmetic shr, we can truncate it to a
448  // smaller ashr iff we know that all the bits from the sign bit of the
449  // original type and the sign bit of the truncate type are similar.
450  // TODO: It is enough to check that the bits we would be shifting in are
451  // similar to sign bit of the truncate type.
452  uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
454  KnownBits AmtKnownBits =
455  llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
456  unsigned ShiftedBits = OrigBitWidth - BitWidth;
457  if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
458  ShiftedBits < IC.ComputeNumSignBits(I->getOperand(0), 0, CxtI))
459  return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
460  canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
461  break;
462  }
463  case Instruction::Trunc:
464  // trunc(trunc(x)) -> trunc(x)
465  return true;
466  case Instruction::ZExt:
467  case Instruction::SExt:
468  // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
469  // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
470  return true;
471  case Instruction::Select: {
472  SelectInst *SI = cast<SelectInst>(I);
473  return canEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) &&
474  canEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI);
475  }
476  case Instruction::PHI: {
477  // We can change a phi if we can change all operands. Note that we never
478  // get into trouble with cyclic PHIs here because we only consider
479  // instructions with a single use.
480  PHINode *PN = cast<PHINode>(I);
481  for (Value *IncValue : PN->incoming_values())
482  if (!canEvaluateTruncated(IncValue, Ty, IC, CxtI))
483  return false;
484  return true;
485  }
486  default:
487  // TODO: Can handle more cases here.
488  break;
489  }
490 
491  return false;
492 }
493 
494 /// Given a vector that is bitcast to an integer, optionally logically
495 /// right-shifted, and truncated, convert it to an extractelement.
496 /// Example (big endian):
497 /// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
498 /// --->
499 /// extractelement <4 x i32> %X, 1
501  InstCombinerImpl &IC) {
502  Value *TruncOp = Trunc.getOperand(0);
503  Type *DestType = Trunc.getType();
504  if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType))
505  return nullptr;
506 
507  Value *VecInput = nullptr;
508  ConstantInt *ShiftVal = nullptr;
509  if (!match(TruncOp, m_CombineOr(m_BitCast(m_Value(VecInput)),
510  m_LShr(m_BitCast(m_Value(VecInput)),
511  m_ConstantInt(ShiftVal)))) ||
512  !isa<VectorType>(VecInput->getType()))
513  return nullptr;
514 
515  VectorType *VecType = cast<VectorType>(VecInput->getType());
516  unsigned VecWidth = VecType->getPrimitiveSizeInBits();
517  unsigned DestWidth = DestType->getPrimitiveSizeInBits();
518  unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0;
519 
520  if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0))
521  return nullptr;
522 
523  // If the element type of the vector doesn't match the result type,
524  // bitcast it to a vector type that we can extract from.
525  unsigned NumVecElts = VecWidth / DestWidth;
526  if (VecType->getElementType() != DestType) {
527  VecType = FixedVectorType::get(DestType, NumVecElts);
528  VecInput = IC.Builder.CreateBitCast(VecInput, VecType, "bc");
529  }
530 
531  unsigned Elt = ShiftAmount / DestWidth;
532  if (IC.getDataLayout().isBigEndian())
533  Elt = NumVecElts - 1 - Elt;
534 
535  return ExtractElementInst::Create(VecInput, IC.Builder.getInt32(Elt));
536 }
537 
538 /// Funnel/Rotate left/right may occur in a wider type than necessary because of
539 /// type promotion rules. Try to narrow the inputs and convert to funnel shift.
540 Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {
541  assert((isa<VectorType>(Trunc.getSrcTy()) ||
542  shouldChangeType(Trunc.getSrcTy(), Trunc.getType())) &&
543  "Don't narrow to an illegal scalar type");
544 
545  // Bail out on strange types. It is possible to handle some of these patterns
546  // even with non-power-of-2 sizes, but it is not a likely scenario.
547  Type *DestTy = Trunc.getType();
548  unsigned NarrowWidth = DestTy->getScalarSizeInBits();
549  unsigned WideWidth = Trunc.getSrcTy()->getScalarSizeInBits();
550  if (!isPowerOf2_32(NarrowWidth))
551  return nullptr;
552 
553  // First, find an or'd pair of opposite shifts:
554  // trunc (or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1))
555  BinaryOperator *Or0, *Or1;
556  if (!match(Trunc.getOperand(0), m_OneUse(m_Or(m_BinOp(Or0), m_BinOp(Or1)))))
557  return nullptr;
558 
559  Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
560  if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
561  !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||
562  Or0->getOpcode() == Or1->getOpcode())
563  return nullptr;
564 
565  // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
566  if (Or0->getOpcode() == BinaryOperator::LShr) {
567  std::swap(Or0, Or1);
568  std::swap(ShVal0, ShVal1);
569  std::swap(ShAmt0, ShAmt1);
570  }
571  assert(Or0->getOpcode() == BinaryOperator::Shl &&
572  Or1->getOpcode() == BinaryOperator::LShr &&
573  "Illegal or(shift,shift) pair");
574 
575  // Match the shift amount operands for a funnel/rotate pattern. This always
576  // matches a subtraction on the R operand.
577  auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
578  // The shift amounts may add up to the narrow bit width:
579  // (shl ShVal0, L) | (lshr ShVal1, Width - L)
580  // If this is a funnel shift (different operands are shifted), then the
581  // shift amount can not over-shift (create poison) in the narrow type.
582  unsigned MaxShiftAmountWidth = Log2_32(NarrowWidth);
583  APInt HiBitMask = ~APInt::getLowBitsSet(WideWidth, MaxShiftAmountWidth);
584  if (ShVal0 == ShVal1 || MaskedValueIsZero(L, HiBitMask))
586  return L;
587 
588  // The following patterns currently only work for rotation patterns.
589  // TODO: Add more general funnel-shift compatible patterns.
590  if (ShVal0 != ShVal1)
591  return nullptr;
592 
593  // The shift amount may be masked with negation:
594  // (shl ShVal0, (X & (Width - 1))) | (lshr ShVal1, ((-X) & (Width - 1)))
595  Value *X;
596  unsigned Mask = Width - 1;
597  if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
599  return X;
600 
601  // Same as above, but the shift amount may be extended after masking:
602  if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
604  return X;
605 
606  return nullptr;
607  };
608 
609  Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, NarrowWidth);
610  bool IsFshl = true; // Sub on LSHR.
611  if (!ShAmt) {
612  ShAmt = matchShiftAmount(ShAmt1, ShAmt0, NarrowWidth);
613  IsFshl = false; // Sub on SHL.
614  }
615  if (!ShAmt)
616  return nullptr;
617 
618  // The right-shifted value must have high zeros in the wide type (for example
619  // from 'zext', 'and' or 'shift'). High bits of the left-shifted value are
620  // truncated, so those do not matter.
621  APInt HiBitMask = APInt::getHighBitsSet(WideWidth, WideWidth - NarrowWidth);
622  if (!MaskedValueIsZero(ShVal1, HiBitMask, 0, &Trunc))
623  return nullptr;
624 
625  // We have an unnecessarily wide rotate!
626  // trunc (or (shl ShVal0, ShAmt), (lshr ShVal1, BitWidth - ShAmt))
627  // Narrow the inputs and convert to funnel shift intrinsic:
628  // llvm.fshl.i8(trunc(ShVal), trunc(ShVal), trunc(ShAmt))
629  Value *NarrowShAmt = Builder.CreateTrunc(ShAmt, DestTy);
630  Value *X, *Y;
631  X = Y = Builder.CreateTrunc(ShVal0, DestTy);
632  if (ShVal0 != ShVal1)
633  Y = Builder.CreateTrunc(ShVal1, DestTy);
634  Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
635  Function *F = Intrinsic::getDeclaration(Trunc.getModule(), IID, DestTy);
636  return CallInst::Create(F, {X, Y, NarrowShAmt});
637 }
638 
639 /// Try to narrow the width of math or bitwise logic instructions by pulling a
640 /// truncate ahead of binary operators.
641 Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) {
642  Type *SrcTy = Trunc.getSrcTy();
643  Type *DestTy = Trunc.getType();
644  unsigned SrcWidth = SrcTy->getScalarSizeInBits();
645  unsigned DestWidth = DestTy->getScalarSizeInBits();
646 
647  if (!isa<VectorType>(SrcTy) && !shouldChangeType(SrcTy, DestTy))
648  return nullptr;
649 
650  BinaryOperator *BinOp;
651  if (!match(Trunc.getOperand(0), m_OneUse(m_BinOp(BinOp))))
652  return nullptr;
653 
654  Value *BinOp0 = BinOp->getOperand(0);
655  Value *BinOp1 = BinOp->getOperand(1);
656  switch (BinOp->getOpcode()) {
657  case Instruction::And:
658  case Instruction::Or:
659  case Instruction::Xor:
660  case Instruction::Add:
661  case Instruction::Sub:
662  case Instruction::Mul: {
663  Constant *C;
664  if (match(BinOp0, m_Constant(C))) {
665  // trunc (binop C, X) --> binop (trunc C', X)
666  Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
667  Value *TruncX = Builder.CreateTrunc(BinOp1, DestTy);
668  return BinaryOperator::Create(BinOp->getOpcode(), NarrowC, TruncX);
669  }
670  if (match(BinOp1, m_Constant(C))) {
671  // trunc (binop X, C) --> binop (trunc X, C')
672  Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
673  Value *TruncX = Builder.CreateTrunc(BinOp0, DestTy);
674  return BinaryOperator::Create(BinOp->getOpcode(), TruncX, NarrowC);
675  }
676  Value *X;
677  if (match(BinOp0, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
678  // trunc (binop (ext X), Y) --> binop X, (trunc Y)
679  Value *NarrowOp1 = Builder.CreateTrunc(BinOp1, DestTy);
680  return BinaryOperator::Create(BinOp->getOpcode(), X, NarrowOp1);
681  }
682  if (match(BinOp1, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
683  // trunc (binop Y, (ext X)) --> binop (trunc Y), X
684  Value *NarrowOp0 = Builder.CreateTrunc(BinOp0, DestTy);
685  return BinaryOperator::Create(BinOp->getOpcode(), NarrowOp0, X);
686  }
687  break;
688  }
689  case Instruction::LShr:
690  case Instruction::AShr: {
691  // trunc (*shr (trunc A), C) --> trunc(*shr A, C)
692  Value *A;
693  Constant *C;
694  if (match(BinOp0, m_Trunc(m_Value(A))) && match(BinOp1, m_Constant(C))) {
695  unsigned MaxShiftAmt = SrcWidth - DestWidth;
696  // If the shift is small enough, all zero/sign bits created by the shift
697  // are removed by the trunc.
699  APInt(SrcWidth, MaxShiftAmt)))) {
700  auto *OldShift = cast<Instruction>(Trunc.getOperand(0));
701  bool IsExact = OldShift->isExact();
702  auto *ShAmt = ConstantExpr::getIntegerCast(C, A->getType(), true);
703  ShAmt = Constant::mergeUndefsWith(ShAmt, C);
704  Value *Shift =
705  OldShift->getOpcode() == Instruction::AShr
706  ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact)
707  : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact);
708  return CastInst::CreateTruncOrBitCast(Shift, DestTy);
709  }
710  }
711  break;
712  }
713  default: break;
714  }
715 
716  if (Instruction *NarrowOr = narrowFunnelShift(Trunc))
717  return NarrowOr;
718 
719  return nullptr;
720 }
721 
722 /// Try to narrow the width of a splat shuffle. This could be generalized to any
723 /// shuffle with a constant operand, but we limit the transform to avoid
724 /// creating a shuffle type that targets may not be able to lower effectively.
727  auto *Shuf = dyn_cast<ShuffleVectorInst>(Trunc.getOperand(0));
728  if (Shuf && Shuf->hasOneUse() && match(Shuf->getOperand(1), m_Undef()) &&
729  is_splat(Shuf->getShuffleMask()) &&
730  Shuf->getType() == Shuf->getOperand(0)->getType()) {
731  // trunc (shuf X, Undef, SplatMask) --> shuf (trunc X), Poison, SplatMask
732  // trunc (shuf X, Poison, SplatMask) --> shuf (trunc X), Poison, SplatMask
733  Value *NarrowOp = Builder.CreateTrunc(Shuf->getOperand(0), Trunc.getType());
734  return new ShuffleVectorInst(NarrowOp, Shuf->getShuffleMask());
735  }
736 
737  return nullptr;
738 }
739 
740 /// Try to narrow the width of an insert element. This could be generalized for
741 /// any vector constant, but we limit the transform to insertion into undef to
742 /// avoid potential backend problems from unsupported insertion widths. This
743 /// could also be extended to handle the case of inserting a scalar constant
744 /// into a vector variable.
747  Instruction::CastOps Opcode = Trunc.getOpcode();
748  assert((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
749  "Unexpected instruction for shrinking");
750 
751  auto *InsElt = dyn_cast<InsertElementInst>(Trunc.getOperand(0));
752  if (!InsElt || !InsElt->hasOneUse())
753  return nullptr;
754 
755  Type *DestTy = Trunc.getType();
756  Type *DestScalarTy = DestTy->getScalarType();
757  Value *VecOp = InsElt->getOperand(0);
758  Value *ScalarOp = InsElt->getOperand(1);
759  Value *Index = InsElt->getOperand(2);
760 
761  if (match(VecOp, m_Undef())) {
762  // trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
763  // fptrunc (inselt undef, X, Index) --> inselt undef, (fptrunc X), Index
764  UndefValue *NarrowUndef = UndefValue::get(DestTy);
765  Value *NarrowOp = Builder.CreateCast(Opcode, ScalarOp, DestScalarTy);
766  return InsertElementInst::Create(NarrowUndef, NarrowOp, Index);
767  }
768 
769  return nullptr;
770 }
771 
773  if (Instruction *Result = commonCastTransforms(Trunc))
774  return Result;
775 
776  Value *Src = Trunc.getOperand(0);
777  Type *DestTy = Trunc.getType(), *SrcTy = Src->getType();
778  unsigned DestWidth = DestTy->getScalarSizeInBits();
779  unsigned SrcWidth = SrcTy->getScalarSizeInBits();
780 
781  // Attempt to truncate the entire input expression tree to the destination
782  // type. Only do this if the dest type is a simple type, don't convert the
783  // expression tree to something weird like i93 unless the source is also
784  // strange.
785  if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) &&
786  canEvaluateTruncated(Src, DestTy, *this, &Trunc)) {
787 
788  // If this cast is a truncate, evaluting in a different type always
789  // eliminates the cast, so it is always a win.
790  LLVM_DEBUG(
791  dbgs() << "ICE: EvaluateInDifferentType converting expression type"
792  " to avoid cast: "
793  << Trunc << '\n');
794  Value *Res = EvaluateInDifferentType(Src, DestTy, false);
795  assert(Res->getType() == DestTy);
796  return replaceInstUsesWith(Trunc, Res);
797  }
798 
799  // For integer types, check if we can shorten the entire input expression to
800  // DestWidth * 2, which won't allow removing the truncate, but reducing the
801  // width may enable further optimizations, e.g. allowing for larger
802  // vectorization factors.
803  if (auto *DestITy = dyn_cast<IntegerType>(DestTy)) {
804  if (DestWidth * 2 < SrcWidth) {
805  auto *NewDestTy = DestITy->getExtendedType();
806  if (shouldChangeType(SrcTy, NewDestTy) &&
807  canEvaluateTruncated(Src, NewDestTy, *this, &Trunc)) {
808  LLVM_DEBUG(
809  dbgs() << "ICE: EvaluateInDifferentType converting expression type"
810  " to reduce the width of operand of"
811  << Trunc << '\n');
812  Value *Res = EvaluateInDifferentType(Src, NewDestTy, false);
813  return new TruncInst(Res, DestTy);
814  }
815  }
816  }
817 
818  // Test if the trunc is the user of a select which is part of a
819  // minimum or maximum operation. If so, don't do any more simplification.
820  // Even simplifying demanded bits can break the canonical form of a
821  // min/max.
822  Value *LHS, *RHS;
823  if (SelectInst *Sel = dyn_cast<SelectInst>(Src))
824  if (matchSelectPattern(Sel, LHS, RHS).Flavor != SPF_UNKNOWN)
825  return nullptr;
826 
827  // See if we can simplify any instructions used by the input whose sole
828  // purpose is to compute bits we don't care about.
829  if (SimplifyDemandedInstructionBits(Trunc))
830  return &Trunc;
831 
832  if (DestWidth == 1) {
833  Value *Zero = Constant::getNullValue(SrcTy);
834  if (DestTy->isIntegerTy()) {
835  // Canonicalize trunc x to i1 -> icmp ne (and x, 1), 0 (scalar only).
836  // TODO: We canonicalize to more instructions here because we are probably
837  // lacking equivalent analysis for trunc relative to icmp. There may also
838  // be codegen concerns. If those trunc limitations were removed, we could
839  // remove this transform.
840  Value *And = Builder.CreateAnd(Src, ConstantInt::get(SrcTy, 1));
841  return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
842  }
843 
844  // For vectors, we do not canonicalize all truncs to icmp, so optimize
845  // patterns that would be covered within visitICmpInst.
846  Value *X;
847  Constant *C;
848  if (match(Src, m_OneUse(m_LShr(m_Value(X), m_Constant(C))))) {
849  // trunc (lshr X, C) to i1 --> icmp ne (and X, C'), 0
850  Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
851  Constant *MaskC = ConstantExpr::getShl(One, C);
852  Value *And = Builder.CreateAnd(X, MaskC);
853  return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
854  }
856  m_Deferred(X))))) {
857  // trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0
858  Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
859  Constant *MaskC = ConstantExpr::getShl(One, C);
860  MaskC = ConstantExpr::getOr(MaskC, One);
861  Value *And = Builder.CreateAnd(X, MaskC);
862  return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
863  }
864  }
865 
866  Value *A, *B;
867  Constant *C;
868  if (match(Src, m_LShr(m_SExt(m_Value(A)), m_Constant(C)))) {
869  unsigned AWidth = A->getType()->getScalarSizeInBits();
870  unsigned MaxShiftAmt = SrcWidth - std::max(DestWidth, AWidth);
871  auto *OldSh = cast<Instruction>(Src);
872  bool IsExact = OldSh->isExact();
873 
874  // If the shift is small enough, all zero bits created by the shift are
875  // removed by the trunc.
877  APInt(SrcWidth, MaxShiftAmt)))) {
878  // trunc (lshr (sext A), C) --> ashr A, C
879  if (A->getType() == DestTy) {
880  Constant *MaxAmt = ConstantInt::get(SrcTy, DestWidth - 1, false);
881  Constant *ShAmt = ConstantExpr::getUMin(C, MaxAmt);
882  ShAmt = ConstantExpr::getTrunc(ShAmt, A->getType());
883  ShAmt = Constant::mergeUndefsWith(ShAmt, C);
884  return IsExact ? BinaryOperator::CreateExactAShr(A, ShAmt)
885  : BinaryOperator::CreateAShr(A, ShAmt);
886  }
887  // The types are mismatched, so create a cast after shifting:
888  // trunc (lshr (sext A), C) --> sext/trunc (ashr A, C)
889  if (Src->hasOneUse()) {
890  Constant *MaxAmt = ConstantInt::get(SrcTy, AWidth - 1, false);
891  Constant *ShAmt = ConstantExpr::getUMin(C, MaxAmt);
892  ShAmt = ConstantExpr::getTrunc(ShAmt, A->getType());
893  Value *Shift = Builder.CreateAShr(A, ShAmt, "", IsExact);
894  return CastInst::CreateIntegerCast(Shift, DestTy, true);
895  }
896  }
897  // TODO: Mask high bits with 'and'.
898  }
899 
900  if (Instruction *I = narrowBinOp(Trunc))
901  return I;
902 
903  if (Instruction *I = shrinkSplatShuffle(Trunc, Builder))
904  return I;
905 
906  if (Instruction *I = shrinkInsertElt(Trunc, Builder))
907  return I;
908 
909  if (Src->hasOneUse() &&
910  (isa<VectorType>(SrcTy) || shouldChangeType(SrcTy, DestTy))) {
911  // Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the
912  // dest type is native and cst < dest size.
913  if (match(Src, m_Shl(m_Value(A), m_Constant(C))) &&
914  !match(A, m_Shr(m_Value(), m_Constant()))) {
915  // Skip shifts of shift by constants. It undoes a combine in
916  // FoldShiftByConstant and is the extend in reg pattern.
917  APInt Threshold = APInt(C->getType()->getScalarSizeInBits(), DestWidth);
918  if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold))) {
919  Value *NewTrunc = Builder.CreateTrunc(A, DestTy, A->getName() + ".tr");
920  return BinaryOperator::Create(Instruction::Shl, NewTrunc,
921  ConstantExpr::getTrunc(C, DestTy));
922  }
923  }
924  }
925 
926  if (Instruction *I = foldVecTruncToExtElt(Trunc, *this))
927  return I;
928 
929  // Whenever an element is extracted from a vector, and then truncated,
930  // canonicalize by converting it to a bitcast followed by an
931  // extractelement.
932  //
933  // Example (little endian):
934  // trunc (extractelement <4 x i64> %X, 0) to i32
935  // --->
936  // extractelement <8 x i32> (bitcast <4 x i64> %X to <8 x i32>), i32 0
937  Value *VecOp;
938  ConstantInt *Cst;
939  if (match(Src, m_OneUse(m_ExtractElt(m_Value(VecOp), m_ConstantInt(Cst))))) {
940  auto *VecOpTy = cast<VectorType>(VecOp->getType());
941  auto VecElts = VecOpTy->getElementCount();
942 
943  // A badly fit destination size would result in an invalid cast.
944  if (SrcWidth % DestWidth == 0) {
945  uint64_t TruncRatio = SrcWidth / DestWidth;
946  uint64_t BitCastNumElts = VecElts.getKnownMinValue() * TruncRatio;
947  uint64_t VecOpIdx = Cst->getZExtValue();
948  uint64_t NewIdx = DL.isBigEndian() ? (VecOpIdx + 1) * TruncRatio - 1
949  : VecOpIdx * TruncRatio;
950  assert(BitCastNumElts <= std::numeric_limits<uint32_t>::max() &&
951  "overflow 32-bits");
952 
953  auto *BitCastTo =
954  VectorType::get(DestTy, BitCastNumElts, VecElts.isScalable());
955  Value *BitCast = Builder.CreateBitCast(VecOp, BitCastTo);
956  return ExtractElementInst::Create(BitCast, Builder.getInt32(NewIdx));
957  }
958  }
959 
960  // trunc (ctlz_i32(zext(A), B) --> add(ctlz_i16(A, B), C)
961  if (match(Src, m_OneUse(m_Intrinsic<Intrinsic::ctlz>(m_ZExt(m_Value(A)),
962  m_Value(B))))) {
963  unsigned AWidth = A->getType()->getScalarSizeInBits();
964  if (AWidth == DestWidth && AWidth > Log2_32(SrcWidth)) {
965  Value *WidthDiff = ConstantInt::get(A->getType(), SrcWidth - AWidth);
966  Value *NarrowCtlz =
967  Builder.CreateIntrinsic(Intrinsic::ctlz, {Trunc.getType()}, {A, B});
968  return BinaryOperator::CreateAdd(NarrowCtlz, WidthDiff);
969  }
970  }
971 
972  if (match(Src, m_VScale(DL))) {
973  if (Trunc.getFunction() &&
974  Trunc.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
975  Attribute Attr =
976  Trunc.getFunction()->getFnAttribute(Attribute::VScaleRange);
977  if (Optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
978  if (Log2_32(*MaxVScale) < DestWidth) {
979  Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
980  return replaceInstUsesWith(Trunc, VScale);
981  }
982  }
983  }
984  }
985 
986  return nullptr;
987 }
988 
989 Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext) {
990  // If we are just checking for a icmp eq of a single bit and zext'ing it
991  // to an integer, then shift the bit to the appropriate place and then
992  // cast to integer to avoid the comparison.
993 
994  // FIXME: This set of transforms does not check for extra uses and/or creates
995  // an extra instruction (an optional final cast is not included
996  // in the transform comments). We may also want to favor icmp over
997  // shifts in cases of equal instructions because icmp has better
998  // analysis in general (invert the transform).
999 
1000  const APInt *Op1CV;
1001  if (match(Cmp->getOperand(1), m_APInt(Op1CV))) {
1002 
1003  // zext (x <s 0) to i32 --> x>>u31 true if signbit set.
1004  if (Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isZero()) {
1005  Value *In = Cmp->getOperand(0);
1006  Value *Sh = ConstantInt::get(In->getType(),
1007  In->getType()->getScalarSizeInBits() - 1);
1008  In = Builder.CreateLShr(In, Sh, In->getName() + ".lobit");
1009  if (In->getType() != Zext.getType())
1010  In = Builder.CreateIntCast(In, Zext.getType(), false /*ZExt*/);
1011 
1012  return replaceInstUsesWith(Zext, In);
1013  }
1014 
1015  // zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
1016  // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
1017  // zext (X == 1) to i32 --> X iff X has only the low bit set.
1018  // zext (X == 2) to i32 --> X>>1 iff X has only the 2nd bit set.
1019  // zext (X != 0) to i32 --> X iff X has only the low bit set.
1020  // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
1021  // zext (X != 1) to i32 --> X^1 iff X has only the low bit set.
1022  // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
1023  if ((Op1CV->isZero() || Op1CV->isPowerOf2()) &&
1024  // This only works for EQ and NE
1025  Cmp->isEquality()) {
1026  // If Op1C some other power of two, convert:
1027  KnownBits Known = computeKnownBits(Cmp->getOperand(0), 0, &Zext);
1028 
1029  APInt KnownZeroMask(~Known.Zero);
1030  if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
1031  bool isNE = Cmp->getPredicate() == ICmpInst::ICMP_NE;
1032  if (!Op1CV->isZero() && (*Op1CV != KnownZeroMask)) {
1033  // (X&4) == 2 --> false
1034  // (X&4) != 2 --> true
1035  Constant *Res = ConstantInt::get(Zext.getType(), isNE);
1036  return replaceInstUsesWith(Zext, Res);
1037  }
1038 
1039  uint32_t ShAmt = KnownZeroMask.logBase2();
1040  Value *In = Cmp->getOperand(0);
1041  if (ShAmt) {
1042  // Perform a logical shr by shiftamt.
1043  // Insert the shift to put the result in the low bit.
1044  In = Builder.CreateLShr(In, ConstantInt::get(In->getType(), ShAmt),
1045  In->getName() + ".lobit");
1046  }
1047 
1048  if (!Op1CV->isZero() == isNE) { // Toggle the low bit.
1049  Constant *One = ConstantInt::get(In->getType(), 1);
1050  In = Builder.CreateXor(In, One);
1051  }
1052 
1053  if (Zext.getType() == In->getType())
1054  return replaceInstUsesWith(Zext, In);
1055 
1056  Value *IntCast = Builder.CreateIntCast(In, Zext.getType(), false);
1057  return replaceInstUsesWith(Zext, IntCast);
1058  }
1059  }
1060  }
1061 
1062  if (Cmp->isEquality() && Zext.getType() == Cmp->getOperand(0)->getType()) {
1063  // Test if a bit is clear/set using a shifted-one mask:
1064  // zext (icmp eq (and X, (1 << ShAmt)), 0) --> and (lshr (not X), ShAmt), 1
1065  // zext (icmp ne (and X, (1 << ShAmt)), 0) --> and (lshr X, ShAmt), 1
1066  Value *X, *ShAmt;
1067  if (Cmp->hasOneUse() && match(Cmp->getOperand(1), m_ZeroInt()) &&
1068  match(Cmp->getOperand(0),
1069  m_OneUse(m_c_And(m_Shl(m_One(), m_Value(ShAmt)), m_Value(X))))) {
1070  if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
1071  X = Builder.CreateNot(X);
1072  Value *Lshr = Builder.CreateLShr(X, ShAmt);
1073  Value *And1 = Builder.CreateAnd(Lshr, ConstantInt::get(X->getType(), 1));
1074  return replaceInstUsesWith(Zext, And1);
1075  }
1076 
1077  // icmp ne A, B is equal to xor A, B when A and B only really have one bit.
1078  // It is also profitable to transform icmp eq into not(xor(A, B)) because
1079  // that may lead to additional simplifications.
1080  if (IntegerType *ITy = dyn_cast<IntegerType>(Zext.getType())) {
1081  Value *LHS = Cmp->getOperand(0);
1082  Value *RHS = Cmp->getOperand(1);
1083 
1084  KnownBits KnownLHS = computeKnownBits(LHS, 0, &Zext);
1085  KnownBits KnownRHS = computeKnownBits(RHS, 0, &Zext);
1086 
1087  if (KnownLHS == KnownRHS) {
1088  APInt KnownBits = KnownLHS.Zero | KnownLHS.One;
1089  APInt UnknownBit = ~KnownBits;
1090  if (UnknownBit.countPopulation() == 1) {
1091  Value *Result = Builder.CreateXor(LHS, RHS);
1092 
1093  // Mask off any bits that are set and won't be shifted away.
1094  if (KnownLHS.One.uge(UnknownBit))
1095  Result = Builder.CreateAnd(Result,
1096  ConstantInt::get(ITy, UnknownBit));
1097 
1098  // Shift the bit we're testing down to the lsb.
1099  Result = Builder.CreateLShr(
1100  Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros()));
1101 
1102  if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
1103  Result = Builder.CreateXor(Result, ConstantInt::get(ITy, 1));
1104  Result->takeName(Cmp);
1105  return replaceInstUsesWith(Zext, Result);
1106  }
1107  }
1108  }
1109  }
1110 
1111  return nullptr;
1112 }
1113 
1114 /// Determine if the specified value can be computed in the specified wider type
1115 /// and produce the same low bits. If not, return false.
1116 ///
1117 /// If this function returns true, it can also return a non-zero number of bits
1118 /// (in BitsToClear) which indicates that the value it computes is correct for
1119 /// the zero extend, but that the additional BitsToClear bits need to be zero'd
1120 /// out. For example, to promote something like:
1121 ///
1122 /// %B = trunc i64 %A to i32
1123 /// %C = lshr i32 %B, 8
1124 /// %E = zext i32 %C to i64
1125 ///
1126 /// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
1127 /// set to 8 to indicate that the promoted value needs to have bits 24-31
1128 /// cleared in addition to bits 32-63. Since an 'and' will be generated to
1129 /// clear the top bits anyway, doing this has no extra cost.
1130 ///
1131 /// This function works on both vectors and scalars.
1132 static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
1133  InstCombinerImpl &IC, Instruction *CxtI) {
1134  BitsToClear = 0;
1135  if (canAlwaysEvaluateInType(V, Ty))
1136  return true;
1137  if (canNotEvaluateInType(V, Ty))
1138  return false;
1139 
1140  auto *I = cast<Instruction>(V);
1141  unsigned Tmp;
1142  switch (I->getOpcode()) {
1143  case Instruction::ZExt: // zext(zext(x)) -> zext(x).
1144  case Instruction::SExt: // zext(sext(x)) -> sext(x).
1145  case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x)
1146  return true;
1147  case Instruction::And:
1148  case Instruction::Or:
1149  case Instruction::Xor:
1150  case Instruction::Add:
1151  case Instruction::Sub:
1152  case Instruction::Mul:
1153  if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||
1154  !canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI))
1155  return false;
1156  // These can all be promoted if neither operand has 'bits to clear'.
1157  if (BitsToClear == 0 && Tmp == 0)
1158  return true;
1159 
1160  // If the operation is an AND/OR/XOR and the bits to clear are zero in the
1161  // other side, BitsToClear is ok.
1162  if (Tmp == 0 && I->isBitwiseLogicOp()) {
1163  // We use MaskedValueIsZero here for generality, but the case we care
1164  // about the most is constant RHS.
1165  unsigned VSize = V->getType()->getScalarSizeInBits();
1166  if (IC.MaskedValueIsZero(I->getOperand(1),
1167  APInt::getHighBitsSet(VSize, BitsToClear),
1168  0, CxtI)) {
1169  // If this is an And instruction and all of the BitsToClear are
1170  // known to be zero we can reset BitsToClear.
1171  if (I->getOpcode() == Instruction::And)
1172  BitsToClear = 0;
1173  return true;
1174  }
1175  }
1176 
1177  // Otherwise, we don't know how to analyze this BitsToClear case yet.
1178  return false;
1179 
1180  case Instruction::Shl: {
1181  // We can promote shl(x, cst) if we can promote x. Since shl overwrites the
1182  // upper bits we can reduce BitsToClear by the shift amount.
1183  const APInt *Amt;
1184  if (match(I->getOperand(1), m_APInt(Amt))) {
1185  if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1186  return false;
1187  uint64_t ShiftAmt = Amt->getZExtValue();
1188  BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0;
1189  return true;
1190  }
1191  return false;
1192  }
1193  case Instruction::LShr: {
1194  // We can promote lshr(x, cst) if we can promote x. This requires the
1195  // ultimate 'and' to clear out the high zero bits we're clearing out though.
1196  const APInt *Amt;
1197  if (match(I->getOperand(1), m_APInt(Amt))) {
1198  if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1199  return false;
1200  BitsToClear += Amt->getZExtValue();
1201  if (BitsToClear > V->getType()->getScalarSizeInBits())
1202  BitsToClear = V->getType()->getScalarSizeInBits();
1203  return true;
1204  }
1205  // Cannot promote variable LSHR.
1206  return false;
1207  }
1208  case Instruction::Select:
1209  if (!canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI) ||
1210  !canEvaluateZExtd(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||
1211  // TODO: If important, we could handle the case when the BitsToClear are
1212  // known zero in the disagreeing side.
1213  Tmp != BitsToClear)
1214  return false;
1215  return true;
1216 
1217  case Instruction::PHI: {
1218  // We can change a phi if we can change all operands. Note that we never
1219  // get into trouble with cyclic PHIs here because we only consider
1220  // instructions with a single use.
1221  PHINode *PN = cast<PHINode>(I);
1222  if (!canEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear, IC, CxtI))
1223  return false;
1224  for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
1225  if (!canEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||
1226  // TODO: If important, we could handle the case when the BitsToClear
1227  // are known zero in the disagreeing input.
1228  Tmp != BitsToClear)
1229  return false;
1230  return true;
1231  }
1232  default:
1233  // TODO: Can handle more cases here.
1234  return false;
1235  }
1236 }
1237 
1239  // If this zero extend is only used by a truncate, let the truncate be
1240  // eliminated before we try to optimize this zext.
1241  if (CI.hasOneUse() && isa<TruncInst>(CI.user_back()))
1242  return nullptr;
1243 
1244  // If one of the common conversion will work, do it.
1245  if (Instruction *Result = commonCastTransforms(CI))
1246  return Result;
1247 
1248  Value *Src = CI.getOperand(0);
1249  Type *SrcTy = Src->getType(), *DestTy = CI.getType();
1250 
1251  // Try to extend the entire expression tree to the wide destination type.
1252  unsigned BitsToClear;
1253  if (shouldChangeType(SrcTy, DestTy) &&
1254  canEvaluateZExtd(Src, DestTy, BitsToClear, *this, &CI)) {
1255  assert(BitsToClear <= SrcTy->getScalarSizeInBits() &&
1256  "Can't clear more bits than in SrcTy");
1257 
1258  // Okay, we can transform this! Insert the new expression now.
1259  LLVM_DEBUG(
1260  dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1261  " to avoid zero extend: "
1262  << CI << '\n');
1263  Value *Res = EvaluateInDifferentType(Src, DestTy, false);
1264  assert(Res->getType() == DestTy);
1265 
1266  // Preserve debug values referring to Src if the zext is its last use.
1267  if (auto *SrcOp = dyn_cast<Instruction>(Src))
1268  if (SrcOp->hasOneUse())
1269  replaceAllDbgUsesWith(*SrcOp, *Res, CI, DT);
1270 
1271  uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits()-BitsToClear;
1272  uint32_t DestBitSize = DestTy->getScalarSizeInBits();
1273 
1274  // If the high bits are already filled with zeros, just replace this
1275  // cast with the result.
1276  if (MaskedValueIsZero(Res,
1277  APInt::getHighBitsSet(DestBitSize,
1278  DestBitSize-SrcBitsKept),
1279  0, &CI))
1280  return replaceInstUsesWith(CI, Res);
1281 
1282  // We need to emit an AND to clear the high bits.
1283  Constant *C = ConstantInt::get(Res->getType(),
1284  APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
1285  return BinaryOperator::CreateAnd(Res, C);
1286  }
1287 
1288  // If this is a TRUNC followed by a ZEXT then we are dealing with integral
1289  // types and if the sizes are just right we can convert this into a logical
1290  // 'and' which will be much cheaper than the pair of casts.
1291  if (TruncInst *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast
1292  // TODO: Subsume this into EvaluateInDifferentType.
1293 
1294  // Get the sizes of the types involved. We know that the intermediate type
1295  // will be smaller than A or C, but don't know the relation between A and C.
1296  Value *A = CSrc->getOperand(0);
1297  unsigned SrcSize = A->getType()->getScalarSizeInBits();
1298  unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
1299  unsigned DstSize = CI.getType()->getScalarSizeInBits();
1300  // If we're actually extending zero bits, then if
1301  // SrcSize < DstSize: zext(a & mask)
1302  // SrcSize == DstSize: a & mask
1303  // SrcSize > DstSize: trunc(a) & mask
1304  if (SrcSize < DstSize) {
1305  APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1306  Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
1307  Value *And = Builder.CreateAnd(A, AndConst, CSrc->getName() + ".mask");
1308  return new ZExtInst(And, CI.getType());
1309  }
1310 
1311  if (SrcSize == DstSize) {
1312  APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1313  return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
1314  AndValue));
1315  }
1316  if (SrcSize > DstSize) {
1317  Value *Trunc = Builder.CreateTrunc(A, CI.getType());
1318  APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
1319  return BinaryOperator::CreateAnd(Trunc,
1320  ConstantInt::get(Trunc->getType(),
1321  AndValue));
1322  }
1323  }
1324 
1325  if (ICmpInst *Cmp = dyn_cast<ICmpInst>(Src))
1326  return transformZExtICmp(Cmp, CI);
1327 
1328  // zext(trunc(X) & C) -> (X & zext(C)).
1329  Constant *C;
1330  Value *X;
1331  if (match(Src, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&
1332  X->getType() == CI.getType())
1333  return BinaryOperator::CreateAnd(X, ConstantExpr::getZExt(C, CI.getType()));
1334 
1335  // zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
1336  Value *And;
1337  if (match(Src, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
1339  X->getType() == CI.getType()) {
1340  Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
1341  return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC);
1342  }
1343 
1344  if (match(Src, m_VScale(DL))) {
1345  if (CI.getFunction() &&
1346  CI.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1347  Attribute Attr = CI.getFunction()->getFnAttribute(Attribute::VScaleRange);
1348  if (Optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
1349  unsigned TypeWidth = Src->getType()->getScalarSizeInBits();
1350  if (Log2_32(*MaxVScale) < TypeWidth) {
1351  Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
1352  return replaceInstUsesWith(CI, VScale);
1353  }
1354  }
1355  }
1356  }
1357 
1358  return nullptr;
1359 }
1360 
1361 /// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.
1362 Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *ICI,
1363  Instruction &CI) {
1364  Value *Op0 = ICI->getOperand(0), *Op1 = ICI->getOperand(1);
1365  ICmpInst::Predicate Pred = ICI->getPredicate();
1366 
1367  // Don't bother if Op1 isn't of vector or integer type.
1368  if (!Op1->getType()->isIntOrIntVectorTy())
1369  return nullptr;
1370 
1371  if ((Pred == ICmpInst::ICMP_SLT && match(Op1, m_ZeroInt())) ||
1372  (Pred == ICmpInst::ICMP_SGT && match(Op1, m_AllOnes()))) {
1373  // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if negative
1374  // (x >s -1) ? -1 : 0 -> not (ashr x, 31) -> all ones if positive
1375  Value *Sh = ConstantInt::get(Op0->getType(),
1376  Op0->getType()->getScalarSizeInBits() - 1);
1377  Value *In = Builder.CreateAShr(Op0, Sh, Op0->getName() + ".lobit");
1378  if (In->getType() != CI.getType())
1379  In = Builder.CreateIntCast(In, CI.getType(), true /*SExt*/);
1380 
1381  if (Pred == ICmpInst::ICMP_SGT)
1382  In = Builder.CreateNot(In, In->getName() + ".not");
1383  return replaceInstUsesWith(CI, In);
1384  }
1385 
1386  if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1387  // If we know that only one bit of the LHS of the icmp can be set and we
1388  // have an equality comparison with zero or a power of 2, we can transform
1389  // the icmp and sext into bitwise/integer operations.
1390  if (ICI->hasOneUse() &&
1391  ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
1392  KnownBits Known = computeKnownBits(Op0, 0, &CI);
1393 
1394  APInt KnownZeroMask(~Known.Zero);
1395  if (KnownZeroMask.isPowerOf2()) {
1396  Value *In = ICI->getOperand(0);
1397 
1398  // If the icmp tests for a known zero bit we can constant fold it.
1399  if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {
1400  Value *V = Pred == ICmpInst::ICMP_NE ?
1402  ConstantInt::getNullValue(CI.getType());
1403  return replaceInstUsesWith(CI, V);
1404  }
1405 
1406  if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
1407  // sext ((x & 2^n) == 0) -> (x >> n) - 1
1408  // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
1409  unsigned ShiftAmt = KnownZeroMask.countTrailingZeros();
1410  // Perform a right shift to place the desired bit in the LSB.
1411  if (ShiftAmt)
1412  In = Builder.CreateLShr(In,
1413  ConstantInt::get(In->getType(), ShiftAmt));
1414 
1415  // At this point "In" is either 1 or 0. Subtract 1 to turn
1416  // {1, 0} -> {0, -1}.
1417  In = Builder.CreateAdd(In,
1418  ConstantInt::getAllOnesValue(In->getType()),
1419  "sext");
1420  } else {
1421  // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
1422  // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
1423  unsigned ShiftAmt = KnownZeroMask.countLeadingZeros();
1424  // Perform a left shift to place the desired bit in the MSB.
1425  if (ShiftAmt)
1426  In = Builder.CreateShl(In,
1427  ConstantInt::get(In->getType(), ShiftAmt));
1428 
1429  // Distribute the bit over the whole bit width.
1430  In = Builder.CreateAShr(In, ConstantInt::get(In->getType(),
1431  KnownZeroMask.getBitWidth() - 1), "sext");
1432  }
1433 
1434  if (CI.getType() == In->getType())
1435  return replaceInstUsesWith(CI, In);
1436  return CastInst::CreateIntegerCast(In, CI.getType(), true/*SExt*/);
1437  }
1438  }
1439  }
1440 
1441  return nullptr;
1442 }
1443 
1444 /// Return true if we can take the specified value and return it as type Ty
1445 /// without inserting any new casts and without changing the value of the common
1446 /// low bits. This is used by code that tries to promote integer operations to
1447 /// a wider types will allow us to eliminate the extension.
1448 ///
1449 /// This function works on both vectors and scalars.
1450 ///
1451 static bool canEvaluateSExtd(Value *V, Type *Ty) {
1453  "Can't sign extend type to a smaller type");
1454  if (canAlwaysEvaluateInType(V, Ty))
1455  return true;
1456  if (canNotEvaluateInType(V, Ty))
1457  return false;
1458 
1459  auto *I = cast<Instruction>(V);
1460  switch (I->getOpcode()) {
1461  case Instruction::SExt: // sext(sext(x)) -> sext(x)
1462  case Instruction::ZExt: // sext(zext(x)) -> zext(x)
1463  case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x)
1464  return true;
1465  case Instruction::And:
1466  case Instruction::Or:
1467  case Instruction::Xor:
1468  case Instruction::Add:
1469  case Instruction::Sub:
1470  case Instruction::Mul:
1471  // These operators can all arbitrarily be extended if their inputs can.
1472  return canEvaluateSExtd(I->getOperand(0), Ty) &&
1473  canEvaluateSExtd(I->getOperand(1), Ty);
1474 
1475  //case Instruction::Shl: TODO
1476  //case Instruction::LShr: TODO
1477 
1478  case Instruction::Select:
1479  return canEvaluateSExtd(I->getOperand(1), Ty) &&
1480  canEvaluateSExtd(I->getOperand(2), Ty);
1481 
1482  case Instruction::PHI: {
1483  // We can change a phi if we can change all operands. Note that we never
1484  // get into trouble with cyclic PHIs here because we only consider
1485  // instructions with a single use.
1486  PHINode *PN = cast<PHINode>(I);
1487  for (Value *IncValue : PN->incoming_values())
1488  if (!canEvaluateSExtd(IncValue, Ty)) return false;
1489  return true;
1490  }
1491  default:
1492  // TODO: Can handle more cases here.
1493  break;
1494  }
1495 
1496  return false;
1497 }
1498 
1500  // If this sign extend is only used by a truncate, let the truncate be
1501  // eliminated before we try to optimize this sext.
1502  if (CI.hasOneUse() && isa<TruncInst>(CI.user_back()))
1503  return nullptr;
1504 
1505  if (Instruction *I = commonCastTransforms(CI))
1506  return I;
1507 
1508  Value *Src = CI.getOperand(0);
1509  Type *SrcTy = Src->getType(), *DestTy = CI.getType();
1510  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
1511  unsigned DestBitSize = DestTy->getScalarSizeInBits();
1512 
1513  // If the value being extended is zero or positive, use a zext instead.
1514  if (isKnownNonNegative(Src, DL, 0, &AC, &CI, &DT))
1515  return CastInst::Create(Instruction::ZExt, Src, DestTy);
1516 
1517  // Try to extend the entire expression tree to the wide destination type.
1518  if (shouldChangeType(SrcTy, DestTy) && canEvaluateSExtd(Src, DestTy)) {
1519  // Okay, we can transform this! Insert the new expression now.
1520  LLVM_DEBUG(
1521  dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1522  " to avoid sign extend: "
1523  << CI << '\n');
1524  Value *Res = EvaluateInDifferentType(Src, DestTy, true);
1525  assert(Res->getType() == DestTy);
1526 
1527  // If the high bits are already filled with sign bit, just replace this
1528  // cast with the result.
1529  if (ComputeNumSignBits(Res, 0, &CI) > DestBitSize - SrcBitSize)
1530  return replaceInstUsesWith(CI, Res);
1531 
1532  // We need to emit a shl + ashr to do the sign extend.
1533  Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
1534  return BinaryOperator::CreateAShr(Builder.CreateShl(Res, ShAmt, "sext"),
1535  ShAmt);
1536  }
1537 
1538  Value *X;
1539  if (match(Src, m_Trunc(m_Value(X)))) {
1540  // If the input has more sign bits than bits truncated, then convert
1541  // directly to final type.
1542  unsigned XBitSize = X->getType()->getScalarSizeInBits();
1543  if (ComputeNumSignBits(X, 0, &CI) > XBitSize - SrcBitSize)
1544  return CastInst::CreateIntegerCast(X, DestTy, /* isSigned */ true);
1545 
1546  // If input is a trunc from the destination type, then convert into shifts.
1547  if (Src->hasOneUse() && X->getType() == DestTy) {
1548  // sext (trunc X) --> ashr (shl X, C), C
1549  Constant *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
1550  return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShAmt), ShAmt);
1551  }
1552 
1553  // If we are replacing shifted-in high zero bits with sign bits, convert
1554  // the logic shift to arithmetic shift and eliminate the cast to
1555  // intermediate type:
1556  // sext (trunc (lshr Y, C)) --> sext/trunc (ashr Y, C)
1557  Value *Y;
1558  if (Src->hasOneUse() &&
1559  match(X, m_LShr(m_Value(Y),
1560  m_SpecificIntAllowUndef(XBitSize - SrcBitSize)))) {
1561  Value *Ashr = Builder.CreateAShr(Y, XBitSize - SrcBitSize);
1562  return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
1563  }
1564  }
1565 
1566  if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src))
1567  return transformSExtICmp(ICI, CI);
1568 
1569  // If the input is a shl/ashr pair of a same constant, then this is a sign
1570  // extension from a smaller value. If we could trust arbitrary bitwidth
1571  // integers, we could turn this into a truncate to the smaller bit and then
1572  // use a sext for the whole extension. Since we don't, look deeper and check
1573  // for a truncate. If the source and dest are the same type, eliminate the
1574  // trunc and extend and just do shifts. For example, turn:
1575  // %a = trunc i32 %i to i8
1576  // %b = shl i8 %a, C
1577  // %c = ashr i8 %b, C
1578  // %d = sext i8 %c to i32
1579  // into:
1580  // %a = shl i32 %i, 32-(8-C)
1581  // %d = ashr i32 %a, 32-(8-C)
1582  Value *A = nullptr;
1583  // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
1584  Constant *BA = nullptr, *CA = nullptr;
1585  if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_Constant(BA)),
1586  m_Constant(CA))) &&
1587  BA->isElementWiseEqual(CA) && A->getType() == DestTy) {
1588  Constant *WideCurrShAmt = ConstantExpr::getSExt(CA, DestTy);
1589  Constant *NumLowbitsLeft = ConstantExpr::getSub(
1590  ConstantInt::get(DestTy, SrcTy->getScalarSizeInBits()), WideCurrShAmt);
1591  Constant *NewShAmt = ConstantExpr::getSub(
1592  ConstantInt::get(DestTy, DestTy->getScalarSizeInBits()),
1593  NumLowbitsLeft);
1594  NewShAmt =
1596  A = Builder.CreateShl(A, NewShAmt, CI.getName());
1597  return BinaryOperator::CreateAShr(A, NewShAmt);
1598  }
1599 
1600  // Splatting a bit of constant-index across a value:
1601  // sext (ashr (trunc iN X to iM), M-1) to iN --> ashr (shl X, N-M), N-1
1602  // If the dest type is different, use a cast (adjust use check).
1603  if (match(Src, m_OneUse(m_AShr(m_Trunc(m_Value(X)),
1604  m_SpecificInt(SrcBitSize - 1))))) {
1605  Type *XTy = X->getType();
1606  unsigned XBitSize = XTy->getScalarSizeInBits();
1607  Constant *ShlAmtC = ConstantInt::get(XTy, XBitSize - SrcBitSize);
1608  Constant *AshrAmtC = ConstantInt::get(XTy, XBitSize - 1);
1609  if (XTy == DestTy)
1610  return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShlAmtC),
1611  AshrAmtC);
1612  if (cast<BinaryOperator>(Src)->getOperand(0)->hasOneUse()) {
1613  Value *Ashr = Builder.CreateAShr(Builder.CreateShl(X, ShlAmtC), AshrAmtC);
1614  return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
1615  }
1616  }
1617 
1618  if (match(Src, m_VScale(DL))) {
1619  if (CI.getFunction() &&
1620  CI.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1621  Attribute Attr = CI.getFunction()->getFnAttribute(Attribute::VScaleRange);
1622  if (Optional<unsigned> MaxVScale = Attr.getVScaleRangeMax()) {
1623  if (Log2_32(*MaxVScale) < (SrcBitSize - 1)) {
1624  Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
1625  return replaceInstUsesWith(CI, VScale);
1626  }
1627  }
1628  }
1629  }
1630 
1631  return nullptr;
1632 }
1633 
1634 /// Return a Constant* for the specified floating-point constant if it fits
1635 /// in the specified FP type without changing its value.
1636 static bool fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
1637  bool losesInfo;
1638  APFloat F = CFP->getValueAPF();
1639  (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
1640  return !losesInfo;
1641 }
1642 
1644  if (CFP->getType() == Type::getPPC_FP128Ty(CFP->getContext()))
1645  return nullptr; // No constant folding of this.
1646  // See if the value can be truncated to half and then reextended.
1647  if (fitsInFPType(CFP, APFloat::IEEEhalf()))
1648  return Type::getHalfTy(CFP->getContext());
1649  // See if the value can be truncated to float and then reextended.
1650  if (fitsInFPType(CFP, APFloat::IEEEsingle()))
1651  return Type::getFloatTy(CFP->getContext());
1652  if (CFP->getType()->isDoubleTy())
1653  return nullptr; // Won't shrink.
1654  if (fitsInFPType(CFP, APFloat::IEEEdouble()))
1655  return Type::getDoubleTy(CFP->getContext());
1656  // Don't try to shrink to various long double types.
1657  return nullptr;
1658 }
1659 
1660 // Determine if this is a vector of ConstantFPs and if so, return the minimal
1661 // type we can safely truncate all elements to.
1662 // TODO: Make these support undef elements.
1664  auto *CV = dyn_cast<Constant>(V);
1665  auto *CVVTy = dyn_cast<FixedVectorType>(V->getType());
1666  if (!CV || !CVVTy)
1667  return nullptr;
1668 
1669  Type *MinType = nullptr;
1670 
1671  unsigned NumElts = CVVTy->getNumElements();
1672 
1673  // For fixed-width vectors we find the minimal type by looking
1674  // through the constant values of the vector.
1675  for (unsigned i = 0; i != NumElts; ++i) {
1676  auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i));
1677  if (!CFP)
1678  return nullptr;
1679 
1680  Type *T = shrinkFPConstant(CFP);
1681  if (!T)
1682  return nullptr;
1683 
1684  // If we haven't found a type yet or this type has a larger mantissa than
1685  // our previous type, this is our new minimal type.
1686  if (!MinType || T->getFPMantissaWidth() > MinType->getFPMantissaWidth())
1687  MinType = T;
1688  }
1689 
1690  // Make a vector type from the minimal type.
1691  return FixedVectorType::get(MinType, NumElts);
1692 }
1693 
1694 /// Find the minimum FP type we can safely truncate to.
1696  if (auto *FPExt = dyn_cast<FPExtInst>(V))
1697  return FPExt->getOperand(0)->getType();
1698 
1699  // If this value is a constant, return the constant in the smallest FP type
1700  // that can accurately represent it. This allows us to turn
1701  // (float)((double)X+2.0) into x+2.0f.
1702  if (auto *CFP = dyn_cast<ConstantFP>(V))
1703  if (Type *T = shrinkFPConstant(CFP))
1704  return T;
1705 
1706  // We can only correctly find a minimum type for a scalable vector when it is
1707  // a splat. For splats of constant values the fpext is wrapped up as a
1708  // ConstantExpr.
1709  if (auto *FPCExt = dyn_cast<ConstantExpr>(V))
1710  if (FPCExt->getOpcode() == Instruction::FPExt)
1711  return FPCExt->getOperand(0)->getType();
1712 
1713  // Try to shrink a vector of FP constants. This returns nullptr on scalable
1714  // vectors
1715  if (Type *T = shrinkFPConstantVector(V))
1716  return T;
1717 
1718  return V->getType();
1719 }
1720 
1721 /// Return true if the cast from integer to FP can be proven to be exact for all
1722 /// possible inputs (the conversion does not lose any precision).
1724  CastInst::CastOps Opcode = I.getOpcode();
1725  assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) &&
1726  "Unexpected cast");
1727  Value *Src = I.getOperand(0);
1728  Type *SrcTy = Src->getType();
1729  Type *FPTy = I.getType();
1730  bool IsSigned = Opcode == Instruction::SIToFP;
1731  int SrcSize = (int)SrcTy->getScalarSizeInBits() - IsSigned;
1732 
1733  // Easy case - if the source integer type has less bits than the FP mantissa,
1734  // then the cast must be exact.
1735  int DestNumSigBits = FPTy->getFPMantissaWidth();
1736  if (SrcSize <= DestNumSigBits)
1737  return true;
1738 
1739  // Cast from FP to integer and back to FP is independent of the intermediate
1740  // integer width because of poison on overflow.
1741  Value *F;
1742  if (match(Src, m_FPToSI(m_Value(F))) || match(Src, m_FPToUI(m_Value(F)))) {
1743  // If this is uitofp (fptosi F), the source needs an extra bit to avoid
1744  // potential rounding of negative FP input values.
1745  int SrcNumSigBits = F->getType()->getFPMantissaWidth();
1746  if (!IsSigned && match(Src, m_FPToSI(m_Value())))
1747  SrcNumSigBits++;
1748 
1749  // [su]itofp (fpto[su]i F) --> exact if the source type has less or equal
1750  // significant bits than the destination (and make sure neither type is
1751  // weird -- ppc_fp128).
1752  if (SrcNumSigBits > 0 && DestNumSigBits > 0 &&
1753  SrcNumSigBits <= DestNumSigBits)
1754  return true;
1755  }
1756 
1757  // TODO:
1758  // Try harder to find if the source integer type has less significant bits.
1759  // For example, compute number of sign bits or compute low bit mask.
1760  return false;
1761 }
1762 
1764  if (Instruction *I = commonCastTransforms(FPT))
1765  return I;
1766 
1767  // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
1768  // simplify this expression to avoid one or more of the trunc/extend
1769  // operations if we can do so without changing the numerical results.
1770  //
1771  // The exact manner in which the widths of the operands interact to limit
1772  // what we can and cannot do safely varies from operation to operation, and
1773  // is explained below in the various case statements.
1774  Type *Ty = FPT.getType();
1775  auto *BO = dyn_cast<BinaryOperator>(FPT.getOperand(0));
1776  if (BO && BO->hasOneUse()) {
1777  Type *LHSMinType = getMinimumFPType(BO->getOperand(0));
1778  Type *RHSMinType = getMinimumFPType(BO->getOperand(1));
1779  unsigned OpWidth = BO->getType()->getFPMantissaWidth();
1780  unsigned LHSWidth = LHSMinType->getFPMantissaWidth();
1781  unsigned RHSWidth = RHSMinType->getFPMantissaWidth();
1782  unsigned SrcWidth = std::max(LHSWidth, RHSWidth);
1783  unsigned DstWidth = Ty->getFPMantissaWidth();
1784  switch (BO->getOpcode()) {
1785  default: break;
1786  case Instruction::FAdd:
1787  case Instruction::FSub:
1788  // For addition and subtraction, the infinitely precise result can
1789  // essentially be arbitrarily wide; proving that double rounding
1790  // will not occur because the result of OpI is exact (as we will for
1791  // FMul, for example) is hopeless. However, we *can* nonetheless
1792  // frequently know that double rounding cannot occur (or that it is
1793  // innocuous) by taking advantage of the specific structure of
1794  // infinitely-precise results that admit double rounding.
1795  //
1796  // Specifically, if OpWidth >= 2*DstWdith+1 and DstWidth is sufficient
1797  // to represent both sources, we can guarantee that the double
1798  // rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis,
1799  // "A Rigorous Framework for Fully Supporting the IEEE Standard ..."
1800  // for proof of this fact).
1801  //
1802  // Note: Figueroa does not consider the case where DstFormat !=
1803  // SrcFormat. It's possible (likely even!) that this analysis
1804  // could be tightened for those cases, but they are rare (the main
1805  // case of interest here is (float)((double)float + float)).
1806  if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) {
1807  Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1808  Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1809  Instruction *RI = BinaryOperator::Create(BO->getOpcode(), LHS, RHS);
1810  RI->copyFastMathFlags(BO);
1811  return RI;
1812  }
1813  break;
1814  case Instruction::FMul:
1815  // For multiplication, the infinitely precise result has at most
1816  // LHSWidth + RHSWidth significant bits; if OpWidth is sufficient
1817  // that such a value can be exactly represented, then no double
1818  // rounding can possibly occur; we can safely perform the operation
1819  // in the destination format if it can represent both sources.
1820  if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) {
1821  Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1822  Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1823  return BinaryOperator::CreateFMulFMF(LHS, RHS, BO);
1824  }
1825  break;
1826  case Instruction::FDiv:
1827  // For division, we use again use the bound from Figueroa's
1828  // dissertation. I am entirely certain that this bound can be
1829  // tightened in the unbalanced operand case by an analysis based on
1830  // the diophantine rational approximation bound, but the well-known
1831  // condition used here is a good conservative first pass.
1832  // TODO: Tighten bound via rigorous analysis of the unbalanced case.
1833  if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) {
1834  Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1835  Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1836  return BinaryOperator::CreateFDivFMF(LHS, RHS, BO);
1837  }
1838  break;
1839  case Instruction::FRem: {
1840  // Remainder is straightforward. Remainder is always exact, so the
1841  // type of OpI doesn't enter into things at all. We simply evaluate
1842  // in whichever source type is larger, then convert to the
1843  // destination type.
1844  if (SrcWidth == OpWidth)
1845  break;
1846  Value *LHS, *RHS;
1847  if (LHSWidth == SrcWidth) {
1848  LHS = Builder.CreateFPTrunc(BO->getOperand(0), LHSMinType);
1849  RHS = Builder.CreateFPTrunc(BO->getOperand(1), LHSMinType);
1850  } else {
1851  LHS = Builder.CreateFPTrunc(BO->getOperand(0), RHSMinType);
1852  RHS = Builder.CreateFPTrunc(BO->getOperand(1), RHSMinType);
1853  }
1854 
1855  Value *ExactResult = Builder.CreateFRemFMF(LHS, RHS, BO);
1856  return CastInst::CreateFPCast(ExactResult, Ty);
1857  }
1858  }
1859  }
1860 
1861  // (fptrunc (fneg x)) -> (fneg (fptrunc x))
1862  Value *X;
1863  Instruction *Op = dyn_cast<Instruction>(FPT.getOperand(0));
1864  if (Op && Op->hasOneUse()) {
1865  // FIXME: The FMF should propagate from the fptrunc, not the source op.
1867  if (isa<FPMathOperator>(Op))
1868  Builder.setFastMathFlags(Op->getFastMathFlags());
1869 
1870  if (match(Op, m_FNeg(m_Value(X)))) {
1871  Value *InnerTrunc = Builder.CreateFPTrunc(X, Ty);
1872 
1873  return UnaryOperator::CreateFNegFMF(InnerTrunc, Op);
1874  }
1875 
1876  // If we are truncating a select that has an extended operand, we can
1877  // narrow the other operand and do the select as a narrow op.
1878  Value *Cond, *X, *Y;
1879  if (match(Op, m_Select(m_Value(Cond), m_FPExt(m_Value(X)), m_Value(Y))) &&
1880  X->getType() == Ty) {
1881  // fptrunc (select Cond, (fpext X), Y --> select Cond, X, (fptrunc Y)
1882  Value *NarrowY = Builder.CreateFPTrunc(Y, Ty);
1883  Value *Sel = Builder.CreateSelect(Cond, X, NarrowY, "narrow.sel", Op);
1884  return replaceInstUsesWith(FPT, Sel);
1885  }
1886  if (match(Op, m_Select(m_Value(Cond), m_Value(Y), m_FPExt(m_Value(X)))) &&
1887  X->getType() == Ty) {
1888  // fptrunc (select Cond, Y, (fpext X) --> select Cond, (fptrunc Y), X
1889  Value *NarrowY = Builder.CreateFPTrunc(Y, Ty);
1890  Value *Sel = Builder.CreateSelect(Cond, NarrowY, X, "narrow.sel", Op);
1891  return replaceInstUsesWith(FPT, Sel);
1892  }
1893  }
1894 
1895  if (auto *II = dyn_cast<IntrinsicInst>(FPT.getOperand(0))) {
1896  switch (II->getIntrinsicID()) {
1897  default: break;
1898  case Intrinsic::ceil:
1899  case Intrinsic::fabs:
1900  case Intrinsic::floor:
1901  case Intrinsic::nearbyint:
1902  case Intrinsic::rint:
1903  case Intrinsic::round:
1904  case Intrinsic::roundeven:
1905  case Intrinsic::trunc: {
1906  Value *Src = II->getArgOperand(0);
1907  if (!Src->hasOneUse())
1908  break;
1909 
1910  // Except for fabs, this transformation requires the input of the unary FP
1911  // operation to be itself an fpext from the type to which we're
1912  // truncating.
1913  if (II->getIntrinsicID() != Intrinsic::fabs) {
1914  FPExtInst *FPExtSrc = dyn_cast<FPExtInst>(Src);
1915  if (!FPExtSrc || FPExtSrc->getSrcTy() != Ty)
1916  break;
1917  }
1918 
1919  // Do unary FP operation on smaller type.
1920  // (fptrunc (fabs x)) -> (fabs (fptrunc x))
1921  Value *InnerTrunc = Builder.CreateFPTrunc(Src, Ty);
1922  Function *Overload = Intrinsic::getDeclaration(FPT.getModule(),
1923  II->getIntrinsicID(), Ty);
1925  II->getOperandBundlesAsDefs(OpBundles);
1926  CallInst *NewCI =
1927  CallInst::Create(Overload, {InnerTrunc}, OpBundles, II->getName());
1928  NewCI->copyFastMathFlags(II);
1929  return NewCI;
1930  }
1931  }
1932  }
1933 
1934  if (Instruction *I = shrinkInsertElt(FPT, Builder))
1935  return I;
1936 
1937  Value *Src = FPT.getOperand(0);
1938  if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
1939  auto *FPCast = cast<CastInst>(Src);
1940  if (isKnownExactCastIntToFP(*FPCast))
1941  return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
1942  }
1943 
1944  return nullptr;
1945 }
1946 
1948  // If the source operand is a cast from integer to FP and known exact, then
1949  // cast the integer operand directly to the destination type.
1950  Type *Ty = FPExt.getType();
1951  Value *Src = FPExt.getOperand(0);
1952  if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
1953  auto *FPCast = cast<CastInst>(Src);
1954  if (isKnownExactCastIntToFP(*FPCast))
1955  return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
1956  }
1957 
1958  return commonCastTransforms(FPExt);
1959 }
1960 
1961 /// fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
1962 /// This is safe if the intermediate type has enough bits in its mantissa to
1963 /// accurately represent all values of X. For example, this won't work with
1964 /// i64 -> float -> i64.
1966  if (!isa<UIToFPInst>(FI.getOperand(0)) && !isa<SIToFPInst>(FI.getOperand(0)))
1967  return nullptr;
1968 
1969  auto *OpI = cast<CastInst>(FI.getOperand(0));
1970  Value *X = OpI->getOperand(0);
1971  Type *XType = X->getType();
1972  Type *DestType = FI.getType();
1973  bool IsOutputSigned = isa<FPToSIInst>(FI);
1974 
1975  // Since we can assume the conversion won't overflow, our decision as to
1976  // whether the input will fit in the float should depend on the minimum
1977  // of the input range and output range.
1978 
1979  // This means this is also safe for a signed input and unsigned output, since
1980  // a negative input would lead to undefined behavior.
1981  if (!isKnownExactCastIntToFP(*OpI)) {
1982  // The first cast may not round exactly based on the source integer width
1983  // and FP width, but the overflow UB rules can still allow this to fold.
1984  // If the destination type is narrow, that means the intermediate FP value
1985  // must be large enough to hold the source value exactly.
1986  // For example, (uint8_t)((float)(uint32_t 16777217) is undefined behavior.
1987  int OutputSize = (int)DestType->getScalarSizeInBits();
1988  if (OutputSize > OpI->getType()->getFPMantissaWidth())
1989  return nullptr;
1990  }
1991 
1992  if (DestType->getScalarSizeInBits() > XType->getScalarSizeInBits()) {
1993  bool IsInputSigned = isa<SIToFPInst>(OpI);
1994  if (IsInputSigned && IsOutputSigned)
1995  return new SExtInst(X, DestType);
1996  return new ZExtInst(X, DestType);
1997  }
1998  if (DestType->getScalarSizeInBits() < XType->getScalarSizeInBits())
1999  return new TruncInst(X, DestType);
2000 
2001  assert(XType == DestType && "Unexpected types for int to FP to int casts");
2002  return replaceInstUsesWith(FI, X);
2003 }
2004 
2006  if (Instruction *I = foldItoFPtoI(FI))
2007  return I;
2008 
2009  return commonCastTransforms(FI);
2010 }
2011 
2013  if (Instruction *I = foldItoFPtoI(FI))
2014  return I;
2015 
2016  return commonCastTransforms(FI);
2017 }
2018 
2020  return commonCastTransforms(CI);
2021 }
2022 
2024  return commonCastTransforms(CI);
2025 }
2026 
2028  // If the source integer type is not the intptr_t type for this target, do a
2029  // trunc or zext to the intptr_t type, then inttoptr of it. This allows the
2030  // cast to be exposed to other transforms.
2031  unsigned AS = CI.getAddressSpace();
2032  if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
2033  DL.getPointerSizeInBits(AS)) {
2034  Type *Ty = CI.getOperand(0)->getType()->getWithNewType(
2035  DL.getIntPtrType(CI.getContext(), AS));
2036  Value *P = Builder.CreateZExtOrTrunc(CI.getOperand(0), Ty);
2037  return new IntToPtrInst(P, CI.getType());
2038  }
2039 
2040  if (Instruction *I = commonCastTransforms(CI))
2041  return I;
2042 
2043  return nullptr;
2044 }
2045 
2046 /// Implement the transforms for cast of pointer (bitcast/ptrtoint)
2048  Value *Src = CI.getOperand(0);
2049 
2050  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
2051  // If casting the result of a getelementptr instruction with no offset, turn
2052  // this into a cast of the original pointer!
2053  if (GEP->hasAllZeroIndices() &&
2054  // If CI is an addrspacecast and GEP changes the poiner type, merging
2055  // GEP into CI would undo canonicalizing addrspacecast with different
2056  // pointer types, causing infinite loops.
2057  (!isa<AddrSpaceCastInst>(CI) ||
2058  GEP->getType() == GEP->getPointerOperandType())) {
2059  // Changing the cast operand is usually not a good idea but it is safe
2060  // here because the pointer operand is being replaced with another
2061  // pointer operand so the opcode doesn't need to change.
2062  return replaceOperand(CI, 0, GEP->getOperand(0));
2063  }
2064  }
2065 
2066  return commonCastTransforms(CI);
2067 }
2068 
2070  // If the destination integer type is not the intptr_t type for this target,
2071  // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
2072  // to be exposed to other transforms.
2073  Value *SrcOp = CI.getPointerOperand();
2074  Type *SrcTy = SrcOp->getType();
2075  Type *Ty = CI.getType();
2076  unsigned AS = CI.getPointerAddressSpace();
2077  unsigned TySize = Ty->getScalarSizeInBits();
2078  unsigned PtrSize = DL.getPointerSizeInBits(AS);
2079  if (TySize != PtrSize) {
2080  Type *IntPtrTy =
2081  SrcTy->getWithNewType(DL.getIntPtrType(CI.getContext(), AS));
2082  Value *P = Builder.CreatePtrToInt(SrcOp, IntPtrTy);
2083  return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
2084  }
2085 
2086  if (auto *GEP = dyn_cast<GetElementPtrInst>(SrcOp)) {
2087  // Fold ptrtoint(gep null, x) to multiply + constant if the GEP has one use.
2088  // While this can increase the number of instructions it doesn't actually
2089  // increase the overall complexity since the arithmetic is just part of
2090  // the GEP otherwise.
2091  if (GEP->hasOneUse() &&
2092  isa<ConstantPointerNull>(GEP->getPointerOperand())) {
2093  return replaceInstUsesWith(CI,
2094  Builder.CreateIntCast(EmitGEPOffset(GEP), Ty,
2095  /*isSigned=*/false));
2096  }
2097  }
2098 
2099  Value *Vec, *Scalar, *Index;
2101  m_Value(Scalar), m_Value(Index)))) &&
2102  Vec->getType() == Ty) {
2103  assert(Vec->getType()->getScalarSizeInBits() == PtrSize && "Wrong type");
2104  // Convert the scalar to int followed by insert to eliminate one cast:
2105  // p2i (ins (i2p Vec), Scalar, Index --> ins Vec, (p2i Scalar), Index
2106  Value *NewCast = Builder.CreatePtrToInt(Scalar, Ty->getScalarType());
2107  return InsertElementInst::Create(Vec, NewCast, Index);
2108  }
2109 
2110  return commonPointerCastTransforms(CI);
2111 }
2112 
2113 /// This input value (which is known to have vector type) is being zero extended
2114 /// or truncated to the specified vector type. Since the zext/trunc is done
2115 /// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
2116 /// endianness will impact which end of the vector that is extended or
2117 /// truncated.
2118 ///
2119 /// A vector is always stored with index 0 at the lowest address, which
2120 /// corresponds to the most significant bits for a big endian stored integer and
2121 /// the least significant bits for little endian. A trunc/zext of an integer
2122 /// impacts the big end of the integer. Thus, we need to add/remove elements at
2123 /// the front of the vector for big endian targets, and the back of the vector
2124 /// for little endian targets.
2125 ///
2126 /// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
2127 ///
2128 /// The source and destination vector types may have different element types.
2129 static Instruction *
2131  InstCombinerImpl &IC) {
2132  // We can only do this optimization if the output is a multiple of the input
2133  // element size, or the input is a multiple of the output element size.
2134  // Convert the input type to have the same element type as the output.
2135  VectorType *SrcTy = cast<VectorType>(InVal->getType());
2136 
2137  if (SrcTy->getElementType() != DestTy->getElementType()) {
2138  // The input types don't need to be identical, but for now they must be the
2139  // same size. There is no specific reason we couldn't handle things like
2140  // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
2141  // there yet.
2142  if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
2144  return nullptr;
2145 
2146  SrcTy =
2148  cast<FixedVectorType>(SrcTy)->getNumElements());
2149  InVal = IC.Builder.CreateBitCast(InVal, SrcTy);
2150  }
2151 
2152  bool IsBigEndian = IC.getDataLayout().isBigEndian();
2153  unsigned SrcElts = cast<FixedVectorType>(SrcTy)->getNumElements();
2154  unsigned DestElts = cast<FixedVectorType>(DestTy)->getNumElements();
2155 
2156  assert(SrcElts != DestElts && "Element counts should be different.");
2157 
2158  // Now that the element types match, get the shuffle mask and RHS of the
2159  // shuffle to use, which depends on whether we're increasing or decreasing the
2160  // size of the input.
2161  auto ShuffleMaskStorage = llvm::to_vector<16>(llvm::seq<int>(0, SrcElts));
2162  ArrayRef<int> ShuffleMask;
2163  Value *V2;
2164 
2165  if (SrcElts > DestElts) {
2166  // If we're shrinking the number of elements (rewriting an integer
2167  // truncate), just shuffle in the elements corresponding to the least
2168  // significant bits from the input and use poison as the second shuffle
2169  // input.
2170  V2 = PoisonValue::get(SrcTy);
2171  // Make sure the shuffle mask selects the "least significant bits" by
2172  // keeping elements from back of the src vector for big endian, and from the
2173  // front for little endian.
2174  ShuffleMask = ShuffleMaskStorage;
2175  if (IsBigEndian)
2176  ShuffleMask = ShuffleMask.take_back(DestElts);
2177  else
2178  ShuffleMask = ShuffleMask.take_front(DestElts);
2179  } else {
2180  // If we're increasing the number of elements (rewriting an integer zext),
2181  // shuffle in all of the elements from InVal. Fill the rest of the result
2182  // elements with zeros from a constant zero.
2183  V2 = Constant::getNullValue(SrcTy);
2184  // Use first elt from V2 when indicating zero in the shuffle mask.
2185  uint32_t NullElt = SrcElts;
2186  // Extend with null values in the "most significant bits" by adding elements
2187  // in front of the src vector for big endian, and at the back for little
2188  // endian.
2189  unsigned DeltaElts = DestElts - SrcElts;
2190  if (IsBigEndian)
2191  ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt);
2192  else
2193  ShuffleMaskStorage.append(DeltaElts, NullElt);
2194  ShuffleMask = ShuffleMaskStorage;
2195  }
2196 
2197  return new ShuffleVectorInst(InVal, V2, ShuffleMask);
2198 }
2199 
2200 static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) {
2201  return Value % Ty->getPrimitiveSizeInBits() == 0;
2202 }
2203 
2204 static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
2205  return Value / Ty->getPrimitiveSizeInBits();
2206 }
2207 
2208 /// V is a value which is inserted into a vector of VecEltTy.
2209 /// Look through the value to see if we can decompose it into
2210 /// insertions into the vector. See the example in the comment for
2211 /// OptimizeIntegerToVectorInsertions for the pattern this handles.
2212 /// The type of V is always a non-zero multiple of VecEltTy's size.
2213 /// Shift is the number of bits between the lsb of V and the lsb of
2214 /// the vector.
2215 ///
2216 /// This returns false if the pattern can't be matched or true if it can,
2217 /// filling in Elements with the elements found here.
2218 static bool collectInsertionElements(Value *V, unsigned Shift,
2219  SmallVectorImpl<Value *> &Elements,
2220  Type *VecEltTy, bool isBigEndian) {
2221  assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
2222  "Shift should be a multiple of the element type size");
2223 
2224  // Undef values never contribute useful bits to the result.
2225  if (isa<UndefValue>(V)) return true;
2226 
2227  // If we got down to a value of the right type, we win, try inserting into the
2228  // right element.
2229  if (V->getType() == VecEltTy) {
2230  // Inserting null doesn't actually insert any elements.
2231  if (Constant *C = dyn_cast<Constant>(V))
2232  if (C->isNullValue())
2233  return true;
2234 
2235  unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
2236  if (isBigEndian)
2237  ElementIndex = Elements.size() - ElementIndex - 1;
2238 
2239  // Fail if multiple elements are inserted into this slot.
2240  if (Elements[ElementIndex])
2241  return false;
2242 
2243  Elements[ElementIndex] = V;
2244  return true;
2245  }
2246 
2247  if (Constant *C = dyn_cast<Constant>(V)) {
2248  // Figure out the # elements this provides, and bitcast it or slice it up
2249  // as required.
2250  unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
2251  VecEltTy);
2252  // If the constant is the size of a vector element, we just need to bitcast
2253  // it to the right type so it gets properly inserted.
2254  if (NumElts == 1)
2256  Shift, Elements, VecEltTy, isBigEndian);
2257 
2258  // Okay, this is a constant that covers multiple elements. Slice it up into
2259  // pieces and insert each element-sized piece into the vector.
2260  if (!isa<IntegerType>(C->getType()))
2262  C->getType()->getPrimitiveSizeInBits()));
2263  unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
2264  Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
2265 
2266  for (unsigned i = 0; i != NumElts; ++i) {
2267  unsigned ShiftI = Shift+i*ElementSize;
2268  Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
2269  ShiftI));
2270  Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
2271  if (!collectInsertionElements(Piece, ShiftI, Elements, VecEltTy,
2272  isBigEndian))
2273  return false;
2274  }
2275  return true;
2276  }
2277 
2278  if (!V->hasOneUse()) return false;
2279 
2280  Instruction *I = dyn_cast<Instruction>(V);
2281  if (!I) return false;
2282  switch (I->getOpcode()) {
2283  default: return false; // Unhandled case.
2284  case Instruction::BitCast:
2285  if (I->getOperand(0)->getType()->isVectorTy())
2286  return false;
2287  return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2288  isBigEndian);
2289  case Instruction::ZExt:
2290  if (!isMultipleOfTypeSize(
2291  I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
2292  VecEltTy))
2293  return false;
2294  return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2295  isBigEndian);
2296  case Instruction::Or:
2297  return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2298  isBigEndian) &&
2299  collectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
2300  isBigEndian);
2301  case Instruction::Shl: {
2302  // Must be shifting by a constant that is a multiple of the element size.
2303  ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
2304  if (!CI) return false;
2305  Shift += CI->getZExtValue();
2306  if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
2307  return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2308  isBigEndian);
2309  }
2310 
2311  }
2312 }
2313 
2314 
2315 /// If the input is an 'or' instruction, we may be doing shifts and ors to
2316 /// assemble the elements of the vector manually.
2317 /// Try to rip the code out and replace it with insertelements. This is to
2318 /// optimize code like this:
2319 ///
2320 /// %tmp37 = bitcast float %inc to i32
2321 /// %tmp38 = zext i32 %tmp37 to i64
2322 /// %tmp31 = bitcast float %inc5 to i32
2323 /// %tmp32 = zext i32 %tmp31 to i64
2324 /// %tmp33 = shl i64 %tmp32, 32
2325 /// %ins35 = or i64 %tmp33, %tmp38
2326 /// %tmp43 = bitcast i64 %ins35 to <2 x float>
2327 ///
2328 /// Into two insertelements that do "buildvector{%inc, %inc5}".
2330  InstCombinerImpl &IC) {
2331  auto *DestVecTy = cast<FixedVectorType>(CI.getType());
2332  Value *IntInput = CI.getOperand(0);
2333 
2334  SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
2335  if (!collectInsertionElements(IntInput, 0, Elements,
2336  DestVecTy->getElementType(),
2337  IC.getDataLayout().isBigEndian()))
2338  return nullptr;
2339 
2340  // If we succeeded, we know that all of the element are specified by Elements
2341  // or are zero if Elements has a null entry. Recast this as a set of
2342  // insertions.
2343  Value *Result = Constant::getNullValue(CI.getType());
2344  for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
2345  if (!Elements[i]) continue; // Unset element.
2346 
2347  Result = IC.Builder.CreateInsertElement(Result, Elements[i],
2348  IC.Builder.getInt32(i));
2349  }
2350 
2351  return Result;
2352 }
2353 
2354 /// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
2355 /// vector followed by extract element. The backend tends to handle bitcasts of
2356 /// vectors better than bitcasts of scalars because vector registers are
2357 /// usually not type-specific like scalar integer or scalar floating-point.
2359  InstCombinerImpl &IC) {
2360  Value *VecOp, *Index;
2361  if (!match(BitCast.getOperand(0),
2363  return nullptr;
2364 
2365  // The bitcast must be to a vectorizable type, otherwise we can't make a new
2366  // type to extract from.
2367  Type *DestType = BitCast.getType();
2368  VectorType *VecType = cast<VectorType>(VecOp->getType());
2369  if (VectorType::isValidElementType(DestType)) {
2370  auto *NewVecType = VectorType::get(DestType, VecType);
2371  auto *NewBC = IC.Builder.CreateBitCast(VecOp, NewVecType, "bc");
2372  return ExtractElementInst::Create(NewBC, Index);
2373  }
2374 
2375  // Only solve DestType is vector to avoid inverse transform in visitBitCast.
2376  // bitcast (extractelement <1 x elt>, dest) -> bitcast(<1 x elt>, dest)
2377  auto *FixedVType = dyn_cast<FixedVectorType>(VecType);
2378  if (DestType->isVectorTy() && FixedVType && FixedVType->getNumElements() == 1)
2379  return CastInst::Create(Instruction::BitCast, VecOp, DestType);
2380 
2381  return nullptr;
2382 }
2383 
2384 /// Change the type of a bitwise logic operation if we can eliminate a bitcast.
2387  Type *DestTy = BitCast.getType();
2388  BinaryOperator *BO;
2389 
2390  if (!match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) ||
2391  !BO->isBitwiseLogicOp())
2392  return nullptr;
2393 
2394  // FIXME: This transform is restricted to vector types to avoid backend
2395  // problems caused by creating potentially illegal operations. If a fix-up is
2396  // added to handle that situation, we can remove this check.
2397  if (!DestTy->isVectorTy() || !BO->getType()->isVectorTy())
2398  return nullptr;
2399 
2400  if (DestTy->isFPOrFPVectorTy()) {
2401  Value *X, *Y;
2402  // bitcast(logic(bitcast(X), bitcast(Y))) -> bitcast'(logic(bitcast'(X), Y))
2403  if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
2404  match(BO->getOperand(1), m_OneUse(m_BitCast(m_Value(Y))))) {
2405  if (X->getType()->isFPOrFPVectorTy() &&
2406  Y->getType()->isIntOrIntVectorTy()) {
2407  Value *CastedOp =
2408  Builder.CreateBitCast(BO->getOperand(0), Y->getType());
2409  Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, Y);
2410  return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
2411  }
2412  if (X->getType()->isIntOrIntVectorTy() &&
2413  Y->getType()->isFPOrFPVectorTy()) {
2414  Value *CastedOp =
2415  Builder.CreateBitCast(BO->getOperand(1), X->getType());
2416  Value *NewBO = Builder.CreateBinOp(BO->getOpcode(), CastedOp, X);
2417  return CastInst::CreateBitOrPointerCast(NewBO, DestTy);
2418  }
2419  }
2420  return nullptr;
2421  }
2422 
2423  if (!DestTy->isIntOrIntVectorTy())
2424  return nullptr;
2425 
2426  Value *X;
2427  if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
2428  X->getType() == DestTy && !isa<Constant>(X)) {
2429  // bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
2430  Value *CastedOp1 = Builder.CreateBitCast(BO->getOperand(1), DestTy);
2431  return BinaryOperator::Create(BO->getOpcode(), X, CastedOp1);
2432  }
2433 
2434  if (match(BO->getOperand(1), m_OneUse(m_BitCast(m_Value(X)))) &&
2435  X->getType() == DestTy && !isa<Constant>(X)) {
2436  // bitcast(logic(Y, bitcast(X))) --> logic'(bitcast(Y), X)
2437  Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2438  return BinaryOperator::Create(BO->getOpcode(), CastedOp0, X);
2439  }
2440 
2441  // Canonicalize vector bitcasts to come before vector bitwise logic with a
2442  // constant. This eases recognition of special constants for later ops.
2443  // Example:
2444  // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
2445  Constant *C;
2446  if (match(BO->getOperand(1), m_Constant(C))) {
2447  // bitcast (logic X, C) --> logic (bitcast X, C')
2448  Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2449  Value *CastedC = Builder.CreateBitCast(C, DestTy);
2450  return BinaryOperator::Create(BO->getOpcode(), CastedOp0, CastedC);
2451  }
2452 
2453  return nullptr;
2454 }
2455 
2456 /// Change the type of a select if we can eliminate a bitcast.
2459  Value *Cond, *TVal, *FVal;
2460  if (!match(BitCast.getOperand(0),
2461  m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
2462  return nullptr;
2463 
2464  // A vector select must maintain the same number of elements in its operands.
2465  Type *CondTy = Cond->getType();
2466  Type *DestTy = BitCast.getType();
2467  if (auto *CondVTy = dyn_cast<VectorType>(CondTy))
2468  if (!DestTy->isVectorTy() ||
2469  CondVTy->getElementCount() !=
2470  cast<VectorType>(DestTy)->getElementCount())
2471  return nullptr;
2472 
2473  // FIXME: This transform is restricted from changing the select between
2474  // scalars and vectors to avoid backend problems caused by creating
2475  // potentially illegal operations. If a fix-up is added to handle that
2476  // situation, we can remove this check.
2477  if (DestTy->isVectorTy() != TVal->getType()->isVectorTy())
2478  return nullptr;
2479 
2480  auto *Sel = cast<Instruction>(BitCast.getOperand(0));
2481  Value *X;
2482  if (match(TVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
2483  !isa<Constant>(X)) {
2484  // bitcast(select(Cond, bitcast(X), Y)) --> select'(Cond, X, bitcast(Y))
2485  Value *CastedVal = Builder.CreateBitCast(FVal, DestTy);
2486  return SelectInst::Create(Cond, X, CastedVal, "", nullptr, Sel);
2487  }
2488 
2489  if (match(FVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
2490  !isa<Constant>(X)) {
2491  // bitcast(select(Cond, Y, bitcast(X))) --> select'(Cond, bitcast(Y), X)
2492  Value *CastedVal = Builder.CreateBitCast(TVal, DestTy);
2493  return SelectInst::Create(Cond, CastedVal, X, "", nullptr, Sel);
2494  }
2495 
2496  return nullptr;
2497 }
2498 
2499 /// Check if all users of CI are StoreInsts.
2500 static bool hasStoreUsersOnly(CastInst &CI) {
2501  for (User *U : CI.users()) {
2502  if (!isa<StoreInst>(U))
2503  return false;
2504  }
2505  return true;
2506 }
2507 
2508 /// This function handles following case
2509 ///
2510 /// A -> B cast
2511 /// PHI
2512 /// B -> A cast
2513 ///
2514 /// All the related PHI nodes can be replaced by new PHI nodes with type A.
2515 /// The uses of \p CI can be changed to the new PHI node corresponding to \p PN.
2516 Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI,
2517  PHINode *PN) {
2518  // BitCast used by Store can be handled in InstCombineLoadStoreAlloca.cpp.
2519  if (hasStoreUsersOnly(CI))
2520  return nullptr;
2521 
2522  Value *Src = CI.getOperand(0);
2523  Type *SrcTy = Src->getType(); // Type B
2524  Type *DestTy = CI.getType(); // Type A
2525 
2526  SmallVector<PHINode *, 4> PhiWorklist;
2527  SmallSetVector<PHINode *, 4> OldPhiNodes;
2528 
2529  // Find all of the A->B casts and PHI nodes.
2530  // We need to inspect all related PHI nodes, but PHIs can be cyclic, so
2531  // OldPhiNodes is used to track all known PHI nodes, before adding a new
2532  // PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
2533  PhiWorklist.push_back(PN);
2534  OldPhiNodes.insert(PN);
2535  while (!PhiWorklist.empty()) {
2536  auto *OldPN = PhiWorklist.pop_back_val();
2537  for (Value *IncValue : OldPN->incoming_values()) {
2538  if (isa<Constant>(IncValue))
2539  continue;
2540 
2541  if (auto *LI = dyn_cast<LoadInst>(IncValue)) {
2542  // If there is a sequence of one or more load instructions, each loaded
2543  // value is used as address of later load instruction, bitcast is
2544  // necessary to change the value type, don't optimize it. For
2545  // simplicity we give up if the load address comes from another load.
2546  Value *Addr = LI->getOperand(0);
2547  if (Addr == &CI || isa<LoadInst>(Addr))
2548  return nullptr;
2549  // Don't tranform "load <256 x i32>, <256 x i32>*" to
2550  // "load x86_amx, x86_amx*", because x86_amx* is invalid.
2551  // TODO: Remove this check when bitcast between vector and x86_amx
2552  // is replaced with a specific intrinsic.
2553  if (DestTy->isX86_AMXTy())
2554  return nullptr;
2555  if (LI->hasOneUse() && LI->isSimple())
2556  continue;
2557  // If a LoadInst has more than one use, changing the type of loaded
2558  // value may create another bitcast.
2559  return nullptr;
2560  }
2561 
2562  if (auto *PNode = dyn_cast<PHINode>(IncValue)) {
2563  if (OldPhiNodes.insert(PNode))
2564  PhiWorklist.push_back(PNode);
2565  continue;
2566  }
2567 
2568  auto *BCI = dyn_cast<BitCastInst>(IncValue);
2569  // We can't handle other instructions.
2570  if (!BCI)
2571  return nullptr;
2572 
2573  // Verify it's a A->B cast.
2574  Type *TyA = BCI->getOperand(0)->getType();
2575  Type *TyB = BCI->getType();
2576  if (TyA != DestTy || TyB != SrcTy)
2577  return nullptr;
2578  }
2579  }
2580 
2581  // Check that each user of each old PHI node is something that we can
2582  // rewrite, so that all of the old PHI nodes can be cleaned up afterwards.
2583  for (auto *OldPN : OldPhiNodes) {
2584  for (User *V : OldPN->users()) {
2585  if (auto *SI = dyn_cast<StoreInst>(V)) {
2586  if (!SI->isSimple() || SI->getOperand(0) != OldPN)
2587  return nullptr;
2588  } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2589  // Verify it's a B->A cast.
2590  Type *TyB = BCI->getOperand(0)->getType();
2591  Type *TyA = BCI->getType();
2592  if (TyA != DestTy || TyB != SrcTy)
2593  return nullptr;
2594  } else if (auto *PHI = dyn_cast<PHINode>(V)) {
2595  // As long as the user is another old PHI node, then even if we don't
2596  // rewrite it, the PHI web we're considering won't have any users
2597  // outside itself, so it'll be dead.
2598  if (!OldPhiNodes.contains(PHI))
2599  return nullptr;
2600  } else {
2601  return nullptr;
2602  }
2603  }
2604  }
2605 
2606  // For each old PHI node, create a corresponding new PHI node with a type A.
2608  for (auto *OldPN : OldPhiNodes) {
2609  Builder.SetInsertPoint(OldPN);
2610  PHINode *NewPN = Builder.CreatePHI(DestTy, OldPN->getNumOperands());
2611  NewPNodes[OldPN] = NewPN;
2612  }
2613 
2614  // Fill in the operands of new PHI nodes.
2615  for (auto *OldPN : OldPhiNodes) {
2616  PHINode *NewPN = NewPNodes[OldPN];
2617  for (unsigned j = 0, e = OldPN->getNumOperands(); j != e; ++j) {
2618  Value *V = OldPN->getOperand(j);
2619  Value *NewV = nullptr;
2620  if (auto *C = dyn_cast<Constant>(V)) {
2621  NewV = ConstantExpr::getBitCast(C, DestTy);
2622  } else if (auto *LI = dyn_cast<LoadInst>(V)) {
2623  // Explicitly perform load combine to make sure no opposing transform
2624  // can remove the bitcast in the meantime and trigger an infinite loop.
2625  Builder.SetInsertPoint(LI);
2626  NewV = combineLoadToNewType(*LI, DestTy);
2627  // Remove the old load and its use in the old phi, which itself becomes
2628  // dead once the whole transform finishes.
2629  replaceInstUsesWith(*LI, PoisonValue::get(LI->getType()));
2630  eraseInstFromFunction(*LI);
2631  } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2632  NewV = BCI->getOperand(0);
2633  } else if (auto *PrevPN = dyn_cast<PHINode>(V)) {
2634  NewV = NewPNodes[PrevPN];
2635  }
2636  assert(NewV);
2637  NewPN->addIncoming(NewV, OldPN->getIncomingBlock(j));
2638  }
2639  }
2640 
2641  // Traverse all accumulated PHI nodes and process its users,
2642  // which are Stores and BitcCasts. Without this processing
2643  // NewPHI nodes could be replicated and could lead to extra
2644  // moves generated after DeSSA.
2645  // If there is a store with type B, change it to type A.
2646 
2647 
2648  // Replace users of BitCast B->A with NewPHI. These will help
2649  // later to get rid off a closure formed by OldPHI nodes.
2650  Instruction *RetVal = nullptr;
2651  for (auto *OldPN : OldPhiNodes) {
2652  PHINode *NewPN = NewPNodes[OldPN];
2653  for (User *V : make_early_inc_range(OldPN->users())) {
2654  if (auto *SI = dyn_cast<StoreInst>(V)) {
2655  assert(SI->isSimple() && SI->getOperand(0) == OldPN);
2656  Builder.SetInsertPoint(SI);
2657  auto *NewBC =
2658  cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));
2659  SI->setOperand(0, NewBC);
2660  Worklist.push(SI);
2661  assert(hasStoreUsersOnly(*NewBC));
2662  }
2663  else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2664  Type *TyB = BCI->getOperand(0)->getType();
2665  Type *TyA = BCI->getType();
2666  assert(TyA == DestTy && TyB == SrcTy);
2667  (void) TyA;
2668  (void) TyB;
2669  Instruction *I = replaceInstUsesWith(*BCI, NewPN);
2670  if (BCI == &CI)
2671  RetVal = I;
2672  } else if (auto *PHI = dyn_cast<PHINode>(V)) {
2673  assert(OldPhiNodes.contains(PHI));
2674  (void) PHI;
2675  } else {
2676  llvm_unreachable("all uses should be handled");
2677  }
2678  }
2679  }
2680 
2681  return RetVal;
2682 }
2683 
2685  const DataLayout &DL) {
2686  Value *Src = CI.getOperand(0);
2687  PointerType *SrcPTy = cast<PointerType>(Src->getType());
2688  PointerType *DstPTy = cast<PointerType>(CI.getType());
2689 
2690  // Bitcasts involving opaque pointers cannot be converted into a GEP.
2691  if (SrcPTy->isOpaque() || DstPTy->isOpaque())
2692  return nullptr;
2693 
2694  Type *DstElTy = DstPTy->getNonOpaquePointerElementType();
2695  Type *SrcElTy = SrcPTy->getNonOpaquePointerElementType();
2696 
2697  // When the type pointed to is not sized the cast cannot be
2698  // turned into a gep.
2699  if (!SrcElTy->isSized())
2700  return nullptr;
2701 
2702  // If the source and destination are pointers, and this cast is equivalent
2703  // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep.
2704  // This can enhance SROA and other transforms that want type-safe pointers.
2705  unsigned NumZeros = 0;
2706  while (SrcElTy && SrcElTy != DstElTy) {
2707  SrcElTy = GetElementPtrInst::getTypeAtIndex(SrcElTy, (uint64_t)0);
2708  ++NumZeros;
2709  }
2710 
2711  // If we found a path from the src to dest, create the getelementptr now.
2712  if (SrcElTy == DstElTy) {
2713  SmallVector<Value *, 8> Idxs(NumZeros + 1, Builder.getInt32(0));
2715  SrcPTy->getNonOpaquePointerElementType(), Src, Idxs);
2716 
2717  // If the source pointer is dereferenceable, then assume it points to an
2718  // allocated object and apply "inbounds" to the GEP.
2719  bool CanBeNull, CanBeFreed;
2720  if (Src->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed)) {
2721  // In a non-default address space (not 0), a null pointer can not be
2722  // assumed inbounds, so ignore that case (dereferenceable_or_null).
2723  // The reason is that 'null' is not treated differently in these address
2724  // spaces, and we consequently ignore the 'gep inbounds' special case
2725  // for 'null' which allows 'inbounds' on 'null' if the indices are
2726  // zeros.
2727  if (SrcPTy->getAddressSpace() == 0 || !CanBeNull)
2728  GEP->setIsInBounds();
2729  }
2730  return GEP;
2731  }
2732  return nullptr;
2733 }
2734 
2736  // If the operands are integer typed then apply the integer transforms,
2737  // otherwise just apply the common ones.
2738  Value *Src = CI.getOperand(0);
2739  Type *SrcTy = Src->getType();
2740  Type *DestTy = CI.getType();
2741 
2742  // Get rid of casts from one type to the same type. These are useless and can
2743  // be replaced by the operand.
2744  if (DestTy == Src->getType())
2745  return replaceInstUsesWith(CI, Src);
2746 
2747  if (isa<PointerType>(SrcTy) && isa<PointerType>(DestTy)) {
2748  // If we are casting a alloca to a pointer to a type of the same
2749  // size, rewrite the allocation instruction to allocate the "right" type.
2750  // There is no need to modify malloc calls because it is their bitcast that
2751  // needs to be cleaned up.
2752  if (AllocaInst *AI = dyn_cast<AllocaInst>(Src))
2753  if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
2754  return V;
2755 
2757  return I;
2758  }
2759 
2760  if (FixedVectorType *DestVTy = dyn_cast<FixedVectorType>(DestTy)) {
2761  // Beware: messing with this target-specific oddity may cause trouble.
2762  if (DestVTy->getNumElements() == 1 && SrcTy->isX86_MMXTy()) {
2763  Value *Elem = Builder.CreateBitCast(Src, DestVTy->getElementType());
2764  return InsertElementInst::Create(PoisonValue::get(DestTy), Elem,
2766  }
2767 
2768  if (isa<IntegerType>(SrcTy)) {
2769  // If this is a cast from an integer to vector, check to see if the input
2770  // is a trunc or zext of a bitcast from vector. If so, we can replace all
2771  // the casts with a shuffle and (potentially) a bitcast.
2772  if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {
2773  CastInst *SrcCast = cast<CastInst>(Src);
2774  if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
2775  if (isa<VectorType>(BCIn->getOperand(0)->getType()))
2777  BCIn->getOperand(0), cast<VectorType>(DestTy), *this))
2778  return I;
2779  }
2780 
2781  // If the input is an 'or' instruction, we may be doing shifts and ors to
2782  // assemble the elements of the vector manually. Try to rip the code out
2783  // and replace it with insertelements.
2784  if (Value *V = optimizeIntegerToVectorInsertions(CI, *this))
2785  return replaceInstUsesWith(CI, V);
2786  }
2787  }
2788 
2789  if (FixedVectorType *SrcVTy = dyn_cast<FixedVectorType>(SrcTy)) {
2790  if (SrcVTy->getNumElements() == 1) {
2791  // If our destination is not a vector, then make this a straight
2792  // scalar-scalar cast.
2793  if (!DestTy->isVectorTy()) {
2794  Value *Elem =
2795  Builder.CreateExtractElement(Src,
2797  return CastInst::Create(Instruction::BitCast, Elem, DestTy);
2798  }
2799 
2800  // Otherwise, see if our source is an insert. If so, then use the scalar
2801  // component directly:
2802  // bitcast (inselt <1 x elt> V, X, 0) to <n x m> --> bitcast X to <n x m>
2803  if (auto *InsElt = dyn_cast<InsertElementInst>(Src))
2804  return new BitCastInst(InsElt->getOperand(1), DestTy);
2805  }
2806 
2807  // Convert an artificial vector insert into more analyzable bitwise logic.
2808  unsigned BitWidth = DestTy->getScalarSizeInBits();
2809  Value *X, *Y;
2810  uint64_t IndexC;
2812  m_Value(Y), m_ConstantInt(IndexC)))) &&
2813  DestTy->isIntegerTy() && X->getType() == DestTy &&
2814  Y->getType()->isIntegerTy() && isDesirableIntType(BitWidth)) {
2815  // Adjust for big endian - the LSBs are at the high index.
2816  if (DL.isBigEndian())
2817  IndexC = SrcVTy->getNumElements() - 1 - IndexC;
2818 
2819  // We only handle (endian-normalized) insert to index 0. Any other insert
2820  // would require a left-shift, so that is an extra instruction.
2821  if (IndexC == 0) {
2822  // bitcast (inselt (bitcast X), Y, 0) --> or (and X, MaskC), (zext Y)
2823  unsigned EltWidth = Y->getType()->getScalarSizeInBits();
2824  APInt MaskC = APInt::getHighBitsSet(BitWidth, BitWidth - EltWidth);
2825  Value *AndX = Builder.CreateAnd(X, MaskC);
2826  Value *ZextY = Builder.CreateZExt(Y, DestTy);
2827  return BinaryOperator::CreateOr(AndX, ZextY);
2828  }
2829  }
2830  }
2831 
2832  if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Src)) {
2833  // Okay, we have (bitcast (shuffle ..)). Check to see if this is
2834  // a bitcast to a vector with the same # elts.
2835  Value *ShufOp0 = Shuf->getOperand(0);
2836  Value *ShufOp1 = Shuf->getOperand(1);
2837  auto ShufElts = cast<VectorType>(Shuf->getType())->getElementCount();
2838  auto SrcVecElts = cast<VectorType>(ShufOp0->getType())->getElementCount();
2839  if (Shuf->hasOneUse() && DestTy->isVectorTy() &&
2840  cast<VectorType>(DestTy)->getElementCount() == ShufElts &&
2841  ShufElts == SrcVecElts) {
2842  BitCastInst *Tmp;
2843  // If either of the operands is a cast from CI.getType(), then
2844  // evaluating the shuffle in the casted destination's type will allow
2845  // us to eliminate at least one cast.
2846  if (((Tmp = dyn_cast<BitCastInst>(ShufOp0)) &&
2847  Tmp->getOperand(0)->getType() == DestTy) ||
2848  ((Tmp = dyn_cast<BitCastInst>(ShufOp1)) &&
2849  Tmp->getOperand(0)->getType() == DestTy)) {
2850  Value *LHS = Builder.CreateBitCast(ShufOp0, DestTy);
2851  Value *RHS = Builder.CreateBitCast(ShufOp1, DestTy);
2852  // Return a new shuffle vector. Use the same element ID's, as we
2853  // know the vector types match #elts.
2854  return new ShuffleVectorInst(LHS, RHS, Shuf->getShuffleMask());
2855  }
2856  }
2857 
2858  // A bitcasted-to-scalar and byte-reversing shuffle is better recognized as
2859  // a byte-swap:
2860  // bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) --> bswap (bitcast X)
2861  // TODO: We should match the related pattern for bitreverse.
2862  if (DestTy->isIntegerTy() &&
2863  DL.isLegalInteger(DestTy->getScalarSizeInBits()) &&
2864  SrcTy->getScalarSizeInBits() == 8 &&
2865  ShufElts.getKnownMinValue() % 2 == 0 && Shuf->hasOneUse() &&
2866  Shuf->isReverse()) {
2867  assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask");
2868  assert(match(ShufOp1, m_Undef()) && "Unexpected shuffle op");
2869  Function *Bswap =
2870  Intrinsic::getDeclaration(CI.getModule(), Intrinsic::bswap, DestTy);
2871  Value *ScalarX = Builder.CreateBitCast(ShufOp0, DestTy);
2872  return CallInst::Create(Bswap, { ScalarX });
2873  }
2874  }
2875 
2876  // Handle the A->B->A cast, and there is an intervening PHI node.
2877  if (PHINode *PN = dyn_cast<PHINode>(Src))
2878  if (Instruction *I = optimizeBitCastFromPhi(CI, PN))
2879  return I;
2880 
2881  if (Instruction *I = canonicalizeBitCastExtElt(CI, *this))
2882  return I;
2883 
2885  return I;
2886 
2887  if (Instruction *I = foldBitCastSelect(CI, Builder))
2888  return I;
2889 
2890  if (SrcTy->isPointerTy())
2891  return commonPointerCastTransforms(CI);
2892  return commonCastTransforms(CI);
2893 }
2894 
2896  // If the destination pointer element type is not the same as the source's
2897  // first do a bitcast to the destination type, and then the addrspacecast.
2898  // This allows the cast to be exposed to other transforms.
2899  Value *Src = CI.getOperand(0);
2900  PointerType *SrcTy = cast<PointerType>(Src->getType()->getScalarType());
2901  PointerType *DestTy = cast<PointerType>(CI.getType()->getScalarType());
2902 
2903  if (!SrcTy->hasSameElementTypeAs(DestTy)) {
2904  Type *MidTy =
2906  // Handle vectors of pointers.
2907  if (VectorType *VT = dyn_cast<VectorType>(CI.getType()))
2908  MidTy = VectorType::get(MidTy, VT->getElementCount());
2909 
2910  Value *NewBitCast = Builder.CreateBitCast(Src, MidTy);
2911  return new AddrSpaceCastInst(NewBitCast, CI.getType());
2912  }
2913 
2914  return commonPointerCastTransforms(CI);
2915 }
i
i
Definition: README.txt:29
shrinkInsertElt
static Instruction * shrinkInsertElt(CastInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of an insert element.
Definition: InstCombineCasts.cpp:745
llvm::Type::isSized
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:264
llvm::InstCombinerImpl::visitFPToSI
Instruction * visitFPToSI(FPToSIInst &FI)
Definition: InstCombineCasts.cpp:2012
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:65
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:740
llvm::isKnownNonNegative
bool isKnownNonNegative(const Value *V, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Returns true if the give value is known to be non-negative.
Definition: ValueTracking.cpp:347
llvm::MaskedValueIsZero
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if 'V & Mask' is known to be zero.
Definition: ValueTracking.cpp:391
llvm::PatternMatch::m_Mask
Definition: PatternMatch.h:1508
llvm::SPIRV::FunctionParameterAttribute::Zext
@ Zext
llvm::PHINode::incoming_values
op_range incoming_values()
Definition: Instructions.h:2737
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::Instruction::isBitwiseLogicOp
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
Definition: Instruction.h:212
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
InstCombiner.h
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1418
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:719
llvm::Type::isX86_MMXTy
bool isX86_MMXTy() const
Return true if this is X86 MMX.
Definition: Type.h:173
llvm::AllocaInst::getAlign
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:121
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:218
ceil
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g ceil
Definition: README-FPStack.txt:54
llvm::APFloatBase::IEEEsingle
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:170
llvm::ConstantExpr::getZExt
static Constant * getZExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2156
llvm::Function
Definition: Function.h:60
llvm::Attribute
Definition: Attributes.h:65
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
convertBitCastToGEP
static Instruction * convertBitCastToGEP(BitCastInst &CI, IRBuilderBase &Builder, const DataLayout &DL)
Definition: InstCombineCasts.cpp:2684
llvm::PatternMatch::m_LShr
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1111
llvm::APInt::isPowerOf2
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:425
canonicalizeBitCastExtElt
static Instruction * canonicalizeBitCastExtElt(BitCastInst &BitCast, InstCombinerImpl &IC)
Canonicalize scalar bitcasts of extracted elements into a bitcast of the vector followed by extract e...
Definition: InstCombineCasts.cpp:2358
collectInsertionElements
static bool collectInsertionElements(Value *V, unsigned Shift, SmallVectorImpl< Value * > &Elements, Type *VecEltTy, bool isBigEndian)
V is a value which is inserted into a vector of VecEltTy.
Definition: InstCombineCasts.cpp:2218
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5212
llvm::KnownBits::Zero
APInt Zero
Definition: KnownBits.h:24
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:309
llvm::PatternMatch::m_InsertElt
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Definition: PatternMatch.h:1478
llvm::ConstantExpr::getSExt
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2142
llvm::AllocaInst::getType
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:100
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::InstCombiner::Builder
BuilderTy & Builder
Definition: InstCombiner.h:58
isMultipleOfTypeSize
static bool isMultipleOfTypeSize(unsigned Value, Type *Ty)
Definition: InstCombineCasts.cpp:2200
llvm::BinaryOperator::CreateFDivFMF
static BinaryOperator * CreateFDivFMF(Value *V1, Value *V2, Instruction *FMFSource, const Twine &Name="")
Definition: InstrTypes.h:272
llvm::PatternMatch::m_CombineOr
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:212
llvm::IRBuilder< TargetFolder, IRBuilderCallbackInserter >
hasStoreUsersOnly
static bool hasStoreUsersOnly(CastInst &CI)
Check if all users of CI are StoreInsts.
Definition: InstCombineCasts.cpp:2500
llvm::CastInst::Create
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Definition: Instructions.cpp:3182
llvm::PointerType::getAddressSpace
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:682
llvm::SmallDenseMap
Definition: DenseMap.h:882
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:741
llvm::ConstantExpr::getBitCast
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2266
llvm::CastInst::isEliminableCastPair
static unsigned isEliminableCastPair(Instruction::CastOps firstOpcode, Instruction::CastOps secondOpcode, Type *SrcTy, Type *MidTy, Type *DstTy, Type *SrcIntPtrTy, Type *MidIntPtrTy, Type *DstIntPtrTy)
Determine how a pair of casts can be eliminated, if they can be at all.
Definition: Instructions.cpp:2957
foldBitCastSelect
static Instruction * foldBitCastSelect(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a select if we can eliminate a bitcast.
Definition: InstCombineCasts.cpp:2457
llvm::Type::isFPOrFPVectorTy
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:179
llvm::InsertElementInst::Create
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1937
llvm::CmpInst::ICMP_SGT
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:746
Shift
bool Shift
Definition: README.txt:468
llvm::ExtractElementInst::Create
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1872
llvm::InstCombinerImpl::visitAddrSpaceCast
Instruction * visitAddrSpaceCast(AddrSpaceCastInst &CI)
Definition: InstCombineCasts.cpp:2895
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::PatternMatch::m_FPToUI
CastClass_match< OpTy, Instruction::FPToUI > m_FPToUI(const OpTy &Op)
Definition: PatternMatch.h:1666
canAlwaysEvaluateInType
static bool canAlwaysEvaluateInType(Value *V, Type *Ty)
Constants and extensions/truncates from the destination type are always free to be evaluated in that ...
Definition: InstCombineCasts.cpp:347
llvm::InstCombinerImpl::visitBitCast
Instruction * visitBitCast(BitCastInst &CI)
Definition: InstCombineCasts.cpp:2735
isBigEndian
static Optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
Definition: CombinerHelper.cpp:108
llvm::ConstantFP::getValueAPF
const APFloat & getValueAPF() const
Definition: Constants.h:298
llvm::Optional< unsigned >
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::PatternMatch::m_BinOp
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::PatternMatch::m_AShr
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1117
llvm::ComputeNumSignBits
unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return the number of times the sign bit of the register is replicated into the other bits.
Definition: ValueTracking.cpp:415
llvm::IRBuilderBase::CreateInsertElement
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2295
llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:422
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
shrinkSplatShuffle
static Instruction * shrinkSplatShuffle(TruncInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of a splat shuffle.
Definition: InstCombineCasts.cpp:725
llvm::CastInst::CreateIntegerCast
static CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
Definition: Instructions.cpp:3348
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:654
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::OverflowingBinaryOperator::hasNoUnsignedWrap
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition: Operator.h:96
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:491
llvm::matchSelectPattern
SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
Definition: ValueTracking.cpp:6350
llvm::PatternMatch::m_c_And
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
Definition: PatternMatch.h:2245
llvm::AllocaInst::getAddressSpace
unsigned getAddressSpace() const
Return the address space for the allocation.
Definition: Instructions.h:105
llvm::PatternMatch::m_BitCast
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
Definition: PatternMatch.h:1587
llvm::FixedVectorType
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:525
llvm::PatternMatch::m_VScale
VScaleVal_match m_VScale(const DataLayout &DL)
Definition: PatternMatch.h:2483
llvm::Type::getNonOpaquePointerElementType
Type * getNonOpaquePointerElementType() const
Only use this method in code that is not reachable with opaque pointers, or part of deprecated method...
Definition: Type.h:382
ConstantFolding.h
llvm::EmitGEPOffset
Value * EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP, bool NoAssumptions=false)
Given a getelementptr instruction/constantexpr, emit the code necessary to compute the offset from th...
Definition: Local.h:29
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
llvm::PatternMatch::m_Deferred
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition: PatternMatch.h:784
llvm::InstCombinerImpl::visitFPExt
Instruction * visitFPExt(CastInst &CI)
Definition: InstCombineCasts.cpp:1947
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::Type::getWithNewType
Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
Definition: DerivedTypes.h:716
llvm::NVPTX::PTXLdStInstCode::VecType
VecType
Definition: NVPTX.h:121
llvm::CastInst::getDestTy
Type * getDestTy() const
Return the destination type, as a convenience.
Definition: InstrTypes.h:683
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::IntToPtrInst::getAddressSpace
unsigned getAddressSpace() const
Returns the address space of this instruction's pointer type.
Definition: Instructions.h:5143
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:240
llvm::APInt::countPopulation
unsigned countPopulation() const
Count the number of bits set.
Definition: APInt.h:1571
KnownBits.h
floor
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g floor
Definition: README-FPStack.txt:54
llvm::PatternMatch::m_OneUse
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ConstantExpr::getSub
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2734
llvm::PtrToIntInst::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:5194
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::PatternMatch::m_APInt
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:272
llvm::APInt::uge
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1171
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::Intrinsic::getType
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:1374
llvm::SPF_UNKNOWN
@ SPF_UNKNOWN
Definition: ValueTracking.h:699
canEvaluateSExtd
static bool canEvaluateSExtd(Value *V, Type *Ty)
Return true if we can take the specified value and return it as type Ty without inserting any new cas...
Definition: InstCombineCasts.cpp:1451
llvm::SelectInst::Create
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
Definition: Instructions.h:1755
llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
llvm::AddrSpaceCastInst
This class represents a conversion between pointers from one address space to another.
Definition: Instructions.h:5252
InstCombineInternal.h
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition: PatternMatch.h:1456
llvm::PatternMatch::m_ZExtOrSExt
match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, CastClass_match< OpTy, Instruction::SExt > > m_ZExtOrSExt(const OpTy &Op)
Definition: PatternMatch.h:1642
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::PHINode::getIncomingValue
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Definition: Instructions.h:2747
llvm::AllocaInst::getAllocatedType
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:114
getScalarSizeInBits
static unsigned getScalarSizeInBits(Type *Ty)
Definition: SystemZTargetTransformInfo.cpp:403
llvm::APInt::isZero
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:359
shrinkFPConstantVector
static Type * shrinkFPConstantVector(Value *V)
Definition: InstCombineCasts.cpp:1663
llvm::Type::getPPC_FP128Ty
static Type * getPPC_FP128Ty(LLVMContext &C)
Definition: Type.cpp:232
llvm::APFloatBase::IEEEhalf
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:164
llvm::User
Definition: User.h:44
llvm::PatternMatch::m_SpecificIntAllowUndef
specific_intval< true > m_SpecificIntAllowUndef(APInt V)
Definition: PatternMatch.h:856
llvm::Type::getDoubleTy
static Type * getDoubleTy(LLVMContext &C)
Definition: Type.cpp:227
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::CmpInst::ICMP_ULE
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:745
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::InstCombinerImpl::visitSIToFP
Instruction * visitSIToFP(CastInst &CI)
Definition: InstCombineCasts.cpp:2023
round
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:56
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::InstCombinerImpl::commonCastTransforms
Instruction * commonCastTransforms(CastInst &CI)
Implement the transforms common to all CastInst visitors.
Definition: InstCombineCasts.cpp:280
llvm::KnownBits::One
APInt One
Definition: KnownBits.h:25
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
decomposeSimpleLinearExpr
static Value * decomposeSimpleLinearExpr(Value *Val, unsigned &Scale, uint64_t &Offset)
Analyze 'Val', seeing if it is a simple linear expression.
Definition: InstCombineCasts.cpp:29
llvm::CallInst::Create
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1504
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::CastInst::CreateFPCast
static CastInst * CreateFPCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create an FPExt, BitCast, or FPTrunc for fp -> fp casts.
Definition: Instructions.cpp:3376
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::PatternMatch::m_ZExt
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
Definition: PatternMatch.h:1623
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:623
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::Function::getFnAttribute
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:651
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::Instruction::CastOps
CastOps
Definition: Instruction.h:800
llvm::PatternMatch::m_FNeg
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
Definition: PatternMatch.h:1027
llvm::Constant::getAllOnesValue
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:395
llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition: InstrTypes.h:392
llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:147
llvm::Instruction
Definition: Instruction.h:42
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:189
llvm::PatternMatch::m_c_Or
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
Definition: PatternMatch.h:2252
optimizeVectorResizeWithIntegerBitCasts
static Instruction * optimizeVectorResizeWithIntegerBitCasts(Value *InVal, VectorType *DestTy, InstCombinerImpl &IC)
This input value (which is known to have vector type) is being zero extended or truncated to the spec...
Definition: InstCombineCasts.cpp:2130
llvm::AllocaInst::getArraySize
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:96
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1466
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:257
llvm::APInt::getHighBitsSet
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:279
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1777
llvm::InstCombinerImpl
Definition: InstCombineInternal.h:61
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:928
PatternMatch.h
llvm::APInt::countTrailingZeros
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1543
llvm::CastInst::CreateTruncOrBitCast
static CastInst * CreateTruncOrBitCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a Trunc or BitCast cast instruction.
Definition: Instructions.cpp:3258
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:684
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::PointerType::isOpaque
bool isOpaque() const
Definition: DerivedTypes.h:673
llvm::CastInst::getSrcTy
Type * getSrcTy() const
Return the source type, as a convenience.
Definition: InstrTypes.h:681
optimizeIntegerToVectorInsertions
static Value * optimizeIntegerToVectorInsertions(BitCastInst &CI, InstCombinerImpl &IC)
If the input is an 'or' instruction, we may be doing shifts and ors to assemble the elements of the v...
Definition: InstCombineCasts.cpp:2329
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2743
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::PatternMatch::m_One
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:511
llvm::ArrayRef::take_back
ArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:234
llvm::PatternMatch::m_ExtractElt
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
Definition: PatternMatch.h:1486
llvm::InstCombinerImpl::visitPtrToInt
Instruction * visitPtrToInt(PtrToIntInst &CI)
Definition: InstCombineCasts.cpp:2069
llvm::PatternMatch::m_Xor
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1099
llvm::PtrToIntInst::getPointerOperand
Value * getPointerOperand()
Gets the pointer operand.
Definition: Instructions.h:5187
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:191
llvm::PatternMatch::m_Shr
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
Definition: PatternMatch.h:1297
llvm::VectorType::isValidElementType
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:675
llvm::InstCombinerImpl::visitTrunc
Instruction * visitTrunc(TruncInst &CI)
Definition: InstCombineCasts.cpp:772
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::DataLayout::isBigEndian
bool isBigEndian() const
Definition: DataLayout.h:245
llvm::ConstantExpr::getTrunc
static Constant * getTrunc(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2128
llvm::Function::hasFnAttribute
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:625
llvm::InstCombinerImpl::visitZExt
Instruction * visitZExt(ZExtInst &CI)
Definition: InstCombineCasts.cpp:1238
llvm::APFloat
Definition: APFloat.h:701
llvm::Type::getFPMantissaWidth
int getFPMantissaWidth() const
Return the width of the mantissa of this type.
Definition: Type.cpp:194
llvm::IRBuilderBase::CreateBitCast
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1958
llvm::InstCombinerImpl::PromoteCastOfAllocation
Instruction * PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI)
If we find a cast of an allocation instruction, try to eliminate the cast by moving the type informat...
Definition: InstCombineCasts.cpp:84
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::UnaryOperator::CreateFNegFMF
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:164
llvm::Constant::isElementWiseEqual
bool isElementWiseEqual(Value *Y) const
Return true if this constant and a constant 'Y' are element-wise equal.
Definition: Constants.cpp:284
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1173
shrinkFPConstant
static Type * shrinkFPConstant(ConstantFP *CFP)
Definition: InstCombineCasts.cpp:1643
llvm::Attribute::getVScaleRangeMax
Optional< unsigned > getVScaleRangeMax() const
Returns the maximum value for the vscale_range attribute or None when unknown.
Definition: Attributes.cpp:366
llvm::BinaryOperator::CreateFMulFMF
static BinaryOperator * CreateFMulFMF(Value *V1, Value *V2, Instruction *FMFSource, const Twine &Name="")
Definition: InstrTypes.h:267
uint64_t
llvm::FPToSIInst
This class represents a cast from floating point to signed integer.
Definition: Instructions.h:5079
llvm::InstCombiner::getDataLayout
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:371
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:78
llvm::InstCombinerImpl::visitIntToPtr
Instruction * visitIntToPtr(IntToPtrInst &CI)
Definition: InstCombineCasts.cpp:2027
llvm::Instruction::user_back
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:88
llvm::TruncInst
This class represents a truncation of integer types.
Definition: Instructions.h:4767
llvm::IRBuilderBase::getInt32
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:461
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2801
llvm::PatternMatch::m_Or
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1093
llvm::PatternMatch::m_AllOnes
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
Definition: PatternMatch.h:447
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::InstCombinerImpl::visitFPToUI
Instruction * visitFPToUI(FPToUIInst &FI)
Definition: InstCombineCasts.cpp:2005
llvm::UndefValue
'undef' values are things that do not have specified contents.
Definition: Constants.h:1386
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:916
llvm::ConstantExpr::getShl
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2799
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:618
llvm::ConstantExpr::getOr
static Constant * getOr(Constant *C1, Constant *C2)
Definition: Constants.cpp:2786
llvm::PatternMatch::m_And
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1087
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
llvm::KnownBits::getMaxValue
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:136
llvm::ICmpInst::isEquality
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Definition: Instructions.h:1262
llvm::computeKnownBits
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, OptimizationRemarkEmitter *ORE=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
Definition: ValueTracking.cpp:222
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::PatternMatch::m_Sub
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:985
llvm::InstCombinerImpl::EvaluateInDifferentType
Value * EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned)
Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns true for,...
Definition: InstCombineCasts.cpp:182
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition: Instructions.h:1724
llvm::is_splat
bool is_splat(R &&Range)
Wrapper function around std::equal to detect if all elements in a container are same.
Definition: STLExtras.h:1793
llvm::FPToUIInst
This class represents a cast from floating point to unsigned integer.
Definition: Instructions.h:5040
llvm::PatternMatch::m_Constant
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:144
llvm::replaceAllDbgUsesWith
bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition: Local.cpp:2043
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::ZExtInst
This class represents zero extension of integer types.
Definition: Instructions.h:4806
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::CmpInst::ICMP_SLT
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:748
llvm::GetElementPtrInst::Create
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:942
llvm::PatternMatch::m_SExt
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
Definition: PatternMatch.h:1617
llvm::PatternMatch::m_SpecificInt
specific_intval< false > m_SpecificInt(APInt V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:848
foldBitCastBitwiseLogic
static Instruction * foldBitCastBitwiseLogic(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a bitwise logic operation if we can eliminate a bitcast.
Definition: InstCombineCasts.cpp:2385
llvm::ArrayRef< int >
llvm::BinaryOperator
Definition: InstrTypes.h:188
llvm::InstCombinerImpl::ComputeNumSignBits
unsigned ComputeNumSignBits(const Value *Op, unsigned Depth=0, const Instruction *CxtI=nullptr) const
Definition: InstCombineInternal.h:491
Mul
BinaryOperator * Mul
Definition: X86PartialReduction.cpp:70
DataLayout.h
llvm::PatternMatch::m_SpecificInt_ICMP
cst_pred_ty< icmp_pred_with_threshold > m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold)
Match an integer or vector with every element comparing 'pred' (eg/ne/...) to Threshold.
Definition: PatternMatch.h:592
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:137
llvm::APFloatBase::IEEEdouble
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:173
llvm::PatternMatch::m_Undef
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:136
llvm::ConstantExpr::getUMin
static Constant * getUMin(Constant *C1, Constant *C2)
Definition: Constants.cpp:2794
llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:744
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::Instruction::getFunction
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:69
llvm::PatternMatch::m_Shuffle
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
Definition: PatternMatch.h:1545
uint32_t
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:991
llvm::IRBuilderBase::InsertPointGuard
Definition: IRBuilder.h:350
llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:93
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::PtrToIntInst
This class represents a cast from a pointer to an integer.
Definition: Instructions.h:5161
llvm::ConstantExpr::getIntegerCast
static Constant * getIntegerCast(Constant *C, Type *Ty, bool IsSigned)
Create a ZExt, Bitcast or Trunc for integer -> integer casts.
Definition: Constants.cpp:2104
llvm::FPExtInst
This class represents an extension of floating point types.
Definition: Instructions.h:4923
trunc
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g trunc
Definition: README-FPStack.txt:63
llvm::Instruction::copyFastMathFlags
void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
Definition: Instruction.cpp:244
llvm::OverflowingBinaryOperator
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl.
Definition: Operator.h:72
llvm::Type::isPtrOrPtrVectorTy
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:224
llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:429
llvm::APInt::ult
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1061
llvm::SExtInst
This class represents a sign extension of integer types.
Definition: Instructions.h:4845
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:305
llvm::InstCombinerImpl::visitSExt
Instruction * visitSExt(SExtInst &CI)
Definition: InstCombineCasts.cpp:1499
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
llvm::MCID::Select
@ Select
Definition: MCInstrDesc.h:164
llvm::PatternMatch::m_FPExt
CastClass_match< OpTy, Instruction::FPExt > m_FPExt(const OpTy &Op)
Definition: PatternMatch.h:1681
llvm::InstCombinerImpl::commonPointerCastTransforms
Instruction * commonPointerCastTransforms(CastInst &CI)
Implement the transforms for cast of pointer (bitcast/ptrtoint)
Definition: InstCombineCasts.cpp:2047
j
return j(j<< 16)
llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:123
canEvaluateTruncated
static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC, Instruction *CxtI)
Return true if we can evaluate the specified expression tree as type Ty instead of its larger type,...
Definition: InstCombineCasts.cpp:383
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:350
llvm::KnownBits
Definition: KnownBits.h:23
llvm::PointerType::hasSameElementTypeAs
bool hasSameElementTypeAs(PointerType *Other)
Return true if both pointer types have the same element type.
Definition: DerivedTypes.h:696
llvm::Type::getHalfTy
static Type * getHalfTy(LLVMContext &C)
Definition: Type.cpp:224
llvm::CastInst::CreateBitOrPointerCast
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
Definition: Instructions.cpp:3337
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:345
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2693
llvm::X86::FirstMacroFusionInstKind::Cmp
@ Cmp
llvm::ArrayRef::take_front
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:227
canEvaluateZExtd
static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear, InstCombinerImpl &IC, Instruction *CxtI)
Determine if the specified value can be computed in the specified wider type and produce the same low...
Definition: InstCombineCasts.cpp:1132
llvm::PatternMatch::m_FPToSI
CastClass_match< OpTy, Instruction::FPToSI > m_FPToSI(const OpTy &Op)
Definition: PatternMatch.h:1671
llvm::Type::isIntOrIntVectorTy
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:197
llvm::fltSemantics
Definition: APFloat.cpp:54
llvm::GetElementPtrInst::getTypeAtIndex
static Type * getTypeAtIndex(Type *Ty, Value *Idx)
Return the type of the element at the given index of an indexable type.
Definition: Instructions.cpp:1750
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::Type::isDoubleTy
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:151
llvm::PatternMatch::m_ZeroInt
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:518
llvm::ConstantExpr::getLShr
static Constant * getLShr(Constant *C1, Constant *C2, bool isExact=false)
Definition: Constants.cpp:2806
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
llvm::IntToPtrInst
This class represents a cast from an integer to a pointer.
Definition: Instructions.h:5118
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:436
llvm::PatternMatch::m_LogicalShift
BinOpPred_match< LHS, RHS, is_logical_shift_op > m_LogicalShift(const LHS &L, const RHS &R)
Matches logical shift operations.
Definition: PatternMatch.h:1305
llvm::Constant::mergeUndefsWith
static Constant * mergeUndefsWith(Constant *C, Constant *Other)
Merges undefs of a Constant with another Constant, along with the undefs already present.
Definition: Constants.cpp:820
fitsInFPType
static bool fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem)
Return a Constant* for the specified floating-point constant if it fits in the specified FP type with...
Definition: InstCombineCasts.cpp:1636
llvm::Instruction::BinaryOps
BinaryOps
Definition: Instruction.h:786
llvm::ShuffleVectorInst
This instruction constructs a fixed permutation of two input vectors.
Definition: Instructions.h:1995
llvm::APFloatBase::rmNearestTiesToEven
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:190
llvm::AllocaInst::isUsedWithInAlloca
bool isUsedWithInAlloca() const
Return true if this alloca is used as an inalloca argument to a call.
Definition: Instructions.h:136
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:766
getMinimumFPType
static Type * getMinimumFPType(Value *V)
Find the minimum FP type we can safely truncate to.
Definition: InstCombineCasts.cpp:1695
llvm::PointerType::getWithSamePointeeType
static PointerType * getWithSamePointeeType(PointerType *PT, unsigned AddressSpace)
This constructs a pointer type with the same pointee type as input PointerType (or opaque pointer if ...
Definition: DerivedTypes.h:666
llvm::FPTruncInst
This class represents a truncation of floating point types.
Definition: Instructions.h:4884
llvm::CastInst::getOpcode
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:676
llvm::CmpInst::getPredicate
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:809
llvm::PHINode::getIncomingBlock
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Definition: Instructions.h:2767
llvm::PHINode
Definition: Instructions.h:2651
getTypeSizeIndex
static unsigned getTypeSizeIndex(unsigned Value, Type *Ty)
Definition: InstCombineCasts.cpp:2204
llvm::SmallVectorImpl< Value * >
llvm::ConstantFoldConstant
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
Definition: ConstantFolding.cpp:1206
llvm::PatternMatch::m_IntToPtr
CastClass_match< OpTy, Instruction::IntToPtr > m_IntToPtr(const OpTy &Op)
Matches IntToPtr.
Definition: PatternMatch.h:1599
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::PatternMatch::m_Neg
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
Definition: PatternMatch.h:2267
llvm::APInt::getLowBitsSet
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:289
llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1461
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::InstCombinerImpl::foldItoFPtoI
Instruction * foldItoFPtoI(CastInst &FI)
fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X) This is safe if the intermediate ty...
Definition: InstCombineCasts.cpp:1965
CreateAdd
static BinaryOperator * CreateAdd(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
Definition: Reassociate.cpp:231
llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:378
llvm::APInt::getBitsSetFrom
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:269
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:58
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
canNotEvaluateInType
static bool canNotEvaluateInType(Value *V, Type *Ty)
Filter out values that we can not evaluate in the destination type for free.
Definition: InstCombineCasts.cpp:360
llvm::PatternMatch::m_Trunc
CastClass_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
Definition: PatternMatch.h:1605
llvm::BinaryOperator::Create
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Definition: Instructions.cpp:2778
hasOneUse
static bool hasOneUse(unsigned Reg, MachineInstr *Def, MachineRegisterInfo &MRI, MachineDominatorTree &MDT, LiveIntervals &LIS)
Definition: WebAssemblyRegStackify.cpp:284
llvm::InstCombinerImpl::visitFPTrunc
Instruction * visitFPTrunc(FPTruncInst &CI)
Definition: InstCombineCasts.cpp:1763
llvm::InstCombinerImpl::visitUIToFP
Instruction * visitUIToFP(CastInst &CI)
Definition: InstCombineCasts.cpp:2019
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::PatternMatch::m_Shl
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1105
llvm::InstCombinerImpl::MaskedValueIsZero
bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth=0, const Instruction *CxtI=nullptr) const
Definition: InstCombineInternal.h:486
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:668
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::Type::getFloatTy
static Type * getFloatTy(LLVMContext &C)
Definition: Type.cpp:226
llvm::SrcOp
Definition: MachineIRBuilder.h:126
llvm::Type::isX86_AMXTy
bool isX86_AMXTy() const
Return true if this is X86 AMX.
Definition: Type.h:176
isKnownExactCastIntToFP
static bool isKnownExactCastIntToFP(CastInst &I)
Return true if the cast from integer to FP can be proven to be exact for all possible inputs (the con...
Definition: InstCombineCasts.cpp:1723
SetVector.h
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:164
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
foldVecTruncToExtElt
static Instruction * foldVecTruncToExtElt(TruncInst &Trunc, InstCombinerImpl &IC)
Given a vector that is bitcast to an integer, optionally logically right-shifted, and truncated,...
Definition: InstCombineCasts.cpp:500
llvm::PoisonValue::get
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1796