LLVM  14.0.0git
InstCombineCasts.cpp
Go to the documentation of this file.
1 //===- InstCombineCasts.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the visit functions for cast operations.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "InstCombineInternal.h"
14 #include "llvm/ADT/SetVector.h"
17 #include "llvm/IR/DIBuilder.h"
18 #include "llvm/IR/DataLayout.h"
19 #include "llvm/IR/PatternMatch.h"
20 #include "llvm/Support/KnownBits.h"
22 #include <numeric>
23 using namespace llvm;
24 using namespace PatternMatch;
25 
26 #define DEBUG_TYPE "instcombine"
27 
28 /// Analyze 'Val', seeing if it is a simple linear expression.
29 /// If so, decompose it, returning some value X, such that Val is
30 /// X*Scale+Offset.
31 ///
32 static Value *decomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
33  uint64_t &Offset) {
34  if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
35  Offset = CI->getZExtValue();
36  Scale = 0;
37  return ConstantInt::get(Val->getType(), 0);
38  }
39 
40  if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
41  // Cannot look past anything that might overflow.
42  OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val);
43  if (OBI && !OBI->hasNoUnsignedWrap() && !OBI->hasNoSignedWrap()) {
44  Scale = 1;
45  Offset = 0;
46  return Val;
47  }
48 
49  if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
50  if (I->getOpcode() == Instruction::Shl) {
51  // This is a value scaled by '1 << the shift amt'.
52  Scale = UINT64_C(1) << RHS->getZExtValue();
53  Offset = 0;
54  return I->getOperand(0);
55  }
56 
57  if (I->getOpcode() == Instruction::Mul) {
58  // This value is scaled by 'RHS'.
59  Scale = RHS->getZExtValue();
60  Offset = 0;
61  return I->getOperand(0);
62  }
63 
64  if (I->getOpcode() == Instruction::Add) {
65  // We have X+C. Check to see if we really have (X*C2)+C1,
66  // where C1 is divisible by C2.
67  unsigned SubScale;
68  Value *SubVal =
69  decomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
70  Offset += RHS->getZExtValue();
71  Scale = SubScale;
72  return SubVal;
73  }
74  }
75  }
76 
77  // Otherwise, we can't look past this.
78  Scale = 1;
79  Offset = 0;
80  return Val;
81 }
82 
83 /// If we find a cast of an allocation instruction, try to eliminate the cast by
84 /// moving the type information into the alloc.
86  AllocaInst &AI) {
87  PointerType *PTy = cast<PointerType>(CI.getType());
88 
90  Builder.SetInsertPoint(&AI);
91 
92  // Get the type really allocated and the type casted to.
93  Type *AllocElTy = AI.getAllocatedType();
94  Type *CastElTy = PTy->getElementType();
95  if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr;
96 
97  // This optimisation does not work for cases where the cast type
98  // is scalable and the allocated type is not. This because we need to
99  // know how many times the casted type fits into the allocated type.
100  // For the opposite case where the allocated type is scalable and the
101  // cast type is not this leads to poor code quality due to the
102  // introduction of 'vscale' into the calculations. It seems better to
103  // bail out for this case too until we've done a proper cost-benefit
104  // analysis.
105  bool AllocIsScalable = isa<ScalableVectorType>(AllocElTy);
106  bool CastIsScalable = isa<ScalableVectorType>(CastElTy);
107  if (AllocIsScalable != CastIsScalable) return nullptr;
108 
109  Align AllocElTyAlign = DL.getABITypeAlign(AllocElTy);
110  Align CastElTyAlign = DL.getABITypeAlign(CastElTy);
111  if (CastElTyAlign < AllocElTyAlign) return nullptr;
112 
113  // If the allocation has multiple uses, only promote it if we are strictly
114  // increasing the alignment of the resultant allocation. If we keep it the
115  // same, we open the door to infinite loops of various kinds.
116  if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return nullptr;
117 
118  // The alloc and cast types should be either both fixed or both scalable.
119  uint64_t AllocElTySize = DL.getTypeAllocSize(AllocElTy).getKnownMinSize();
120  uint64_t CastElTySize = DL.getTypeAllocSize(CastElTy).getKnownMinSize();
121  if (CastElTySize == 0 || AllocElTySize == 0) return nullptr;
122 
123  // If the allocation has multiple uses, only promote it if we're not
124  // shrinking the amount of memory being allocated.
125  uint64_t AllocElTyStoreSize = DL.getTypeStoreSize(AllocElTy).getKnownMinSize();
126  uint64_t CastElTyStoreSize = DL.getTypeStoreSize(CastElTy).getKnownMinSize();
127  if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return nullptr;
128 
129  // See if we can satisfy the modulus by pulling a scale out of the array
130  // size argument.
131  unsigned ArraySizeScale;
132  uint64_t ArrayOffset;
133  Value *NumElements = // See if the array size is a decomposable linear expr.
134  decomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
135 
136  // If we can now satisfy the modulus, by using a non-1 scale, we really can
137  // do the xform.
138  if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 ||
139  (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return nullptr;
140 
141  // We don't currently support arrays of scalable types.
142  assert(!AllocIsScalable || (ArrayOffset == 1 && ArraySizeScale == 0));
143 
144  unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize;
145  Value *Amt = nullptr;
146  if (Scale == 1) {
147  Amt = NumElements;
148  } else {
149  Amt = ConstantInt::get(AI.getArraySize()->getType(), Scale);
150  // Insert before the alloca, not before the cast.
151  Amt = Builder.CreateMul(Amt, NumElements);
152  }
153 
154  if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
156  Offset, true);
157  Amt = Builder.CreateAdd(Amt, Off);
158  }
159 
160  AllocaInst *New = Builder.CreateAlloca(CastElTy, Amt);
161  New->setAlignment(AI.getAlign());
162  New->takeName(&AI);
163  New->setUsedWithInAlloca(AI.isUsedWithInAlloca());
164 
165  // If the allocation has multiple real uses, insert a cast and change all
166  // things that used it to use the new cast. This will also hack on CI, but it
167  // will die soon.
168  if (!AI.hasOneUse()) {
169  // New is the allocation instruction, pointer typed. AI is the original
170  // allocation instruction, also pointer typed. Thus, cast to use is BitCast.
171  Value *NewCast = Builder.CreateBitCast(New, AI.getType(), "tmpcast");
172  replaceInstUsesWith(AI, NewCast);
173  eraseInstFromFunction(AI);
174  }
175  return replaceInstUsesWith(CI, New);
176 }
177 
178 /// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
179 /// true for, actually insert the code to evaluate the expression.
181  bool isSigned) {
182  if (Constant *C = dyn_cast<Constant>(V)) {
183  C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
184  // If we got a constantexpr back, try to simplify it with DL info.
185  return ConstantFoldConstant(C, DL, &TLI);
186  }
187 
188  // Otherwise, it must be an instruction.
189  Instruction *I = cast<Instruction>(V);
190  Instruction *Res = nullptr;
191  unsigned Opc = I->getOpcode();
192  switch (Opc) {
193  case Instruction::Add:
194  case Instruction::Sub:
195  case Instruction::Mul:
196  case Instruction::And:
197  case Instruction::Or:
198  case Instruction::Xor:
199  case Instruction::AShr:
200  case Instruction::LShr:
201  case Instruction::Shl:
202  case Instruction::UDiv:
203  case Instruction::URem: {
204  Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
205  Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
206  Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
207  break;
208  }
209  case Instruction::Trunc:
210  case Instruction::ZExt:
211  case Instruction::SExt:
212  // If the source type of the cast is the type we're trying for then we can
213  // just return the source. There's no need to insert it because it is not
214  // new.
215  if (I->getOperand(0)->getType() == Ty)
216  return I->getOperand(0);
217 
218  // Otherwise, must be the same type of cast, so just reinsert a new one.
219  // This also handles the case of zext(trunc(x)) -> zext(x).
220  Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
221  Opc == Instruction::SExt);
222  break;
223  case Instruction::Select: {
224  Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
225  Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned);
226  Res = SelectInst::Create(I->getOperand(0), True, False);
227  break;
228  }
229  case Instruction::PHI: {
230  PHINode *OPN = cast<PHINode>(I);
231  PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues());
232  for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
233  Value *V =
234  EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
235  NPN->addIncoming(V, OPN->getIncomingBlock(i));
236  }
237  Res = NPN;
238  break;
239  }
240  default:
241  // TODO: Can handle more cases here.
242  llvm_unreachable("Unreachable!");
243  }
244 
245  Res->takeName(I);
246  return InsertNewInstWith(Res, *I);
247 }
248 
250 InstCombinerImpl::isEliminableCastPair(const CastInst *CI1,
251  const CastInst *CI2) {
252  Type *SrcTy = CI1->getSrcTy();
253  Type *MidTy = CI1->getDestTy();
254  Type *DstTy = CI2->getDestTy();
255 
256  Instruction::CastOps firstOp = CI1->getOpcode();
257  Instruction::CastOps secondOp = CI2->getOpcode();
258  Type *SrcIntPtrTy =
259  SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
260  Type *MidIntPtrTy =
261  MidTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(MidTy) : nullptr;
262  Type *DstIntPtrTy =
263  DstTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(DstTy) : nullptr;
264  unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
265  DstTy, SrcIntPtrTy, MidIntPtrTy,
266  DstIntPtrTy);
267 
268  // We don't want to form an inttoptr or ptrtoint that converts to an integer
269  // type that differs from the pointer size.
270  if ((Res == Instruction::IntToPtr && SrcTy != DstIntPtrTy) ||
271  (Res == Instruction::PtrToInt && DstTy != SrcIntPtrTy))
272  Res = 0;
273 
274  return Instruction::CastOps(Res);
275 }
276 
277 /// Implement the transforms common to all CastInst visitors.
279  Value *Src = CI.getOperand(0);
280  Type *Ty = CI.getType();
281 
282  // Try to eliminate a cast of a cast.
283  if (auto *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
284  if (Instruction::CastOps NewOpc = isEliminableCastPair(CSrc, &CI)) {
285  // The first cast (CSrc) is eliminable so we need to fix up or replace
286  // the second cast (CI). CSrc will then have a good chance of being dead.
287  auto *Res = CastInst::Create(NewOpc, CSrc->getOperand(0), Ty);
288  // Point debug users of the dying cast to the new one.
289  if (CSrc->hasOneUse())
290  replaceAllDbgUsesWith(*CSrc, *Res, CI, DT);
291  return Res;
292  }
293  }
294 
295  if (auto *Sel = dyn_cast<SelectInst>(Src)) {
296  // We are casting a select. Try to fold the cast into the select if the
297  // select does not have a compare instruction with matching operand types
298  // or the select is likely better done in a narrow type.
299  // Creating a select with operands that are different sizes than its
300  // condition may inhibit other folds and lead to worse codegen.
301  auto *Cmp = dyn_cast<CmpInst>(Sel->getCondition());
302  if (!Cmp || Cmp->getOperand(0)->getType() != Sel->getType() ||
303  (CI.getOpcode() == Instruction::Trunc &&
304  shouldChangeType(CI.getSrcTy(), CI.getType()))) {
305  if (Instruction *NV = FoldOpIntoSelect(CI, Sel)) {
306  replaceAllDbgUsesWith(*Sel, *NV, CI, DT);
307  return NV;
308  }
309  }
310  }
311 
312  // If we are casting a PHI, then fold the cast into the PHI.
313  if (auto *PN = dyn_cast<PHINode>(Src)) {
314  // Don't do this if it would create a PHI node with an illegal type from a
315  // legal type.
316  if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() ||
317  shouldChangeType(CI.getSrcTy(), CI.getType()))
318  if (Instruction *NV = foldOpIntoPhi(CI, PN))
319  return NV;
320  }
321 
322  // Canonicalize a unary shuffle after the cast if neither operation changes
323  // the size or element size of the input vector.
324  // TODO: We could allow size-changing ops if that doesn't harm codegen.
325  // cast (shuffle X, Mask) --> shuffle (cast X), Mask
326  Value *X;
328  if (match(Src, m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(Mask))))) {
329  // TODO: Allow scalable vectors?
330  auto *SrcTy = dyn_cast<FixedVectorType>(X->getType());
331  auto *DestTy = dyn_cast<FixedVectorType>(Ty);
332  if (SrcTy && DestTy &&
333  SrcTy->getNumElements() == DestTy->getNumElements() &&
334  SrcTy->getPrimitiveSizeInBits() == DestTy->getPrimitiveSizeInBits()) {
335  Value *CastX = Builder.CreateCast(CI.getOpcode(), X, DestTy);
336  return new ShuffleVectorInst(CastX, UndefValue::get(DestTy), Mask);
337  }
338  }
339 
340  return nullptr;
341 }
342 
343 /// Constants and extensions/truncates from the destination type are always
344 /// free to be evaluated in that type. This is a helper for canEvaluate*.
345 static bool canAlwaysEvaluateInType(Value *V, Type *Ty) {
346  if (isa<Constant>(V))
347  return true;
348  Value *X;
349  if ((match(V, m_ZExtOrSExt(m_Value(X))) || match(V, m_Trunc(m_Value(X)))) &&
350  X->getType() == Ty)
351  return true;
352 
353  return false;
354 }
355 
356 /// Filter out values that we can not evaluate in the destination type for free.
357 /// This is a helper for canEvaluate*.
358 static bool canNotEvaluateInType(Value *V, Type *Ty) {
359  assert(!isa<Constant>(V) && "Constant should already be handled.");
360  if (!isa<Instruction>(V))
361  return true;
362  // We don't extend or shrink something that has multiple uses -- doing so
363  // would require duplicating the instruction which isn't profitable.
364  if (!V->hasOneUse())
365  return true;
366 
367  return false;
368 }
369 
370 /// Return true if we can evaluate the specified expression tree as type Ty
371 /// instead of its larger type, and arrive with the same value.
372 /// This is used by code that tries to eliminate truncates.
373 ///
374 /// Ty will always be a type smaller than V. We should return true if trunc(V)
375 /// can be computed by computing V in the smaller type. If V is an instruction,
376 /// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
377 /// makes sense if x and y can be efficiently truncated.
378 ///
379 /// This function works on both vectors and scalars.
380 ///
382  Instruction *CxtI) {
383  if (canAlwaysEvaluateInType(V, Ty))
384  return true;
385  if (canNotEvaluateInType(V, Ty))
386  return false;
387 
388  auto *I = cast<Instruction>(V);
389  Type *OrigTy = V->getType();
390  switch (I->getOpcode()) {
391  case Instruction::Add:
392  case Instruction::Sub:
393  case Instruction::Mul:
394  case Instruction::And:
395  case Instruction::Or:
396  case Instruction::Xor:
397  // These operators can all arbitrarily be extended or truncated.
398  return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
399  canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
400 
401  case Instruction::UDiv:
402  case Instruction::URem: {
403  // UDiv and URem can be truncated if all the truncated bits are zero.
404  uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
406  assert(BitWidth < OrigBitWidth && "Unexpected bitwidths!");
407  APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
408  if (IC.MaskedValueIsZero(I->getOperand(0), Mask, 0, CxtI) &&
409  IC.MaskedValueIsZero(I->getOperand(1), Mask, 0, CxtI)) {
410  return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
411  canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
412  }
413  break;
414  }
415  case Instruction::Shl: {
416  // If we are truncating the result of this SHL, and if it's a shift of an
417  // inrange amount, we can always perform a SHL in a smaller type.
419  KnownBits AmtKnownBits =
420  llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
421  if (AmtKnownBits.getMaxValue().ult(BitWidth))
422  return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
423  canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
424  break;
425  }
426  case Instruction::LShr: {
427  // If this is a truncate of a logical shr, we can truncate it to a smaller
428  // lshr iff we know that the bits we would otherwise be shifting in are
429  // already zeros.
430  // TODO: It is enough to check that the bits we would be shifting in are
431  // zero - use AmtKnownBits.getMaxValue().
432  uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
434  KnownBits AmtKnownBits =
435  llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
436  APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
437  if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
438  IC.MaskedValueIsZero(I->getOperand(0), ShiftedBits, 0, CxtI)) {
439  return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
440  canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
441  }
442  break;
443  }
444  case Instruction::AShr: {
445  // If this is a truncate of an arithmetic shr, we can truncate it to a
446  // smaller ashr iff we know that all the bits from the sign bit of the
447  // original type and the sign bit of the truncate type are similar.
448  // TODO: It is enough to check that the bits we would be shifting in are
449  // similar to sign bit of the truncate type.
450  uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
452  KnownBits AmtKnownBits =
453  llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
454  unsigned ShiftedBits = OrigBitWidth - BitWidth;
455  if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
456  ShiftedBits < IC.ComputeNumSignBits(I->getOperand(0), 0, CxtI))
457  return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
458  canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
459  break;
460  }
461  case Instruction::Trunc:
462  // trunc(trunc(x)) -> trunc(x)
463  return true;
464  case Instruction::ZExt:
465  case Instruction::SExt:
466  // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
467  // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
468  return true;
469  case Instruction::Select: {
470  SelectInst *SI = cast<SelectInst>(I);
471  return canEvaluateTruncated(SI->getTrueValue(), Ty, IC, CxtI) &&
472  canEvaluateTruncated(SI->getFalseValue(), Ty, IC, CxtI);
473  }
474  case Instruction::PHI: {
475  // We can change a phi if we can change all operands. Note that we never
476  // get into trouble with cyclic PHIs here because we only consider
477  // instructions with a single use.
478  PHINode *PN = cast<PHINode>(I);
479  for (Value *IncValue : PN->incoming_values())
480  if (!canEvaluateTruncated(IncValue, Ty, IC, CxtI))
481  return false;
482  return true;
483  }
484  default:
485  // TODO: Can handle more cases here.
486  break;
487  }
488 
489  return false;
490 }
491 
492 /// Given a vector that is bitcast to an integer, optionally logically
493 /// right-shifted, and truncated, convert it to an extractelement.
494 /// Example (big endian):
495 /// trunc (lshr (bitcast <4 x i32> %X to i128), 32) to i32
496 /// --->
497 /// extractelement <4 x i32> %X, 1
499  InstCombinerImpl &IC) {
500  Value *TruncOp = Trunc.getOperand(0);
501  Type *DestType = Trunc.getType();
502  if (!TruncOp->hasOneUse() || !isa<IntegerType>(DestType))
503  return nullptr;
504 
505  Value *VecInput = nullptr;
506  ConstantInt *ShiftVal = nullptr;
507  if (!match(TruncOp, m_CombineOr(m_BitCast(m_Value(VecInput)),
508  m_LShr(m_BitCast(m_Value(VecInput)),
509  m_ConstantInt(ShiftVal)))) ||
510  !isa<VectorType>(VecInput->getType()))
511  return nullptr;
512 
513  VectorType *VecType = cast<VectorType>(VecInput->getType());
514  unsigned VecWidth = VecType->getPrimitiveSizeInBits();
515  unsigned DestWidth = DestType->getPrimitiveSizeInBits();
516  unsigned ShiftAmount = ShiftVal ? ShiftVal->getZExtValue() : 0;
517 
518  if ((VecWidth % DestWidth != 0) || (ShiftAmount % DestWidth != 0))
519  return nullptr;
520 
521  // If the element type of the vector doesn't match the result type,
522  // bitcast it to a vector type that we can extract from.
523  unsigned NumVecElts = VecWidth / DestWidth;
524  if (VecType->getElementType() != DestType) {
525  VecType = FixedVectorType::get(DestType, NumVecElts);
526  VecInput = IC.Builder.CreateBitCast(VecInput, VecType, "bc");
527  }
528 
529  unsigned Elt = ShiftAmount / DestWidth;
530  if (IC.getDataLayout().isBigEndian())
531  Elt = NumVecElts - 1 - Elt;
532 
533  return ExtractElementInst::Create(VecInput, IC.Builder.getInt32(Elt));
534 }
535 
536 /// Funnel/Rotate left/right may occur in a wider type than necessary because of
537 /// type promotion rules. Try to narrow the inputs and convert to funnel shift.
538 Instruction *InstCombinerImpl::narrowFunnelShift(TruncInst &Trunc) {
539  assert((isa<VectorType>(Trunc.getSrcTy()) ||
540  shouldChangeType(Trunc.getSrcTy(), Trunc.getType())) &&
541  "Don't narrow to an illegal scalar type");
542 
543  // Bail out on strange types. It is possible to handle some of these patterns
544  // even with non-power-of-2 sizes, but it is not a likely scenario.
545  Type *DestTy = Trunc.getType();
546  unsigned NarrowWidth = DestTy->getScalarSizeInBits();
547  unsigned WideWidth = Trunc.getSrcTy()->getScalarSizeInBits();
548  if (!isPowerOf2_32(NarrowWidth))
549  return nullptr;
550 
551  // First, find an or'd pair of opposite shifts:
552  // trunc (or (lshr ShVal0, ShAmt0), (shl ShVal1, ShAmt1))
553  BinaryOperator *Or0, *Or1;
554  if (!match(Trunc.getOperand(0), m_OneUse(m_Or(m_BinOp(Or0), m_BinOp(Or1)))))
555  return nullptr;
556 
557  Value *ShVal0, *ShVal1, *ShAmt0, *ShAmt1;
558  if (!match(Or0, m_OneUse(m_LogicalShift(m_Value(ShVal0), m_Value(ShAmt0)))) ||
559  !match(Or1, m_OneUse(m_LogicalShift(m_Value(ShVal1), m_Value(ShAmt1)))) ||
560  Or0->getOpcode() == Or1->getOpcode())
561  return nullptr;
562 
563  // Canonicalize to or(shl(ShVal0, ShAmt0), lshr(ShVal1, ShAmt1)).
564  if (Or0->getOpcode() == BinaryOperator::LShr) {
565  std::swap(Or0, Or1);
566  std::swap(ShVal0, ShVal1);
567  std::swap(ShAmt0, ShAmt1);
568  }
569  assert(Or0->getOpcode() == BinaryOperator::Shl &&
570  Or1->getOpcode() == BinaryOperator::LShr &&
571  "Illegal or(shift,shift) pair");
572 
573  // Match the shift amount operands for a funnel/rotate pattern. This always
574  // matches a subtraction on the R operand.
575  auto matchShiftAmount = [&](Value *L, Value *R, unsigned Width) -> Value * {
576  // The shift amounts may add up to the narrow bit width:
577  // (shl ShVal0, L) | (lshr ShVal1, Width - L)
578  // If this is a funnel shift (different operands are shifted), then the
579  // shift amount can not over-shift (create poison) in the narrow type.
580  unsigned MaxShiftAmountWidth = Log2_32(NarrowWidth);
581  APInt HiBitMask = ~APInt::getLowBitsSet(WideWidth, MaxShiftAmountWidth);
582  if (ShVal0 == ShVal1 || MaskedValueIsZero(L, HiBitMask))
584  return L;
585 
586  // The following patterns currently only work for rotation patterns.
587  // TODO: Add more general funnel-shift compatible patterns.
588  if (ShVal0 != ShVal1)
589  return nullptr;
590 
591  // The shift amount may be masked with negation:
592  // (shl ShVal0, (X & (Width - 1))) | (lshr ShVal1, ((-X) & (Width - 1)))
593  Value *X;
594  unsigned Mask = Width - 1;
595  if (match(L, m_And(m_Value(X), m_SpecificInt(Mask))) &&
597  return X;
598 
599  // Same as above, but the shift amount may be extended after masking:
600  if (match(L, m_ZExt(m_And(m_Value(X), m_SpecificInt(Mask)))) &&
602  return X;
603 
604  return nullptr;
605  };
606 
607  Value *ShAmt = matchShiftAmount(ShAmt0, ShAmt1, NarrowWidth);
608  bool IsFshl = true; // Sub on LSHR.
609  if (!ShAmt) {
610  ShAmt = matchShiftAmount(ShAmt1, ShAmt0, NarrowWidth);
611  IsFshl = false; // Sub on SHL.
612  }
613  if (!ShAmt)
614  return nullptr;
615 
616  // The right-shifted value must have high zeros in the wide type (for example
617  // from 'zext', 'and' or 'shift'). High bits of the left-shifted value are
618  // truncated, so those do not matter.
619  APInt HiBitMask = APInt::getHighBitsSet(WideWidth, WideWidth - NarrowWidth);
620  if (!MaskedValueIsZero(ShVal1, HiBitMask, 0, &Trunc))
621  return nullptr;
622 
623  // We have an unnecessarily wide rotate!
624  // trunc (or (shl ShVal0, ShAmt), (lshr ShVal1, BitWidth - ShAmt))
625  // Narrow the inputs and convert to funnel shift intrinsic:
626  // llvm.fshl.i8(trunc(ShVal), trunc(ShVal), trunc(ShAmt))
627  Value *NarrowShAmt = Builder.CreateTrunc(ShAmt, DestTy);
628  Value *X, *Y;
629  X = Y = Builder.CreateTrunc(ShVal0, DestTy);
630  if (ShVal0 != ShVal1)
631  Y = Builder.CreateTrunc(ShVal1, DestTy);
632  Intrinsic::ID IID = IsFshl ? Intrinsic::fshl : Intrinsic::fshr;
633  Function *F = Intrinsic::getDeclaration(Trunc.getModule(), IID, DestTy);
634  return CallInst::Create(F, {X, Y, NarrowShAmt});
635 }
636 
637 /// Try to narrow the width of math or bitwise logic instructions by pulling a
638 /// truncate ahead of binary operators.
639 /// TODO: Transforms for truncated shifts should be moved into here.
640 Instruction *InstCombinerImpl::narrowBinOp(TruncInst &Trunc) {
641  Type *SrcTy = Trunc.getSrcTy();
642  Type *DestTy = Trunc.getType();
643  if (!isa<VectorType>(SrcTy) && !shouldChangeType(SrcTy, DestTy))
644  return nullptr;
645 
646  BinaryOperator *BinOp;
647  if (!match(Trunc.getOperand(0), m_OneUse(m_BinOp(BinOp))))
648  return nullptr;
649 
650  Value *BinOp0 = BinOp->getOperand(0);
651  Value *BinOp1 = BinOp->getOperand(1);
652  switch (BinOp->getOpcode()) {
653  case Instruction::And:
654  case Instruction::Or:
655  case Instruction::Xor:
656  case Instruction::Add:
657  case Instruction::Sub:
658  case Instruction::Mul: {
659  Constant *C;
660  if (match(BinOp0, m_Constant(C))) {
661  // trunc (binop C, X) --> binop (trunc C', X)
662  Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
663  Value *TruncX = Builder.CreateTrunc(BinOp1, DestTy);
664  return BinaryOperator::Create(BinOp->getOpcode(), NarrowC, TruncX);
665  }
666  if (match(BinOp1, m_Constant(C))) {
667  // trunc (binop X, C) --> binop (trunc X, C')
668  Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
669  Value *TruncX = Builder.CreateTrunc(BinOp0, DestTy);
670  return BinaryOperator::Create(BinOp->getOpcode(), TruncX, NarrowC);
671  }
672  Value *X;
673  if (match(BinOp0, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
674  // trunc (binop (ext X), Y) --> binop X, (trunc Y)
675  Value *NarrowOp1 = Builder.CreateTrunc(BinOp1, DestTy);
676  return BinaryOperator::Create(BinOp->getOpcode(), X, NarrowOp1);
677  }
678  if (match(BinOp1, m_ZExtOrSExt(m_Value(X))) && X->getType() == DestTy) {
679  // trunc (binop Y, (ext X)) --> binop (trunc Y), X
680  Value *NarrowOp0 = Builder.CreateTrunc(BinOp0, DestTy);
681  return BinaryOperator::Create(BinOp->getOpcode(), NarrowOp0, X);
682  }
683  break;
684  }
685 
686  default: break;
687  }
688 
689  if (Instruction *NarrowOr = narrowFunnelShift(Trunc))
690  return NarrowOr;
691 
692  return nullptr;
693 }
694 
695 /// Try to narrow the width of a splat shuffle. This could be generalized to any
696 /// shuffle with a constant operand, but we limit the transform to avoid
697 /// creating a shuffle type that targets may not be able to lower effectively.
700  auto *Shuf = dyn_cast<ShuffleVectorInst>(Trunc.getOperand(0));
701  if (Shuf && Shuf->hasOneUse() && match(Shuf->getOperand(1), m_Undef()) &&
702  is_splat(Shuf->getShuffleMask()) &&
703  Shuf->getType() == Shuf->getOperand(0)->getType()) {
704  // trunc (shuf X, Undef, SplatMask) --> shuf (trunc X), Undef, SplatMask
705  Constant *NarrowUndef = UndefValue::get(Trunc.getType());
706  Value *NarrowOp = Builder.CreateTrunc(Shuf->getOperand(0), Trunc.getType());
707  return new ShuffleVectorInst(NarrowOp, NarrowUndef, Shuf->getShuffleMask());
708  }
709 
710  return nullptr;
711 }
712 
713 /// Try to narrow the width of an insert element. This could be generalized for
714 /// any vector constant, but we limit the transform to insertion into undef to
715 /// avoid potential backend problems from unsupported insertion widths. This
716 /// could also be extended to handle the case of inserting a scalar constant
717 /// into a vector variable.
720  Instruction::CastOps Opcode = Trunc.getOpcode();
721  assert((Opcode == Instruction::Trunc || Opcode == Instruction::FPTrunc) &&
722  "Unexpected instruction for shrinking");
723 
724  auto *InsElt = dyn_cast<InsertElementInst>(Trunc.getOperand(0));
725  if (!InsElt || !InsElt->hasOneUse())
726  return nullptr;
727 
728  Type *DestTy = Trunc.getType();
729  Type *DestScalarTy = DestTy->getScalarType();
730  Value *VecOp = InsElt->getOperand(0);
731  Value *ScalarOp = InsElt->getOperand(1);
732  Value *Index = InsElt->getOperand(2);
733 
734  if (match(VecOp, m_Undef())) {
735  // trunc (inselt undef, X, Index) --> inselt undef, (trunc X), Index
736  // fptrunc (inselt undef, X, Index) --> inselt undef, (fptrunc X), Index
737  UndefValue *NarrowUndef = UndefValue::get(DestTy);
738  Value *NarrowOp = Builder.CreateCast(Opcode, ScalarOp, DestScalarTy);
739  return InsertElementInst::Create(NarrowUndef, NarrowOp, Index);
740  }
741 
742  return nullptr;
743 }
744 
746  if (Instruction *Result = commonCastTransforms(Trunc))
747  return Result;
748 
749  Value *Src = Trunc.getOperand(0);
750  Type *DestTy = Trunc.getType(), *SrcTy = Src->getType();
751  unsigned DestWidth = DestTy->getScalarSizeInBits();
752  unsigned SrcWidth = SrcTy->getScalarSizeInBits();
753 
754  // Attempt to truncate the entire input expression tree to the destination
755  // type. Only do this if the dest type is a simple type, don't convert the
756  // expression tree to something weird like i93 unless the source is also
757  // strange.
758  if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) &&
759  canEvaluateTruncated(Src, DestTy, *this, &Trunc)) {
760 
761  // If this cast is a truncate, evaluting in a different type always
762  // eliminates the cast, so it is always a win.
763  LLVM_DEBUG(
764  dbgs() << "ICE: EvaluateInDifferentType converting expression type"
765  " to avoid cast: "
766  << Trunc << '\n');
767  Value *Res = EvaluateInDifferentType(Src, DestTy, false);
768  assert(Res->getType() == DestTy);
769  return replaceInstUsesWith(Trunc, Res);
770  }
771 
772  // For integer types, check if we can shorten the entire input expression to
773  // DestWidth * 2, which won't allow removing the truncate, but reducing the
774  // width may enable further optimizations, e.g. allowing for larger
775  // vectorization factors.
776  if (auto *DestITy = dyn_cast<IntegerType>(DestTy)) {
777  if (DestWidth * 2 < SrcWidth) {
778  auto *NewDestTy = DestITy->getExtendedType();
779  if (shouldChangeType(SrcTy, NewDestTy) &&
780  canEvaluateTruncated(Src, NewDestTy, *this, &Trunc)) {
781  LLVM_DEBUG(
782  dbgs() << "ICE: EvaluateInDifferentType converting expression type"
783  " to reduce the width of operand of"
784  << Trunc << '\n');
785  Value *Res = EvaluateInDifferentType(Src, NewDestTy, false);
786  return new TruncInst(Res, DestTy);
787  }
788  }
789  }
790 
791  // Test if the trunc is the user of a select which is part of a
792  // minimum or maximum operation. If so, don't do any more simplification.
793  // Even simplifying demanded bits can break the canonical form of a
794  // min/max.
795  Value *LHS, *RHS;
796  if (SelectInst *Sel = dyn_cast<SelectInst>(Src))
797  if (matchSelectPattern(Sel, LHS, RHS).Flavor != SPF_UNKNOWN)
798  return nullptr;
799 
800  // See if we can simplify any instructions used by the input whose sole
801  // purpose is to compute bits we don't care about.
802  if (SimplifyDemandedInstructionBits(Trunc))
803  return &Trunc;
804 
805  if (DestWidth == 1) {
806  Value *Zero = Constant::getNullValue(SrcTy);
807  if (DestTy->isIntegerTy()) {
808  // Canonicalize trunc x to i1 -> icmp ne (and x, 1), 0 (scalar only).
809  // TODO: We canonicalize to more instructions here because we are probably
810  // lacking equivalent analysis for trunc relative to icmp. There may also
811  // be codegen concerns. If those trunc limitations were removed, we could
812  // remove this transform.
813  Value *And = Builder.CreateAnd(Src, ConstantInt::get(SrcTy, 1));
814  return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
815  }
816 
817  // For vectors, we do not canonicalize all truncs to icmp, so optimize
818  // patterns that would be covered within visitICmpInst.
819  Value *X;
820  Constant *C;
821  if (match(Src, m_OneUse(m_LShr(m_Value(X), m_Constant(C))))) {
822  // trunc (lshr X, C) to i1 --> icmp ne (and X, C'), 0
823  Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
824  Constant *MaskC = ConstantExpr::getShl(One, C);
825  Value *And = Builder.CreateAnd(X, MaskC);
826  return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
827  }
829  m_Deferred(X))))) {
830  // trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0
831  Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
832  Constant *MaskC = ConstantExpr::getShl(One, C);
833  MaskC = ConstantExpr::getOr(MaskC, One);
834  Value *And = Builder.CreateAnd(X, MaskC);
835  return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
836  }
837  }
838 
839  Value *A, *B;
840  Constant *C;
841  if (match(Src, m_LShr(m_SExt(m_Value(A)), m_Constant(C)))) {
842  unsigned AWidth = A->getType()->getScalarSizeInBits();
843  unsigned MaxShiftAmt = SrcWidth - std::max(DestWidth, AWidth);
844  auto *OldSh = cast<Instruction>(Src);
845  bool IsExact = OldSh->isExact();
846 
847  // If the shift is small enough, all zero bits created by the shift are
848  // removed by the trunc.
850  APInt(SrcWidth, MaxShiftAmt)))) {
851  // trunc (lshr (sext A), C) --> ashr A, C
852  if (A->getType() == DestTy) {
853  Constant *MaxAmt = ConstantInt::get(SrcTy, DestWidth - 1, false);
854  Constant *ShAmt = ConstantExpr::getUMin(C, MaxAmt);
855  ShAmt = ConstantExpr::getTrunc(ShAmt, A->getType());
856  ShAmt = Constant::mergeUndefsWith(ShAmt, C);
857  return IsExact ? BinaryOperator::CreateExactAShr(A, ShAmt)
858  : BinaryOperator::CreateAShr(A, ShAmt);
859  }
860  // The types are mismatched, so create a cast after shifting:
861  // trunc (lshr (sext A), C) --> sext/trunc (ashr A, C)
862  if (Src->hasOneUse()) {
863  Constant *MaxAmt = ConstantInt::get(SrcTy, AWidth - 1, false);
864  Constant *ShAmt = ConstantExpr::getUMin(C, MaxAmt);
865  ShAmt = ConstantExpr::getTrunc(ShAmt, A->getType());
866  Value *Shift = Builder.CreateAShr(A, ShAmt, "", IsExact);
867  return CastInst::CreateIntegerCast(Shift, DestTy, true);
868  }
869  }
870  // TODO: Mask high bits with 'and'.
871  }
872 
873  // trunc (*shr (trunc A), C) --> trunc(*shr A, C)
874  if (match(Src, m_OneUse(m_Shr(m_Trunc(m_Value(A)), m_Constant(C))))) {
875  unsigned MaxShiftAmt = SrcWidth - DestWidth;
876 
877  // If the shift is small enough, all zero/sign bits created by the shift are
878  // removed by the trunc.
880  APInt(SrcWidth, MaxShiftAmt)))) {
881  auto *OldShift = cast<Instruction>(Src);
882  bool IsExact = OldShift->isExact();
883  auto *ShAmt = ConstantExpr::getIntegerCast(C, A->getType(), true);
884  ShAmt = Constant::mergeUndefsWith(ShAmt, C);
885  Value *Shift =
886  OldShift->getOpcode() == Instruction::AShr
887  ? Builder.CreateAShr(A, ShAmt, OldShift->getName(), IsExact)
888  : Builder.CreateLShr(A, ShAmt, OldShift->getName(), IsExact);
889  return CastInst::CreateTruncOrBitCast(Shift, DestTy);
890  }
891  }
892 
893  if (Instruction *I = narrowBinOp(Trunc))
894  return I;
895 
896  if (Instruction *I = shrinkSplatShuffle(Trunc, Builder))
897  return I;
898 
899  if (Instruction *I = shrinkInsertElt(Trunc, Builder))
900  return I;
901 
902  if (Src->hasOneUse() &&
903  (isa<VectorType>(SrcTy) || shouldChangeType(SrcTy, DestTy))) {
904  // Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the
905  // dest type is native and cst < dest size.
906  if (match(Src, m_Shl(m_Value(A), m_Constant(C))) &&
907  !match(A, m_Shr(m_Value(), m_Constant()))) {
908  // Skip shifts of shift by constants. It undoes a combine in
909  // FoldShiftByConstant and is the extend in reg pattern.
910  APInt Threshold = APInt(C->getType()->getScalarSizeInBits(), DestWidth);
912  Value *NewTrunc = Builder.CreateTrunc(A, DestTy, A->getName() + ".tr");
913  return BinaryOperator::Create(Instruction::Shl, NewTrunc,
914  ConstantExpr::getTrunc(C, DestTy));
915  }
916  }
917  }
918 
919  if (Instruction *I = foldVecTruncToExtElt(Trunc, *this))
920  return I;
921 
922  // Whenever an element is extracted from a vector, and then truncated,
923  // canonicalize by converting it to a bitcast followed by an
924  // extractelement.
925  //
926  // Example (little endian):
927  // trunc (extractelement <4 x i64> %X, 0) to i32
928  // --->
929  // extractelement <8 x i32> (bitcast <4 x i64> %X to <8 x i32>), i32 0
930  Value *VecOp;
931  ConstantInt *Cst;
932  if (match(Src, m_OneUse(m_ExtractElt(m_Value(VecOp), m_ConstantInt(Cst))))) {
933  auto *VecOpTy = cast<VectorType>(VecOp->getType());
934  auto VecElts = VecOpTy->getElementCount();
935 
936  // A badly fit destination size would result in an invalid cast.
937  if (SrcWidth % DestWidth == 0) {
938  uint64_t TruncRatio = SrcWidth / DestWidth;
939  uint64_t BitCastNumElts = VecElts.getKnownMinValue() * TruncRatio;
940  uint64_t VecOpIdx = Cst->getZExtValue();
941  uint64_t NewIdx = DL.isBigEndian() ? (VecOpIdx + 1) * TruncRatio - 1
942  : VecOpIdx * TruncRatio;
943  assert(BitCastNumElts <= std::numeric_limits<uint32_t>::max() &&
944  "overflow 32-bits");
945 
946  auto *BitCastTo =
947  VectorType::get(DestTy, BitCastNumElts, VecElts.isScalable());
948  Value *BitCast = Builder.CreateBitCast(VecOp, BitCastTo);
949  return ExtractElementInst::Create(BitCast, Builder.getInt32(NewIdx));
950  }
951  }
952 
953  // trunc (ctlz_i32(zext(A), B) --> add(ctlz_i16(A, B), C)
954  if (match(Src, m_OneUse(m_Intrinsic<Intrinsic::ctlz>(m_ZExt(m_Value(A)),
955  m_Value(B))))) {
956  unsigned AWidth = A->getType()->getScalarSizeInBits();
957  if (AWidth == DestWidth && AWidth > Log2_32(SrcWidth)) {
958  Value *WidthDiff = ConstantInt::get(A->getType(), SrcWidth - AWidth);
959  Value *NarrowCtlz =
960  Builder.CreateIntrinsic(Intrinsic::ctlz, {Trunc.getType()}, {A, B});
961  return BinaryOperator::CreateAdd(NarrowCtlz, WidthDiff);
962  }
963  }
964 
965  if (match(Src, m_VScale(DL))) {
966  if (Trunc.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
967  unsigned MaxVScale = Trunc.getFunction()
968  ->getFnAttribute(Attribute::VScaleRange)
970  .second;
971  if (MaxVScale > 0 && Log2_32(MaxVScale) < DestWidth) {
972  Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
973  return replaceInstUsesWith(Trunc, VScale);
974  }
975  }
976  }
977 
978  return nullptr;
979 }
980 
981 Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext) {
982  // If we are just checking for a icmp eq of a single bit and zext'ing it
983  // to an integer, then shift the bit to the appropriate place and then
984  // cast to integer to avoid the comparison.
985  const APInt *Op1CV;
986  if (match(Cmp->getOperand(1), m_APInt(Op1CV))) {
987 
988  // zext (x <s 0) to i32 --> x>>u31 true if signbit set.
989  // zext (x >s -1) to i32 --> (x>>u31)^1 true if signbit clear.
990  if ((Cmp->getPredicate() == ICmpInst::ICMP_SLT && Op1CV->isNullValue()) ||
991  (Cmp->getPredicate() == ICmpInst::ICMP_SGT && Op1CV->isAllOnesValue())) {
992  Value *In = Cmp->getOperand(0);
993  Value *Sh = ConstantInt::get(In->getType(),
994  In->getType()->getScalarSizeInBits() - 1);
995  In = Builder.CreateLShr(In, Sh, In->getName() + ".lobit");
996  if (In->getType() != Zext.getType())
997  In = Builder.CreateIntCast(In, Zext.getType(), false /*ZExt*/);
998 
999  if (Cmp->getPredicate() == ICmpInst::ICMP_SGT) {
1000  Constant *One = ConstantInt::get(In->getType(), 1);
1001  In = Builder.CreateXor(In, One, In->getName() + ".not");
1002  }
1003 
1004  return replaceInstUsesWith(Zext, In);
1005  }
1006 
1007  // zext (X == 0) to i32 --> X^1 iff X has only the low bit set.
1008  // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
1009  // zext (X == 1) to i32 --> X iff X has only the low bit set.
1010  // zext (X == 2) to i32 --> X>>1 iff X has only the 2nd bit set.
1011  // zext (X != 0) to i32 --> X iff X has only the low bit set.
1012  // zext (X != 0) to i32 --> X>>1 iff X has only the 2nd bit set.
1013  // zext (X != 1) to i32 --> X^1 iff X has only the low bit set.
1014  // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
1015  if ((Op1CV->isNullValue() || Op1CV->isPowerOf2()) &&
1016  // This only works for EQ and NE
1017  Cmp->isEquality()) {
1018  // If Op1C some other power of two, convert:
1019  KnownBits Known = computeKnownBits(Cmp->getOperand(0), 0, &Zext);
1020 
1021  APInt KnownZeroMask(~Known.Zero);
1022  if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
1023  bool isNE = Cmp->getPredicate() == ICmpInst::ICMP_NE;
1024  if (!Op1CV->isNullValue() && (*Op1CV != KnownZeroMask)) {
1025  // (X&4) == 2 --> false
1026  // (X&4) != 2 --> true
1027  Constant *Res = ConstantInt::get(Zext.getType(), isNE);
1028  return replaceInstUsesWith(Zext, Res);
1029  }
1030 
1031  uint32_t ShAmt = KnownZeroMask.logBase2();
1032  Value *In = Cmp->getOperand(0);
1033  if (ShAmt) {
1034  // Perform a logical shr by shiftamt.
1035  // Insert the shift to put the result in the low bit.
1036  In = Builder.CreateLShr(In, ConstantInt::get(In->getType(), ShAmt),
1037  In->getName() + ".lobit");
1038  }
1039 
1040  if (!Op1CV->isNullValue() == isNE) { // Toggle the low bit.
1041  Constant *One = ConstantInt::get(In->getType(), 1);
1042  In = Builder.CreateXor(In, One);
1043  }
1044 
1045  if (Zext.getType() == In->getType())
1046  return replaceInstUsesWith(Zext, In);
1047 
1048  Value *IntCast = Builder.CreateIntCast(In, Zext.getType(), false);
1049  return replaceInstUsesWith(Zext, IntCast);
1050  }
1051  }
1052  }
1053 
1054  if (Cmp->isEquality() && Zext.getType() == Cmp->getOperand(0)->getType()) {
1055  // Test if a bit is clear/set using a shifted-one mask:
1056  // zext (icmp eq (and X, (1 << ShAmt)), 0) --> and (lshr (not X), ShAmt), 1
1057  // zext (icmp ne (and X, (1 << ShAmt)), 0) --> and (lshr X, ShAmt), 1
1058  Value *X, *ShAmt;
1059  if (Cmp->hasOneUse() && match(Cmp->getOperand(1), m_ZeroInt()) &&
1060  match(Cmp->getOperand(0),
1061  m_OneUse(m_c_And(m_Shl(m_One(), m_Value(ShAmt)), m_Value(X))))) {
1062  if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
1063  X = Builder.CreateNot(X);
1064  Value *Lshr = Builder.CreateLShr(X, ShAmt);
1065  Value *And1 = Builder.CreateAnd(Lshr, ConstantInt::get(X->getType(), 1));
1066  return replaceInstUsesWith(Zext, And1);
1067  }
1068 
1069  // icmp ne A, B is equal to xor A, B when A and B only really have one bit.
1070  // It is also profitable to transform icmp eq into not(xor(A, B)) because
1071  // that may lead to additional simplifications.
1072  if (IntegerType *ITy = dyn_cast<IntegerType>(Zext.getType())) {
1073  Value *LHS = Cmp->getOperand(0);
1074  Value *RHS = Cmp->getOperand(1);
1075 
1076  KnownBits KnownLHS = computeKnownBits(LHS, 0, &Zext);
1077  KnownBits KnownRHS = computeKnownBits(RHS, 0, &Zext);
1078 
1079  if (KnownLHS.Zero == KnownRHS.Zero && KnownLHS.One == KnownRHS.One) {
1080  APInt KnownBits = KnownLHS.Zero | KnownLHS.One;
1081  APInt UnknownBit = ~KnownBits;
1082  if (UnknownBit.countPopulation() == 1) {
1083  Value *Result = Builder.CreateXor(LHS, RHS);
1084 
1085  // Mask off any bits that are set and won't be shifted away.
1086  if (KnownLHS.One.uge(UnknownBit))
1087  Result = Builder.CreateAnd(Result,
1088  ConstantInt::get(ITy, UnknownBit));
1089 
1090  // Shift the bit we're testing down to the lsb.
1091  Result = Builder.CreateLShr(
1092  Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros()));
1093 
1094  if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
1095  Result = Builder.CreateXor(Result, ConstantInt::get(ITy, 1));
1096  Result->takeName(Cmp);
1097  return replaceInstUsesWith(Zext, Result);
1098  }
1099  }
1100  }
1101  }
1102 
1103  return nullptr;
1104 }
1105 
1106 /// Determine if the specified value can be computed in the specified wider type
1107 /// and produce the same low bits. If not, return false.
1108 ///
1109 /// If this function returns true, it can also return a non-zero number of bits
1110 /// (in BitsToClear) which indicates that the value it computes is correct for
1111 /// the zero extend, but that the additional BitsToClear bits need to be zero'd
1112 /// out. For example, to promote something like:
1113 ///
1114 /// %B = trunc i64 %A to i32
1115 /// %C = lshr i32 %B, 8
1116 /// %E = zext i32 %C to i64
1117 ///
1118 /// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
1119 /// set to 8 to indicate that the promoted value needs to have bits 24-31
1120 /// cleared in addition to bits 32-63. Since an 'and' will be generated to
1121 /// clear the top bits anyway, doing this has no extra cost.
1122 ///
1123 /// This function works on both vectors and scalars.
1124 static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
1125  InstCombinerImpl &IC, Instruction *CxtI) {
1126  BitsToClear = 0;
1127  if (canAlwaysEvaluateInType(V, Ty))
1128  return true;
1129  if (canNotEvaluateInType(V, Ty))
1130  return false;
1131 
1132  auto *I = cast<Instruction>(V);
1133  unsigned Tmp;
1134  switch (I->getOpcode()) {
1135  case Instruction::ZExt: // zext(zext(x)) -> zext(x).
1136  case Instruction::SExt: // zext(sext(x)) -> sext(x).
1137  case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x)
1138  return true;
1139  case Instruction::And:
1140  case Instruction::Or:
1141  case Instruction::Xor:
1142  case Instruction::Add:
1143  case Instruction::Sub:
1144  case Instruction::Mul:
1145  if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI) ||
1146  !canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI))
1147  return false;
1148  // These can all be promoted if neither operand has 'bits to clear'.
1149  if (BitsToClear == 0 && Tmp == 0)
1150  return true;
1151 
1152  // If the operation is an AND/OR/XOR and the bits to clear are zero in the
1153  // other side, BitsToClear is ok.
1154  if (Tmp == 0 && I->isBitwiseLogicOp()) {
1155  // We use MaskedValueIsZero here for generality, but the case we care
1156  // about the most is constant RHS.
1157  unsigned VSize = V->getType()->getScalarSizeInBits();
1158  if (IC.MaskedValueIsZero(I->getOperand(1),
1159  APInt::getHighBitsSet(VSize, BitsToClear),
1160  0, CxtI)) {
1161  // If this is an And instruction and all of the BitsToClear are
1162  // known to be zero we can reset BitsToClear.
1163  if (I->getOpcode() == Instruction::And)
1164  BitsToClear = 0;
1165  return true;
1166  }
1167  }
1168 
1169  // Otherwise, we don't know how to analyze this BitsToClear case yet.
1170  return false;
1171 
1172  case Instruction::Shl: {
1173  // We can promote shl(x, cst) if we can promote x. Since shl overwrites the
1174  // upper bits we can reduce BitsToClear by the shift amount.
1175  const APInt *Amt;
1176  if (match(I->getOperand(1), m_APInt(Amt))) {
1177  if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1178  return false;
1179  uint64_t ShiftAmt = Amt->getZExtValue();
1180  BitsToClear = ShiftAmt < BitsToClear ? BitsToClear - ShiftAmt : 0;
1181  return true;
1182  }
1183  return false;
1184  }
1185  case Instruction::LShr: {
1186  // We can promote lshr(x, cst) if we can promote x. This requires the
1187  // ultimate 'and' to clear out the high zero bits we're clearing out though.
1188  const APInt *Amt;
1189  if (match(I->getOperand(1), m_APInt(Amt))) {
1190  if (!canEvaluateZExtd(I->getOperand(0), Ty, BitsToClear, IC, CxtI))
1191  return false;
1192  BitsToClear += Amt->getZExtValue();
1193  if (BitsToClear > V->getType()->getScalarSizeInBits())
1194  BitsToClear = V->getType()->getScalarSizeInBits();
1195  return true;
1196  }
1197  // Cannot promote variable LSHR.
1198  return false;
1199  }
1200  case Instruction::Select:
1201  if (!canEvaluateZExtd(I->getOperand(1), Ty, Tmp, IC, CxtI) ||
1202  !canEvaluateZExtd(I->getOperand(2), Ty, BitsToClear, IC, CxtI) ||
1203  // TODO: If important, we could handle the case when the BitsToClear are
1204  // known zero in the disagreeing side.
1205  Tmp != BitsToClear)
1206  return false;
1207  return true;
1208 
1209  case Instruction::PHI: {
1210  // We can change a phi if we can change all operands. Note that we never
1211  // get into trouble with cyclic PHIs here because we only consider
1212  // instructions with a single use.
1213  PHINode *PN = cast<PHINode>(I);
1214  if (!canEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear, IC, CxtI))
1215  return false;
1216  for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
1217  if (!canEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp, IC, CxtI) ||
1218  // TODO: If important, we could handle the case when the BitsToClear
1219  // are known zero in the disagreeing input.
1220  Tmp != BitsToClear)
1221  return false;
1222  return true;
1223  }
1224  default:
1225  // TODO: Can handle more cases here.
1226  return false;
1227  }
1228 }
1229 
1231  // If this zero extend is only used by a truncate, let the truncate be
1232  // eliminated before we try to optimize this zext.
1233  if (CI.hasOneUse() && isa<TruncInst>(CI.user_back()))
1234  return nullptr;
1235 
1236  // If one of the common conversion will work, do it.
1237  if (Instruction *Result = commonCastTransforms(CI))
1238  return Result;
1239 
1240  Value *Src = CI.getOperand(0);
1241  Type *SrcTy = Src->getType(), *DestTy = CI.getType();
1242 
1243  // Try to extend the entire expression tree to the wide destination type.
1244  unsigned BitsToClear;
1245  if (shouldChangeType(SrcTy, DestTy) &&
1246  canEvaluateZExtd(Src, DestTy, BitsToClear, *this, &CI)) {
1247  assert(BitsToClear <= SrcTy->getScalarSizeInBits() &&
1248  "Can't clear more bits than in SrcTy");
1249 
1250  // Okay, we can transform this! Insert the new expression now.
1251  LLVM_DEBUG(
1252  dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1253  " to avoid zero extend: "
1254  << CI << '\n');
1255  Value *Res = EvaluateInDifferentType(Src, DestTy, false);
1256  assert(Res->getType() == DestTy);
1257 
1258  // Preserve debug values referring to Src if the zext is its last use.
1259  if (auto *SrcOp = dyn_cast<Instruction>(Src))
1260  if (SrcOp->hasOneUse())
1261  replaceAllDbgUsesWith(*SrcOp, *Res, CI, DT);
1262 
1263  uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits()-BitsToClear;
1264  uint32_t DestBitSize = DestTy->getScalarSizeInBits();
1265 
1266  // If the high bits are already filled with zeros, just replace this
1267  // cast with the result.
1268  if (MaskedValueIsZero(Res,
1269  APInt::getHighBitsSet(DestBitSize,
1270  DestBitSize-SrcBitsKept),
1271  0, &CI))
1272  return replaceInstUsesWith(CI, Res);
1273 
1274  // We need to emit an AND to clear the high bits.
1275  Constant *C = ConstantInt::get(Res->getType(),
1276  APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
1277  return BinaryOperator::CreateAnd(Res, C);
1278  }
1279 
1280  // If this is a TRUNC followed by a ZEXT then we are dealing with integral
1281  // types and if the sizes are just right we can convert this into a logical
1282  // 'and' which will be much cheaper than the pair of casts.
1283  if (TruncInst *CSrc = dyn_cast<TruncInst>(Src)) { // A->B->C cast
1284  // TODO: Subsume this into EvaluateInDifferentType.
1285 
1286  // Get the sizes of the types involved. We know that the intermediate type
1287  // will be smaller than A or C, but don't know the relation between A and C.
1288  Value *A = CSrc->getOperand(0);
1289  unsigned SrcSize = A->getType()->getScalarSizeInBits();
1290  unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
1291  unsigned DstSize = CI.getType()->getScalarSizeInBits();
1292  // If we're actually extending zero bits, then if
1293  // SrcSize < DstSize: zext(a & mask)
1294  // SrcSize == DstSize: a & mask
1295  // SrcSize > DstSize: trunc(a) & mask
1296  if (SrcSize < DstSize) {
1297  APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1298  Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
1299  Value *And = Builder.CreateAnd(A, AndConst, CSrc->getName() + ".mask");
1300  return new ZExtInst(And, CI.getType());
1301  }
1302 
1303  if (SrcSize == DstSize) {
1304  APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
1305  return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
1306  AndValue));
1307  }
1308  if (SrcSize > DstSize) {
1309  Value *Trunc = Builder.CreateTrunc(A, CI.getType());
1310  APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
1311  return BinaryOperator::CreateAnd(Trunc,
1312  ConstantInt::get(Trunc->getType(),
1313  AndValue));
1314  }
1315  }
1316 
1317  if (ICmpInst *Cmp = dyn_cast<ICmpInst>(Src))
1318  return transformZExtICmp(Cmp, CI);
1319 
1320  // zext(trunc(X) & C) -> (X & zext(C)).
1321  Constant *C;
1322  Value *X;
1323  if (match(Src, m_OneUse(m_And(m_Trunc(m_Value(X)), m_Constant(C)))) &&
1324  X->getType() == CI.getType())
1325  return BinaryOperator::CreateAnd(X, ConstantExpr::getZExt(C, CI.getType()));
1326 
1327  // zext((trunc(X) & C) ^ C) -> ((X & zext(C)) ^ zext(C)).
1328  Value *And;
1329  if (match(Src, m_OneUse(m_Xor(m_Value(And), m_Constant(C)))) &&
1331  X->getType() == CI.getType()) {
1332  Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
1333  return BinaryOperator::CreateXor(Builder.CreateAnd(X, ZC), ZC);
1334  }
1335 
1336  if (match(Src, m_VScale(DL))) {
1337  if (CI.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1338  unsigned MaxVScale = CI.getFunction()
1339  ->getFnAttribute(Attribute::VScaleRange)
1341  .second;
1342  unsigned TypeWidth = Src->getType()->getScalarSizeInBits();
1343  if (MaxVScale > 0 && Log2_32(MaxVScale) < TypeWidth) {
1344  Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
1345  return replaceInstUsesWith(CI, VScale);
1346  }
1347  }
1348  }
1349 
1350  return nullptr;
1351 }
1352 
1353 /// Transform (sext icmp) to bitwise / integer operations to eliminate the icmp.
1354 Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *ICI,
1355  Instruction &CI) {
1356  Value *Op0 = ICI->getOperand(0), *Op1 = ICI->getOperand(1);
1357  ICmpInst::Predicate Pred = ICI->getPredicate();
1358 
1359  // Don't bother if Op1 isn't of vector or integer type.
1360  if (!Op1->getType()->isIntOrIntVectorTy())
1361  return nullptr;
1362 
1363  if ((Pred == ICmpInst::ICMP_SLT && match(Op1, m_ZeroInt())) ||
1364  (Pred == ICmpInst::ICMP_SGT && match(Op1, m_AllOnes()))) {
1365  // (x <s 0) ? -1 : 0 -> ashr x, 31 -> all ones if negative
1366  // (x >s -1) ? -1 : 0 -> not (ashr x, 31) -> all ones if positive
1367  Value *Sh = ConstantInt::get(Op0->getType(),
1368  Op0->getType()->getScalarSizeInBits() - 1);
1369  Value *In = Builder.CreateAShr(Op0, Sh, Op0->getName() + ".lobit");
1370  if (In->getType() != CI.getType())
1371  In = Builder.CreateIntCast(In, CI.getType(), true /*SExt*/);
1372 
1373  if (Pred == ICmpInst::ICMP_SGT)
1374  In = Builder.CreateNot(In, In->getName() + ".not");
1375  return replaceInstUsesWith(CI, In);
1376  }
1377 
1378  if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
1379  // If we know that only one bit of the LHS of the icmp can be set and we
1380  // have an equality comparison with zero or a power of 2, we can transform
1381  // the icmp and sext into bitwise/integer operations.
1382  if (ICI->hasOneUse() &&
1383  ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){
1384  KnownBits Known = computeKnownBits(Op0, 0, &CI);
1385 
1386  APInt KnownZeroMask(~Known.Zero);
1387  if (KnownZeroMask.isPowerOf2()) {
1388  Value *In = ICI->getOperand(0);
1389 
1390  // If the icmp tests for a known zero bit we can constant fold it.
1391  if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) {
1392  Value *V = Pred == ICmpInst::ICMP_NE ?
1394  ConstantInt::getNullValue(CI.getType());
1395  return replaceInstUsesWith(CI, V);
1396  }
1397 
1398  if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
1399  // sext ((x & 2^n) == 0) -> (x >> n) - 1
1400  // sext ((x & 2^n) != 2^n) -> (x >> n) - 1
1401  unsigned ShiftAmt = KnownZeroMask.countTrailingZeros();
1402  // Perform a right shift to place the desired bit in the LSB.
1403  if (ShiftAmt)
1404  In = Builder.CreateLShr(In,
1405  ConstantInt::get(In->getType(), ShiftAmt));
1406 
1407  // At this point "In" is either 1 or 0. Subtract 1 to turn
1408  // {1, 0} -> {0, -1}.
1409  In = Builder.CreateAdd(In,
1410  ConstantInt::getAllOnesValue(In->getType()),
1411  "sext");
1412  } else {
1413  // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
1414  // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
1415  unsigned ShiftAmt = KnownZeroMask.countLeadingZeros();
1416  // Perform a left shift to place the desired bit in the MSB.
1417  if (ShiftAmt)
1418  In = Builder.CreateShl(In,
1419  ConstantInt::get(In->getType(), ShiftAmt));
1420 
1421  // Distribute the bit over the whole bit width.
1422  In = Builder.CreateAShr(In, ConstantInt::get(In->getType(),
1423  KnownZeroMask.getBitWidth() - 1), "sext");
1424  }
1425 
1426  if (CI.getType() == In->getType())
1427  return replaceInstUsesWith(CI, In);
1428  return CastInst::CreateIntegerCast(In, CI.getType(), true/*SExt*/);
1429  }
1430  }
1431  }
1432 
1433  return nullptr;
1434 }
1435 
1436 /// Return true if we can take the specified value and return it as type Ty
1437 /// without inserting any new casts and without changing the value of the common
1438 /// low bits. This is used by code that tries to promote integer operations to
1439 /// a wider types will allow us to eliminate the extension.
1440 ///
1441 /// This function works on both vectors and scalars.
1442 ///
1443 static bool canEvaluateSExtd(Value *V, Type *Ty) {
1445  "Can't sign extend type to a smaller type");
1446  if (canAlwaysEvaluateInType(V, Ty))
1447  return true;
1448  if (canNotEvaluateInType(V, Ty))
1449  return false;
1450 
1451  auto *I = cast<Instruction>(V);
1452  switch (I->getOpcode()) {
1453  case Instruction::SExt: // sext(sext(x)) -> sext(x)
1454  case Instruction::ZExt: // sext(zext(x)) -> zext(x)
1455  case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x)
1456  return true;
1457  case Instruction::And:
1458  case Instruction::Or:
1459  case Instruction::Xor:
1460  case Instruction::Add:
1461  case Instruction::Sub:
1462  case Instruction::Mul:
1463  // These operators can all arbitrarily be extended if their inputs can.
1464  return canEvaluateSExtd(I->getOperand(0), Ty) &&
1465  canEvaluateSExtd(I->getOperand(1), Ty);
1466 
1467  //case Instruction::Shl: TODO
1468  //case Instruction::LShr: TODO
1469 
1470  case Instruction::Select:
1471  return canEvaluateSExtd(I->getOperand(1), Ty) &&
1472  canEvaluateSExtd(I->getOperand(2), Ty);
1473 
1474  case Instruction::PHI: {
1475  // We can change a phi if we can change all operands. Note that we never
1476  // get into trouble with cyclic PHIs here because we only consider
1477  // instructions with a single use.
1478  PHINode *PN = cast<PHINode>(I);
1479  for (Value *IncValue : PN->incoming_values())
1480  if (!canEvaluateSExtd(IncValue, Ty)) return false;
1481  return true;
1482  }
1483  default:
1484  // TODO: Can handle more cases here.
1485  break;
1486  }
1487 
1488  return false;
1489 }
1490 
1492  // If this sign extend is only used by a truncate, let the truncate be
1493  // eliminated before we try to optimize this sext.
1494  if (CI.hasOneUse() && isa<TruncInst>(CI.user_back()))
1495  return nullptr;
1496 
1497  if (Instruction *I = commonCastTransforms(CI))
1498  return I;
1499 
1500  Value *Src = CI.getOperand(0);
1501  Type *SrcTy = Src->getType(), *DestTy = CI.getType();
1502  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
1503  unsigned DestBitSize = DestTy->getScalarSizeInBits();
1504 
1505  // If we know that the value being extended is positive, we can use a zext
1506  // instead.
1507  KnownBits Known = computeKnownBits(Src, 0, &CI);
1508  if (Known.isNonNegative())
1509  return CastInst::Create(Instruction::ZExt, Src, DestTy);
1510 
1511  // Try to extend the entire expression tree to the wide destination type.
1512  if (shouldChangeType(SrcTy, DestTy) && canEvaluateSExtd(Src, DestTy)) {
1513  // Okay, we can transform this! Insert the new expression now.
1514  LLVM_DEBUG(
1515  dbgs() << "ICE: EvaluateInDifferentType converting expression type"
1516  " to avoid sign extend: "
1517  << CI << '\n');
1518  Value *Res = EvaluateInDifferentType(Src, DestTy, true);
1519  assert(Res->getType() == DestTy);
1520 
1521  // If the high bits are already filled with sign bit, just replace this
1522  // cast with the result.
1523  if (ComputeNumSignBits(Res, 0, &CI) > DestBitSize - SrcBitSize)
1524  return replaceInstUsesWith(CI, Res);
1525 
1526  // We need to emit a shl + ashr to do the sign extend.
1527  Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
1528  return BinaryOperator::CreateAShr(Builder.CreateShl(Res, ShAmt, "sext"),
1529  ShAmt);
1530  }
1531 
1532  Value *X;
1533  if (match(Src, m_Trunc(m_Value(X)))) {
1534  // If the input has more sign bits than bits truncated, then convert
1535  // directly to final type.
1536  unsigned XBitSize = X->getType()->getScalarSizeInBits();
1537  if (ComputeNumSignBits(X, 0, &CI) > XBitSize - SrcBitSize)
1538  return CastInst::CreateIntegerCast(X, DestTy, /* isSigned */ true);
1539 
1540  // If input is a trunc from the destination type, then convert into shifts.
1541  if (Src->hasOneUse() && X->getType() == DestTy) {
1542  // sext (trunc X) --> ashr (shl X, C), C
1543  Constant *ShAmt = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
1544  return BinaryOperator::CreateAShr(Builder.CreateShl(X, ShAmt), ShAmt);
1545  }
1546 
1547  // If we are replacing shifted-in high zero bits with sign bits, convert
1548  // the logic shift to arithmetic shift and eliminate the cast to
1549  // intermediate type:
1550  // sext (trunc (lshr Y, C)) --> sext/trunc (ashr Y, C)
1551  Value *Y;
1552  if (Src->hasOneUse() &&
1553  match(X, m_LShr(m_Value(Y),
1554  m_SpecificIntAllowUndef(XBitSize - SrcBitSize)))) {
1555  Value *Ashr = Builder.CreateAShr(Y, XBitSize - SrcBitSize);
1556  return CastInst::CreateIntegerCast(Ashr, DestTy, /* isSigned */ true);
1557  }
1558  }
1559 
1560  if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src))
1561  return transformSExtICmp(ICI, CI);
1562 
1563  // If the input is a shl/ashr pair of a same constant, then this is a sign
1564  // extension from a smaller value. If we could trust arbitrary bitwidth
1565  // integers, we could turn this into a truncate to the smaller bit and then
1566  // use a sext for the whole extension. Since we don't, look deeper and check
1567  // for a truncate. If the source and dest are the same type, eliminate the
1568  // trunc and extend and just do shifts. For example, turn:
1569  // %a = trunc i32 %i to i8
1570  // %b = shl i8 %a, C
1571  // %c = ashr i8 %b, C
1572  // %d = sext i8 %c to i32
1573  // into:
1574  // %a = shl i32 %i, 32-(8-C)
1575  // %d = ashr i32 %a, 32-(8-C)
1576  Value *A = nullptr;
1577  // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
1578  Constant *BA = nullptr, *CA = nullptr;
1579  if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_Constant(BA)),
1580  m_Constant(CA))) &&
1581  BA->isElementWiseEqual(CA) && A->getType() == DestTy) {
1582  Constant *WideCurrShAmt = ConstantExpr::getSExt(CA, DestTy);
1583  Constant *NumLowbitsLeft = ConstantExpr::getSub(
1584  ConstantInt::get(DestTy, SrcTy->getScalarSizeInBits()), WideCurrShAmt);
1585  Constant *NewShAmt = ConstantExpr::getSub(
1586  ConstantInt::get(DestTy, DestTy->getScalarSizeInBits()),
1587  NumLowbitsLeft);
1588  NewShAmt =
1590  A = Builder.CreateShl(A, NewShAmt, CI.getName());
1591  return BinaryOperator::CreateAShr(A, NewShAmt);
1592  }
1593 
1594  // Splatting a bit of constant-index across a value:
1595  // sext (ashr (trunc iN X to iM), M-1) to iN --> ashr (shl X, N-M), N-1
1596  // TODO: If the dest type is different, use a cast (adjust use check).
1597  if (match(Src, m_OneUse(m_AShr(m_Trunc(m_Value(X)),
1598  m_SpecificInt(SrcBitSize - 1)))) &&
1599  X->getType() == DestTy) {
1600  Constant *ShlAmtC = ConstantInt::get(DestTy, DestBitSize - SrcBitSize);
1601  Constant *AshrAmtC = ConstantInt::get(DestTy, DestBitSize - 1);
1602  Value *Shl = Builder.CreateShl(X, ShlAmtC);
1603  return BinaryOperator::CreateAShr(Shl, AshrAmtC);
1604  }
1605 
1606  if (match(Src, m_VScale(DL))) {
1607  if (CI.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
1608  unsigned MaxVScale = CI.getFunction()
1609  ->getFnAttribute(Attribute::VScaleRange)
1611  .second;
1612  if (MaxVScale > 0 && Log2_32(MaxVScale) < (SrcBitSize - 1)) {
1613  Value *VScale = Builder.CreateVScale(ConstantInt::get(DestTy, 1));
1614  return replaceInstUsesWith(CI, VScale);
1615  }
1616  }
1617  }
1618 
1619  return nullptr;
1620 }
1621 
1622 /// Return a Constant* for the specified floating-point constant if it fits
1623 /// in the specified FP type without changing its value.
1624 static bool fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
1625  bool losesInfo;
1626  APFloat F = CFP->getValueAPF();
1627  (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
1628  return !losesInfo;
1629 }
1630 
1632  if (CFP->getType() == Type::getPPC_FP128Ty(CFP->getContext()))
1633  return nullptr; // No constant folding of this.
1634  // See if the value can be truncated to half and then reextended.
1635  if (fitsInFPType(CFP, APFloat::IEEEhalf()))
1636  return Type::getHalfTy(CFP->getContext());
1637  // See if the value can be truncated to float and then reextended.
1638  if (fitsInFPType(CFP, APFloat::IEEEsingle()))
1639  return Type::getFloatTy(CFP->getContext());
1640  if (CFP->getType()->isDoubleTy())
1641  return nullptr; // Won't shrink.
1642  if (fitsInFPType(CFP, APFloat::IEEEdouble()))
1643  return Type::getDoubleTy(CFP->getContext());
1644  // Don't try to shrink to various long double types.
1645  return nullptr;
1646 }
1647 
1648 // Determine if this is a vector of ConstantFPs and if so, return the minimal
1649 // type we can safely truncate all elements to.
1650 // TODO: Make these support undef elements.
1652  auto *CV = dyn_cast<Constant>(V);
1653  auto *CVVTy = dyn_cast<FixedVectorType>(V->getType());
1654  if (!CV || !CVVTy)
1655  return nullptr;
1656 
1657  Type *MinType = nullptr;
1658 
1659  unsigned NumElts = CVVTy->getNumElements();
1660 
1661  // For fixed-width vectors we find the minimal type by looking
1662  // through the constant values of the vector.
1663  for (unsigned i = 0; i != NumElts; ++i) {
1664  auto *CFP = dyn_cast_or_null<ConstantFP>(CV->getAggregateElement(i));
1665  if (!CFP)
1666  return nullptr;
1667 
1668  Type *T = shrinkFPConstant(CFP);
1669  if (!T)
1670  return nullptr;
1671 
1672  // If we haven't found a type yet or this type has a larger mantissa than
1673  // our previous type, this is our new minimal type.
1674  if (!MinType || T->getFPMantissaWidth() > MinType->getFPMantissaWidth())
1675  MinType = T;
1676  }
1677 
1678  // Make a vector type from the minimal type.
1679  return FixedVectorType::get(MinType, NumElts);
1680 }
1681 
1682 /// Find the minimum FP type we can safely truncate to.
1684  if (auto *FPExt = dyn_cast<FPExtInst>(V))
1685  return FPExt->getOperand(0)->getType();
1686 
1687  // If this value is a constant, return the constant in the smallest FP type
1688  // that can accurately represent it. This allows us to turn
1689  // (float)((double)X+2.0) into x+2.0f.
1690  if (auto *CFP = dyn_cast<ConstantFP>(V))
1691  if (Type *T = shrinkFPConstant(CFP))
1692  return T;
1693 
1694  // We can only correctly find a minimum type for a scalable vector when it is
1695  // a splat. For splats of constant values the fpext is wrapped up as a
1696  // ConstantExpr.
1697  if (auto *FPCExt = dyn_cast<ConstantExpr>(V))
1698  if (FPCExt->getOpcode() == Instruction::FPExt)
1699  return FPCExt->getOperand(0)->getType();
1700 
1701  // Try to shrink a vector of FP constants. This returns nullptr on scalable
1702  // vectors
1703  if (Type *T = shrinkFPConstantVector(V))
1704  return T;
1705 
1706  return V->getType();
1707 }
1708 
1709 /// Return true if the cast from integer to FP can be proven to be exact for all
1710 /// possible inputs (the conversion does not lose any precision).
1712  CastInst::CastOps Opcode = I.getOpcode();
1713  assert((Opcode == CastInst::SIToFP || Opcode == CastInst::UIToFP) &&
1714  "Unexpected cast");
1715  Value *Src = I.getOperand(0);
1716  Type *SrcTy = Src->getType();
1717  Type *FPTy = I.getType();
1718  bool IsSigned = Opcode == Instruction::SIToFP;
1719  int SrcSize = (int)SrcTy->getScalarSizeInBits() - IsSigned;
1720 
1721  // Easy case - if the source integer type has less bits than the FP mantissa,
1722  // then the cast must be exact.
1723  int DestNumSigBits = FPTy->getFPMantissaWidth();
1724  if (SrcSize <= DestNumSigBits)
1725  return true;
1726 
1727  // Cast from FP to integer and back to FP is independent of the intermediate
1728  // integer width because of poison on overflow.
1729  Value *F;
1730  if (match(Src, m_FPToSI(m_Value(F))) || match(Src, m_FPToUI(m_Value(F)))) {
1731  // If this is uitofp (fptosi F), the source needs an extra bit to avoid
1732  // potential rounding of negative FP input values.
1733  int SrcNumSigBits = F->getType()->getFPMantissaWidth();
1734  if (!IsSigned && match(Src, m_FPToSI(m_Value())))
1735  SrcNumSigBits++;
1736 
1737  // [su]itofp (fpto[su]i F) --> exact if the source type has less or equal
1738  // significant bits than the destination (and make sure neither type is
1739  // weird -- ppc_fp128).
1740  if (SrcNumSigBits > 0 && DestNumSigBits > 0 &&
1741  SrcNumSigBits <= DestNumSigBits)
1742  return true;
1743  }
1744 
1745  // TODO:
1746  // Try harder to find if the source integer type has less significant bits.
1747  // For example, compute number of sign bits or compute low bit mask.
1748  return false;
1749 }
1750 
1752  if (Instruction *I = commonCastTransforms(FPT))
1753  return I;
1754 
1755  // If we have fptrunc(OpI (fpextend x), (fpextend y)), we would like to
1756  // simplify this expression to avoid one or more of the trunc/extend
1757  // operations if we can do so without changing the numerical results.
1758  //
1759  // The exact manner in which the widths of the operands interact to limit
1760  // what we can and cannot do safely varies from operation to operation, and
1761  // is explained below in the various case statements.
1762  Type *Ty = FPT.getType();
1763  auto *BO = dyn_cast<BinaryOperator>(FPT.getOperand(0));
1764  if (BO && BO->hasOneUse()) {
1765  Type *LHSMinType = getMinimumFPType(BO->getOperand(0));
1766  Type *RHSMinType = getMinimumFPType(BO->getOperand(1));
1767  unsigned OpWidth = BO->getType()->getFPMantissaWidth();
1768  unsigned LHSWidth = LHSMinType->getFPMantissaWidth();
1769  unsigned RHSWidth = RHSMinType->getFPMantissaWidth();
1770  unsigned SrcWidth = std::max(LHSWidth, RHSWidth);
1771  unsigned DstWidth = Ty->getFPMantissaWidth();
1772  switch (BO->getOpcode()) {
1773  default: break;
1774  case Instruction::FAdd:
1775  case Instruction::FSub:
1776  // For addition and subtraction, the infinitely precise result can
1777  // essentially be arbitrarily wide; proving that double rounding
1778  // will not occur because the result of OpI is exact (as we will for
1779  // FMul, for example) is hopeless. However, we *can* nonetheless
1780  // frequently know that double rounding cannot occur (or that it is
1781  // innocuous) by taking advantage of the specific structure of
1782  // infinitely-precise results that admit double rounding.
1783  //
1784  // Specifically, if OpWidth >= 2*DstWdith+1 and DstWidth is sufficient
1785  // to represent both sources, we can guarantee that the double
1786  // rounding is innocuous (See p50 of Figueroa's 2000 PhD thesis,
1787  // "A Rigorous Framework for Fully Supporting the IEEE Standard ..."
1788  // for proof of this fact).
1789  //
1790  // Note: Figueroa does not consider the case where DstFormat !=
1791  // SrcFormat. It's possible (likely even!) that this analysis
1792  // could be tightened for those cases, but they are rare (the main
1793  // case of interest here is (float)((double)float + float)).
1794  if (OpWidth >= 2*DstWidth+1 && DstWidth >= SrcWidth) {
1795  Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1796  Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1797  Instruction *RI = BinaryOperator::Create(BO->getOpcode(), LHS, RHS);
1798  RI->copyFastMathFlags(BO);
1799  return RI;
1800  }
1801  break;
1802  case Instruction::FMul:
1803  // For multiplication, the infinitely precise result has at most
1804  // LHSWidth + RHSWidth significant bits; if OpWidth is sufficient
1805  // that such a value can be exactly represented, then no double
1806  // rounding can possibly occur; we can safely perform the operation
1807  // in the destination format if it can represent both sources.
1808  if (OpWidth >= LHSWidth + RHSWidth && DstWidth >= SrcWidth) {
1809  Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1810  Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1811  return BinaryOperator::CreateFMulFMF(LHS, RHS, BO);
1812  }
1813  break;
1814  case Instruction::FDiv:
1815  // For division, we use again use the bound from Figueroa's
1816  // dissertation. I am entirely certain that this bound can be
1817  // tightened in the unbalanced operand case by an analysis based on
1818  // the diophantine rational approximation bound, but the well-known
1819  // condition used here is a good conservative first pass.
1820  // TODO: Tighten bound via rigorous analysis of the unbalanced case.
1821  if (OpWidth >= 2*DstWidth && DstWidth >= SrcWidth) {
1822  Value *LHS = Builder.CreateFPTrunc(BO->getOperand(0), Ty);
1823  Value *RHS = Builder.CreateFPTrunc(BO->getOperand(1), Ty);
1824  return BinaryOperator::CreateFDivFMF(LHS, RHS, BO);
1825  }
1826  break;
1827  case Instruction::FRem: {
1828  // Remainder is straightforward. Remainder is always exact, so the
1829  // type of OpI doesn't enter into things at all. We simply evaluate
1830  // in whichever source type is larger, then convert to the
1831  // destination type.
1832  if (SrcWidth == OpWidth)
1833  break;
1834  Value *LHS, *RHS;
1835  if (LHSWidth == SrcWidth) {
1836  LHS = Builder.CreateFPTrunc(BO->getOperand(0), LHSMinType);
1837  RHS = Builder.CreateFPTrunc(BO->getOperand(1), LHSMinType);
1838  } else {
1839  LHS = Builder.CreateFPTrunc(BO->getOperand(0), RHSMinType);
1840  RHS = Builder.CreateFPTrunc(BO->getOperand(1), RHSMinType);
1841  }
1842 
1843  Value *ExactResult = Builder.CreateFRemFMF(LHS, RHS, BO);
1844  return CastInst::CreateFPCast(ExactResult, Ty);
1845  }
1846  }
1847  }
1848 
1849  // (fptrunc (fneg x)) -> (fneg (fptrunc x))
1850  Value *X;
1851  Instruction *Op = dyn_cast<Instruction>(FPT.getOperand(0));
1852  if (Op && Op->hasOneUse()) {
1853  // FIXME: The FMF should propagate from the fptrunc, not the source op.
1855  if (isa<FPMathOperator>(Op))
1856  Builder.setFastMathFlags(Op->getFastMathFlags());
1857 
1858  if (match(Op, m_FNeg(m_Value(X)))) {
1859  Value *InnerTrunc = Builder.CreateFPTrunc(X, Ty);
1860 
1861  return UnaryOperator::CreateFNegFMF(InnerTrunc, Op);
1862  }
1863 
1864  // If we are truncating a select that has an extended operand, we can
1865  // narrow the other operand and do the select as a narrow op.
1866  Value *Cond, *X, *Y;
1867  if (match(Op, m_Select(m_Value(Cond), m_FPExt(m_Value(X)), m_Value(Y))) &&
1868  X->getType() == Ty) {
1869  // fptrunc (select Cond, (fpext X), Y --> select Cond, X, (fptrunc Y)
1870  Value *NarrowY = Builder.CreateFPTrunc(Y, Ty);
1871  Value *Sel = Builder.CreateSelect(Cond, X, NarrowY, "narrow.sel", Op);
1872  return replaceInstUsesWith(FPT, Sel);
1873  }
1874  if (match(Op, m_Select(m_Value(Cond), m_Value(Y), m_FPExt(m_Value(X)))) &&
1875  X->getType() == Ty) {
1876  // fptrunc (select Cond, Y, (fpext X) --> select Cond, (fptrunc Y), X
1877  Value *NarrowY = Builder.CreateFPTrunc(Y, Ty);
1878  Value *Sel = Builder.CreateSelect(Cond, NarrowY, X, "narrow.sel", Op);
1879  return replaceInstUsesWith(FPT, Sel);
1880  }
1881  }
1882 
1883  if (auto *II = dyn_cast<IntrinsicInst>(FPT.getOperand(0))) {
1884  switch (II->getIntrinsicID()) {
1885  default: break;
1886  case Intrinsic::ceil:
1887  case Intrinsic::fabs:
1888  case Intrinsic::floor:
1889  case Intrinsic::nearbyint:
1890  case Intrinsic::rint:
1891  case Intrinsic::round:
1892  case Intrinsic::roundeven:
1893  case Intrinsic::trunc: {
1894  Value *Src = II->getArgOperand(0);
1895  if (!Src->hasOneUse())
1896  break;
1897 
1898  // Except for fabs, this transformation requires the input of the unary FP
1899  // operation to be itself an fpext from the type to which we're
1900  // truncating.
1901  if (II->getIntrinsicID() != Intrinsic::fabs) {
1902  FPExtInst *FPExtSrc = dyn_cast<FPExtInst>(Src);
1903  if (!FPExtSrc || FPExtSrc->getSrcTy() != Ty)
1904  break;
1905  }
1906 
1907  // Do unary FP operation on smaller type.
1908  // (fptrunc (fabs x)) -> (fabs (fptrunc x))
1909  Value *InnerTrunc = Builder.CreateFPTrunc(Src, Ty);
1910  Function *Overload = Intrinsic::getDeclaration(FPT.getModule(),
1911  II->getIntrinsicID(), Ty);
1913  II->getOperandBundlesAsDefs(OpBundles);
1914  CallInst *NewCI =
1915  CallInst::Create(Overload, {InnerTrunc}, OpBundles, II->getName());
1916  NewCI->copyFastMathFlags(II);
1917  return NewCI;
1918  }
1919  }
1920  }
1921 
1922  if (Instruction *I = shrinkInsertElt(FPT, Builder))
1923  return I;
1924 
1925  Value *Src = FPT.getOperand(0);
1926  if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
1927  auto *FPCast = cast<CastInst>(Src);
1928  if (isKnownExactCastIntToFP(*FPCast))
1929  return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
1930  }
1931 
1932  return nullptr;
1933 }
1934 
1936  // If the source operand is a cast from integer to FP and known exact, then
1937  // cast the integer operand directly to the destination type.
1938  Type *Ty = FPExt.getType();
1939  Value *Src = FPExt.getOperand(0);
1940  if (isa<SIToFPInst>(Src) || isa<UIToFPInst>(Src)) {
1941  auto *FPCast = cast<CastInst>(Src);
1942  if (isKnownExactCastIntToFP(*FPCast))
1943  return CastInst::Create(FPCast->getOpcode(), FPCast->getOperand(0), Ty);
1944  }
1945 
1946  return commonCastTransforms(FPExt);
1947 }
1948 
1949 /// fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X)
1950 /// This is safe if the intermediate type has enough bits in its mantissa to
1951 /// accurately represent all values of X. For example, this won't work with
1952 /// i64 -> float -> i64.
1954  if (!isa<UIToFPInst>(FI.getOperand(0)) && !isa<SIToFPInst>(FI.getOperand(0)))
1955  return nullptr;
1956 
1957  auto *OpI = cast<CastInst>(FI.getOperand(0));
1958  Value *X = OpI->getOperand(0);
1959  Type *XType = X->getType();
1960  Type *DestType = FI.getType();
1961  bool IsOutputSigned = isa<FPToSIInst>(FI);
1962 
1963  // Since we can assume the conversion won't overflow, our decision as to
1964  // whether the input will fit in the float should depend on the minimum
1965  // of the input range and output range.
1966 
1967  // This means this is also safe for a signed input and unsigned output, since
1968  // a negative input would lead to undefined behavior.
1969  if (!isKnownExactCastIntToFP(*OpI)) {
1970  // The first cast may not round exactly based on the source integer width
1971  // and FP width, but the overflow UB rules can still allow this to fold.
1972  // If the destination type is narrow, that means the intermediate FP value
1973  // must be large enough to hold the source value exactly.
1974  // For example, (uint8_t)((float)(uint32_t 16777217) is undefined behavior.
1975  int OutputSize = (int)DestType->getScalarSizeInBits() - IsOutputSigned;
1976  if (OutputSize > OpI->getType()->getFPMantissaWidth())
1977  return nullptr;
1978  }
1979 
1980  if (DestType->getScalarSizeInBits() > XType->getScalarSizeInBits()) {
1981  bool IsInputSigned = isa<SIToFPInst>(OpI);
1982  if (IsInputSigned && IsOutputSigned)
1983  return new SExtInst(X, DestType);
1984  return new ZExtInst(X, DestType);
1985  }
1986  if (DestType->getScalarSizeInBits() < XType->getScalarSizeInBits())
1987  return new TruncInst(X, DestType);
1988 
1989  assert(XType == DestType && "Unexpected types for int to FP to int casts");
1990  return replaceInstUsesWith(FI, X);
1991 }
1992 
1994  if (Instruction *I = foldItoFPtoI(FI))
1995  return I;
1996 
1997  return commonCastTransforms(FI);
1998 }
1999 
2001  if (Instruction *I = foldItoFPtoI(FI))
2002  return I;
2003 
2004  return commonCastTransforms(FI);
2005 }
2006 
2008  return commonCastTransforms(CI);
2009 }
2010 
2012  return commonCastTransforms(CI);
2013 }
2014 
2016  // If the source integer type is not the intptr_t type for this target, do a
2017  // trunc or zext to the intptr_t type, then inttoptr of it. This allows the
2018  // cast to be exposed to other transforms.
2019  unsigned AS = CI.getAddressSpace();
2020  if (CI.getOperand(0)->getType()->getScalarSizeInBits() !=
2021  DL.getPointerSizeInBits(AS)) {
2022  Type *Ty = CI.getOperand(0)->getType()->getWithNewType(
2023  DL.getIntPtrType(CI.getContext(), AS));
2024  Value *P = Builder.CreateZExtOrTrunc(CI.getOperand(0), Ty);
2025  return new IntToPtrInst(P, CI.getType());
2026  }
2027 
2028  if (Instruction *I = commonCastTransforms(CI))
2029  return I;
2030 
2031  return nullptr;
2032 }
2033 
2034 /// Implement the transforms for cast of pointer (bitcast/ptrtoint)
2036  Value *Src = CI.getOperand(0);
2037 
2038  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
2039  // If casting the result of a getelementptr instruction with no offset, turn
2040  // this into a cast of the original pointer!
2041  if (GEP->hasAllZeroIndices() &&
2042  // If CI is an addrspacecast and GEP changes the poiner type, merging
2043  // GEP into CI would undo canonicalizing addrspacecast with different
2044  // pointer types, causing infinite loops.
2045  (!isa<AddrSpaceCastInst>(CI) ||
2046  GEP->getType() == GEP->getPointerOperandType())) {
2047  // Changing the cast operand is usually not a good idea but it is safe
2048  // here because the pointer operand is being replaced with another
2049  // pointer operand so the opcode doesn't need to change.
2050  return replaceOperand(CI, 0, GEP->getOperand(0));
2051  }
2052  }
2053 
2054  return commonCastTransforms(CI);
2055 }
2056 
2058  // If the destination integer type is not the intptr_t type for this target,
2059  // do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
2060  // to be exposed to other transforms.
2061  Value *SrcOp = CI.getPointerOperand();
2062  Type *SrcTy = SrcOp->getType();
2063  Type *Ty = CI.getType();
2064  unsigned AS = CI.getPointerAddressSpace();
2065  unsigned TySize = Ty->getScalarSizeInBits();
2066  unsigned PtrSize = DL.getPointerSizeInBits(AS);
2067  if (TySize != PtrSize) {
2068  Type *IntPtrTy =
2069  SrcTy->getWithNewType(DL.getIntPtrType(CI.getContext(), AS));
2070  Value *P = Builder.CreatePtrToInt(SrcOp, IntPtrTy);
2071  return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
2072  }
2073 
2074  Value *Vec, *Scalar, *Index;
2076  m_Value(Scalar), m_Value(Index)))) &&
2077  Vec->getType() == Ty) {
2078  assert(Vec->getType()->getScalarSizeInBits() == PtrSize && "Wrong type");
2079  // Convert the scalar to int followed by insert to eliminate one cast:
2080  // p2i (ins (i2p Vec), Scalar, Index --> ins Vec, (p2i Scalar), Index
2081  Value *NewCast = Builder.CreatePtrToInt(Scalar, Ty->getScalarType());
2082  return InsertElementInst::Create(Vec, NewCast, Index);
2083  }
2084 
2085  return commonPointerCastTransforms(CI);
2086 }
2087 
2088 /// This input value (which is known to have vector type) is being zero extended
2089 /// or truncated to the specified vector type. Since the zext/trunc is done
2090 /// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
2091 /// endianness will impact which end of the vector that is extended or
2092 /// truncated.
2093 ///
2094 /// A vector is always stored with index 0 at the lowest address, which
2095 /// corresponds to the most significant bits for a big endian stored integer and
2096 /// the least significant bits for little endian. A trunc/zext of an integer
2097 /// impacts the big end of the integer. Thus, we need to add/remove elements at
2098 /// the front of the vector for big endian targets, and the back of the vector
2099 /// for little endian targets.
2100 ///
2101 /// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
2102 ///
2103 /// The source and destination vector types may have different element types.
2104 static Instruction *
2106  InstCombinerImpl &IC) {
2107  // We can only do this optimization if the output is a multiple of the input
2108  // element size, or the input is a multiple of the output element size.
2109  // Convert the input type to have the same element type as the output.
2110  VectorType *SrcTy = cast<VectorType>(InVal->getType());
2111 
2112  if (SrcTy->getElementType() != DestTy->getElementType()) {
2113  // The input types don't need to be identical, but for now they must be the
2114  // same size. There is no specific reason we couldn't handle things like
2115  // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
2116  // there yet.
2117  if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
2119  return nullptr;
2120 
2121  SrcTy =
2123  cast<FixedVectorType>(SrcTy)->getNumElements());
2124  InVal = IC.Builder.CreateBitCast(InVal, SrcTy);
2125  }
2126 
2127  bool IsBigEndian = IC.getDataLayout().isBigEndian();
2128  unsigned SrcElts = cast<FixedVectorType>(SrcTy)->getNumElements();
2129  unsigned DestElts = cast<FixedVectorType>(DestTy)->getNumElements();
2130 
2131  assert(SrcElts != DestElts && "Element counts should be different.");
2132 
2133  // Now that the element types match, get the shuffle mask and RHS of the
2134  // shuffle to use, which depends on whether we're increasing or decreasing the
2135  // size of the input.
2136  SmallVector<int, 16> ShuffleMaskStorage;
2137  ArrayRef<int> ShuffleMask;
2138  Value *V2;
2139 
2140  // Produce an identify shuffle mask for the src vector.
2141  ShuffleMaskStorage.resize(SrcElts);
2142  std::iota(ShuffleMaskStorage.begin(), ShuffleMaskStorage.end(), 0);
2143 
2144  if (SrcElts > DestElts) {
2145  // If we're shrinking the number of elements (rewriting an integer
2146  // truncate), just shuffle in the elements corresponding to the least
2147  // significant bits from the input and use undef as the second shuffle
2148  // input.
2149  V2 = UndefValue::get(SrcTy);
2150  // Make sure the shuffle mask selects the "least significant bits" by
2151  // keeping elements from back of the src vector for big endian, and from the
2152  // front for little endian.
2153  ShuffleMask = ShuffleMaskStorage;
2154  if (IsBigEndian)
2155  ShuffleMask = ShuffleMask.take_back(DestElts);
2156  else
2157  ShuffleMask = ShuffleMask.take_front(DestElts);
2158  } else {
2159  // If we're increasing the number of elements (rewriting an integer zext),
2160  // shuffle in all of the elements from InVal. Fill the rest of the result
2161  // elements with zeros from a constant zero.
2162  V2 = Constant::getNullValue(SrcTy);
2163  // Use first elt from V2 when indicating zero in the shuffle mask.
2164  uint32_t NullElt = SrcElts;
2165  // Extend with null values in the "most significant bits" by adding elements
2166  // in front of the src vector for big endian, and at the back for little
2167  // endian.
2168  unsigned DeltaElts = DestElts - SrcElts;
2169  if (IsBigEndian)
2170  ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt);
2171  else
2172  ShuffleMaskStorage.append(DeltaElts, NullElt);
2173  ShuffleMask = ShuffleMaskStorage;
2174  }
2175 
2176  return new ShuffleVectorInst(InVal, V2, ShuffleMask);
2177 }
2178 
2179 static bool isMultipleOfTypeSize(unsigned Value, Type *Ty) {
2180  return Value % Ty->getPrimitiveSizeInBits() == 0;
2181 }
2182 
2183 static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
2184  return Value / Ty->getPrimitiveSizeInBits();
2185 }
2186 
2187 /// V is a value which is inserted into a vector of VecEltTy.
2188 /// Look through the value to see if we can decompose it into
2189 /// insertions into the vector. See the example in the comment for
2190 /// OptimizeIntegerToVectorInsertions for the pattern this handles.
2191 /// The type of V is always a non-zero multiple of VecEltTy's size.
2192 /// Shift is the number of bits between the lsb of V and the lsb of
2193 /// the vector.
2194 ///
2195 /// This returns false if the pattern can't be matched or true if it can,
2196 /// filling in Elements with the elements found here.
2197 static bool collectInsertionElements(Value *V, unsigned Shift,
2198  SmallVectorImpl<Value *> &Elements,
2199  Type *VecEltTy, bool isBigEndian) {
2200  assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
2201  "Shift should be a multiple of the element type size");
2202 
2203  // Undef values never contribute useful bits to the result.
2204  if (isa<UndefValue>(V)) return true;
2205 
2206  // If we got down to a value of the right type, we win, try inserting into the
2207  // right element.
2208  if (V->getType() == VecEltTy) {
2209  // Inserting null doesn't actually insert any elements.
2210  if (Constant *C = dyn_cast<Constant>(V))
2211  if (C->isNullValue())
2212  return true;
2213 
2214  unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
2215  if (isBigEndian)
2216  ElementIndex = Elements.size() - ElementIndex - 1;
2217 
2218  // Fail if multiple elements are inserted into this slot.
2219  if (Elements[ElementIndex])
2220  return false;
2221 
2222  Elements[ElementIndex] = V;
2223  return true;
2224  }
2225 
2226  if (Constant *C = dyn_cast<Constant>(V)) {
2227  // Figure out the # elements this provides, and bitcast it or slice it up
2228  // as required.
2229  unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
2230  VecEltTy);
2231  // If the constant is the size of a vector element, we just need to bitcast
2232  // it to the right type so it gets properly inserted.
2233  if (NumElts == 1)
2235  Shift, Elements, VecEltTy, isBigEndian);
2236 
2237  // Okay, this is a constant that covers multiple elements. Slice it up into
2238  // pieces and insert each element-sized piece into the vector.
2239  if (!isa<IntegerType>(C->getType()))
2241  C->getType()->getPrimitiveSizeInBits()));
2242  unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
2243  Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
2244 
2245  for (unsigned i = 0; i != NumElts; ++i) {
2246  unsigned ShiftI = Shift+i*ElementSize;
2247  Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
2248  ShiftI));
2249  Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
2250  if (!collectInsertionElements(Piece, ShiftI, Elements, VecEltTy,
2251  isBigEndian))
2252  return false;
2253  }
2254  return true;
2255  }
2256 
2257  if (!V->hasOneUse()) return false;
2258 
2259  Instruction *I = dyn_cast<Instruction>(V);
2260  if (!I) return false;
2261  switch (I->getOpcode()) {
2262  default: return false; // Unhandled case.
2263  case Instruction::BitCast:
2264  return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2265  isBigEndian);
2266  case Instruction::ZExt:
2267  if (!isMultipleOfTypeSize(
2268  I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
2269  VecEltTy))
2270  return false;
2271  return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2272  isBigEndian);
2273  case Instruction::Or:
2274  return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2275  isBigEndian) &&
2276  collectInsertionElements(I->getOperand(1), Shift, Elements, VecEltTy,
2277  isBigEndian);
2278  case Instruction::Shl: {
2279  // Must be shifting by a constant that is a multiple of the element size.
2280  ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
2281  if (!CI) return false;
2282  Shift += CI->getZExtValue();
2283  if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
2284  return collectInsertionElements(I->getOperand(0), Shift, Elements, VecEltTy,
2285  isBigEndian);
2286  }
2287 
2288  }
2289 }
2290 
2291 
2292 /// If the input is an 'or' instruction, we may be doing shifts and ors to
2293 /// assemble the elements of the vector manually.
2294 /// Try to rip the code out and replace it with insertelements. This is to
2295 /// optimize code like this:
2296 ///
2297 /// %tmp37 = bitcast float %inc to i32
2298 /// %tmp38 = zext i32 %tmp37 to i64
2299 /// %tmp31 = bitcast float %inc5 to i32
2300 /// %tmp32 = zext i32 %tmp31 to i64
2301 /// %tmp33 = shl i64 %tmp32, 32
2302 /// %ins35 = or i64 %tmp33, %tmp38
2303 /// %tmp43 = bitcast i64 %ins35 to <2 x float>
2304 ///
2305 /// Into two insertelements that do "buildvector{%inc, %inc5}".
2307  InstCombinerImpl &IC) {
2308  auto *DestVecTy = cast<FixedVectorType>(CI.getType());
2309  Value *IntInput = CI.getOperand(0);
2310 
2311  SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
2312  if (!collectInsertionElements(IntInput, 0, Elements,
2313  DestVecTy->getElementType(),
2314  IC.getDataLayout().isBigEndian()))
2315  return nullptr;
2316 
2317  // If we succeeded, we know that all of the element are specified by Elements
2318  // or are zero if Elements has a null entry. Recast this as a set of
2319  // insertions.
2320  Value *Result = Constant::getNullValue(CI.getType());
2321  for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
2322  if (!Elements[i]) continue; // Unset element.
2323 
2324  Result = IC.Builder.CreateInsertElement(Result, Elements[i],
2325  IC.Builder.getInt32(i));
2326  }
2327 
2328  return Result;
2329 }
2330 
2331 /// Canonicalize scalar bitcasts of extracted elements into a bitcast of the
2332 /// vector followed by extract element. The backend tends to handle bitcasts of
2333 /// vectors better than bitcasts of scalars because vector registers are
2334 /// usually not type-specific like scalar integer or scalar floating-point.
2336  InstCombinerImpl &IC) {
2337  // TODO: Create and use a pattern matcher for ExtractElementInst.
2338  auto *ExtElt = dyn_cast<ExtractElementInst>(BitCast.getOperand(0));
2339  if (!ExtElt || !ExtElt->hasOneUse())
2340  return nullptr;
2341 
2342  // The bitcast must be to a vectorizable type, otherwise we can't make a new
2343  // type to extract from.
2344  Type *DestType = BitCast.getType();
2345  if (!VectorType::isValidElementType(DestType))
2346  return nullptr;
2347 
2348  auto *NewVecType = VectorType::get(DestType, ExtElt->getVectorOperandType());
2349  auto *NewBC = IC.Builder.CreateBitCast(ExtElt->getVectorOperand(),
2350  NewVecType, "bc");
2351  return ExtractElementInst::Create(NewBC, ExtElt->getIndexOperand());
2352 }
2353 
2354 /// Change the type of a bitwise logic operation if we can eliminate a bitcast.
2357  Type *DestTy = BitCast.getType();
2358  BinaryOperator *BO;
2359  if (!DestTy->isIntOrIntVectorTy() ||
2360  !match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) ||
2361  !BO->isBitwiseLogicOp())
2362  return nullptr;
2363 
2364  // FIXME: This transform is restricted to vector types to avoid backend
2365  // problems caused by creating potentially illegal operations. If a fix-up is
2366  // added to handle that situation, we can remove this check.
2367  if (!DestTy->isVectorTy() || !BO->getType()->isVectorTy())
2368  return nullptr;
2369 
2370  Value *X;
2371  if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
2372  X->getType() == DestTy && !isa<Constant>(X)) {
2373  // bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
2374  Value *CastedOp1 = Builder.CreateBitCast(BO->getOperand(1), DestTy);
2375  return BinaryOperator::Create(BO->getOpcode(), X, CastedOp1);
2376  }
2377 
2378  if (match(BO->getOperand(1), m_OneUse(m_BitCast(m_Value(X)))) &&
2379  X->getType() == DestTy && !isa<Constant>(X)) {
2380  // bitcast(logic(Y, bitcast(X))) --> logic'(bitcast(Y), X)
2381  Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2382  return BinaryOperator::Create(BO->getOpcode(), CastedOp0, X);
2383  }
2384 
2385  // Canonicalize vector bitcasts to come before vector bitwise logic with a
2386  // constant. This eases recognition of special constants for later ops.
2387  // Example:
2388  // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b
2389  Constant *C;
2390  if (match(BO->getOperand(1), m_Constant(C))) {
2391  // bitcast (logic X, C) --> logic (bitcast X, C')
2392  Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
2393  Value *CastedC = Builder.CreateBitCast(C, DestTy);
2394  return BinaryOperator::Create(BO->getOpcode(), CastedOp0, CastedC);
2395  }
2396 
2397  return nullptr;
2398 }
2399 
2400 /// Change the type of a select if we can eliminate a bitcast.
2403  Value *Cond, *TVal, *FVal;
2404  if (!match(BitCast.getOperand(0),
2405  m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
2406  return nullptr;
2407 
2408  // A vector select must maintain the same number of elements in its operands.
2409  Type *CondTy = Cond->getType();
2410  Type *DestTy = BitCast.getType();
2411  if (auto *CondVTy = dyn_cast<VectorType>(CondTy))
2412  if (!DestTy->isVectorTy() ||
2413  CondVTy->getElementCount() !=
2414  cast<VectorType>(DestTy)->getElementCount())
2415  return nullptr;
2416 
2417  // FIXME: This transform is restricted from changing the select between
2418  // scalars and vectors to avoid backend problems caused by creating
2419  // potentially illegal operations. If a fix-up is added to handle that
2420  // situation, we can remove this check.
2421  if (DestTy->isVectorTy() != TVal->getType()->isVectorTy())
2422  return nullptr;
2423 
2424  auto *Sel = cast<Instruction>(BitCast.getOperand(0));
2425  Value *X;
2426  if (match(TVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
2427  !isa<Constant>(X)) {
2428  // bitcast(select(Cond, bitcast(X), Y)) --> select'(Cond, X, bitcast(Y))
2429  Value *CastedVal = Builder.CreateBitCast(FVal, DestTy);
2430  return SelectInst::Create(Cond, X, CastedVal, "", nullptr, Sel);
2431  }
2432 
2433  if (match(FVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
2434  !isa<Constant>(X)) {
2435  // bitcast(select(Cond, Y, bitcast(X))) --> select'(Cond, bitcast(Y), X)
2436  Value *CastedVal = Builder.CreateBitCast(TVal, DestTy);
2437  return SelectInst::Create(Cond, CastedVal, X, "", nullptr, Sel);
2438  }
2439 
2440  return nullptr;
2441 }
2442 
2443 /// Check if all users of CI are StoreInsts.
2444 static bool hasStoreUsersOnly(CastInst &CI) {
2445  for (User *U : CI.users()) {
2446  if (!isa<StoreInst>(U))
2447  return false;
2448  }
2449  return true;
2450 }
2451 
2452 /// This function handles following case
2453 ///
2454 /// A -> B cast
2455 /// PHI
2456 /// B -> A cast
2457 ///
2458 /// All the related PHI nodes can be replaced by new PHI nodes with type A.
2459 /// The uses of \p CI can be changed to the new PHI node corresponding to \p PN.
2460 Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI,
2461  PHINode *PN) {
2462  // BitCast used by Store can be handled in InstCombineLoadStoreAlloca.cpp.
2463  if (hasStoreUsersOnly(CI))
2464  return nullptr;
2465 
2466  Value *Src = CI.getOperand(0);
2467  Type *SrcTy = Src->getType(); // Type B
2468  Type *DestTy = CI.getType(); // Type A
2469 
2470  SmallVector<PHINode *, 4> PhiWorklist;
2471  SmallSetVector<PHINode *, 4> OldPhiNodes;
2472 
2473  // Find all of the A->B casts and PHI nodes.
2474  // We need to inspect all related PHI nodes, but PHIs can be cyclic, so
2475  // OldPhiNodes is used to track all known PHI nodes, before adding a new
2476  // PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
2477  PhiWorklist.push_back(PN);
2478  OldPhiNodes.insert(PN);
2479  while (!PhiWorklist.empty()) {
2480  auto *OldPN = PhiWorklist.pop_back_val();
2481  for (Value *IncValue : OldPN->incoming_values()) {
2482  if (isa<Constant>(IncValue))
2483  continue;
2484 
2485  if (auto *LI = dyn_cast<LoadInst>(IncValue)) {
2486  // If there is a sequence of one or more load instructions, each loaded
2487  // value is used as address of later load instruction, bitcast is
2488  // necessary to change the value type, don't optimize it. For
2489  // simplicity we give up if the load address comes from another load.
2490  Value *Addr = LI->getOperand(0);
2491  if (Addr == &CI || isa<LoadInst>(Addr))
2492  return nullptr;
2493  // Don't tranform "load <256 x i32>, <256 x i32>*" to
2494  // "load x86_amx, x86_amx*", because x86_amx* is invalid.
2495  // TODO: Remove this check when bitcast between vector and x86_amx
2496  // is replaced with a specific intrinsic.
2497  if (DestTy->isX86_AMXTy())
2498  return nullptr;
2499  if (LI->hasOneUse() && LI->isSimple())
2500  continue;
2501  // If a LoadInst has more than one use, changing the type of loaded
2502  // value may create another bitcast.
2503  return nullptr;
2504  }
2505 
2506  if (auto *PNode = dyn_cast<PHINode>(IncValue)) {
2507  if (OldPhiNodes.insert(PNode))
2508  PhiWorklist.push_back(PNode);
2509  continue;
2510  }
2511 
2512  auto *BCI = dyn_cast<BitCastInst>(IncValue);
2513  // We can't handle other instructions.
2514  if (!BCI)
2515  return nullptr;
2516 
2517  // Verify it's a A->B cast.
2518  Type *TyA = BCI->getOperand(0)->getType();
2519  Type *TyB = BCI->getType();
2520  if (TyA != DestTy || TyB != SrcTy)
2521  return nullptr;
2522  }
2523  }
2524 
2525  // Check that each user of each old PHI node is something that we can
2526  // rewrite, so that all of the old PHI nodes can be cleaned up afterwards.
2527  for (auto *OldPN : OldPhiNodes) {
2528  for (User *V : OldPN->users()) {
2529  if (auto *SI = dyn_cast<StoreInst>(V)) {
2530  if (!SI->isSimple() || SI->getOperand(0) != OldPN)
2531  return nullptr;
2532  } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2533  // Verify it's a B->A cast.
2534  Type *TyB = BCI->getOperand(0)->getType();
2535  Type *TyA = BCI->getType();
2536  if (TyA != DestTy || TyB != SrcTy)
2537  return nullptr;
2538  } else if (auto *PHI = dyn_cast<PHINode>(V)) {
2539  // As long as the user is another old PHI node, then even if we don't
2540  // rewrite it, the PHI web we're considering won't have any users
2541  // outside itself, so it'll be dead.
2542  if (OldPhiNodes.count(PHI) == 0)
2543  return nullptr;
2544  } else {
2545  return nullptr;
2546  }
2547  }
2548  }
2549 
2550  // For each old PHI node, create a corresponding new PHI node with a type A.
2552  for (auto *OldPN : OldPhiNodes) {
2553  Builder.SetInsertPoint(OldPN);
2554  PHINode *NewPN = Builder.CreatePHI(DestTy, OldPN->getNumOperands());
2555  NewPNodes[OldPN] = NewPN;
2556  }
2557 
2558  // Fill in the operands of new PHI nodes.
2559  for (auto *OldPN : OldPhiNodes) {
2560  PHINode *NewPN = NewPNodes[OldPN];
2561  for (unsigned j = 0, e = OldPN->getNumOperands(); j != e; ++j) {
2562  Value *V = OldPN->getOperand(j);
2563  Value *NewV = nullptr;
2564  if (auto *C = dyn_cast<Constant>(V)) {
2565  NewV = ConstantExpr::getBitCast(C, DestTy);
2566  } else if (auto *LI = dyn_cast<LoadInst>(V)) {
2567  // Explicitly perform load combine to make sure no opposing transform
2568  // can remove the bitcast in the meantime and trigger an infinite loop.
2569  Builder.SetInsertPoint(LI);
2570  NewV = combineLoadToNewType(*LI, DestTy);
2571  // Remove the old load and its use in the old phi, which itself becomes
2572  // dead once the whole transform finishes.
2573  replaceInstUsesWith(*LI, PoisonValue::get(LI->getType()));
2574  eraseInstFromFunction(*LI);
2575  } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2576  NewV = BCI->getOperand(0);
2577  } else if (auto *PrevPN = dyn_cast<PHINode>(V)) {
2578  NewV = NewPNodes[PrevPN];
2579  }
2580  assert(NewV);
2581  NewPN->addIncoming(NewV, OldPN->getIncomingBlock(j));
2582  }
2583  }
2584 
2585  // Traverse all accumulated PHI nodes and process its users,
2586  // which are Stores and BitcCasts. Without this processing
2587  // NewPHI nodes could be replicated and could lead to extra
2588  // moves generated after DeSSA.
2589  // If there is a store with type B, change it to type A.
2590 
2591 
2592  // Replace users of BitCast B->A with NewPHI. These will help
2593  // later to get rid off a closure formed by OldPHI nodes.
2594  Instruction *RetVal = nullptr;
2595  for (auto *OldPN : OldPhiNodes) {
2596  PHINode *NewPN = NewPNodes[OldPN];
2597  for (User *V : make_early_inc_range(OldPN->users())) {
2598  if (auto *SI = dyn_cast<StoreInst>(V)) {
2599  assert(SI->isSimple() && SI->getOperand(0) == OldPN);
2600  Builder.SetInsertPoint(SI);
2601  auto *NewBC =
2602  cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));
2603  SI->setOperand(0, NewBC);
2604  Worklist.push(SI);
2605  assert(hasStoreUsersOnly(*NewBC));
2606  }
2607  else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2608  Type *TyB = BCI->getOperand(0)->getType();
2609  Type *TyA = BCI->getType();
2610  assert(TyA == DestTy && TyB == SrcTy);
2611  (void) TyA;
2612  (void) TyB;
2613  Instruction *I = replaceInstUsesWith(*BCI, NewPN);
2614  if (BCI == &CI)
2615  RetVal = I;
2616  } else if (auto *PHI = dyn_cast<PHINode>(V)) {
2617  assert(OldPhiNodes.contains(PHI));
2618  (void) PHI;
2619  } else {
2620  llvm_unreachable("all uses should be handled");
2621  }
2622  }
2623  }
2624 
2625  return RetVal;
2626 }
2627 
2629  const DataLayout &DL) {
2630  Value *Src = CI.getOperand(0);
2631  PointerType *SrcPTy = cast<PointerType>(Src->getType());
2632  PointerType *DstPTy = cast<PointerType>(CI.getType());
2633 
2634  // Bitcasts involving opaque pointers cannot be converted into a GEP.
2635  if (SrcPTy->isOpaque() || DstPTy->isOpaque())
2636  return nullptr;
2637 
2638  Type *DstElTy = DstPTy->getElementType();
2639  Type *SrcElTy = SrcPTy->getElementType();
2640 
2641  // When the type pointed to is not sized the cast cannot be
2642  // turned into a gep.
2643  if (!SrcElTy->isSized())
2644  return nullptr;
2645 
2646  // If the source and destination are pointers, and this cast is equivalent
2647  // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep.
2648  // This can enhance SROA and other transforms that want type-safe pointers.
2649  unsigned NumZeros = 0;
2650  while (SrcElTy && SrcElTy != DstElTy) {
2651  SrcElTy = GetElementPtrInst::getTypeAtIndex(SrcElTy, (uint64_t)0);
2652  ++NumZeros;
2653  }
2654 
2655  // If we found a path from the src to dest, create the getelementptr now.
2656  if (SrcElTy == DstElTy) {
2657  SmallVector<Value *, 8> Idxs(NumZeros + 1, Builder.getInt32(0));
2659  GetElementPtrInst::Create(SrcPTy->getElementType(), Src, Idxs);
2660 
2661  // If the source pointer is dereferenceable, then assume it points to an
2662  // allocated object and apply "inbounds" to the GEP.
2663  bool CanBeNull, CanBeFreed;
2664  if (Src->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed)) {
2665  // In a non-default address space (not 0), a null pointer can not be
2666  // assumed inbounds, so ignore that case (dereferenceable_or_null).
2667  // The reason is that 'null' is not treated differently in these address
2668  // spaces, and we consequently ignore the 'gep inbounds' special case
2669  // for 'null' which allows 'inbounds' on 'null' if the indices are
2670  // zeros.
2671  if (SrcPTy->getAddressSpace() == 0 || !CanBeNull)
2672  GEP->setIsInBounds();
2673  }
2674  return GEP;
2675  }
2676  return nullptr;
2677 }
2678 
2680  // If the operands are integer typed then apply the integer transforms,
2681  // otherwise just apply the common ones.
2682  Value *Src = CI.getOperand(0);
2683  Type *SrcTy = Src->getType();
2684  Type *DestTy = CI.getType();
2685 
2686  // Get rid of casts from one type to the same type. These are useless and can
2687  // be replaced by the operand.
2688  if (DestTy == Src->getType())
2689  return replaceInstUsesWith(CI, Src);
2690 
2691  if (isa<PointerType>(SrcTy) && isa<PointerType>(DestTy)) {
2692  // If we are casting a alloca to a pointer to a type of the same
2693  // size, rewrite the allocation instruction to allocate the "right" type.
2694  // There is no need to modify malloc calls because it is their bitcast that
2695  // needs to be cleaned up.
2696  if (AllocaInst *AI = dyn_cast<AllocaInst>(Src))
2697  if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
2698  return V;
2699 
2701  return I;
2702  }
2703 
2704  if (FixedVectorType *DestVTy = dyn_cast<FixedVectorType>(DestTy)) {
2705  // Beware: messing with this target-specific oddity may cause trouble.
2706  if (DestVTy->getNumElements() == 1 && SrcTy->isX86_MMXTy()) {
2707  Value *Elem = Builder.CreateBitCast(Src, DestVTy->getElementType());
2708  return InsertElementInst::Create(PoisonValue::get(DestTy), Elem,
2710  }
2711 
2712  if (isa<IntegerType>(SrcTy)) {
2713  // If this is a cast from an integer to vector, check to see if the input
2714  // is a trunc or zext of a bitcast from vector. If so, we can replace all
2715  // the casts with a shuffle and (potentially) a bitcast.
2716  if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {
2717  CastInst *SrcCast = cast<CastInst>(Src);
2718  if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
2719  if (isa<VectorType>(BCIn->getOperand(0)->getType()))
2721  BCIn->getOperand(0), cast<VectorType>(DestTy), *this))
2722  return I;
2723  }
2724 
2725  // If the input is an 'or' instruction, we may be doing shifts and ors to
2726  // assemble the elements of the vector manually. Try to rip the code out
2727  // and replace it with insertelements.
2728  if (Value *V = optimizeIntegerToVectorInsertions(CI, *this))
2729  return replaceInstUsesWith(CI, V);
2730  }
2731  }
2732 
2733  if (FixedVectorType *SrcVTy = dyn_cast<FixedVectorType>(SrcTy)) {
2734  if (SrcVTy->getNumElements() == 1) {
2735  // If our destination is not a vector, then make this a straight
2736  // scalar-scalar cast.
2737  if (!DestTy->isVectorTy()) {
2738  Value *Elem =
2739  Builder.CreateExtractElement(Src,
2741  return CastInst::Create(Instruction::BitCast, Elem, DestTy);
2742  }
2743 
2744  // Otherwise, see if our source is an insert. If so, then use the scalar
2745  // component directly:
2746  // bitcast (inselt <1 x elt> V, X, 0) to <n x m> --> bitcast X to <n x m>
2747  if (auto *InsElt = dyn_cast<InsertElementInst>(Src))
2748  return new BitCastInst(InsElt->getOperand(1), DestTy);
2749  }
2750  }
2751 
2752  if (auto *Shuf = dyn_cast<ShuffleVectorInst>(Src)) {
2753  // Okay, we have (bitcast (shuffle ..)). Check to see if this is
2754  // a bitcast to a vector with the same # elts.
2755  Value *ShufOp0 = Shuf->getOperand(0);
2756  Value *ShufOp1 = Shuf->getOperand(1);
2757  auto ShufElts = cast<VectorType>(Shuf->getType())->getElementCount();
2758  auto SrcVecElts = cast<VectorType>(ShufOp0->getType())->getElementCount();
2759  if (Shuf->hasOneUse() && DestTy->isVectorTy() &&
2760  cast<VectorType>(DestTy)->getElementCount() == ShufElts &&
2761  ShufElts == SrcVecElts) {
2762  BitCastInst *Tmp;
2763  // If either of the operands is a cast from CI.getType(), then
2764  // evaluating the shuffle in the casted destination's type will allow
2765  // us to eliminate at least one cast.
2766  if (((Tmp = dyn_cast<BitCastInst>(ShufOp0)) &&
2767  Tmp->getOperand(0)->getType() == DestTy) ||
2768  ((Tmp = dyn_cast<BitCastInst>(ShufOp1)) &&
2769  Tmp->getOperand(0)->getType() == DestTy)) {
2770  Value *LHS = Builder.CreateBitCast(ShufOp0, DestTy);
2771  Value *RHS = Builder.CreateBitCast(ShufOp1, DestTy);
2772  // Return a new shuffle vector. Use the same element ID's, as we
2773  // know the vector types match #elts.
2774  return new ShuffleVectorInst(LHS, RHS, Shuf->getShuffleMask());
2775  }
2776  }
2777 
2778  // A bitcasted-to-scalar and byte-reversing shuffle is better recognized as
2779  // a byte-swap:
2780  // bitcast <N x i8> (shuf X, undef, <N, N-1,...0>) --> bswap (bitcast X)
2781  // TODO: We should match the related pattern for bitreverse.
2782  if (DestTy->isIntegerTy() &&
2783  DL.isLegalInteger(DestTy->getScalarSizeInBits()) &&
2784  SrcTy->getScalarSizeInBits() == 8 &&
2785  ShufElts.getKnownMinValue() % 2 == 0 && Shuf->hasOneUse() &&
2786  Shuf->isReverse()) {
2787  assert(ShufOp0->getType() == SrcTy && "Unexpected shuffle mask");
2788  assert(match(ShufOp1, m_Undef()) && "Unexpected shuffle op");
2789  Function *Bswap =
2790  Intrinsic::getDeclaration(CI.getModule(), Intrinsic::bswap, DestTy);
2791  Value *ScalarX = Builder.CreateBitCast(ShufOp0, DestTy);
2792  return CallInst::Create(Bswap, { ScalarX });
2793  }
2794  }
2795 
2796  // Handle the A->B->A cast, and there is an intervening PHI node.
2797  if (PHINode *PN = dyn_cast<PHINode>(Src))
2798  if (Instruction *I = optimizeBitCastFromPhi(CI, PN))
2799  return I;
2800 
2801  if (Instruction *I = canonicalizeBitCastExtElt(CI, *this))
2802  return I;
2803 
2805  return I;
2806 
2807  if (Instruction *I = foldBitCastSelect(CI, Builder))
2808  return I;
2809 
2810  if (SrcTy->isPointerTy())
2811  return commonPointerCastTransforms(CI);
2812  return commonCastTransforms(CI);
2813 }
2814 
2816  // If the destination pointer element type is not the same as the source's
2817  // first do a bitcast to the destination type, and then the addrspacecast.
2818  // This allows the cast to be exposed to other transforms.
2819  Value *Src = CI.getOperand(0);
2820  PointerType *SrcTy = cast<PointerType>(Src->getType()->getScalarType());
2821  PointerType *DestTy = cast<PointerType>(CI.getType()->getScalarType());
2822 
2823  if (!SrcTy->hasSameElementTypeAs(DestTy)) {
2824  Type *MidTy =
2826  // Handle vectors of pointers.
2827  if (VectorType *VT = dyn_cast<VectorType>(CI.getType()))
2828  MidTy = VectorType::get(MidTy, VT->getElementCount());
2829 
2830  Value *NewBitCast = Builder.CreateBitCast(Src, MidTy);
2831  return new AddrSpaceCastInst(NewBitCast, CI.getType());
2832  }
2833 
2834  return commonPointerCastTransforms(CI);
2835 }
i
i
Definition: README.txt:29
shrinkInsertElt
static Instruction * shrinkInsertElt(CastInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of an insert element.
Definition: InstCombineCasts.cpp:718
llvm::Type::isSized
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:274
llvm::InstCombinerImpl::visitFPToSI
Instruction * visitFPToSI(FPToSIInst &FI)
Definition: InstCombineCasts.cpp:2000
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:741
llvm::MaskedValueIsZero
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if 'V & Mask' is known to be zero.
Definition: ValueTracking.cpp:361
llvm::PatternMatch::m_Mask
Definition: PatternMatch.h:1524
llvm::PHINode::incoming_values
op_range incoming_values()
Definition: Instructions.h:2711
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::Instruction::isBitwiseLogicOp
static bool isBitwiseLogicOp(unsigned Opcode)
Determine if the Opcode is and/or/xor.
Definition: Instruction.h:215
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:435
InstCombiner.h
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1379
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:720
llvm::Type::isX86_MMXTy
bool isX86_MMXTy() const
Return true if this is X86 MMX.
Definition: Type.h:183
llvm::AllocaInst::getAlign
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:120
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:228
ceil
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g ceil
Definition: README-FPStack.txt:54
llvm::APFloatBase::IEEEsingle
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:170
llvm::ConstantExpr::getZExt
static Constant * getZExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2098
llvm::PointerType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:673
llvm::Function
Definition: Function.h:61
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
convertBitCastToGEP
static Instruction * convertBitCastToGEP(BitCastInst &CI, IRBuilderBase &Builder, const DataLayout &DL)
Definition: InstCombineCasts.cpp:2628
llvm::PatternMatch::m_LShr
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1147
llvm::APInt::isPowerOf2
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:429
canonicalizeBitCastExtElt
static Instruction * canonicalizeBitCastExtElt(BitCastInst &BitCast, InstCombinerImpl &IC)
Canonicalize scalar bitcasts of extracted elements into a bitcast of the vector followed by extract e...
Definition: InstCombineCasts.cpp:2335
collectInsertionElements
static bool collectInsertionElements(Value *V, unsigned Shift, SmallVectorImpl< Value * > &Elements, Type *VecEltTy, bool isBigEndian)
V is a value which is inserted into a vector of VecEltTy.
Definition: InstCombineCasts.cpp:2197
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5192
llvm::KnownBits::Zero
APInt Zero
Definition: KnownBits.h:24
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:319
llvm::PatternMatch::m_InsertElt
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Definition: PatternMatch.h:1494
llvm::ConstantExpr::getSExt
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2084
llvm::AllocaInst::getType
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:104
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::InstCombiner::Builder
BuilderTy & Builder
Definition: InstCombiner.h:56
isMultipleOfTypeSize
static bool isMultipleOfTypeSize(unsigned Value, Type *Ty)
Definition: InstCombineCasts.cpp:2179
llvm::BinaryOperator::CreateFDivFMF
static BinaryOperator * CreateFDivFMF(Value *V1, Value *V2, Instruction *FMFSource, const Twine &Name="")
Definition: InstrTypes.h:273
llvm::PatternMatch::m_CombineOr
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:210
llvm::IRBuilder< TargetFolder, IRBuilderCallbackInserter >
hasStoreUsersOnly
static bool hasStoreUsersOnly(CastInst &CI)
Check if all users of CI are StoreInsts.
Definition: InstCombineCasts.cpp:2444
llvm::CastInst::Create
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Definition: Instructions.cpp:3038
llvm::PointerType::getAddressSpace
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:687
llvm::SmallDenseMap
Definition: DenseMap.h:880
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:742
llvm::ConstantExpr::getBitCast
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2208
llvm::CastInst::isEliminableCastPair
static unsigned isEliminableCastPair(Instruction::CastOps firstOpcode, Instruction::CastOps secondOpcode, Type *SrcTy, Type *MidTy, Type *DstTy, Type *SrcIntPtrTy, Type *MidIntPtrTy, Type *DstIntPtrTy)
Determine how a pair of casts can be eliminated, if they can be at all.
Definition: Instructions.cpp:2815
foldBitCastSelect
static Instruction * foldBitCastSelect(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a select if we can eliminate a bitcast.
Definition: InstCombineCasts.cpp:2401
llvm::InsertElementInst::Create
static InsertElementInst * Create(Value *Vec, Value *NewElt, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1951
llvm::CmpInst::ICMP_SGT
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:747
Shift
bool Shift
Definition: README.txt:468
llvm::ExtractElementInst::Create
static ExtractElementInst * Create(Value *Vec, Value *Idx, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1886
llvm::InstCombinerImpl::visitAddrSpaceCast
Instruction * visitAddrSpaceCast(AddrSpaceCastInst &CI)
Definition: InstCombineCasts.cpp:2815
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::PatternMatch::m_FPToUI
CastClass_match< OpTy, Instruction::FPToUI > m_FPToUI(const OpTy &Op)
Definition: PatternMatch.h:1682
canAlwaysEvaluateInType
static bool canAlwaysEvaluateInType(Value *V, Type *Ty)
Constants and extensions/truncates from the destination type are always free to be evaluated in that ...
Definition: InstCombineCasts.cpp:345
llvm::InstCombinerImpl::visitBitCast
Instruction * visitBitCast(BitCastInst &CI)
Definition: InstCombineCasts.cpp:2679
isBigEndian
static Optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
Definition: CombinerHelper.cpp:98
llvm::ConstantFP::getValueAPF
const APFloat & getValueAPF() const
Definition: Constants.h:297
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::PatternMatch::m_BinOp
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::PatternMatch::m_AShr
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1153
llvm::ComputeNumSignBits
unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return the number of times the sign bit of the register is replicated into the other bits.
Definition: ValueTracking.cpp:385
llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:422
shrinkSplatShuffle
static Instruction * shrinkSplatShuffle(TruncInst &Trunc, InstCombiner::BuilderTy &Builder)
Try to narrow the width of a splat shuffle.
Definition: InstCombineCasts.cpp:698
llvm::CastInst::CreateIntegerCast
static CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
Definition: Instructions.cpp:3204
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::OverflowingBinaryOperator::hasNoUnsignedWrap
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition: Operator.h:91
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:491
llvm::matchSelectPattern
SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
Definition: ValueTracking.cpp:6170
llvm::PatternMatch::m_c_And
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
Definition: PatternMatch.h:2242
llvm::PatternMatch::m_BitCast
CastClass_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
Definition: PatternMatch.h:1603
llvm::FixedVectorType
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:525
llvm::PatternMatch::m_VScale
VScaleVal_match m_VScale(const DataLayout &DL)
Definition: PatternMatch.h:2455
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
ConstantFolding.h
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:203
llvm::PatternMatch::m_Deferred
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition: PatternMatch.h:820
llvm::InstCombinerImpl::visitFPExt
Instruction * visitFPExt(CastInst &CI)
Definition: InstCombineCasts.cpp:1935
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::Type::getWithNewType
Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
Definition: DerivedTypes.h:721
llvm::NVPTX::PTXLdStInstCode::VecType
VecType
Definition: NVPTX.h:121
llvm::CastInst::getDestTy
Type * getDestTy() const
Return the destination type, as a convenience.
Definition: InstrTypes.h:684
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::IntToPtrInst::getAddressSpace
unsigned getAddressSpace() const
Returns the address space of this instruction's pointer type.
Definition: Instructions.h:5123
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:193
llvm::APInt::countPopulation
unsigned countPopulation() const
Count the number of bits set.
Definition: APInt.h:1555
KnownBits.h
floor
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g floor
Definition: README-FPStack.txt:54
llvm::PatternMatch::m_OneUse
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ConstantExpr::getSub
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2676
llvm::PtrToIntInst::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:5174
llvm::PatternMatch::m_APInt
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:270
llvm::APInt::uge
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1145
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::KnownBits::isNonNegative
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:99
llvm::Intrinsic::getType
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:1335
llvm::SPF_UNKNOWN
@ SPF_UNKNOWN
Definition: ValueTracking.h:658
canEvaluateSExtd
static bool canEvaluateSExtd(Value *V, Type *Ty)
Return true if we can take the specified value and return it as type Ty without inserting any new cas...
Definition: InstCombineCasts.cpp:1443
llvm::SelectInst::Create
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
Definition: Instructions.h:1769
llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
llvm::AddrSpaceCastInst
This class represents a conversion between pointers from one address space to another.
Definition: Instructions.h:5232
InstCombineInternal.h
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition: PatternMatch.h:1472
llvm::PatternMatch::m_ZExtOrSExt
match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, CastClass_match< OpTy, Instruction::SExt > > m_ZExtOrSExt(const OpTy &Op)
Definition: PatternMatch.h:1658
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::PHINode::getIncomingValue
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
Definition: Instructions.h:2721
llvm::AllocaInst::getAllocatedType
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:113
getScalarSizeInBits
static unsigned getScalarSizeInBits(Type *Ty)
Definition: SystemZTargetTransformInfo.cpp:367
shrinkFPConstantVector
static Type * shrinkFPConstantVector(Value *V)
Definition: InstCombineCasts.cpp:1651
llvm::Type::getPPC_FP128Ty
static Type * getPPC_FP128Ty(LLVMContext &C)
Definition: Type.cpp:196
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:648
llvm::APFloatBase::IEEEhalf
static const fltSemantics & IEEEhalf() LLVM_READNONE
Definition: APFloat.cpp:164
llvm::User
Definition: User.h:44
llvm::PatternMatch::m_SpecificIntAllowUndef
specific_intval< true > m_SpecificIntAllowUndef(APInt V)
Definition: PatternMatch.h:893
llvm::Type::getDoubleTy
static Type * getDoubleTy(LLVMContext &C)
Definition: Type.cpp:191
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::CmpInst::ICMP_ULE
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:746
llvm::InstCombinerImpl::visitSIToFP
Instruction * visitSIToFP(CastInst &CI)
Definition: InstCombineCasts.cpp:2011
round
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::InstCombinerImpl::commonCastTransforms
Instruction * commonCastTransforms(CastInst &CI)
Implement the transforms common to all CastInst visitors.
Definition: InstCombineCasts.cpp:278
llvm::KnownBits::One
APInt One
Definition: KnownBits.h:25
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
decomposeSimpleLinearExpr
static Value * decomposeSimpleLinearExpr(Value *Val, unsigned &Scale, uint64_t &Offset)
Analyze 'Val', seeing if it is a simple linear expression.
Definition: InstCombineCasts.cpp:32
llvm::CallInst::Create
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1518
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::CastInst::CreateFPCast
static CastInst * CreateFPCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create an FPExt, BitCast, or FPTrunc for fp -> fp casts.
Definition: Instructions.cpp:3232
TargetLibraryInfo.h
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:237
llvm::PatternMatch::m_ZExt
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
Definition: PatternMatch.h:1639
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:596
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::Function::getFnAttribute
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:652
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::Instruction::CastOps
CastOps
Definition: Instruction.h:799
llvm::PatternMatch::m_FNeg
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
Definition: PatternMatch.h:1063
llvm::Attribute::getVScaleRangeArgs
std::pair< unsigned, unsigned > getVScaleRangeArgs() const
Returns the argument numbers for the vscale_range attribute (or pair(0, 0) if not known).
Definition: Attributes.cpp:357
llvm::Constant::getAllOnesValue
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:405
llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition: InstrTypes.h:393
llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:145
llvm::Instruction
Definition: Instruction.h:45
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:153
llvm::PatternMatch::m_c_Or
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
Definition: PatternMatch.h:2249
optimizeVectorResizeWithIntegerBitCasts
static Instruction * optimizeVectorResizeWithIntegerBitCasts(Value *InVal, VectorType *DestTy, InstCombinerImpl &IC)
This input value (which is known to have vector type) is being zero extended or truncated to the spec...
Definition: InstCombineCasts.cpp:2105
llvm::AllocaInst::getArraySize
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:100
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1453
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:257
llvm::IRBuilderBase::CreateInsertElement
Value * CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2455
llvm::APInt::getHighBitsSet
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:281
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:606
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1771
llvm::InstCombinerImpl
Definition: InstCombineInternal.h:61
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:900
PatternMatch.h
llvm::APInt::countTrailingZeros
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1527
llvm::CastInst::CreateTruncOrBitCast
static CastInst * CreateTruncOrBitCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a Trunc or BitCast cast instruction.
Definition: Instructions.cpp:3114
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:648
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::PointerType::isOpaque
bool isOpaque() const
Definition: DerivedTypes.h:678
llvm::CastInst::getSrcTy
Type * getSrcTy() const
Return the source type, as a convenience.
Definition: InstrTypes.h:682
optimizeIntegerToVectorInsertions
static Value * optimizeIntegerToVectorInsertions(BitCastInst &CI, InstCombinerImpl &IC)
If the input is an 'or' instruction, we may be doing shifts and ors to assemble the elements of the v...
Definition: InstCombineCasts.cpp:2306
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2717
llvm::APInt::isAllOnesValue
bool isAllOnesValue() const
NOTE: This is soft-deprecated. Please use isAllOnes() instead.
Definition: APInt.h:360
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::PatternMatch::m_One
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:513
llvm::ArrayRef::take_back
ArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:235
llvm::PatternMatch::m_ExtractElt
TwoOps_match< Val_t, Idx_t, Instruction::ExtractElement > m_ExtractElt(const Val_t &Val, const Idx_t &Idx)
Matches ExtractElementInst.
Definition: PatternMatch.h:1502
llvm::InstCombinerImpl::visitPtrToInt
Instruction * visitPtrToInt(PtrToIntInst &CI)
Definition: InstCombineCasts.cpp:2057
llvm::PatternMatch::m_Xor
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1135
llvm::PtrToIntInst::getPointerOperand
Value * getPointerOperand()
Gets the pointer operand.
Definition: Instructions.h:5167
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:201
llvm::PatternMatch::m_Shr
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
Definition: PatternMatch.h:1313
llvm::VectorType::isValidElementType
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:639
llvm::InstCombinerImpl::visitTrunc
Instruction * visitTrunc(TruncInst &CI)
Definition: InstCombineCasts.cpp:745
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::DataLayout::isBigEndian
bool isBigEndian() const
Definition: DataLayout.h:242
llvm::ConstantExpr::getTrunc
static Constant * getTrunc(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2070
llvm::Function::hasFnAttribute
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:626
llvm::InstCombinerImpl::visitZExt
Instruction * visitZExt(ZExtInst &CI)
Definition: InstCombineCasts.cpp:1230
llvm::APFloat
Definition: APFloat.h:701
llvm::Type::getFPMantissaWidth
int getFPMantissaWidth() const
Return the width of the mantissa of this type.
Definition: Type.cpp:158
llvm::IRBuilderBase::CreateBitCast
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2117
llvm::InstCombinerImpl::PromoteCastOfAllocation
Instruction * PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI)
If we find a cast of an allocation instruction, try to eliminate the cast by moving the type informat...
Definition: InstCombineCasts.cpp:85
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::UnaryOperator::CreateFNegFMF
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:165
llvm::Constant::isElementWiseEqual
bool isElementWiseEqual(Value *Y) const
Return true if this constant and a constant 'Y' are element-wise equal.
Definition: Constants.cpp:282
llvm::ICmpInst
This instruction compares its operands according to the predicate given to the constructor.
Definition: Instructions.h:1203
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
shrinkFPConstant
static Type * shrinkFPConstant(ConstantFP *CFP)
Definition: InstCombineCasts.cpp:1631
llvm::BinaryOperator::CreateFMulFMF
static BinaryOperator * CreateFMulFMF(Value *V1, Value *V2, Instruction *FMFSource, const Twine &Name="")
Definition: InstrTypes.h:268
uint64_t
llvm::FPToSIInst
This class represents a cast from floating point to signed integer.
Definition: Instructions.h:5059
llvm::InstCombiner::getDataLayout
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:369
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:80
llvm::InstCombinerImpl::visitIntToPtr
Instruction * visitIntToPtr(IntToPtrInst &CI)
Definition: InstCombineCasts.cpp:2015
llvm::Instruction::user_back
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:91
llvm::TruncInst
This class represents a truncation of integer types.
Definition: Instructions.h:4747
llvm::IRBuilderBase::getInt32
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:478
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2775
llvm::PatternMatch::m_Or
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1129
llvm::PatternMatch::m_AllOnes
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
Definition: PatternMatch.h:445
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::InstCombinerImpl::visitFPToUI
Instruction * visitFPToUI(FPToUIInst &FI)
Definition: InstCombineCasts.cpp:1993
llvm::UndefValue
'undef' values are things that do not have specified contents.
Definition: Constants.h:1348
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:928
llvm::ConstantExpr::getShl
static Constant * getShl(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2741
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:572
llvm::ConstantExpr::getOr
static Constant * getOr(Constant *C1, Constant *C2)
Definition: Constants.cpp:2728
DIBuilder.h
llvm::PatternMatch::m_And
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1123
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
llvm::KnownBits::getMaxValue
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:136
llvm::ICmpInst::isEquality
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Definition: Instructions.h:1292
llvm::computeKnownBits
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, OptimizationRemarkEmitter *ORE=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
Definition: ValueTracking.cpp:213
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::PatternMatch::m_Sub
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1020
llvm::InstCombinerImpl::EvaluateInDifferentType
Value * EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned)
Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns true for,...
Definition: InstCombineCasts.cpp:180
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::SelectInst
This class represents the LLVM 'select' instruction.
Definition: Instructions.h:1738
llvm::is_splat
bool is_splat(R &&Range)
Wrapper function around std::equal to detect if all elements in a container are same.
Definition: STLExtras.h:1714
llvm::FPToUIInst
This class represents a cast from floating point to unsigned integer.
Definition: Instructions.h:5020
llvm::PatternMatch::m_Constant
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:142
llvm::replaceAllDbgUsesWith
bool replaceAllDbgUsesWith(Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT)
Point debug users of From to To or salvage them.
Definition: Local.cpp:2022
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:650
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::ZExtInst
This class represents zero extension of integer types.
Definition: Instructions.h:4786
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::CmpInst::ICMP_SLT
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:749
llvm::GetElementPtrInst::Create
static GetElementPtrInst * Create(Type *PointeeType, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:954
llvm::PatternMatch::m_SExt
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
Definition: PatternMatch.h:1633
llvm::PatternMatch::m_SpecificInt
specific_intval< false > m_SpecificInt(APInt V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:885
foldBitCastBitwiseLogic
static Instruction * foldBitCastBitwiseLogic(BitCastInst &BitCast, InstCombiner::BuilderTy &Builder)
Change the type of a bitwise logic operation if we can eliminate a bitcast.
Definition: InstCombineCasts.cpp:2355
llvm::ArrayRef< int >
llvm::BinaryOperator
Definition: InstrTypes.h:189
llvm::InstCombinerImpl::ComputeNumSignBits
unsigned ComputeNumSignBits(const Value *Op, unsigned Depth=0, const Instruction *CxtI=nullptr) const
Definition: InstCombineInternal.h:482
DataLayout.h
llvm::PatternMatch::m_SpecificInt_ICMP
cst_pred_ty< icmp_pred_with_threshold > m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold)
Match an integer or vector with every element comparing 'pred' (eg/ne/...) to Threshold.
Definition: PatternMatch.h:626
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:179
llvm::APFloatBase::IEEEdouble
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:173
llvm::PatternMatch::m_Undef
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:136
llvm::ConstantExpr::getUMin
static Constant * getUMin(Constant *C1, Constant *C2)
Definition: Constants.cpp:2736
llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:745
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::Instruction::getFunction
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:70
llvm::PatternMatch::m_Shuffle
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
Definition: PatternMatch.h:1561
uint32_t
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:978
llvm::IRBuilderBase::InsertPointGuard
Definition: IRBuilder.h:367
llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:95
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::PtrToIntInst
This class represents a cast from a pointer to an integer.
Definition: Instructions.h:5141
llvm::ConstantExpr::getIntegerCast
static Constant * getIntegerCast(Constant *C, Type *Ty, bool IsSigned)
Create a ZExt, Bitcast or Trunc for integer -> integer casts.
Definition: Constants.cpp:2046
llvm::FPExtInst
This class represents an extension of floating point types.
Definition: Instructions.h:4903
trunc
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g trunc
Definition: README-FPStack.txt:63
llvm::Instruction::copyFastMathFlags
void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
Definition: Instruction.cpp:235
llvm::OverflowingBinaryOperator
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl.
Definition: Operator.h:67
llvm::Type::isPtrOrPtrVectorTy
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:234
llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:430
llvm::APInt::ult
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1037
llvm::SExtInst
This class represents a sign extension of integer types.
Definition: Instructions.h:4825
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:297
llvm::InstCombinerImpl::visitSExt
Instruction * visitSExt(SExtInst &CI)
Definition: InstCombineCasts.cpp:1491
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
llvm::MCID::Select
@ Select
Definition: MCInstrDesc.h:162
llvm::PatternMatch::m_FPExt
CastClass_match< OpTy, Instruction::FPExt > m_FPExt(const OpTy &Op)
Definition: PatternMatch.h:1697
llvm::InstCombinerImpl::commonPointerCastTransforms
Instruction * commonPointerCastTransforms(CastInst &CI)
Implement the transforms for cast of pointer (bitcast/ptrtoint)
Definition: InstCombineCasts.cpp:2035
j
return j(j<< 16)
llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:123
canEvaluateTruncated
static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC, Instruction *CxtI)
Return true if we can evaluate the specified expression tree as type Ty instead of its larger type,...
Definition: InstCombineCasts.cpp:381
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:348
llvm::KnownBits
Definition: KnownBits.h:23
llvm::PointerType::hasSameElementTypeAs
bool hasSameElementTypeAs(PointerType *Other)
Return true if both pointer types have the same element type.
Definition: DerivedTypes.h:701
llvm::OverflowingBinaryOperator::hasNoSignedWrap
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
Definition: Operator.h:97
llvm::Type::getHalfTy
static Type * getHalfTy(LLVMContext &C)
Definition: Type.cpp:188
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:321
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2667
llvm::X86::FirstMacroFusionInstKind::Cmp
@ Cmp
llvm::ArrayRef::take_front
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:228
canEvaluateZExtd
static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear, InstCombinerImpl &IC, Instruction *CxtI)
Determine if the specified value can be computed in the specified wider type and produce the same low...
Definition: InstCombineCasts.cpp:1124
llvm::PatternMatch::m_FPToSI
CastClass_match< OpTy, Instruction::FPToSI > m_FPToSI(const OpTy &Op)
Definition: PatternMatch.h:1687
llvm::Type::isIntOrIntVectorTy
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:207
llvm::fltSemantics
Definition: APFloat.cpp:54
llvm::GetElementPtrInst::getTypeAtIndex
static Type * getTypeAtIndex(Type *Ty, Value *Idx)
Return the type of the element at the given index of an indexable type.
Definition: Instructions.cpp:1720
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::Type::isDoubleTy
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:150
llvm::PatternMatch::m_ZeroInt
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:522
llvm::ConstantExpr::getLShr
static Constant * getLShr(Constant *C1, Constant *C2, bool isExact=false)
Definition: Constants.cpp:2748
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:183
llvm::IntToPtrInst
This class represents a cast from an integer to a pointer.
Definition: Instructions.h:5098
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:410
llvm::PatternMatch::m_LogicalShift
BinOpPred_match< LHS, RHS, is_logical_shift_op > m_LogicalShift(const LHS &L, const RHS &R)
Matches logical shift operations.
Definition: PatternMatch.h:1321
llvm::Constant::mergeUndefsWith
static Constant * mergeUndefsWith(Constant *C, Constant *Other)
Merges undefs of a Constant with another Constant, along with the undefs already present.
Definition: Constants.cpp:792
llvm::APInt::isNullValue
bool isNullValue() const
NOTE: This is soft-deprecated. Please use isZero() instead.
Definition: APInt.h:370
fitsInFPType
static bool fitsInFPType(ConstantFP *CFP, const fltSemantics &Sem)
Return a Constant* for the specified floating-point constant if it fits in the specified FP type with...
Definition: InstCombineCasts.cpp:1624
llvm::Instruction::BinaryOps
BinaryOps
Definition: Instruction.h:785
llvm::ShuffleVectorInst
This instruction constructs a fixed permutation of two input vectors.
Definition: Instructions.h:2009
llvm::APFloatBase::rmNearestTiesToEven
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:190
llvm::AllocaInst::isUsedWithInAlloca
bool isUsedWithInAlloca() const
Return true if this alloca is used as an inalloca argument to a call.
Definition: Instructions.h:138
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:802
getMinimumFPType
static Type * getMinimumFPType(Value *V)
Find the minimum FP type we can safely truncate to.
Definition: InstCombineCasts.cpp:1683
llvm::PointerType::getWithSamePointeeType
static PointerType * getWithSamePointeeType(PointerType *PT, unsigned AddressSpace)
This constructs a pointer type with the same pointee type as input PointerType (or opaque pointer is ...
Definition: DerivedTypes.h:666
llvm::FPTruncInst
This class represents a truncation of floating point types.
Definition: Instructions.h:4864
llvm::CastInst::getOpcode
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:677
llvm::CmpInst::getPredicate
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:796
llvm::PHINode::getIncomingBlock
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Definition: Instructions.h:2741
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::PHINode
Definition: Instructions.h:2625
Threshold
static cl::opt< unsigned > Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), cl::init(100), cl::Hidden)
getTypeSizeIndex
static unsigned getTypeSizeIndex(unsigned Value, Type *Ty)
Definition: InstCombineCasts.cpp:2183
llvm::SmallVectorImpl< Value * >
llvm::ConstantFoldConstant
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
Definition: ConstantFolding.cpp:1261
llvm::PatternMatch::m_IntToPtr
CastClass_match< OpTy, Instruction::IntToPtr > m_IntToPtr(const OpTy &Op)
Matches IntToPtr.
Definition: PatternMatch.h:1615
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::PatternMatch::m_Neg
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
Definition: PatternMatch.h:2264
llvm::APInt::getLowBitsSet
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:291
llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:275
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1475
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::InstCombinerImpl::foldItoFPtoI
Instruction * foldItoFPtoI(CastInst &FI)
fpto{s/u}i({u/s}itofp(X)) --> X or zext(X) or sext(X) or trunc(X) This is safe if the intermediate ty...
Definition: InstCombineCasts.cpp:1953
CreateAdd
static BinaryOperator * CreateAdd(Value *S1, Value *S2, const Twine &Name, Instruction *InsertBefore, Value *FlagsOp)
Definition: Reassociate.cpp:234
llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:370
llvm::APInt::getBitsSetFrom
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:271
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:62
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
canNotEvaluateInType
static bool canNotEvaluateInType(Value *V, Type *Ty)
Filter out values that we can not evaluate in the destination type for free.
Definition: InstCombineCasts.cpp:358
llvm::PatternMatch::m_Trunc
CastClass_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
Definition: PatternMatch.h:1621
llvm::BinaryOperator::Create
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Definition: Instructions.cpp:2636
llvm::InstCombinerImpl::visitFPTrunc
Instruction * visitFPTrunc(FPTruncInst &CI)
Definition: InstCombineCasts.cpp:1751
llvm::InstCombinerImpl::visitUIToFP
Instruction * visitUIToFP(CastInst &CI)
Definition: InstCombineCasts.cpp:2007
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::PatternMatch::m_Shl
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1141
llvm::InstCombinerImpl::MaskedValueIsZero
bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth=0, const Instruction *CxtI=nullptr) const
Definition: InstCombineInternal.h:477
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:632
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:422
llvm::Type::getFloatTy
static Type * getFloatTy(LLVMContext &C)
Definition: Type.cpp:190
llvm::SrcOp
Definition: MachineIRBuilder.h:119
llvm::Type::isX86_AMXTy
bool isX86_AMXTy() const
Return true if this is X86 AMX.
Definition: Type.h:186
isKnownExactCastIntToFP
static bool isKnownExactCastIntToFP(CastInst &I)
Return true if the cast from integer to FP can be proven to be exact for all possible inputs (the con...
Definition: InstCombineCasts.cpp:1711
SetVector.h
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:128
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
foldVecTruncToExtElt
static Instruction * foldVecTruncToExtElt(TruncInst &Trunc, InstCombinerImpl &IC)
Given a vector that is bitcast to an integer, optionally logically right-shifted, and truncated,...
Definition: InstCombineCasts.cpp:498
llvm::SmallVectorImpl::insert
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:773
llvm::PoisonValue::get
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1790