LLVM  7.0.0svn
InstCombineCalls.cpp
Go to the documentation of this file.
1 //===- InstCombineCalls.cpp -----------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the visitCall and visitInvoke functions.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "InstCombineInternal.h"
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/ADT/Twine.h"
29 #include "llvm/IR/Attributes.h"
30 #include "llvm/IR/BasicBlock.h"
31 #include "llvm/IR/CallSite.h"
32 #include "llvm/IR/Constant.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DataLayout.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/IR/GlobalVariable.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Instruction.h"
40 #include "llvm/IR/Instructions.h"
41 #include "llvm/IR/IntrinsicInst.h"
42 #include "llvm/IR/Intrinsics.h"
43 #include "llvm/IR/LLVMContext.h"
44 #include "llvm/IR/Metadata.h"
45 #include "llvm/IR/PatternMatch.h"
46 #include "llvm/IR/Statepoint.h"
47 #include "llvm/IR/Type.h"
48 #include "llvm/IR/User.h"
49 #include "llvm/IR/Value.h"
50 #include "llvm/IR/ValueHandle.h"
52 #include "llvm/Support/Casting.h"
54 #include "llvm/Support/Compiler.h"
55 #include "llvm/Support/Debug.h"
57 #include "llvm/Support/KnownBits.h"
62 #include <algorithm>
63 #include <cassert>
64 #include <cstdint>
65 #include <cstring>
66 #include <utility>
67 #include <vector>
68 
69 using namespace llvm;
70 using namespace PatternMatch;
71 
72 #define DEBUG_TYPE "instcombine"
73 
74 STATISTIC(NumSimplified, "Number of library calls simplified");
75 
77  "instcombine-guard-widening-window",
78  cl::init(3),
79  cl::desc("How wide an instruction window to bypass looking for "
80  "another guard"));
81 
82 /// Return the specified type promoted as it would be to pass though a va_arg
83 /// area.
84 static Type *getPromotedType(Type *Ty) {
85  if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
86  if (ITy->getBitWidth() < 32)
87  return Type::getInt32Ty(Ty->getContext());
88  }
89  return Ty;
90 }
91 
92 /// Return a constant boolean vector that has true elements in all positions
93 /// where the input constant data vector has an element with the sign bit set.
96  IntegerType *BoolTy = Type::getInt1Ty(V->getContext());
97  for (unsigned I = 0, E = V->getNumElements(); I != E; ++I) {
98  Constant *Elt = V->getElementAsConstant(I);
99  assert((isa<ConstantInt>(Elt) || isa<ConstantFP>(Elt)) &&
100  "Unexpected constant data vector element type");
101  bool Sign = V->getElementType()->isIntegerTy()
102  ? cast<ConstantInt>(Elt)->isNegative()
103  : cast<ConstantFP>(Elt)->isNegative();
104  BoolVec.push_back(ConstantInt::get(BoolTy, Sign));
105  }
106  return ConstantVector::get(BoolVec);
107 }
108 
109 Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
110  unsigned DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT);
111  unsigned CopyDstAlign = MI->getDestAlignment();
112  if (CopyDstAlign < DstAlign){
113  MI->setDestAlignment(DstAlign);
114  return MI;
115  }
116 
117  unsigned SrcAlign = getKnownAlignment(MI->getRawSource(), DL, MI, &AC, &DT);
118  unsigned CopySrcAlign = MI->getSourceAlignment();
119  if (CopySrcAlign < SrcAlign) {
120  MI->setSourceAlignment(SrcAlign);
121  return MI;
122  }
123 
124  // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
125  // load/store.
126  ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getLength());
127  if (!MemOpLength) return nullptr;
128 
129  // Source and destination pointer types are always "i8*" for intrinsic. See
130  // if the size is something we can handle with a single primitive load/store.
131  // A single load+store correctly handles overlapping memory in the memmove
132  // case.
133  uint64_t Size = MemOpLength->getLimitedValue();
134  assert(Size && "0-sized memory transferring should be removed already.");
135 
136  if (Size > 8 || (Size&(Size-1)))
137  return nullptr; // If not 1/2/4/8 bytes, exit.
138 
139  // Use an integer load+store unless we can find something better.
140  unsigned SrcAddrSp =
141  cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
142  unsigned DstAddrSp =
143  cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();
144 
145  IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
146  Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
147  Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
148 
149  // If the memcpy has metadata describing the members, see if we can get the
150  // TBAA tag describing our copy.
151  MDNode *CopyMD = nullptr;
152  if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa)) {
153  CopyMD = M;
154  } else if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) {
155  if (M->getNumOperands() == 3 && M->getOperand(0) &&
156  mdconst::hasa<ConstantInt>(M->getOperand(0)) &&
157  mdconst::extract<ConstantInt>(M->getOperand(0))->isZero() &&
158  M->getOperand(1) &&
159  mdconst::hasa<ConstantInt>(M->getOperand(1)) &&
160  mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() ==
161  Size &&
162  M->getOperand(2) && isa<MDNode>(M->getOperand(2)))
163  CopyMD = cast<MDNode>(M->getOperand(2));
164  }
165 
166  Value *Src = Builder.CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
167  Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
168  LoadInst *L = Builder.CreateLoad(Src);
169  // Alignment from the mem intrinsic will be better, so use it.
170  L->setAlignment(CopySrcAlign);
171  if (CopyMD)
172  L->setMetadata(LLVMContext::MD_tbaa, CopyMD);
173  MDNode *LoopMemParallelMD =
175  if (LoopMemParallelMD)
177 
178  StoreInst *S = Builder.CreateStore(L, Dest);
179  // Alignment from the mem intrinsic will be better, so use it.
180  S->setAlignment(CopyDstAlign);
181  if (CopyMD)
182  S->setMetadata(LLVMContext::MD_tbaa, CopyMD);
183  if (LoopMemParallelMD)
185 
186  if (auto *MT = dyn_cast<MemTransferInst>(MI)) {
187  // non-atomics can be volatile
188  L->setVolatile(MT->isVolatile());
189  S->setVolatile(MT->isVolatile());
190  }
191  if (isa<AtomicMemTransferInst>(MI)) {
192  // atomics have to be unordered
195  }
196 
197  // Set the size of the copy to 0, it will be deleted on the next iteration.
198  MI->setLength(Constant::getNullValue(MemOpLength->getType()));
199  return MI;
200 }
201 
202 Instruction *InstCombiner::SimplifyAnyMemSet(AnyMemSetInst *MI) {
203  unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
204  if (MI->getDestAlignment() < Alignment) {
205  MI->setDestAlignment(Alignment);
206  return MI;
207  }
208 
209  // Extract the length and alignment and fill if they are constant.
210  ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
211  ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
212  if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
213  return nullptr;
214  uint64_t Len = LenC->getLimitedValue();
215  Alignment = MI->getDestAlignment();
216  assert(Len && "0-sized memory setting should be removed already.");
217 
218  // memset(s,c,n) -> store s, c (for n=1,2,4,8)
219  if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
220  Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8.
221 
222  Value *Dest = MI->getDest();
223  unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
224  Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
225  Dest = Builder.CreateBitCast(Dest, NewDstPtrTy);
226 
227  // Alignment 0 is identity for alignment 1 for memset, but not store.
228  if (Alignment == 0) Alignment = 1;
229 
230  // Extract the fill value and store.
231  uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
232  StoreInst *S = Builder.CreateStore(ConstantInt::get(ITy, Fill), Dest,
233  MI->isVolatile());
234  S->setAlignment(Alignment);
235  if (isa<AtomicMemSetInst>(MI))
237 
238  // Set the size of the copy to 0, it will be deleted on the next iteration.
239  MI->setLength(Constant::getNullValue(LenC->getType()));
240  return MI;
241  }
242 
243  return nullptr;
244 }
245 
247  InstCombiner::BuilderTy &Builder) {
248  bool LogicalShift = false;
249  bool ShiftLeft = false;
250 
251  switch (II.getIntrinsicID()) {
252  default: llvm_unreachable("Unexpected intrinsic!");
253  case Intrinsic::x86_sse2_psra_d:
254  case Intrinsic::x86_sse2_psra_w:
255  case Intrinsic::x86_sse2_psrai_d:
256  case Intrinsic::x86_sse2_psrai_w:
257  case Intrinsic::x86_avx2_psra_d:
258  case Intrinsic::x86_avx2_psra_w:
259  case Intrinsic::x86_avx2_psrai_d:
260  case Intrinsic::x86_avx2_psrai_w:
261  case Intrinsic::x86_avx512_psra_q_128:
262  case Intrinsic::x86_avx512_psrai_q_128:
263  case Intrinsic::x86_avx512_psra_q_256:
264  case Intrinsic::x86_avx512_psrai_q_256:
265  case Intrinsic::x86_avx512_psra_d_512:
266  case Intrinsic::x86_avx512_psra_q_512:
267  case Intrinsic::x86_avx512_psra_w_512:
268  case Intrinsic::x86_avx512_psrai_d_512:
269  case Intrinsic::x86_avx512_psrai_q_512:
270  case Intrinsic::x86_avx512_psrai_w_512:
271  LogicalShift = false; ShiftLeft = false;
272  break;
273  case Intrinsic::x86_sse2_psrl_d:
274  case Intrinsic::x86_sse2_psrl_q:
275  case Intrinsic::x86_sse2_psrl_w:
276  case Intrinsic::x86_sse2_psrli_d:
277  case Intrinsic::x86_sse2_psrli_q:
278  case Intrinsic::x86_sse2_psrli_w:
279  case Intrinsic::x86_avx2_psrl_d:
280  case Intrinsic::x86_avx2_psrl_q:
281  case Intrinsic::x86_avx2_psrl_w:
282  case Intrinsic::x86_avx2_psrli_d:
283  case Intrinsic::x86_avx2_psrli_q:
284  case Intrinsic::x86_avx2_psrli_w:
285  case Intrinsic::x86_avx512_psrl_d_512:
286  case Intrinsic::x86_avx512_psrl_q_512:
287  case Intrinsic::x86_avx512_psrl_w_512:
288  case Intrinsic::x86_avx512_psrli_d_512:
289  case Intrinsic::x86_avx512_psrli_q_512:
290  case Intrinsic::x86_avx512_psrli_w_512:
291  LogicalShift = true; ShiftLeft = false;
292  break;
293  case Intrinsic::x86_sse2_psll_d:
294  case Intrinsic::x86_sse2_psll_q:
295  case Intrinsic::x86_sse2_psll_w:
296  case Intrinsic::x86_sse2_pslli_d:
297  case Intrinsic::x86_sse2_pslli_q:
298  case Intrinsic::x86_sse2_pslli_w:
299  case Intrinsic::x86_avx2_psll_d:
300  case Intrinsic::x86_avx2_psll_q:
301  case Intrinsic::x86_avx2_psll_w:
302  case Intrinsic::x86_avx2_pslli_d:
303  case Intrinsic::x86_avx2_pslli_q:
304  case Intrinsic::x86_avx2_pslli_w:
305  case Intrinsic::x86_avx512_psll_d_512:
306  case Intrinsic::x86_avx512_psll_q_512:
307  case Intrinsic::x86_avx512_psll_w_512:
308  case Intrinsic::x86_avx512_pslli_d_512:
309  case Intrinsic::x86_avx512_pslli_q_512:
310  case Intrinsic::x86_avx512_pslli_w_512:
311  LogicalShift = true; ShiftLeft = true;
312  break;
313  }
314  assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
315 
316  // Simplify if count is constant.
317  auto Arg1 = II.getArgOperand(1);
318  auto CAZ = dyn_cast<ConstantAggregateZero>(Arg1);
319  auto CDV = dyn_cast<ConstantDataVector>(Arg1);
320  auto CInt = dyn_cast<ConstantInt>(Arg1);
321  if (!CAZ && !CDV && !CInt)
322  return nullptr;
323 
324  APInt Count(64, 0);
325  if (CDV) {
326  // SSE2/AVX2 uses all the first 64-bits of the 128-bit vector
327  // operand to compute the shift amount.
328  auto VT = cast<VectorType>(CDV->getType());
329  unsigned BitWidth = VT->getElementType()->getPrimitiveSizeInBits();
330  assert((64 % BitWidth) == 0 && "Unexpected packed shift size");
331  unsigned NumSubElts = 64 / BitWidth;
332 
333  // Concatenate the sub-elements to create the 64-bit value.
334  for (unsigned i = 0; i != NumSubElts; ++i) {
335  unsigned SubEltIdx = (NumSubElts - 1) - i;
336  auto SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
337  Count <<= BitWidth;
338  Count |= SubElt->getValue().zextOrTrunc(64);
339  }
340  }
341  else if (CInt)
342  Count = CInt->getValue();
343 
344  auto Vec = II.getArgOperand(0);
345  auto VT = cast<VectorType>(Vec->getType());
346  auto SVT = VT->getElementType();
347  unsigned VWidth = VT->getNumElements();
348  unsigned BitWidth = SVT->getPrimitiveSizeInBits();
349 
350  // If shift-by-zero then just return the original value.
351  if (Count.isNullValue())
352  return Vec;
353 
354  // Handle cases when Shift >= BitWidth.
355  if (Count.uge(BitWidth)) {
356  // If LogicalShift - just return zero.
357  if (LogicalShift)
358  return ConstantAggregateZero::get(VT);
359 
360  // If ArithmeticShift - clamp Shift to (BitWidth - 1).
361  Count = APInt(64, BitWidth - 1);
362  }
363 
364  // Get a constant vector of the same type as the first operand.
365  auto ShiftAmt = ConstantInt::get(SVT, Count.zextOrTrunc(BitWidth));
366  auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt);
367 
368  if (ShiftLeft)
369  return Builder.CreateShl(Vec, ShiftVec);
370 
371  if (LogicalShift)
372  return Builder.CreateLShr(Vec, ShiftVec);
373 
374  return Builder.CreateAShr(Vec, ShiftVec);
375 }
376 
377 // Attempt to simplify AVX2 per-element shift intrinsics to a generic IR shift.
378 // Unlike the generic IR shifts, the intrinsics have defined behaviour for out
379 // of range shift amounts (logical - set to zero, arithmetic - splat sign bit).
381  InstCombiner::BuilderTy &Builder) {
382  bool LogicalShift = false;
383  bool ShiftLeft = false;
384 
385  switch (II.getIntrinsicID()) {
386  default: llvm_unreachable("Unexpected intrinsic!");
387  case Intrinsic::x86_avx2_psrav_d:
388  case Intrinsic::x86_avx2_psrav_d_256:
389  case Intrinsic::x86_avx512_psrav_q_128:
390  case Intrinsic::x86_avx512_psrav_q_256:
391  case Intrinsic::x86_avx512_psrav_d_512:
392  case Intrinsic::x86_avx512_psrav_q_512:
393  case Intrinsic::x86_avx512_psrav_w_128:
394  case Intrinsic::x86_avx512_psrav_w_256:
395  case Intrinsic::x86_avx512_psrav_w_512:
396  LogicalShift = false;
397  ShiftLeft = false;
398  break;
399  case Intrinsic::x86_avx2_psrlv_d:
400  case Intrinsic::x86_avx2_psrlv_d_256:
401  case Intrinsic::x86_avx2_psrlv_q:
402  case Intrinsic::x86_avx2_psrlv_q_256:
403  case Intrinsic::x86_avx512_psrlv_d_512:
404  case Intrinsic::x86_avx512_psrlv_q_512:
405  case Intrinsic::x86_avx512_psrlv_w_128:
406  case Intrinsic::x86_avx512_psrlv_w_256:
407  case Intrinsic::x86_avx512_psrlv_w_512:
408  LogicalShift = true;
409  ShiftLeft = false;
410  break;
411  case Intrinsic::x86_avx2_psllv_d:
412  case Intrinsic::x86_avx2_psllv_d_256:
413  case Intrinsic::x86_avx2_psllv_q:
414  case Intrinsic::x86_avx2_psllv_q_256:
415  case Intrinsic::x86_avx512_psllv_d_512:
416  case Intrinsic::x86_avx512_psllv_q_512:
417  case Intrinsic::x86_avx512_psllv_w_128:
418  case Intrinsic::x86_avx512_psllv_w_256:
419  case Intrinsic::x86_avx512_psllv_w_512:
420  LogicalShift = true;
421  ShiftLeft = true;
422  break;
423  }
424  assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
425 
426  // Simplify if all shift amounts are constant/undef.
427  auto *CShift = dyn_cast<Constant>(II.getArgOperand(1));
428  if (!CShift)
429  return nullptr;
430 
431  auto Vec = II.getArgOperand(0);
432  auto VT = cast<VectorType>(II.getType());
433  auto SVT = VT->getVectorElementType();
434  int NumElts = VT->getNumElements();
435  int BitWidth = SVT->getIntegerBitWidth();
436 
437  // Collect each element's shift amount.
438  // We also collect special cases: UNDEF = -1, OUT-OF-RANGE = BitWidth.
439  bool AnyOutOfRange = false;
440  SmallVector<int, 8> ShiftAmts;
441  for (int I = 0; I < NumElts; ++I) {
442  auto *CElt = CShift->getAggregateElement(I);
443  if (CElt && isa<UndefValue>(CElt)) {
444  ShiftAmts.push_back(-1);
445  continue;
446  }
447 
448  auto *COp = dyn_cast_or_null<ConstantInt>(CElt);
449  if (!COp)
450  return nullptr;
451 
452  // Handle out of range shifts.
453  // If LogicalShift - set to BitWidth (special case).
454  // If ArithmeticShift - set to (BitWidth - 1) (sign splat).
455  APInt ShiftVal = COp->getValue();
456  if (ShiftVal.uge(BitWidth)) {
457  AnyOutOfRange = LogicalShift;
458  ShiftAmts.push_back(LogicalShift ? BitWidth : BitWidth - 1);
459  continue;
460  }
461 
462  ShiftAmts.push_back((int)ShiftVal.getZExtValue());
463  }
464 
465  // If all elements out of range or UNDEF, return vector of zeros/undefs.
466  // ArithmeticShift should only hit this if they are all UNDEF.
467  auto OutOfRange = [&](int Idx) { return (Idx < 0) || (BitWidth <= Idx); };
468  if (llvm::all_of(ShiftAmts, OutOfRange)) {
469  SmallVector<Constant *, 8> ConstantVec;
470  for (int Idx : ShiftAmts) {
471  if (Idx < 0) {
472  ConstantVec.push_back(UndefValue::get(SVT));
473  } else {
474  assert(LogicalShift && "Logical shift expected");
475  ConstantVec.push_back(ConstantInt::getNullValue(SVT));
476  }
477  }
478  return ConstantVector::get(ConstantVec);
479  }
480 
481  // We can't handle only some out of range values with generic logical shifts.
482  if (AnyOutOfRange)
483  return nullptr;
484 
485  // Build the shift amount constant vector.
486  SmallVector<Constant *, 8> ShiftVecAmts;
487  for (int Idx : ShiftAmts) {
488  if (Idx < 0)
489  ShiftVecAmts.push_back(UndefValue::get(SVT));
490  else
491  ShiftVecAmts.push_back(ConstantInt::get(SVT, Idx));
492  }
493  auto ShiftVec = ConstantVector::get(ShiftVecAmts);
494 
495  if (ShiftLeft)
496  return Builder.CreateShl(Vec, ShiftVec);
497 
498  if (LogicalShift)
499  return Builder.CreateLShr(Vec, ShiftVec);
500 
501  return Builder.CreateAShr(Vec, ShiftVec);
502 }
503 
504 static Value *simplifyX86pack(IntrinsicInst &II, bool IsSigned) {
505  Value *Arg0 = II.getArgOperand(0);
506  Value *Arg1 = II.getArgOperand(1);
507  Type *ResTy = II.getType();
508 
509  // Fast all undef handling.
510  if (isa<UndefValue>(Arg0) && isa<UndefValue>(Arg1))
511  return UndefValue::get(ResTy);
512 
513  Type *ArgTy = Arg0->getType();
514  unsigned NumLanes = ResTy->getPrimitiveSizeInBits() / 128;
515  unsigned NumDstElts = ResTy->getVectorNumElements();
516  unsigned NumSrcElts = ArgTy->getVectorNumElements();
517  assert(NumDstElts == (2 * NumSrcElts) && "Unexpected packing types");
518 
519  unsigned NumDstEltsPerLane = NumDstElts / NumLanes;
520  unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
521  unsigned DstScalarSizeInBits = ResTy->getScalarSizeInBits();
522  assert(ArgTy->getScalarSizeInBits() == (2 * DstScalarSizeInBits) &&
523  "Unexpected packing types");
524 
525  // Constant folding.
526  auto *Cst0 = dyn_cast<Constant>(Arg0);
527  auto *Cst1 = dyn_cast<Constant>(Arg1);
528  if (!Cst0 || !Cst1)
529  return nullptr;
530 
532  for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
533  for (unsigned Elt = 0; Elt != NumDstEltsPerLane; ++Elt) {
534  unsigned SrcIdx = Lane * NumSrcEltsPerLane + Elt % NumSrcEltsPerLane;
535  auto *Cst = (Elt >= NumSrcEltsPerLane) ? Cst1 : Cst0;
536  auto *COp = Cst->getAggregateElement(SrcIdx);
537  if (COp && isa<UndefValue>(COp)) {
538  Vals.push_back(UndefValue::get(ResTy->getScalarType()));
539  continue;
540  }
541 
542  auto *CInt = dyn_cast_or_null<ConstantInt>(COp);
543  if (!CInt)
544  return nullptr;
545 
546  APInt Val = CInt->getValue();
547  assert(Val.getBitWidth() == ArgTy->getScalarSizeInBits() &&
548  "Unexpected constant bitwidth");
549 
550  if (IsSigned) {
551  // PACKSS: Truncate signed value with signed saturation.
552  // Source values less than dst minint are saturated to minint.
553  // Source values greater than dst maxint are saturated to maxint.
554  if (Val.isSignedIntN(DstScalarSizeInBits))
555  Val = Val.trunc(DstScalarSizeInBits);
556  else if (Val.isNegative())
557  Val = APInt::getSignedMinValue(DstScalarSizeInBits);
558  else
559  Val = APInt::getSignedMaxValue(DstScalarSizeInBits);
560  } else {
561  // PACKUS: Truncate signed value with unsigned saturation.
562  // Source values less than zero are saturated to zero.
563  // Source values greater than dst maxuint are saturated to maxuint.
564  if (Val.isIntN(DstScalarSizeInBits))
565  Val = Val.trunc(DstScalarSizeInBits);
566  else if (Val.isNegative())
567  Val = APInt::getNullValue(DstScalarSizeInBits);
568  else
569  Val = APInt::getAllOnesValue(DstScalarSizeInBits);
570  }
571 
572  Vals.push_back(ConstantInt::get(ResTy->getScalarType(), Val));
573  }
574  }
575 
576  return ConstantVector::get(Vals);
577 }
578 
579 // Replace X86-specific intrinsics with generic floor-ceil where applicable.
581  InstCombiner::BuilderTy &Builder) {
582  ConstantInt *Arg = nullptr;
583  Intrinsic::ID IntrinsicID = II.getIntrinsicID();
584 
585  if (IntrinsicID == Intrinsic::x86_sse41_round_ss ||
586  IntrinsicID == Intrinsic::x86_sse41_round_sd)
587  Arg = dyn_cast<ConstantInt>(II.getArgOperand(2));
588  else if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
589  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd)
590  Arg = dyn_cast<ConstantInt>(II.getArgOperand(4));
591  else
592  Arg = dyn_cast<ConstantInt>(II.getArgOperand(1));
593  if (!Arg)
594  return nullptr;
595  unsigned RoundControl = Arg->getZExtValue();
596 
597  Arg = nullptr;
598  unsigned SAE = 0;
599  if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_512 ||
600  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_512)
601  Arg = dyn_cast<ConstantInt>(II.getArgOperand(4));
602  else if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
603  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd)
604  Arg = dyn_cast<ConstantInt>(II.getArgOperand(5));
605  else
606  SAE = 4;
607  if (!SAE) {
608  if (!Arg)
609  return nullptr;
610  SAE = Arg->getZExtValue();
611  }
612 
613  if (SAE != 4 || (RoundControl != 2 /*ceil*/ && RoundControl != 1 /*floor*/))
614  return nullptr;
615 
616  Value *Src, *Dst, *Mask;
617  bool IsScalar = false;
618  if (IntrinsicID == Intrinsic::x86_sse41_round_ss ||
619  IntrinsicID == Intrinsic::x86_sse41_round_sd ||
620  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
621  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd) {
622  IsScalar = true;
623  if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
624  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd) {
625  Mask = II.getArgOperand(3);
626  Value *Zero = Constant::getNullValue(Mask->getType());
627  Mask = Builder.CreateAnd(Mask, 1);
628  Mask = Builder.CreateICmp(ICmpInst::ICMP_NE, Mask, Zero);
629  Dst = II.getArgOperand(2);
630  } else
631  Dst = II.getArgOperand(0);
632  Src = Builder.CreateExtractElement(II.getArgOperand(1), (uint64_t)0);
633  } else {
634  Src = II.getArgOperand(0);
635  if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_128 ||
636  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_256 ||
637  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_512 ||
638  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_128 ||
639  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_256 ||
640  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_512) {
641  Dst = II.getArgOperand(2);
642  Mask = II.getArgOperand(3);
643  } else {
644  Dst = Src;
646  Builder.getIntNTy(Src->getType()->getVectorNumElements()));
647  }
648  }
649 
650  Intrinsic::ID ID = (RoundControl == 2) ? Intrinsic::ceil : Intrinsic::floor;
651  Value *Res = Builder.CreateIntrinsic(ID, {Src}, &II);
652  if (!IsScalar) {
653  if (auto *C = dyn_cast<Constant>(Mask))
654  if (C->isAllOnesValue())
655  return Res;
656  auto *MaskTy = VectorType::get(
657  Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
658  Mask = Builder.CreateBitCast(Mask, MaskTy);
659  unsigned Width = Src->getType()->getVectorNumElements();
660  if (MaskTy->getVectorNumElements() > Width) {
661  uint32_t Indices[4];
662  for (unsigned i = 0; i != Width; ++i)
663  Indices[i] = i;
664  Mask = Builder.CreateShuffleVector(Mask, Mask,
665  makeArrayRef(Indices, Width));
666  }
667  return Builder.CreateSelect(Mask, Res, Dst);
668  }
669  if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
670  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd) {
671  Dst = Builder.CreateExtractElement(Dst, (uint64_t)0);
672  Res = Builder.CreateSelect(Mask, Res, Dst);
673  Dst = II.getArgOperand(0);
674  }
675  return Builder.CreateInsertElement(Dst, Res, (uint64_t)0);
676 }
677 
679  Value *Arg = II.getArgOperand(0);
680  Type *ResTy = II.getType();
681  Type *ArgTy = Arg->getType();
682 
683  // movmsk(undef) -> zero as we must ensure the upper bits are zero.
684  if (isa<UndefValue>(Arg))
685  return Constant::getNullValue(ResTy);
686 
687  // We can't easily peek through x86_mmx types.
688  if (!ArgTy->isVectorTy())
689  return nullptr;
690 
691  auto *C = dyn_cast<Constant>(Arg);
692  if (!C)
693  return nullptr;
694 
695  // Extract signbits of the vector input and pack into integer result.
696  APInt Result(ResTy->getPrimitiveSizeInBits(), 0);
697  for (unsigned I = 0, E = ArgTy->getVectorNumElements(); I != E; ++I) {
698  auto *COp = C->getAggregateElement(I);
699  if (!COp)
700  return nullptr;
701  if (isa<UndefValue>(COp))
702  continue;
703 
704  auto *CInt = dyn_cast<ConstantInt>(COp);
705  auto *CFp = dyn_cast<ConstantFP>(COp);
706  if (!CInt && !CFp)
707  return nullptr;
708 
709  if ((CInt && CInt->isNegative()) || (CFp && CFp->isNegative()))
710  Result.setBit(I);
711  }
712 
713  return Constant::getIntegerValue(ResTy, Result);
714 }
715 
717  InstCombiner::BuilderTy &Builder) {
718  auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2));
719  if (!CInt)
720  return nullptr;
721 
722  VectorType *VecTy = cast<VectorType>(II.getType());
723  assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
724 
725  // The immediate permute control byte looks like this:
726  // [3:0] - zero mask for each 32-bit lane
727  // [5:4] - select one 32-bit destination lane
728  // [7:6] - select one 32-bit source lane
729 
730  uint8_t Imm = CInt->getZExtValue();
731  uint8_t ZMask = Imm & 0xf;
732  uint8_t DestLane = (Imm >> 4) & 0x3;
733  uint8_t SourceLane = (Imm >> 6) & 0x3;
734 
736 
737  // If all zero mask bits are set, this was just a weird way to
738  // generate a zero vector.
739  if (ZMask == 0xf)
740  return ZeroVector;
741 
742  // Initialize by passing all of the first source bits through.
743  uint32_t ShuffleMask[4] = { 0, 1, 2, 3 };
744 
745  // We may replace the second operand with the zero vector.
746  Value *V1 = II.getArgOperand(1);
747 
748  if (ZMask) {
749  // If the zero mask is being used with a single input or the zero mask
750  // overrides the destination lane, this is a shuffle with the zero vector.
751  if ((II.getArgOperand(0) == II.getArgOperand(1)) ||
752  (ZMask & (1 << DestLane))) {
753  V1 = ZeroVector;
754  // We may still move 32-bits of the first source vector from one lane
755  // to another.
756  ShuffleMask[DestLane] = SourceLane;
757  // The zero mask may override the previous insert operation.
758  for (unsigned i = 0; i < 4; ++i)
759  if ((ZMask >> i) & 0x1)
760  ShuffleMask[i] = i + 4;
761  } else {
762  // TODO: Model this case as 2 shuffles or a 'logical and' plus shuffle?
763  return nullptr;
764  }
765  } else {
766  // Replace the selected destination lane with the selected source lane.
767  ShuffleMask[DestLane] = SourceLane + 4;
768  }
769 
770  return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask);
771 }
772 
773 /// Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding
774 /// or conversion to a shuffle vector.
776  ConstantInt *CILength, ConstantInt *CIIndex,
777  InstCombiner::BuilderTy &Builder) {
778  auto LowConstantHighUndef = [&](uint64_t Val) {
779  Type *IntTy64 = Type::getInt64Ty(II.getContext());
780  Constant *Args[] = {ConstantInt::get(IntTy64, Val),
781  UndefValue::get(IntTy64)};
782  return ConstantVector::get(Args);
783  };
784 
785  // See if we're dealing with constant values.
786  Constant *C0 = dyn_cast<Constant>(Op0);
787  ConstantInt *CI0 =
788  C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
789  : nullptr;
790 
791  // Attempt to constant fold.
792  if (CILength && CIIndex) {
793  // From AMD documentation: "The bit index and field length are each six
794  // bits in length other bits of the field are ignored."
795  APInt APIndex = CIIndex->getValue().zextOrTrunc(6);
796  APInt APLength = CILength->getValue().zextOrTrunc(6);
797 
798  unsigned Index = APIndex.getZExtValue();
799 
800  // From AMD documentation: "a value of zero in the field length is
801  // defined as length of 64".
802  unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
803 
804  // From AMD documentation: "If the sum of the bit index + length field
805  // is greater than 64, the results are undefined".
806  unsigned End = Index + Length;
807 
808  // Note that both field index and field length are 8-bit quantities.
809  // Since variables 'Index' and 'Length' are unsigned values
810  // obtained from zero-extending field index and field length
811  // respectively, their sum should never wrap around.
812  if (End > 64)
813  return UndefValue::get(II.getType());
814 
815  // If we are inserting whole bytes, we can convert this to a shuffle.
816  // Lowering can recognize EXTRQI shuffle masks.
817  if ((Length % 8) == 0 && (Index % 8) == 0) {
818  // Convert bit indices to byte indices.
819  Length /= 8;
820  Index /= 8;
821 
822  Type *IntTy8 = Type::getInt8Ty(II.getContext());
823  Type *IntTy32 = Type::getInt32Ty(II.getContext());
824  VectorType *ShufTy = VectorType::get(IntTy8, 16);
825 
826  SmallVector<Constant *, 16> ShuffleMask;
827  for (int i = 0; i != (int)Length; ++i)
828  ShuffleMask.push_back(
829  Constant::getIntegerValue(IntTy32, APInt(32, i + Index)));
830  for (int i = Length; i != 8; ++i)
831  ShuffleMask.push_back(
832  Constant::getIntegerValue(IntTy32, APInt(32, i + 16)));
833  for (int i = 8; i != 16; ++i)
834  ShuffleMask.push_back(UndefValue::get(IntTy32));
835 
836  Value *SV = Builder.CreateShuffleVector(
837  Builder.CreateBitCast(Op0, ShufTy),
838  ConstantAggregateZero::get(ShufTy), ConstantVector::get(ShuffleMask));
839  return Builder.CreateBitCast(SV, II.getType());
840  }
841 
842  // Constant Fold - shift Index'th bit to lowest position and mask off
843  // Length bits.
844  if (CI0) {
845  APInt Elt = CI0->getValue();
846  Elt.lshrInPlace(Index);
847  Elt = Elt.zextOrTrunc(Length);
848  return LowConstantHighUndef(Elt.getZExtValue());
849  }
850 
851  // If we were an EXTRQ call, we'll save registers if we convert to EXTRQI.
852  if (II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) {
853  Value *Args[] = {Op0, CILength, CIIndex};
854  Module *M = II.getModule();
855  Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_extrqi);
856  return Builder.CreateCall(F, Args);
857  }
858  }
859 
860  // Constant Fold - extraction from zero is always {zero, undef}.
861  if (CI0 && CI0->isZero())
862  return LowConstantHighUndef(0);
863 
864  return nullptr;
865 }
866 
867 /// Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant
868 /// folding or conversion to a shuffle vector.
870  APInt APLength, APInt APIndex,
871  InstCombiner::BuilderTy &Builder) {
872  // From AMD documentation: "The bit index and field length are each six bits
873  // in length other bits of the field are ignored."
874  APIndex = APIndex.zextOrTrunc(6);
875  APLength = APLength.zextOrTrunc(6);
876 
877  // Attempt to constant fold.
878  unsigned Index = APIndex.getZExtValue();
879 
880  // From AMD documentation: "a value of zero in the field length is
881  // defined as length of 64".
882  unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
883 
884  // From AMD documentation: "If the sum of the bit index + length field
885  // is greater than 64, the results are undefined".
886  unsigned End = Index + Length;
887 
888  // Note that both field index and field length are 8-bit quantities.
889  // Since variables 'Index' and 'Length' are unsigned values
890  // obtained from zero-extending field index and field length
891  // respectively, their sum should never wrap around.
892  if (End > 64)
893  return UndefValue::get(II.getType());
894 
895  // If we are inserting whole bytes, we can convert this to a shuffle.
896  // Lowering can recognize INSERTQI shuffle masks.
897  if ((Length % 8) == 0 && (Index % 8) == 0) {
898  // Convert bit indices to byte indices.
899  Length /= 8;
900  Index /= 8;
901 
902  Type *IntTy8 = Type::getInt8Ty(II.getContext());
903  Type *IntTy32 = Type::getInt32Ty(II.getContext());
904  VectorType *ShufTy = VectorType::get(IntTy8, 16);
905 
906  SmallVector<Constant *, 16> ShuffleMask;
907  for (int i = 0; i != (int)Index; ++i)
908  ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i)));
909  for (int i = 0; i != (int)Length; ++i)
910  ShuffleMask.push_back(
911  Constant::getIntegerValue(IntTy32, APInt(32, i + 16)));
912  for (int i = Index + Length; i != 8; ++i)
913  ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i)));
914  for (int i = 8; i != 16; ++i)
915  ShuffleMask.push_back(UndefValue::get(IntTy32));
916 
917  Value *SV = Builder.CreateShuffleVector(Builder.CreateBitCast(Op0, ShufTy),
918  Builder.CreateBitCast(Op1, ShufTy),
919  ConstantVector::get(ShuffleMask));
920  return Builder.CreateBitCast(SV, II.getType());
921  }
922 
923  // See if we're dealing with constant values.
924  Constant *C0 = dyn_cast<Constant>(Op0);
925  Constant *C1 = dyn_cast<Constant>(Op1);
926  ConstantInt *CI00 =
927  C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
928  : nullptr;
929  ConstantInt *CI10 =
930  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
931  : nullptr;
932 
933  // Constant Fold - insert bottom Length bits starting at the Index'th bit.
934  if (CI00 && CI10) {
935  APInt V00 = CI00->getValue();
936  APInt V10 = CI10->getValue();
937  APInt Mask = APInt::getLowBitsSet(64, Length).shl(Index);
938  V00 = V00 & ~Mask;
939  V10 = V10.zextOrTrunc(Length).zextOrTrunc(64).shl(Index);
940  APInt Val = V00 | V10;
941  Type *IntTy64 = Type::getInt64Ty(II.getContext());
942  Constant *Args[] = {ConstantInt::get(IntTy64, Val.getZExtValue()),
943  UndefValue::get(IntTy64)};
944  return ConstantVector::get(Args);
945  }
946 
947  // If we were an INSERTQ call, we'll save demanded elements if we convert to
948  // INSERTQI.
949  if (II.getIntrinsicID() == Intrinsic::x86_sse4a_insertq) {
950  Type *IntTy8 = Type::getInt8Ty(II.getContext());
951  Constant *CILength = ConstantInt::get(IntTy8, Length, false);
952  Constant *CIIndex = ConstantInt::get(IntTy8, Index, false);
953 
954  Value *Args[] = {Op0, Op1, CILength, CIIndex};
955  Module *M = II.getModule();
956  Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
957  return Builder.CreateCall(F, Args);
958  }
959 
960  return nullptr;
961 }
962 
963 /// Attempt to convert pshufb* to shufflevector if the mask is constant.
965  InstCombiner::BuilderTy &Builder) {
967  if (!V)
968  return nullptr;
969 
970  auto *VecTy = cast<VectorType>(II.getType());
971  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
972  unsigned NumElts = VecTy->getNumElements();
973  assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
974  "Unexpected number of elements in shuffle mask!");
975 
976  // Construct a shuffle mask from constant integers or UNDEFs.
977  Constant *Indexes[64] = {nullptr};
978 
979  // Each byte in the shuffle control mask forms an index to permute the
980  // corresponding byte in the destination operand.
981  for (unsigned I = 0; I < NumElts; ++I) {
982  Constant *COp = V->getAggregateElement(I);
983  if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
984  return nullptr;
985 
986  if (isa<UndefValue>(COp)) {
987  Indexes[I] = UndefValue::get(MaskEltTy);
988  continue;
989  }
990 
991  int8_t Index = cast<ConstantInt>(COp)->getValue().getZExtValue();
992 
993  // If the most significant bit (bit[7]) of each byte of the shuffle
994  // control mask is set, then zero is written in the result byte.
995  // The zero vector is in the right-hand side of the resulting
996  // shufflevector.
997 
998  // The value of each index for the high 128-bit lane is the least
999  // significant 4 bits of the respective shuffle control byte.
1000  Index = ((Index < 0) ? NumElts : Index & 0x0F) + (I & 0xF0);
1001  Indexes[I] = ConstantInt::get(MaskEltTy, Index);
1002  }
1003 
1004  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts));
1005  auto V1 = II.getArgOperand(0);
1006  auto V2 = Constant::getNullValue(VecTy);
1007  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
1008 }
1009 
1010 /// Attempt to convert vpermilvar* to shufflevector if the mask is constant.
1012  InstCombiner::BuilderTy &Builder) {
1013  Constant *V = dyn_cast<Constant>(II.getArgOperand(1));
1014  if (!V)
1015  return nullptr;
1016 
1017  auto *VecTy = cast<VectorType>(II.getType());
1018  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
1019  unsigned NumElts = VecTy->getVectorNumElements();
1020  bool IsPD = VecTy->getScalarType()->isDoubleTy();
1021  unsigned NumLaneElts = IsPD ? 2 : 4;
1022  assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2);
1023 
1024  // Construct a shuffle mask from constant integers or UNDEFs.
1025  Constant *Indexes[16] = {nullptr};
1026 
1027  // The intrinsics only read one or two bits, clear the rest.
1028  for (unsigned I = 0; I < NumElts; ++I) {
1029  Constant *COp = V->getAggregateElement(I);
1030  if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1031  return nullptr;
1032 
1033  if (isa<UndefValue>(COp)) {
1034  Indexes[I] = UndefValue::get(MaskEltTy);
1035  continue;
1036  }
1037 
1038  APInt Index = cast<ConstantInt>(COp)->getValue();
1039  Index = Index.zextOrTrunc(32).getLoBits(2);
1040 
1041  // The PD variants uses bit 1 to select per-lane element index, so
1042  // shift down to convert to generic shuffle mask index.
1043  if (IsPD)
1044  Index.lshrInPlace(1);
1045 
1046  // The _256 variants are a bit trickier since the mask bits always index
1047  // into the corresponding 128 half. In order to convert to a generic
1048  // shuffle, we have to make that explicit.
1049  Index += APInt(32, (I / NumLaneElts) * NumLaneElts);
1050 
1051  Indexes[I] = ConstantInt::get(MaskEltTy, Index);
1052  }
1053 
1054  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts));
1055  auto V1 = II.getArgOperand(0);
1056  auto V2 = UndefValue::get(V1->getType());
1057  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
1058 }
1059 
1060 /// Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
1062  InstCombiner::BuilderTy &Builder) {
1063  auto *V = dyn_cast<Constant>(II.getArgOperand(1));
1064  if (!V)
1065  return nullptr;
1066 
1067  auto *VecTy = cast<VectorType>(II.getType());
1068  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
1069  unsigned Size = VecTy->getNumElements();
1070  assert((Size == 4 || Size == 8 || Size == 16 || Size == 32 || Size == 64) &&
1071  "Unexpected shuffle mask size");
1072 
1073  // Construct a shuffle mask from constant integers or UNDEFs.
1074  Constant *Indexes[64] = {nullptr};
1075 
1076  for (unsigned I = 0; I < Size; ++I) {
1077  Constant *COp = V->getAggregateElement(I);
1078  if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1079  return nullptr;
1080 
1081  if (isa<UndefValue>(COp)) {
1082  Indexes[I] = UndefValue::get(MaskEltTy);
1083  continue;
1084  }
1085 
1086  uint32_t Index = cast<ConstantInt>(COp)->getZExtValue();
1087  Index &= Size - 1;
1088  Indexes[I] = ConstantInt::get(MaskEltTy, Index);
1089  }
1090 
1091  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, Size));
1092  auto V1 = II.getArgOperand(0);
1093  auto V2 = UndefValue::get(VecTy);
1094  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
1095 }
1096 
1097 /// Decode XOP integer vector comparison intrinsics.
1099  InstCombiner::BuilderTy &Builder,
1100  bool IsSigned) {
1101  if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
1102  uint64_t Imm = CInt->getZExtValue() & 0x7;
1103  VectorType *VecTy = cast<VectorType>(II.getType());
1105 
1106  switch (Imm) {
1107  case 0x0:
1108  Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1109  break;
1110  case 0x1:
1111  Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1112  break;
1113  case 0x2:
1114  Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1115  break;
1116  case 0x3:
1117  Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1118  break;
1119  case 0x4:
1120  Pred = ICmpInst::ICMP_EQ; break;
1121  case 0x5:
1122  Pred = ICmpInst::ICMP_NE; break;
1123  case 0x6:
1124  return ConstantInt::getSigned(VecTy, 0); // FALSE
1125  case 0x7:
1126  return ConstantInt::getSigned(VecTy, -1); // TRUE
1127  }
1128 
1129  if (Value *Cmp = Builder.CreateICmp(Pred, II.getArgOperand(0),
1130  II.getArgOperand(1)))
1131  return Builder.CreateSExtOrTrunc(Cmp, VecTy);
1132  }
1133  return nullptr;
1134 }
1135 
1137  Value *Arg0 = II.getArgOperand(0);
1138  Value *Arg1 = II.getArgOperand(1);
1139 
1140  // fmin(x, x) -> x
1141  if (Arg0 == Arg1)
1142  return Arg0;
1143 
1144  const auto *C1 = dyn_cast<ConstantFP>(Arg1);
1145 
1146  // fmin(x, nan) -> x
1147  if (C1 && C1->isNaN())
1148  return Arg0;
1149 
1150  // This is the value because if undef were NaN, we would return the other
1151  // value and cannot return a NaN unless both operands are.
1152  //
1153  // fmin(undef, x) -> x
1154  if (isa<UndefValue>(Arg0))
1155  return Arg1;
1156 
1157  // fmin(x, undef) -> x
1158  if (isa<UndefValue>(Arg1))
1159  return Arg0;
1160 
1161  Value *X = nullptr;
1162  Value *Y = nullptr;
1163  if (II.getIntrinsicID() == Intrinsic::minnum) {
1164  // fmin(x, fmin(x, y)) -> fmin(x, y)
1165  // fmin(y, fmin(x, y)) -> fmin(x, y)
1166  if (match(Arg1, m_FMin(m_Value(X), m_Value(Y)))) {
1167  if (Arg0 == X || Arg0 == Y)
1168  return Arg1;
1169  }
1170 
1171  // fmin(fmin(x, y), x) -> fmin(x, y)
1172  // fmin(fmin(x, y), y) -> fmin(x, y)
1173  if (match(Arg0, m_FMin(m_Value(X), m_Value(Y)))) {
1174  if (Arg1 == X || Arg1 == Y)
1175  return Arg0;
1176  }
1177 
1178  // TODO: fmin(nnan x, inf) -> x
1179  // TODO: fmin(nnan ninf x, flt_max) -> x
1180  if (C1 && C1->isInfinity()) {
1181  // fmin(x, -inf) -> -inf
1182  if (C1->isNegative())
1183  return Arg1;
1184  }
1185  } else {
1187  // fmax(x, fmax(x, y)) -> fmax(x, y)
1188  // fmax(y, fmax(x, y)) -> fmax(x, y)
1189  if (match(Arg1, m_FMax(m_Value(X), m_Value(Y)))) {
1190  if (Arg0 == X || Arg0 == Y)
1191  return Arg1;
1192  }
1193 
1194  // fmax(fmax(x, y), x) -> fmax(x, y)
1195  // fmax(fmax(x, y), y) -> fmax(x, y)
1196  if (match(Arg0, m_FMax(m_Value(X), m_Value(Y)))) {
1197  if (Arg1 == X || Arg1 == Y)
1198  return Arg0;
1199  }
1200 
1201  // TODO: fmax(nnan x, -inf) -> x
1202  // TODO: fmax(nnan ninf x, -flt_max) -> x
1203  if (C1 && C1->isInfinity()) {
1204  // fmax(x, inf) -> inf
1205  if (!C1->isNegative())
1206  return Arg1;
1207  }
1208  }
1209  return nullptr;
1210 }
1211 
1213  auto *ConstMask = dyn_cast<Constant>(Mask);
1214  if (!ConstMask)
1215  return false;
1216  if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
1217  return true;
1218  for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
1219  ++I) {
1220  if (auto *MaskElt = ConstMask->getAggregateElement(I))
1221  if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
1222  continue;
1223  return false;
1224  }
1225  return true;
1226 }
1227 
1229  InstCombiner::BuilderTy &Builder) {
1230  // If the mask is all ones or undefs, this is a plain vector load of the 1st
1231  // argument.
1232  if (maskIsAllOneOrUndef(II.getArgOperand(2))) {
1233  Value *LoadPtr = II.getArgOperand(0);
1234  unsigned Alignment = cast<ConstantInt>(II.getArgOperand(1))->getZExtValue();
1235  return Builder.CreateAlignedLoad(LoadPtr, Alignment, "unmaskedload");
1236  }
1237 
1238  return nullptr;
1239 }
1240 
1242  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
1243  if (!ConstMask)
1244  return nullptr;
1245 
1246  // If the mask is all zeros, this instruction does nothing.
1247  if (ConstMask->isNullValue())
1248  return IC.eraseInstFromFunction(II);
1249 
1250  // If the mask is all ones, this is a plain vector store of the 1st argument.
1251  if (ConstMask->isAllOnesValue()) {
1252  Value *StorePtr = II.getArgOperand(1);
1253  unsigned Alignment = cast<ConstantInt>(II.getArgOperand(2))->getZExtValue();
1254  return new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
1255  }
1256 
1257  return nullptr;
1258 }
1259 
1261  // If the mask is all zeros, return the "passthru" argument of the gather.
1262  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
1263  if (ConstMask && ConstMask->isNullValue())
1264  return IC.replaceInstUsesWith(II, II.getArgOperand(3));
1265 
1266  return nullptr;
1267 }
1268 
1269 /// This function transforms launder.invariant.group and strip.invariant.group
1270 /// like:
1271 /// launder(launder(%x)) -> launder(%x) (the result is not the argument)
1272 /// launder(strip(%x)) -> launder(%x)
1273 /// strip(strip(%x)) -> strip(%x) (the result is not the argument)
1274 /// strip(launder(%x)) -> strip(%x)
1275 /// This is legal because it preserves the most recent information about
1276 /// the presence or absence of invariant.group.
1278  InstCombiner &IC) {
1279  auto *Arg = II.getArgOperand(0);
1280  auto *StrippedArg = Arg->stripPointerCasts();
1281  auto *StrippedInvariantGroupsArg = Arg->stripPointerCastsAndInvariantGroups();
1282  if (StrippedArg == StrippedInvariantGroupsArg)
1283  return nullptr; // No launders/strips to remove.
1284 
1285  Value *Result = nullptr;
1286 
1287  if (II.getIntrinsicID() == Intrinsic::launder_invariant_group)
1288  Result = IC.Builder.CreateLaunderInvariantGroup(StrippedInvariantGroupsArg);
1289  else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
1290  Result = IC.Builder.CreateStripInvariantGroup(StrippedInvariantGroupsArg);
1291  else
1293  "simplifyInvariantGroupIntrinsic only handles launder and strip");
1294  if (Result->getType()->getPointerAddressSpace() !=
1296  Result = IC.Builder.CreateAddrSpaceCast(Result, II.getType());
1297  if (Result->getType() != II.getType())
1298  Result = IC.Builder.CreateBitCast(Result, II.getType());
1299 
1300  return cast<Instruction>(Result);
1301 }
1302 
1304  // If the mask is all zeros, a scatter does nothing.
1305  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
1306  if (ConstMask && ConstMask->isNullValue())
1307  return IC.eraseInstFromFunction(II);
1308 
1309  return nullptr;
1310 }
1311 
1313  assert((II.getIntrinsicID() == Intrinsic::cttz ||
1314  II.getIntrinsicID() == Intrinsic::ctlz) &&
1315  "Expected cttz or ctlz intrinsic");
1316  Value *Op0 = II.getArgOperand(0);
1317 
1318  KnownBits Known = IC.computeKnownBits(Op0, 0, &II);
1319 
1320  // Create a mask for bits above (ctlz) or below (cttz) the first known one.
1321  bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
1322  unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
1323  : Known.countMaxLeadingZeros();
1324  unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
1325  : Known.countMinLeadingZeros();
1326 
1327  // If all bits above (ctlz) or below (cttz) the first known one are known
1328  // zero, this value is constant.
1329  // FIXME: This should be in InstSimplify because we're replacing an
1330  // instruction with a constant.
1331  if (PossibleZeros == DefiniteZeros) {
1332  auto *C = ConstantInt::get(Op0->getType(), DefiniteZeros);
1333  return IC.replaceInstUsesWith(II, C);
1334  }
1335 
1336  // If the input to cttz/ctlz is known to be non-zero,
1337  // then change the 'ZeroIsUndef' parameter to 'true'
1338  // because we know the zero behavior can't affect the result.
1339  if (!Known.One.isNullValue() ||
1340  isKnownNonZero(Op0, IC.getDataLayout(), 0, &IC.getAssumptionCache(), &II,
1341  &IC.getDominatorTree())) {
1342  if (!match(II.getArgOperand(1), m_One())) {
1343  II.setOperand(1, IC.Builder.getTrue());
1344  return &II;
1345  }
1346  }
1347 
1348  // Add range metadata since known bits can't completely reflect what we know.
1349  // TODO: Handle splat vectors.
1350  auto *IT = dyn_cast<IntegerType>(Op0->getType());
1351  if (IT && IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
1352  Metadata *LowAndHigh[] = {
1353  ConstantAsMetadata::get(ConstantInt::get(IT, DefiniteZeros)),
1354  ConstantAsMetadata::get(ConstantInt::get(IT, PossibleZeros + 1))};
1357  return &II;
1358  }
1359 
1360  return nullptr;
1361 }
1362 
1364  assert(II.getIntrinsicID() == Intrinsic::ctpop &&
1365  "Expected ctpop intrinsic");
1366  Value *Op0 = II.getArgOperand(0);
1367  // FIXME: Try to simplify vectors of integers.
1368  auto *IT = dyn_cast<IntegerType>(Op0->getType());
1369  if (!IT)
1370  return nullptr;
1371 
1372  unsigned BitWidth = IT->getBitWidth();
1373  KnownBits Known(BitWidth);
1374  IC.computeKnownBits(Op0, Known, 0, &II);
1375 
1376  unsigned MinCount = Known.countMinPopulation();
1377  unsigned MaxCount = Known.countMaxPopulation();
1378 
1379  // Add range metadata since known bits can't completely reflect what we know.
1380  if (IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
1381  Metadata *LowAndHigh[] = {
1383  ConstantAsMetadata::get(ConstantInt::get(IT, MaxCount + 1))};
1386  return &II;
1387  }
1388 
1389  return nullptr;
1390 }
1391 
1392 // TODO: If the x86 backend knew how to convert a bool vector mask back to an
1393 // XMM register mask efficiently, we could transform all x86 masked intrinsics
1394 // to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
1396  Value *Ptr = II.getOperand(0);
1397  Value *Mask = II.getOperand(1);
1398  Constant *ZeroVec = Constant::getNullValue(II.getType());
1399 
1400  // Special case a zero mask since that's not a ConstantDataVector.
1401  // This masked load instruction creates a zero vector.
1402  if (isa<ConstantAggregateZero>(Mask))
1403  return IC.replaceInstUsesWith(II, ZeroVec);
1404 
1405  auto *ConstMask = dyn_cast<ConstantDataVector>(Mask);
1406  if (!ConstMask)
1407  return nullptr;
1408 
1409  // The mask is constant. Convert this x86 intrinsic to the LLVM instrinsic
1410  // to allow target-independent optimizations.
1411 
1412  // First, cast the x86 intrinsic scalar pointer to a vector pointer to match
1413  // the LLVM intrinsic definition for the pointer argument.
1414  unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
1415  PointerType *VecPtrTy = PointerType::get(II.getType(), AddrSpace);
1416  Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
1417 
1418  // Second, convert the x86 XMM integer vector mask to a vector of bools based
1419  // on each element's most significant bit (the sign bit).
1420  Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask);
1421 
1422  // The pass-through vector for an x86 masked load is a zero vector.
1423  CallInst *NewMaskedLoad =
1424  IC.Builder.CreateMaskedLoad(PtrCast, 1, BoolMask, ZeroVec);
1425  return IC.replaceInstUsesWith(II, NewMaskedLoad);
1426 }
1427 
1428 // TODO: If the x86 backend knew how to convert a bool vector mask back to an
1429 // XMM register mask efficiently, we could transform all x86 masked intrinsics
1430 // to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
1432  Value *Ptr = II.getOperand(0);
1433  Value *Mask = II.getOperand(1);
1434  Value *Vec = II.getOperand(2);
1435 
1436  // Special case a zero mask since that's not a ConstantDataVector:
1437  // this masked store instruction does nothing.
1438  if (isa<ConstantAggregateZero>(Mask)) {
1439  IC.eraseInstFromFunction(II);
1440  return true;
1441  }
1442 
1443  // The SSE2 version is too weird (eg, unaligned but non-temporal) to do
1444  // anything else at this level.
1445  if (II.getIntrinsicID() == Intrinsic::x86_sse2_maskmov_dqu)
1446  return false;
1447 
1448  auto *ConstMask = dyn_cast<ConstantDataVector>(Mask);
1449  if (!ConstMask)
1450  return false;
1451 
1452  // The mask is constant. Convert this x86 intrinsic to the LLVM instrinsic
1453  // to allow target-independent optimizations.
1454 
1455  // First, cast the x86 intrinsic scalar pointer to a vector pointer to match
1456  // the LLVM intrinsic definition for the pointer argument.
1457  unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
1458  PointerType *VecPtrTy = PointerType::get(Vec->getType(), AddrSpace);
1459  Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
1460 
1461  // Second, convert the x86 XMM integer vector mask to a vector of bools based
1462  // on each element's most significant bit (the sign bit).
1463  Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask);
1464 
1465  IC.Builder.CreateMaskedStore(Vec, PtrCast, 1, BoolMask);
1466 
1467  // 'Replace uses' doesn't work for stores. Erase the original masked store.
1468  IC.eraseInstFromFunction(II);
1469  return true;
1470 }
1471 
1472 // Constant fold llvm.amdgcn.fmed3 intrinsics for standard inputs.
1473 //
1474 // A single NaN input is folded to minnum, so we rely on that folding for
1475 // handling NaNs.
1476 static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
1477  const APFloat &Src2) {
1478  APFloat Max3 = maxnum(maxnum(Src0, Src1), Src2);
1479 
1480  APFloat::cmpResult Cmp0 = Max3.compare(Src0);
1481  assert(Cmp0 != APFloat::cmpUnordered && "nans handled separately");
1482  if (Cmp0 == APFloat::cmpEqual)
1483  return maxnum(Src1, Src2);
1484 
1485  APFloat::cmpResult Cmp1 = Max3.compare(Src1);
1486  assert(Cmp1 != APFloat::cmpUnordered && "nans handled separately");
1487  if (Cmp1 == APFloat::cmpEqual)
1488  return maxnum(Src0, Src2);
1489 
1490  return maxnum(Src0, Src1);
1491 }
1492 
1493 /// Convert a table lookup to shufflevector if the mask is constant.
1494 /// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
1495 /// which case we could lower the shufflevector with rev64 instructions
1496 /// as it's actually a byte reverse.
1498  InstCombiner::BuilderTy &Builder) {
1499  // Bail out if the mask is not a constant.
1500  auto *C = dyn_cast<Constant>(II.getArgOperand(1));
1501  if (!C)
1502  return nullptr;
1503 
1504  auto *VecTy = cast<VectorType>(II.getType());
1505  unsigned NumElts = VecTy->getNumElements();
1506 
1507  // Only perform this transformation for <8 x i8> vector types.
1508  if (!VecTy->getElementType()->isIntegerTy(8) || NumElts != 8)
1509  return nullptr;
1510 
1511  uint32_t Indexes[8];
1512 
1513  for (unsigned I = 0; I < NumElts; ++I) {
1514  Constant *COp = C->getAggregateElement(I);
1515 
1516  if (!COp || !isa<ConstantInt>(COp))
1517  return nullptr;
1518 
1519  Indexes[I] = cast<ConstantInt>(COp)->getLimitedValue();
1520 
1521  // Make sure the mask indices are in range.
1522  if (Indexes[I] >= NumElts)
1523  return nullptr;
1524  }
1525 
1526  auto *ShuffleMask = ConstantDataVector::get(II.getContext(),
1527  makeArrayRef(Indexes));
1528  auto *V1 = II.getArgOperand(0);
1529  auto *V2 = Constant::getNullValue(V1->getType());
1530  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
1531 }
1532 
1533 /// Convert a vector load intrinsic into a simple llvm load instruction.
1534 /// This is beneficial when the underlying object being addressed comes
1535 /// from a constant, since we get constant-folding for free.
1537  unsigned MemAlign,
1538  InstCombiner::BuilderTy &Builder) {
1539  auto *IntrAlign = dyn_cast<ConstantInt>(II.getArgOperand(1));
1540 
1541  if (!IntrAlign)
1542  return nullptr;
1543 
1544  unsigned Alignment = IntrAlign->getLimitedValue() < MemAlign ?
1545  MemAlign : IntrAlign->getLimitedValue();
1546 
1547  if (!isPowerOf2_32(Alignment))
1548  return nullptr;
1549 
1550  auto *BCastInst = Builder.CreateBitCast(II.getArgOperand(0),
1551  PointerType::get(II.getType(), 0));
1552  return Builder.CreateAlignedLoad(BCastInst, Alignment);
1553 }
1554 
1555 // Returns true iff the 2 intrinsics have the same operands, limiting the
1556 // comparison to the first NumOperands.
1557 static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
1558  unsigned NumOperands) {
1559  assert(I.getNumArgOperands() >= NumOperands && "Not enough operands");
1560  assert(E.getNumArgOperands() >= NumOperands && "Not enough operands");
1561  for (unsigned i = 0; i < NumOperands; i++)
1562  if (I.getArgOperand(i) != E.getArgOperand(i))
1563  return false;
1564  return true;
1565 }
1566 
1567 // Remove trivially empty start/end intrinsic ranges, i.e. a start
1568 // immediately followed by an end (ignoring debuginfo or other
1569 // start/end intrinsics in between). As this handles only the most trivial
1570 // cases, tracking the nesting level is not needed:
1571 //
1572 // call @llvm.foo.start(i1 0) ; &I
1573 // call @llvm.foo.start(i1 0)
1574 // call @llvm.foo.end(i1 0) ; This one will not be skipped: it will be removed
1575 // call @llvm.foo.end(i1 0)
1576 static bool removeTriviallyEmptyRange(IntrinsicInst &I, unsigned StartID,
1577  unsigned EndID, InstCombiner &IC) {
1578  assert(I.getIntrinsicID() == StartID &&
1579  "Start intrinsic does not have expected ID");
1580  BasicBlock::iterator BI(I), BE(I.getParent()->end());
1581  for (++BI; BI != BE; ++BI) {
1582  if (auto *E = dyn_cast<IntrinsicInst>(BI)) {
1583  if (isa<DbgInfoIntrinsic>(E) || E->getIntrinsicID() == StartID)
1584  continue;
1585  if (E->getIntrinsicID() == EndID &&
1586  haveSameOperands(I, *E, E->getNumArgOperands())) {
1587  IC.eraseInstFromFunction(*E);
1588  IC.eraseInstFromFunction(I);
1589  return true;
1590  }
1591  }
1592  break;
1593  }
1594 
1595  return false;
1596 }
1597 
1598 // Convert NVVM intrinsics to target-generic LLVM code where possible.
1600  // Each NVVM intrinsic we can simplify can be replaced with one of:
1601  //
1602  // * an LLVM intrinsic,
1603  // * an LLVM cast operation,
1604  // * an LLVM binary operation, or
1605  // * ad-hoc LLVM IR for the particular operation.
1606 
1607  // Some transformations are only valid when the module's
1608  // flush-denormals-to-zero (ftz) setting is true/false, whereas other
1609  // transformations are valid regardless of the module's ftz setting.
1610  enum FtzRequirementTy {
1611  FTZ_Any, // Any ftz setting is ok.
1612  FTZ_MustBeOn, // Transformation is valid only if ftz is on.
1613  FTZ_MustBeOff, // Transformation is valid only if ftz is off.
1614  };
1615  // Classes of NVVM intrinsics that can't be replaced one-to-one with a
1616  // target-generic intrinsic, cast op, or binary op but that we can nonetheless
1617  // simplify.
1618  enum SpecialCase {
1619  SPC_Reciprocal,
1620  };
1621 
1622  // SimplifyAction is a poor-man's variant (plus an additional flag) that
1623  // represents how to replace an NVVM intrinsic with target-generic LLVM IR.
1624  struct SimplifyAction {
1625  // Invariant: At most one of these Optionals has a value.
1629  Optional<SpecialCase> Special;
1630 
1631  FtzRequirementTy FtzRequirement = FTZ_Any;
1632 
1633  SimplifyAction() = default;
1634 
1635  SimplifyAction(Intrinsic::ID IID, FtzRequirementTy FtzReq)
1636  : IID(IID), FtzRequirement(FtzReq) {}
1637 
1638  // Cast operations don't have anything to do with FTZ, so we skip that
1639  // argument.
1640  SimplifyAction(Instruction::CastOps CastOp) : CastOp(CastOp) {}
1641 
1642  SimplifyAction(Instruction::BinaryOps BinaryOp, FtzRequirementTy FtzReq)
1643  : BinaryOp(BinaryOp), FtzRequirement(FtzReq) {}
1644 
1645  SimplifyAction(SpecialCase Special, FtzRequirementTy FtzReq)
1646  : Special(Special), FtzRequirement(FtzReq) {}
1647  };
1648 
1649  // Try to generate a SimplifyAction describing how to replace our
1650  // IntrinsicInstr with target-generic LLVM IR.
1651  const SimplifyAction Action = [II]() -> SimplifyAction {
1652  switch (II->getIntrinsicID()) {
1653  // NVVM intrinsics that map directly to LLVM intrinsics.
1654  case Intrinsic::nvvm_ceil_d:
1655  return {Intrinsic::ceil, FTZ_Any};
1656  case Intrinsic::nvvm_ceil_f:
1657  return {Intrinsic::ceil, FTZ_MustBeOff};
1658  case Intrinsic::nvvm_ceil_ftz_f:
1659  return {Intrinsic::ceil, FTZ_MustBeOn};
1660  case Intrinsic::nvvm_fabs_d:
1661  return {Intrinsic::fabs, FTZ_Any};
1662  case Intrinsic::nvvm_fabs_f:
1663  return {Intrinsic::fabs, FTZ_MustBeOff};
1664  case Intrinsic::nvvm_fabs_ftz_f:
1665  return {Intrinsic::fabs, FTZ_MustBeOn};
1666  case Intrinsic::nvvm_floor_d:
1667  return {Intrinsic::floor, FTZ_Any};
1668  case Intrinsic::nvvm_floor_f:
1669  return {Intrinsic::floor, FTZ_MustBeOff};
1670  case Intrinsic::nvvm_floor_ftz_f:
1671  return {Intrinsic::floor, FTZ_MustBeOn};
1672  case Intrinsic::nvvm_fma_rn_d:
1673  return {Intrinsic::fma, FTZ_Any};
1674  case Intrinsic::nvvm_fma_rn_f:
1675  return {Intrinsic::fma, FTZ_MustBeOff};
1676  case Intrinsic::nvvm_fma_rn_ftz_f:
1677  return {Intrinsic::fma, FTZ_MustBeOn};
1678  case Intrinsic::nvvm_fmax_d:
1679  return {Intrinsic::maxnum, FTZ_Any};
1680  case Intrinsic::nvvm_fmax_f:
1681  return {Intrinsic::maxnum, FTZ_MustBeOff};
1682  case Intrinsic::nvvm_fmax_ftz_f:
1683  return {Intrinsic::maxnum, FTZ_MustBeOn};
1684  case Intrinsic::nvvm_fmin_d:
1685  return {Intrinsic::minnum, FTZ_Any};
1686  case Intrinsic::nvvm_fmin_f:
1687  return {Intrinsic::minnum, FTZ_MustBeOff};
1688  case Intrinsic::nvvm_fmin_ftz_f:
1689  return {Intrinsic::minnum, FTZ_MustBeOn};
1690  case Intrinsic::nvvm_round_d:
1691  return {Intrinsic::round, FTZ_Any};
1692  case Intrinsic::nvvm_round_f:
1693  return {Intrinsic::round, FTZ_MustBeOff};
1694  case Intrinsic::nvvm_round_ftz_f:
1695  return {Intrinsic::round, FTZ_MustBeOn};
1696  case Intrinsic::nvvm_sqrt_rn_d:
1697  return {Intrinsic::sqrt, FTZ_Any};
1698  case Intrinsic::nvvm_sqrt_f:
1699  // nvvm_sqrt_f is a special case. For most intrinsics, foo_ftz_f is the
1700  // ftz version, and foo_f is the non-ftz version. But nvvm_sqrt_f adopts
1701  // the ftz-ness of the surrounding code. sqrt_rn_f and sqrt_rn_ftz_f are
1702  // the versions with explicit ftz-ness.
1703  return {Intrinsic::sqrt, FTZ_Any};
1704  case Intrinsic::nvvm_sqrt_rn_f:
1705  return {Intrinsic::sqrt, FTZ_MustBeOff};
1706  case Intrinsic::nvvm_sqrt_rn_ftz_f:
1707  return {Intrinsic::sqrt, FTZ_MustBeOn};
1708  case Intrinsic::nvvm_trunc_d:
1709  return {Intrinsic::trunc, FTZ_Any};
1710  case Intrinsic::nvvm_trunc_f:
1711  return {Intrinsic::trunc, FTZ_MustBeOff};
1712  case Intrinsic::nvvm_trunc_ftz_f:
1713  return {Intrinsic::trunc, FTZ_MustBeOn};
1714 
1715  // NVVM intrinsics that map to LLVM cast operations.
1716  //
1717  // Note that llvm's target-generic conversion operators correspond to the rz
1718  // (round to zero) versions of the nvvm conversion intrinsics, even though
1719  // most everything else here uses the rn (round to nearest even) nvvm ops.
1720  case Intrinsic::nvvm_d2i_rz:
1721  case Intrinsic::nvvm_f2i_rz:
1722  case Intrinsic::nvvm_d2ll_rz:
1723  case Intrinsic::nvvm_f2ll_rz:
1724  return {Instruction::FPToSI};
1725  case Intrinsic::nvvm_d2ui_rz:
1726  case Intrinsic::nvvm_f2ui_rz:
1727  case Intrinsic::nvvm_d2ull_rz:
1728  case Intrinsic::nvvm_f2ull_rz:
1729  return {Instruction::FPToUI};
1730  case Intrinsic::nvvm_i2d_rz:
1731  case Intrinsic::nvvm_i2f_rz:
1732  case Intrinsic::nvvm_ll2d_rz:
1733  case Intrinsic::nvvm_ll2f_rz:
1734  return {Instruction::SIToFP};
1735  case Intrinsic::nvvm_ui2d_rz:
1736  case Intrinsic::nvvm_ui2f_rz:
1737  case Intrinsic::nvvm_ull2d_rz:
1738  case Intrinsic::nvvm_ull2f_rz:
1739  return {Instruction::UIToFP};
1740 
1741  // NVVM intrinsics that map to LLVM binary ops.
1742  case Intrinsic::nvvm_add_rn_d:
1743  return {Instruction::FAdd, FTZ_Any};
1744  case Intrinsic::nvvm_add_rn_f:
1745  return {Instruction::FAdd, FTZ_MustBeOff};
1746  case Intrinsic::nvvm_add_rn_ftz_f:
1747  return {Instruction::FAdd, FTZ_MustBeOn};
1748  case Intrinsic::nvvm_mul_rn_d:
1749  return {Instruction::FMul, FTZ_Any};
1750  case Intrinsic::nvvm_mul_rn_f:
1751  return {Instruction::FMul, FTZ_MustBeOff};
1752  case Intrinsic::nvvm_mul_rn_ftz_f:
1753  return {Instruction::FMul, FTZ_MustBeOn};
1754  case Intrinsic::nvvm_div_rn_d:
1755  return {Instruction::FDiv, FTZ_Any};
1756  case Intrinsic::nvvm_div_rn_f:
1757  return {Instruction::FDiv, FTZ_MustBeOff};
1758  case Intrinsic::nvvm_div_rn_ftz_f:
1759  return {Instruction::FDiv, FTZ_MustBeOn};
1760 
1761  // The remainder of cases are NVVM intrinsics that map to LLVM idioms, but
1762  // need special handling.
1763  //
1764  // We seem to be missing intrinsics for rcp.approx.{ftz.}f32, which is just
1765  // as well.
1766  case Intrinsic::nvvm_rcp_rn_d:
1767  return {SPC_Reciprocal, FTZ_Any};
1768  case Intrinsic::nvvm_rcp_rn_f:
1769  return {SPC_Reciprocal, FTZ_MustBeOff};
1770  case Intrinsic::nvvm_rcp_rn_ftz_f:
1771  return {SPC_Reciprocal, FTZ_MustBeOn};
1772 
1773  // We do not currently simplify intrinsics that give an approximate answer.
1774  // These include:
1775  //
1776  // - nvvm_cos_approx_{f,ftz_f}
1777  // - nvvm_ex2_approx_{d,f,ftz_f}
1778  // - nvvm_lg2_approx_{d,f,ftz_f}
1779  // - nvvm_sin_approx_{f,ftz_f}
1780  // - nvvm_sqrt_approx_{f,ftz_f}
1781  // - nvvm_rsqrt_approx_{d,f,ftz_f}
1782  // - nvvm_div_approx_{ftz_d,ftz_f,f}
1783  // - nvvm_rcp_approx_ftz_d
1784  //
1785  // Ideally we'd encode them as e.g. "fast call @llvm.cos", where "fast"
1786  // means that fastmath is enabled in the intrinsic. Unfortunately only
1787  // binary operators (currently) have a fastmath bit in SelectionDAG, so this
1788  // information gets lost and we can't select on it.
1789  //
1790  // TODO: div and rcp are lowered to a binary op, so these we could in theory
1791  // lower them to "fast fdiv".
1792 
1793  default:
1794  return {};
1795  }
1796  }();
1797 
1798  // If Action.FtzRequirementTy is not satisfied by the module's ftz state, we
1799  // can bail out now. (Notice that in the case that IID is not an NVVM
1800  // intrinsic, we don't have to look up any module metadata, as
1801  // FtzRequirementTy will be FTZ_Any.)
1802  if (Action.FtzRequirement != FTZ_Any) {
1803  bool FtzEnabled =
1804  II->getFunction()->getFnAttribute("nvptx-f32ftz").getValueAsString() ==
1805  "true";
1806 
1807  if (FtzEnabled != (Action.FtzRequirement == FTZ_MustBeOn))
1808  return nullptr;
1809  }
1810 
1811  // Simplify to target-generic intrinsic.
1812  if (Action.IID) {
1814  // All the target-generic intrinsics currently of interest to us have one
1815  // type argument, equal to that of the nvvm intrinsic's argument.
1816  Type *Tys[] = {II->getArgOperand(0)->getType()};
1817  return CallInst::Create(
1818  Intrinsic::getDeclaration(II->getModule(), *Action.IID, Tys), Args);
1819  }
1820 
1821  // Simplify to target-generic binary op.
1822  if (Action.BinaryOp)
1823  return BinaryOperator::Create(*Action.BinaryOp, II->getArgOperand(0),
1824  II->getArgOperand(1), II->getName());
1825 
1826  // Simplify to target-generic cast op.
1827  if (Action.CastOp)
1828  return CastInst::Create(*Action.CastOp, II->getArgOperand(0), II->getType(),
1829  II->getName());
1830 
1831  // All that's left are the special cases.
1832  if (!Action.Special)
1833  return nullptr;
1834 
1835  switch (*Action.Special) {
1836  case SPC_Reciprocal:
1837  // Simplify reciprocal.
1838  return BinaryOperator::Create(
1839  Instruction::FDiv, ConstantFP::get(II->getArgOperand(0)->getType(), 1),
1840  II->getArgOperand(0), II->getName());
1841  }
1842  llvm_unreachable("All SpecialCase enumerators should be handled in switch.");
1843 }
1844 
1846  removeTriviallyEmptyRange(I, Intrinsic::vastart, Intrinsic::vaend, *this);
1847  return nullptr;
1848 }
1849 
1851  removeTriviallyEmptyRange(I, Intrinsic::vacopy, Intrinsic::vaend, *this);
1852  return nullptr;
1853 }
1854 
1855 /// CallInst simplification. This mostly only handles folding of intrinsic
1856 /// instructions. For normal calls, it allows visitCallSite to do the heavy
1857 /// lifting.
1859  if (Value *V = SimplifyCall(&CI, SQ.getWithInstruction(&CI)))
1860  return replaceInstUsesWith(CI, V);
1861 
1862  if (isFreeCall(&CI, &TLI))
1863  return visitFree(CI);
1864 
1865  // If the caller function is nounwind, mark the call as nounwind, even if the
1866  // callee isn't.
1867  if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
1868  CI.setDoesNotThrow();
1869  return &CI;
1870  }
1871 
1872  IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
1873  if (!II) return visitCallSite(&CI);
1874 
1875  // Intrinsics cannot occur in an invoke, so handle them here instead of in
1876  // visitCallSite.
1877  if (auto *MI = dyn_cast<AnyMemIntrinsic>(II)) {
1878  bool Changed = false;
1879 
1880  // memmove/cpy/set of zero bytes is a noop.
1881  if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
1882  if (NumBytes->isNullValue())
1883  return eraseInstFromFunction(CI);
1884 
1885  if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
1886  if (CI->getZExtValue() == 1) {
1887  // Replace the instruction with just byte operations. We would
1888  // transform other cases to loads/stores, but we don't know if
1889  // alignment is sufficient.
1890  }
1891  }
1892 
1893  // No other transformations apply to volatile transfers.
1894  if (auto *M = dyn_cast<MemIntrinsic>(MI))
1895  if (M->isVolatile())
1896  return nullptr;
1897 
1898  // If we have a memmove and the source operation is a constant global,
1899  // then the source and dest pointers can't alias, so we can change this
1900  // into a call to memcpy.
1901  if (auto *MMI = dyn_cast<AnyMemMoveInst>(MI)) {
1902  if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
1903  if (GVSrc->isConstant()) {
1904  Module *M = CI.getModule();
1905  Intrinsic::ID MemCpyID =
1906  isa<AtomicMemMoveInst>(MMI)
1907  ? Intrinsic::memcpy_element_unordered_atomic
1908  : Intrinsic::memcpy;
1909  Type *Tys[3] = { CI.getArgOperand(0)->getType(),
1910  CI.getArgOperand(1)->getType(),
1911  CI.getArgOperand(2)->getType() };
1912  CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys));
1913  Changed = true;
1914  }
1915  }
1916 
1917  if (AnyMemTransferInst *MTI = dyn_cast<AnyMemTransferInst>(MI)) {
1918  // memmove(x,x,size) -> noop.
1919  if (MTI->getSource() == MTI->getDest())
1920  return eraseInstFromFunction(CI);
1921  }
1922 
1923  // If we can determine a pointer alignment that is bigger than currently
1924  // set, update the alignment.
1925  if (auto *MTI = dyn_cast<AnyMemTransferInst>(MI)) {
1926  if (Instruction *I = SimplifyAnyMemTransfer(MTI))
1927  return I;
1928  } else if (auto *MSI = dyn_cast<AnyMemSetInst>(MI)) {
1929  if (Instruction *I = SimplifyAnyMemSet(MSI))
1930  return I;
1931  }
1932 
1933  if (Changed) return II;
1934  }
1935 
1936  if (Instruction *I = SimplifyNVVMIntrinsic(II, *this))
1937  return I;
1938 
1939  auto SimplifyDemandedVectorEltsLow = [this](Value *Op, unsigned Width,
1940  unsigned DemandedWidth) {
1941  APInt UndefElts(Width, 0);
1942  APInt DemandedElts = APInt::getLowBitsSet(Width, DemandedWidth);
1943  return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts);
1944  };
1945 
1946  switch (II->getIntrinsicID()) {
1947  default: break;
1948  case Intrinsic::objectsize:
1949  if (ConstantInt *N =
1950  lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false))
1951  return replaceInstUsesWith(CI, N);
1952  return nullptr;
1953  case Intrinsic::bswap: {
1954  Value *IIOperand = II->getArgOperand(0);
1955  Value *X = nullptr;
1956 
1957  // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
1958  if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
1959  unsigned C = X->getType()->getPrimitiveSizeInBits() -
1960  IIOperand->getType()->getPrimitiveSizeInBits();
1961  Value *CV = ConstantInt::get(X->getType(), C);
1962  Value *V = Builder.CreateLShr(X, CV);
1963  return new TruncInst(V, IIOperand->getType());
1964  }
1965  break;
1966  }
1967  case Intrinsic::masked_load:
1968  if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, Builder))
1969  return replaceInstUsesWith(CI, SimplifiedMaskedOp);
1970  break;
1971  case Intrinsic::masked_store:
1972  return simplifyMaskedStore(*II, *this);
1973  case Intrinsic::masked_gather:
1974  return simplifyMaskedGather(*II, *this);
1975  case Intrinsic::masked_scatter:
1976  return simplifyMaskedScatter(*II, *this);
1977  case Intrinsic::launder_invariant_group:
1978  case Intrinsic::strip_invariant_group:
1979  if (auto *SkippedBarrier = simplifyInvariantGroupIntrinsic(*II, *this))
1980  return replaceInstUsesWith(*II, SkippedBarrier);
1981  break;
1982  case Intrinsic::powi:
1983  if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
1984  // 0 and 1 are handled in instsimplify
1985 
1986  // powi(x, -1) -> 1/x
1987  if (Power->isMinusOne())
1988  return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
1989  II->getArgOperand(0));
1990  // powi(x, 2) -> x*x
1991  if (Power->equalsInt(2))
1992  return BinaryOperator::CreateFMul(II->getArgOperand(0),
1993  II->getArgOperand(0));
1994  }
1995  break;
1996 
1997  case Intrinsic::cttz:
1998  case Intrinsic::ctlz:
1999  if (auto *I = foldCttzCtlz(*II, *this))
2000  return I;
2001  break;
2002 
2003  case Intrinsic::ctpop:
2004  if (auto *I = foldCtpop(*II, *this))
2005  return I;
2006  break;
2007 
2008  case Intrinsic::uadd_with_overflow:
2009  case Intrinsic::sadd_with_overflow:
2010  case Intrinsic::umul_with_overflow:
2011  case Intrinsic::smul_with_overflow:
2012  if (isa<Constant>(II->getArgOperand(0)) &&
2013  !isa<Constant>(II->getArgOperand(1))) {
2014  // Canonicalize constants into the RHS.
2015  Value *LHS = II->getArgOperand(0);
2016  II->setArgOperand(0, II->getArgOperand(1));
2017  II->setArgOperand(1, LHS);
2018  return II;
2019  }
2021 
2022  case Intrinsic::usub_with_overflow:
2023  case Intrinsic::ssub_with_overflow: {
2024  OverflowCheckFlavor OCF =
2026  assert(OCF != OCF_INVALID && "unexpected!");
2027 
2028  Value *OperationResult = nullptr;
2029  Constant *OverflowResult = nullptr;
2030  if (OptimizeOverflowCheck(OCF, II->getArgOperand(0), II->getArgOperand(1),
2031  *II, OperationResult, OverflowResult))
2032  return CreateOverflowTuple(II, OperationResult, OverflowResult);
2033 
2034  break;
2035  }
2036 
2037  case Intrinsic::minnum:
2038  case Intrinsic::maxnum: {
2039  Value *Arg0 = II->getArgOperand(0);
2040  Value *Arg1 = II->getArgOperand(1);
2041  // Canonicalize constants to the RHS.
2042  if (isa<ConstantFP>(Arg0) && !isa<ConstantFP>(Arg1)) {
2043  II->setArgOperand(0, Arg1);
2044  II->setArgOperand(1, Arg0);
2045  return II;
2046  }
2047 
2048  // FIXME: Simplifications should be in instsimplify.
2049  if (Value *V = simplifyMinnumMaxnum(*II))
2050  return replaceInstUsesWith(*II, V);
2051 
2052  Value *X, *Y;
2053  if (match(Arg0, m_FNeg(m_Value(X))) && match(Arg1, m_FNeg(m_Value(Y))) &&
2054  (Arg0->hasOneUse() || Arg1->hasOneUse())) {
2055  // If both operands are negated, invert the call and negate the result:
2056  // minnum(-X, -Y) --> -(maxnum(X, Y))
2057  // maxnum(-X, -Y) --> -(minnum(X, Y))
2058  Intrinsic::ID NewIID = II->getIntrinsicID() == Intrinsic::maxnum ?
2060  Value *NewCall = Builder.CreateIntrinsic(NewIID, { X, Y }, II);
2061  Instruction *FNeg = BinaryOperator::CreateFNeg(NewCall);
2062  FNeg->copyIRFlags(II);
2063  return FNeg;
2064  }
2065  break;
2066  }
2067  case Intrinsic::fmuladd: {
2068  // Canonicalize fast fmuladd to the separate fmul + fadd.
2069  if (II->isFast()) {
2070  BuilderTy::FastMathFlagGuard Guard(Builder);
2071  Builder.setFastMathFlags(II->getFastMathFlags());
2072  Value *Mul = Builder.CreateFMul(II->getArgOperand(0),
2073  II->getArgOperand(1));
2074  Value *Add = Builder.CreateFAdd(Mul, II->getArgOperand(2));
2075  Add->takeName(II);
2076  return replaceInstUsesWith(*II, Add);
2077  }
2078 
2080  }
2081  case Intrinsic::fma: {
2082  Value *Src0 = II->getArgOperand(0);
2083  Value *Src1 = II->getArgOperand(1);
2084 
2085  // Canonicalize constant multiply operand to Src1.
2086  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
2087  II->setArgOperand(0, Src1);
2088  II->setArgOperand(1, Src0);
2089  std::swap(Src0, Src1);
2090  }
2091 
2092  // fma fneg(x), fneg(y), z -> fma x, y, z
2093  Value *X, *Y;
2094  if (match(Src0, m_FNeg(m_Value(X))) && match(Src1, m_FNeg(m_Value(Y)))) {
2095  II->setArgOperand(0, X);
2096  II->setArgOperand(1, Y);
2097  return II;
2098  }
2099 
2100  // fma fabs(x), fabs(x), z -> fma x, x, z
2101  if (match(Src0, m_Intrinsic<Intrinsic::fabs>(m_Value(X))) &&
2102  match(Src1, m_Intrinsic<Intrinsic::fabs>(m_Specific(X)))) {
2103  II->setArgOperand(0, X);
2104  II->setArgOperand(1, X);
2105  return II;
2106  }
2107 
2108  // fma x, 1, z -> fadd x, z
2109  if (match(Src1, m_FPOne())) {
2110  auto *FAdd = BinaryOperator::CreateFAdd(Src0, II->getArgOperand(2));
2111  FAdd->copyFastMathFlags(II);
2112  return FAdd;
2113  }
2114 
2115  break;
2116  }
2117  case Intrinsic::fabs: {
2118  Value *Cond;
2119  Constant *LHS, *RHS;
2120  if (match(II->getArgOperand(0),
2121  m_Select(m_Value(Cond), m_Constant(LHS), m_Constant(RHS)))) {
2122  CallInst *Call0 = Builder.CreateCall(II->getCalledFunction(), {LHS});
2123  CallInst *Call1 = Builder.CreateCall(II->getCalledFunction(), {RHS});
2124  return SelectInst::Create(Cond, Call0, Call1);
2125  }
2126 
2128  }
2129  case Intrinsic::ceil:
2130  case Intrinsic::floor:
2131  case Intrinsic::round:
2132  case Intrinsic::nearbyint:
2133  case Intrinsic::rint:
2134  case Intrinsic::trunc: {
2135  Value *ExtSrc;
2136  if (match(II->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc))))) {
2137  // Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
2138  Value *NarrowII = Builder.CreateIntrinsic(II->getIntrinsicID(),
2139  { ExtSrc }, II);
2140  return new FPExtInst(NarrowII, II->getType());
2141  }
2142  break;
2143  }
2144  case Intrinsic::cos:
2145  case Intrinsic::amdgcn_cos: {
2146  Value *SrcSrc;
2147  Value *Src = II->getArgOperand(0);
2148  if (match(Src, m_FNeg(m_Value(SrcSrc))) ||
2149  match(Src, m_Intrinsic<Intrinsic::fabs>(m_Value(SrcSrc)))) {
2150  // cos(-x) -> cos(x)
2151  // cos(fabs(x)) -> cos(x)
2152  II->setArgOperand(0, SrcSrc);
2153  return II;
2154  }
2155 
2156  break;
2157  }
2158  case Intrinsic::ppc_altivec_lvx:
2159  case Intrinsic::ppc_altivec_lvxl:
2160  // Turn PPC lvx -> load if the pointer is known aligned.
2161  if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC,
2162  &DT) >= 16) {
2163  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
2164  PointerType::getUnqual(II->getType()));
2165  return new LoadInst(Ptr);
2166  }
2167  break;
2168  case Intrinsic::ppc_vsx_lxvw4x:
2169  case Intrinsic::ppc_vsx_lxvd2x: {
2170  // Turn PPC VSX loads into normal loads.
2171  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
2172  PointerType::getUnqual(II->getType()));
2173  return new LoadInst(Ptr, Twine(""), false, 1);
2174  }
2175  case Intrinsic::ppc_altivec_stvx:
2176  case Intrinsic::ppc_altivec_stvxl:
2177  // Turn stvx -> store if the pointer is known aligned.
2178  if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC,
2179  &DT) >= 16) {
2180  Type *OpPtrTy =
2181  PointerType::getUnqual(II->getArgOperand(0)->getType());
2182  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
2183  return new StoreInst(II->getArgOperand(0), Ptr);
2184  }
2185  break;
2186  case Intrinsic::ppc_vsx_stxvw4x:
2187  case Intrinsic::ppc_vsx_stxvd2x: {
2188  // Turn PPC VSX stores into normal stores.
2189  Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType());
2190  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
2191  return new StoreInst(II->getArgOperand(0), Ptr, false, 1);
2192  }
2193  case Intrinsic::ppc_qpx_qvlfs:
2194  // Turn PPC QPX qvlfs -> load if the pointer is known aligned.
2195  if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC,
2196  &DT) >= 16) {
2197  Type *VTy = VectorType::get(Builder.getFloatTy(),
2198  II->getType()->getVectorNumElements());
2199  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
2200  PointerType::getUnqual(VTy));
2201  Value *Load = Builder.CreateLoad(Ptr);
2202  return new FPExtInst(Load, II->getType());
2203  }
2204  break;
2205  case Intrinsic::ppc_qpx_qvlfd:
2206  // Turn PPC QPX qvlfd -> load if the pointer is known aligned.
2207  if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, &AC,
2208  &DT) >= 32) {
2209  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
2210  PointerType::getUnqual(II->getType()));
2211  return new LoadInst(Ptr);
2212  }
2213  break;
2214  case Intrinsic::ppc_qpx_qvstfs:
2215  // Turn PPC QPX qvstfs -> store if the pointer is known aligned.
2216  if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC,
2217  &DT) >= 16) {
2218  Type *VTy = VectorType::get(Builder.getFloatTy(),
2219  II->getArgOperand(0)->getType()->getVectorNumElements());
2220  Value *TOp = Builder.CreateFPTrunc(II->getArgOperand(0), VTy);
2221  Type *OpPtrTy = PointerType::getUnqual(VTy);
2222  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
2223  return new StoreInst(TOp, Ptr);
2224  }
2225  break;
2226  case Intrinsic::ppc_qpx_qvstfd:
2227  // Turn PPC QPX qvstfd -> store if the pointer is known aligned.
2228  if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, II, &AC,
2229  &DT) >= 32) {
2230  Type *OpPtrTy =
2231  PointerType::getUnqual(II->getArgOperand(0)->getType());
2232  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
2233  return new StoreInst(II->getArgOperand(0), Ptr);
2234  }
2235  break;
2236 
2237  case Intrinsic::x86_bmi_bextr_32:
2238  case Intrinsic::x86_bmi_bextr_64:
2239  case Intrinsic::x86_tbm_bextri_u32:
2240  case Intrinsic::x86_tbm_bextri_u64:
2241  // If the RHS is a constant we can try some simplifications.
2242  if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
2243  uint64_t Shift = C->getZExtValue();
2244  uint64_t Length = (Shift >> 8) & 0xff;
2245  Shift &= 0xff;
2246  unsigned BitWidth = II->getType()->getIntegerBitWidth();
2247  // If the length is 0 or the shift is out of range, replace with zero.
2248  if (Length == 0 || Shift >= BitWidth)
2249  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0));
2250  // If the LHS is also a constant, we can completely constant fold this.
2251  if (auto *InC = dyn_cast<ConstantInt>(II->getArgOperand(0))) {
2252  uint64_t Result = InC->getZExtValue() >> Shift;
2253  if (Length > BitWidth)
2254  Length = BitWidth;
2255  Result &= maskTrailingOnes<uint64_t>(Length);
2256  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result));
2257  }
2258  // TODO should we turn this into 'and' if shift is 0? Or 'shl' if we
2259  // are only masking bits that a shift already cleared?
2260  }
2261  break;
2262 
2263  case Intrinsic::x86_bmi_bzhi_32:
2264  case Intrinsic::x86_bmi_bzhi_64:
2265  // If the RHS is a constant we can try some simplifications.
2266  if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
2267  uint64_t Index = C->getZExtValue() & 0xff;
2268  unsigned BitWidth = II->getType()->getIntegerBitWidth();
2269  if (Index >= BitWidth)
2270  return replaceInstUsesWith(CI, II->getArgOperand(0));
2271  if (Index == 0)
2272  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0));
2273  // If the LHS is also a constant, we can completely constant fold this.
2274  if (auto *InC = dyn_cast<ConstantInt>(II->getArgOperand(0))) {
2275  uint64_t Result = InC->getZExtValue();
2276  Result &= maskTrailingOnes<uint64_t>(Index);
2277  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result));
2278  }
2279  // TODO should we convert this to an AND if the RHS is constant?
2280  }
2281  break;
2282 
2283  case Intrinsic::x86_vcvtph2ps_128:
2284  case Intrinsic::x86_vcvtph2ps_256: {
2285  auto Arg = II->getArgOperand(0);
2286  auto ArgType = cast<VectorType>(Arg->getType());
2287  auto RetType = cast<VectorType>(II->getType());
2288  unsigned ArgWidth = ArgType->getNumElements();
2289  unsigned RetWidth = RetType->getNumElements();
2290  assert(RetWidth <= ArgWidth && "Unexpected input/return vector widths");
2291  assert(ArgType->isIntOrIntVectorTy() &&
2292  ArgType->getScalarSizeInBits() == 16 &&
2293  "CVTPH2PS input type should be 16-bit integer vector");
2294  assert(RetType->getScalarType()->isFloatTy() &&
2295  "CVTPH2PS output type should be 32-bit float vector");
2296 
2297  // Constant folding: Convert to generic half to single conversion.
2298  if (isa<ConstantAggregateZero>(Arg))
2299  return replaceInstUsesWith(*II, ConstantAggregateZero::get(RetType));
2300 
2301  if (isa<ConstantDataVector>(Arg)) {
2302  auto VectorHalfAsShorts = Arg;
2303  if (RetWidth < ArgWidth) {
2304  SmallVector<uint32_t, 8> SubVecMask;
2305  for (unsigned i = 0; i != RetWidth; ++i)
2306  SubVecMask.push_back((int)i);
2307  VectorHalfAsShorts = Builder.CreateShuffleVector(
2308  Arg, UndefValue::get(ArgType), SubVecMask);
2309  }
2310 
2311  auto VectorHalfType =
2312  VectorType::get(Type::getHalfTy(II->getContext()), RetWidth);
2313  auto VectorHalfs =
2314  Builder.CreateBitCast(VectorHalfAsShorts, VectorHalfType);
2315  auto VectorFloats = Builder.CreateFPExt(VectorHalfs, RetType);
2316  return replaceInstUsesWith(*II, VectorFloats);
2317  }
2318 
2319  // We only use the lowest lanes of the argument.
2320  if (Value *V = SimplifyDemandedVectorEltsLow(Arg, ArgWidth, RetWidth)) {
2321  II->setArgOperand(0, V);
2322  return II;
2323  }
2324  break;
2325  }
2326 
2327  case Intrinsic::x86_sse_cvtss2si:
2328  case Intrinsic::x86_sse_cvtss2si64:
2329  case Intrinsic::x86_sse_cvttss2si:
2330  case Intrinsic::x86_sse_cvttss2si64:
2331  case Intrinsic::x86_sse2_cvtsd2si:
2332  case Intrinsic::x86_sse2_cvtsd2si64:
2333  case Intrinsic::x86_sse2_cvttsd2si:
2334  case Intrinsic::x86_sse2_cvttsd2si64:
2335  case Intrinsic::x86_avx512_vcvtss2si32:
2336  case Intrinsic::x86_avx512_vcvtss2si64:
2337  case Intrinsic::x86_avx512_vcvtss2usi32:
2338  case Intrinsic::x86_avx512_vcvtss2usi64:
2339  case Intrinsic::x86_avx512_vcvtsd2si32:
2340  case Intrinsic::x86_avx512_vcvtsd2si64:
2341  case Intrinsic::x86_avx512_vcvtsd2usi32:
2342  case Intrinsic::x86_avx512_vcvtsd2usi64:
2343  case Intrinsic::x86_avx512_cvttss2si:
2344  case Intrinsic::x86_avx512_cvttss2si64:
2345  case Intrinsic::x86_avx512_cvttss2usi:
2346  case Intrinsic::x86_avx512_cvttss2usi64:
2347  case Intrinsic::x86_avx512_cvttsd2si:
2348  case Intrinsic::x86_avx512_cvttsd2si64:
2349  case Intrinsic::x86_avx512_cvttsd2usi:
2350  case Intrinsic::x86_avx512_cvttsd2usi64: {
2351  // These intrinsics only demand the 0th element of their input vectors. If
2352  // we can simplify the input based on that, do so now.
2353  Value *Arg = II->getArgOperand(0);
2354  unsigned VWidth = Arg->getType()->getVectorNumElements();
2355  if (Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
2356  II->setArgOperand(0, V);
2357  return II;
2358  }
2359  break;
2360  }
2361 
2362  case Intrinsic::x86_sse41_round_ps:
2363  case Intrinsic::x86_sse41_round_pd:
2364  case Intrinsic::x86_avx_round_ps_256:
2365  case Intrinsic::x86_avx_round_pd_256:
2366  case Intrinsic::x86_avx512_mask_rndscale_ps_128:
2367  case Intrinsic::x86_avx512_mask_rndscale_ps_256:
2368  case Intrinsic::x86_avx512_mask_rndscale_ps_512:
2369  case Intrinsic::x86_avx512_mask_rndscale_pd_128:
2370  case Intrinsic::x86_avx512_mask_rndscale_pd_256:
2371  case Intrinsic::x86_avx512_mask_rndscale_pd_512:
2372  case Intrinsic::x86_avx512_mask_rndscale_ss:
2373  case Intrinsic::x86_avx512_mask_rndscale_sd:
2374  if (Value *V = simplifyX86round(*II, Builder))
2375  return replaceInstUsesWith(*II, V);
2376  break;
2377 
2378  case Intrinsic::x86_mmx_pmovmskb:
2379  case Intrinsic::x86_sse_movmsk_ps:
2380  case Intrinsic::x86_sse2_movmsk_pd:
2381  case Intrinsic::x86_sse2_pmovmskb_128:
2382  case Intrinsic::x86_avx_movmsk_pd_256:
2383  case Intrinsic::x86_avx_movmsk_ps_256:
2384  case Intrinsic::x86_avx2_pmovmskb:
2385  if (Value *V = simplifyX86movmsk(*II))
2386  return replaceInstUsesWith(*II, V);
2387  break;
2388 
2389  case Intrinsic::x86_sse_comieq_ss:
2390  case Intrinsic::x86_sse_comige_ss:
2391  case Intrinsic::x86_sse_comigt_ss:
2392  case Intrinsic::x86_sse_comile_ss:
2393  case Intrinsic::x86_sse_comilt_ss:
2394  case Intrinsic::x86_sse_comineq_ss:
2395  case Intrinsic::x86_sse_ucomieq_ss:
2396  case Intrinsic::x86_sse_ucomige_ss:
2397  case Intrinsic::x86_sse_ucomigt_ss:
2398  case Intrinsic::x86_sse_ucomile_ss:
2399  case Intrinsic::x86_sse_ucomilt_ss:
2400  case Intrinsic::x86_sse_ucomineq_ss:
2401  case Intrinsic::x86_sse2_comieq_sd:
2402  case Intrinsic::x86_sse2_comige_sd:
2403  case Intrinsic::x86_sse2_comigt_sd:
2404  case Intrinsic::x86_sse2_comile_sd:
2405  case Intrinsic::x86_sse2_comilt_sd:
2406  case Intrinsic::x86_sse2_comineq_sd:
2407  case Intrinsic::x86_sse2_ucomieq_sd:
2408  case Intrinsic::x86_sse2_ucomige_sd:
2409  case Intrinsic::x86_sse2_ucomigt_sd:
2410  case Intrinsic::x86_sse2_ucomile_sd:
2411  case Intrinsic::x86_sse2_ucomilt_sd:
2412  case Intrinsic::x86_sse2_ucomineq_sd:
2413  case Intrinsic::x86_avx512_vcomi_ss:
2414  case Intrinsic::x86_avx512_vcomi_sd:
2415  case Intrinsic::x86_avx512_mask_cmp_ss:
2416  case Intrinsic::x86_avx512_mask_cmp_sd: {
2417  // These intrinsics only demand the 0th element of their input vectors. If
2418  // we can simplify the input based on that, do so now.
2419  bool MadeChange = false;
2420  Value *Arg0 = II->getArgOperand(0);
2421  Value *Arg1 = II->getArgOperand(1);
2422  unsigned VWidth = Arg0->getType()->getVectorNumElements();
2423  if (Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
2424  II->setArgOperand(0, V);
2425  MadeChange = true;
2426  }
2427  if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
2428  II->setArgOperand(1, V);
2429  MadeChange = true;
2430  }
2431  if (MadeChange)
2432  return II;
2433  break;
2434  }
2435  case Intrinsic::x86_avx512_cmp_pd_128:
2436  case Intrinsic::x86_avx512_cmp_pd_256:
2437  case Intrinsic::x86_avx512_cmp_pd_512:
2438  case Intrinsic::x86_avx512_cmp_ps_128:
2439  case Intrinsic::x86_avx512_cmp_ps_256:
2440  case Intrinsic::x86_avx512_cmp_ps_512: {
2441  // Folding cmp(sub(a,b),0) -> cmp(a,b) and cmp(0,sub(a,b)) -> cmp(b,a)
2442  Value *Arg0 = II->getArgOperand(0);
2443  Value *Arg1 = II->getArgOperand(1);
2444  bool Arg0IsZero = match(Arg0, m_PosZeroFP());
2445  if (Arg0IsZero)
2446  std::swap(Arg0, Arg1);
2447  Value *A, *B;
2448  // This fold requires only the NINF(not +/- inf) since inf minus
2449  // inf is nan.
2450  // NSZ(No Signed Zeros) is not needed because zeros of any sign are
2451  // equal for both compares.
2452  // NNAN is not needed because nans compare the same for both compares.
2453  // The compare intrinsic uses the above assumptions and therefore
2454  // doesn't require additional flags.
2455  if ((match(Arg0, m_OneUse(m_FSub(m_Value(A), m_Value(B)))) &&
2456  match(Arg1, m_PosZeroFP()) && isa<Instruction>(Arg0) &&
2457  cast<Instruction>(Arg0)->getFastMathFlags().noInfs())) {
2458  if (Arg0IsZero)
2459  std::swap(A, B);
2460  II->setArgOperand(0, A);
2461  II->setArgOperand(1, B);
2462  return II;
2463  }
2464  break;
2465  }
2466 
2467  case Intrinsic::x86_avx512_add_ps_512:
2468  case Intrinsic::x86_avx512_div_ps_512:
2469  case Intrinsic::x86_avx512_mul_ps_512:
2470  case Intrinsic::x86_avx512_sub_ps_512:
2471  case Intrinsic::x86_avx512_add_pd_512:
2472  case Intrinsic::x86_avx512_div_pd_512:
2473  case Intrinsic::x86_avx512_mul_pd_512:
2474  case Intrinsic::x86_avx512_sub_pd_512:
2475  // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
2476  // IR operations.
2477  if (auto *R = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
2478  if (R->getValue() == 4) {
2479  Value *Arg0 = II->getArgOperand(0);
2480  Value *Arg1 = II->getArgOperand(1);
2481 
2482  Value *V;
2483  switch (II->getIntrinsicID()) {
2484  default: llvm_unreachable("Case stmts out of sync!");
2485  case Intrinsic::x86_avx512_add_ps_512:
2486  case Intrinsic::x86_avx512_add_pd_512:
2487  V = Builder.CreateFAdd(Arg0, Arg1);
2488  break;
2489  case Intrinsic::x86_avx512_sub_ps_512:
2490  case Intrinsic::x86_avx512_sub_pd_512:
2491  V = Builder.CreateFSub(Arg0, Arg1);
2492  break;
2493  case Intrinsic::x86_avx512_mul_ps_512:
2494  case Intrinsic::x86_avx512_mul_pd_512:
2495  V = Builder.CreateFMul(Arg0, Arg1);
2496  break;
2497  case Intrinsic::x86_avx512_div_ps_512:
2498  case Intrinsic::x86_avx512_div_pd_512:
2499  V = Builder.CreateFDiv(Arg0, Arg1);
2500  break;
2501  }
2502 
2503  return replaceInstUsesWith(*II, V);
2504  }
2505  }
2506  break;
2507 
2508  case Intrinsic::x86_avx512_mask_add_ss_round:
2509  case Intrinsic::x86_avx512_mask_div_ss_round:
2510  case Intrinsic::x86_avx512_mask_mul_ss_round:
2511  case Intrinsic::x86_avx512_mask_sub_ss_round:
2512  case Intrinsic::x86_avx512_mask_add_sd_round:
2513  case Intrinsic::x86_avx512_mask_div_sd_round:
2514  case Intrinsic::x86_avx512_mask_mul_sd_round:
2515  case Intrinsic::x86_avx512_mask_sub_sd_round:
2516  // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
2517  // IR operations.
2518  if (auto *R = dyn_cast<ConstantInt>(II->getArgOperand(4))) {
2519  if (R->getValue() == 4) {
2520  // Extract the element as scalars.
2521  Value *Arg0 = II->getArgOperand(0);
2522  Value *Arg1 = II->getArgOperand(1);
2523  Value *LHS = Builder.CreateExtractElement(Arg0, (uint64_t)0);
2524  Value *RHS = Builder.CreateExtractElement(Arg1, (uint64_t)0);
2525 
2526  Value *V;
2527  switch (II->getIntrinsicID()) {
2528  default: llvm_unreachable("Case stmts out of sync!");
2529  case Intrinsic::x86_avx512_mask_add_ss_round:
2530  case Intrinsic::x86_avx512_mask_add_sd_round:
2531  V = Builder.CreateFAdd(LHS, RHS);
2532  break;
2533  case Intrinsic::x86_avx512_mask_sub_ss_round:
2534  case Intrinsic::x86_avx512_mask_sub_sd_round:
2535  V = Builder.CreateFSub(LHS, RHS);
2536  break;
2537  case Intrinsic::x86_avx512_mask_mul_ss_round:
2538  case Intrinsic::x86_avx512_mask_mul_sd_round:
2539  V = Builder.CreateFMul(LHS, RHS);
2540  break;
2541  case Intrinsic::x86_avx512_mask_div_ss_round:
2542  case Intrinsic::x86_avx512_mask_div_sd_round:
2543  V = Builder.CreateFDiv(LHS, RHS);
2544  break;
2545  }
2546 
2547  // Handle the masking aspect of the intrinsic.
2548  Value *Mask = II->getArgOperand(3);
2549  auto *C = dyn_cast<ConstantInt>(Mask);
2550  // We don't need a select if we know the mask bit is a 1.
2551  if (!C || !C->getValue()[0]) {
2552  // Cast the mask to an i1 vector and then extract the lowest element.
2553  auto *MaskTy = VectorType::get(Builder.getInt1Ty(),
2554  cast<IntegerType>(Mask->getType())->getBitWidth());
2555  Mask = Builder.CreateBitCast(Mask, MaskTy);
2556  Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
2557  // Extract the lowest element from the passthru operand.
2558  Value *Passthru = Builder.CreateExtractElement(II->getArgOperand(2),
2559  (uint64_t)0);
2560  V = Builder.CreateSelect(Mask, V, Passthru);
2561  }
2562 
2563  // Insert the result back into the original argument 0.
2564  V = Builder.CreateInsertElement(Arg0, V, (uint64_t)0);
2565 
2566  return replaceInstUsesWith(*II, V);
2567  }
2568  }
2570 
2571  // X86 scalar intrinsics simplified with SimplifyDemandedVectorElts.
2572  case Intrinsic::x86_avx512_mask_max_ss_round:
2573  case Intrinsic::x86_avx512_mask_min_ss_round:
2574  case Intrinsic::x86_avx512_mask_max_sd_round:
2575  case Intrinsic::x86_avx512_mask_min_sd_round:
2576  case Intrinsic::x86_sse_cmp_ss:
2577  case Intrinsic::x86_sse_min_ss:
2578  case Intrinsic::x86_sse_max_ss:
2579  case Intrinsic::x86_sse2_cmp_sd:
2580  case Intrinsic::x86_sse2_min_sd:
2581  case Intrinsic::x86_sse2_max_sd:
2582  case Intrinsic::x86_xop_vfrcz_ss:
2583  case Intrinsic::x86_xop_vfrcz_sd: {
2584  unsigned VWidth = II->getType()->getVectorNumElements();
2585  APInt UndefElts(VWidth, 0);
2586  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
2587  if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
2588  if (V != II)
2589  return replaceInstUsesWith(*II, V);
2590  return II;
2591  }
2592  break;
2593  }
2594  case Intrinsic::x86_sse41_round_ss:
2595  case Intrinsic::x86_sse41_round_sd: {
2596  unsigned VWidth = II->getType()->getVectorNumElements();
2597  APInt UndefElts(VWidth, 0);
2598  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
2599  if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
2600  if (V != II)
2601  return replaceInstUsesWith(*II, V);
2602  return II;
2603  } else if (Value *V = simplifyX86round(*II, Builder))
2604  return replaceInstUsesWith(*II, V);
2605  break;
2606  }
2607 
2608  // Constant fold ashr( <A x Bi>, Ci ).
2609  // Constant fold lshr( <A x Bi>, Ci ).
2610  // Constant fold shl( <A x Bi>, Ci ).
2611  case Intrinsic::x86_sse2_psrai_d:
2612  case Intrinsic::x86_sse2_psrai_w:
2613  case Intrinsic::x86_avx2_psrai_d:
2614  case Intrinsic::x86_avx2_psrai_w:
2615  case Intrinsic::x86_avx512_psrai_q_128:
2616  case Intrinsic::x86_avx512_psrai_q_256:
2617  case Intrinsic::x86_avx512_psrai_d_512:
2618  case Intrinsic::x86_avx512_psrai_q_512:
2619  case Intrinsic::x86_avx512_psrai_w_512:
2620  case Intrinsic::x86_sse2_psrli_d:
2621  case Intrinsic::x86_sse2_psrli_q:
2622  case Intrinsic::x86_sse2_psrli_w:
2623  case Intrinsic::x86_avx2_psrli_d:
2624  case Intrinsic::x86_avx2_psrli_q:
2625  case Intrinsic::x86_avx2_psrli_w:
2626  case Intrinsic::x86_avx512_psrli_d_512:
2627  case Intrinsic::x86_avx512_psrli_q_512:
2628  case Intrinsic::x86_avx512_psrli_w_512:
2629  case Intrinsic::x86_sse2_pslli_d:
2630  case Intrinsic::x86_sse2_pslli_q:
2631  case Intrinsic::x86_sse2_pslli_w:
2632  case Intrinsic::x86_avx2_pslli_d:
2633  case Intrinsic::x86_avx2_pslli_q:
2634  case Intrinsic::x86_avx2_pslli_w:
2635  case Intrinsic::x86_avx512_pslli_d_512:
2636  case Intrinsic::x86_avx512_pslli_q_512:
2637  case Intrinsic::x86_avx512_pslli_w_512:
2638  if (Value *V = simplifyX86immShift(*II, Builder))
2639  return replaceInstUsesWith(*II, V);
2640  break;
2641 
2642  case Intrinsic::x86_sse2_psra_d:
2643  case Intrinsic::x86_sse2_psra_w:
2644  case Intrinsic::x86_avx2_psra_d:
2645  case Intrinsic::x86_avx2_psra_w:
2646  case Intrinsic::x86_avx512_psra_q_128:
2647  case Intrinsic::x86_avx512_psra_q_256:
2648  case Intrinsic::x86_avx512_psra_d_512:
2649  case Intrinsic::x86_avx512_psra_q_512:
2650  case Intrinsic::x86_avx512_psra_w_512:
2651  case Intrinsic::x86_sse2_psrl_d:
2652  case Intrinsic::x86_sse2_psrl_q:
2653  case Intrinsic::x86_sse2_psrl_w:
2654  case Intrinsic::x86_avx2_psrl_d:
2655  case Intrinsic::x86_avx2_psrl_q:
2656  case Intrinsic::x86_avx2_psrl_w:
2657  case Intrinsic::x86_avx512_psrl_d_512:
2658  case Intrinsic::x86_avx512_psrl_q_512:
2659  case Intrinsic::x86_avx512_psrl_w_512:
2660  case Intrinsic::x86_sse2_psll_d:
2661  case Intrinsic::x86_sse2_psll_q:
2662  case Intrinsic::x86_sse2_psll_w:
2663  case Intrinsic::x86_avx2_psll_d:
2664  case Intrinsic::x86_avx2_psll_q:
2665  case Intrinsic::x86_avx2_psll_w:
2666  case Intrinsic::x86_avx512_psll_d_512:
2667  case Intrinsic::x86_avx512_psll_q_512:
2668  case Intrinsic::x86_avx512_psll_w_512: {
2669  if (Value *V = simplifyX86immShift(*II, Builder))
2670  return replaceInstUsesWith(*II, V);
2671 
2672  // SSE2/AVX2 uses only the first 64-bits of the 128-bit vector
2673  // operand to compute the shift amount.
2674  Value *Arg1 = II->getArgOperand(1);
2675  assert(Arg1->getType()->getPrimitiveSizeInBits() == 128 &&
2676  "Unexpected packed shift size");
2677  unsigned VWidth = Arg1->getType()->getVectorNumElements();
2678 
2679  if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
2680  II->setArgOperand(1, V);
2681  return II;
2682  }
2683  break;
2684  }
2685 
2686  case Intrinsic::x86_avx2_psllv_d:
2687  case Intrinsic::x86_avx2_psllv_d_256:
2688  case Intrinsic::x86_avx2_psllv_q:
2689  case Intrinsic::x86_avx2_psllv_q_256:
2690  case Intrinsic::x86_avx512_psllv_d_512:
2691  case Intrinsic::x86_avx512_psllv_q_512:
2692  case Intrinsic::x86_avx512_psllv_w_128:
2693  case Intrinsic::x86_avx512_psllv_w_256:
2694  case Intrinsic::x86_avx512_psllv_w_512:
2695  case Intrinsic::x86_avx2_psrav_d:
2696  case Intrinsic::x86_avx2_psrav_d_256:
2697  case Intrinsic::x86_avx512_psrav_q_128:
2698  case Intrinsic::x86_avx512_psrav_q_256:
2699  case Intrinsic::x86_avx512_psrav_d_512:
2700  case Intrinsic::x86_avx512_psrav_q_512:
2701  case Intrinsic::x86_avx512_psrav_w_128:
2702  case Intrinsic::x86_avx512_psrav_w_256:
2703  case Intrinsic::x86_avx512_psrav_w_512:
2704  case Intrinsic::x86_avx2_psrlv_d:
2705  case Intrinsic::x86_avx2_psrlv_d_256:
2706  case Intrinsic::x86_avx2_psrlv_q:
2707  case Intrinsic::x86_avx2_psrlv_q_256:
2708  case Intrinsic::x86_avx512_psrlv_d_512:
2709  case Intrinsic::x86_avx512_psrlv_q_512:
2710  case Intrinsic::x86_avx512_psrlv_w_128:
2711  case Intrinsic::x86_avx512_psrlv_w_256:
2712  case Intrinsic::x86_avx512_psrlv_w_512:
2713  if (Value *V = simplifyX86varShift(*II, Builder))
2714  return replaceInstUsesWith(*II, V);
2715  break;
2716 
2717  case Intrinsic::x86_sse2_packssdw_128:
2718  case Intrinsic::x86_sse2_packsswb_128:
2719  case Intrinsic::x86_avx2_packssdw:
2720  case Intrinsic::x86_avx2_packsswb:
2721  case Intrinsic::x86_avx512_packssdw_512:
2722  case Intrinsic::x86_avx512_packsswb_512:
2723  if (Value *V = simplifyX86pack(*II, true))
2724  return replaceInstUsesWith(*II, V);
2725  break;
2726 
2727  case Intrinsic::x86_sse2_packuswb_128:
2728  case Intrinsic::x86_sse41_packusdw:
2729  case Intrinsic::x86_avx2_packusdw:
2730  case Intrinsic::x86_avx2_packuswb:
2731  case Intrinsic::x86_avx512_packusdw_512:
2732  case Intrinsic::x86_avx512_packuswb_512:
2733  if (Value *V = simplifyX86pack(*II, false))
2734  return replaceInstUsesWith(*II, V);
2735  break;
2736 
2737  case Intrinsic::x86_pclmulqdq:
2738  case Intrinsic::x86_pclmulqdq_256:
2739  case Intrinsic::x86_pclmulqdq_512: {
2740  if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
2741  unsigned Imm = C->getZExtValue();
2742 
2743  bool MadeChange = false;
2744  Value *Arg0 = II->getArgOperand(0);
2745  Value *Arg1 = II->getArgOperand(1);
2746  unsigned VWidth = Arg0->getType()->getVectorNumElements();
2747 
2748  APInt UndefElts1(VWidth, 0);
2749  APInt DemandedElts1 = APInt::getSplat(VWidth,
2750  APInt(2, (Imm & 0x01) ? 2 : 1));
2751  if (Value *V = SimplifyDemandedVectorElts(Arg0, DemandedElts1,
2752  UndefElts1)) {
2753  II->setArgOperand(0, V);
2754  MadeChange = true;
2755  }
2756 
2757  APInt UndefElts2(VWidth, 0);
2758  APInt DemandedElts2 = APInt::getSplat(VWidth,
2759  APInt(2, (Imm & 0x10) ? 2 : 1));
2760  if (Value *V = SimplifyDemandedVectorElts(Arg1, DemandedElts2,
2761  UndefElts2)) {
2762  II->setArgOperand(1, V);
2763  MadeChange = true;
2764  }
2765 
2766  // If either input elements are undef, the result is zero.
2767  if (DemandedElts1.isSubsetOf(UndefElts1) ||
2768  DemandedElts2.isSubsetOf(UndefElts2))
2769  return replaceInstUsesWith(*II,
2770  ConstantAggregateZero::get(II->getType()));
2771 
2772  if (MadeChange)
2773  return II;
2774  }
2775  break;
2776  }
2777 
2778  case Intrinsic::x86_sse41_insertps:
2779  if (Value *V = simplifyX86insertps(*II, Builder))
2780  return replaceInstUsesWith(*II, V);
2781  break;
2782 
2783  case Intrinsic::x86_sse4a_extrq: {
2784  Value *Op0 = II->getArgOperand(0);
2785  Value *Op1 = II->getArgOperand(1);
2786  unsigned VWidth0 = Op0->getType()->getVectorNumElements();
2787  unsigned VWidth1 = Op1->getType()->getVectorNumElements();
2788  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2789  Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
2790  VWidth1 == 16 && "Unexpected operand sizes");
2791 
2792  // See if we're dealing with constant values.
2793  Constant *C1 = dyn_cast<Constant>(Op1);
2794  ConstantInt *CILength =
2795  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
2796  : nullptr;
2797  ConstantInt *CIIndex =
2798  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
2799  : nullptr;
2800 
2801  // Attempt to simplify to a constant, shuffle vector or EXTRQI call.
2802  if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, Builder))
2803  return replaceInstUsesWith(*II, V);
2804 
2805  // EXTRQ only uses the lowest 64-bits of the first 128-bit vector
2806  // operands and the lowest 16-bits of the second.
2807  bool MadeChange = false;
2808  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2809  II->setArgOperand(0, V);
2810  MadeChange = true;
2811  }
2812  if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
2813  II->setArgOperand(1, V);
2814  MadeChange = true;
2815  }
2816  if (MadeChange)
2817  return II;
2818  break;
2819  }
2820 
2821  case Intrinsic::x86_sse4a_extrqi: {
2822  // EXTRQI: Extract Length bits starting from Index. Zero pad the remaining
2823  // bits of the lower 64-bits. The upper 64-bits are undefined.
2824  Value *Op0 = II->getArgOperand(0);
2825  unsigned VWidth = Op0->getType()->getVectorNumElements();
2826  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
2827  "Unexpected operand size");
2828 
2829  // See if we're dealing with constant values.
2830  ConstantInt *CILength = dyn_cast<ConstantInt>(II->getArgOperand(1));
2831  ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(2));
2832 
2833  // Attempt to simplify to a constant or shuffle vector.
2834  if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, Builder))
2835  return replaceInstUsesWith(*II, V);
2836 
2837  // EXTRQI only uses the lowest 64-bits of the first 128-bit vector
2838  // operand.
2839  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2840  II->setArgOperand(0, V);
2841  return II;
2842  }
2843  break;
2844  }
2845 
2846  case Intrinsic::x86_sse4a_insertq: {
2847  Value *Op0 = II->getArgOperand(0);
2848  Value *Op1 = II->getArgOperand(1);
2849  unsigned VWidth = Op0->getType()->getVectorNumElements();
2850  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2851  Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
2852  Op1->getType()->getVectorNumElements() == 2 &&
2853  "Unexpected operand size");
2854 
2855  // See if we're dealing with constant values.
2856  Constant *C1 = dyn_cast<Constant>(Op1);
2857  ConstantInt *CI11 =
2858  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
2859  : nullptr;
2860 
2861  // Attempt to simplify to a constant, shuffle vector or INSERTQI call.
2862  if (CI11) {
2863  const APInt &V11 = CI11->getValue();
2864  APInt Len = V11.zextOrTrunc(6);
2865  APInt Idx = V11.lshr(8).zextOrTrunc(6);
2866  if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, Builder))
2867  return replaceInstUsesWith(*II, V);
2868  }
2869 
2870  // INSERTQ only uses the lowest 64-bits of the first 128-bit vector
2871  // operand.
2872  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2873  II->setArgOperand(0, V);
2874  return II;
2875  }
2876  break;
2877  }
2878 
2879  case Intrinsic::x86_sse4a_insertqi: {
2880  // INSERTQI: Extract lowest Length bits from lower half of second source and
2881  // insert over first source starting at Index bit. The upper 64-bits are
2882  // undefined.
2883  Value *Op0 = II->getArgOperand(0);
2884  Value *Op1 = II->getArgOperand(1);
2885  unsigned VWidth0 = Op0->getType()->getVectorNumElements();
2886  unsigned VWidth1 = Op1->getType()->getVectorNumElements();
2887  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2888  Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
2889  VWidth1 == 2 && "Unexpected operand sizes");
2890 
2891  // See if we're dealing with constant values.
2892  ConstantInt *CILength = dyn_cast<ConstantInt>(II->getArgOperand(2));
2893  ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(3));
2894 
2895  // Attempt to simplify to a constant or shuffle vector.
2896  if (CILength && CIIndex) {
2897  APInt Len = CILength->getValue().zextOrTrunc(6);
2898  APInt Idx = CIIndex->getValue().zextOrTrunc(6);
2899  if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, Builder))
2900  return replaceInstUsesWith(*II, V);
2901  }
2902 
2903  // INSERTQI only uses the lowest 64-bits of the first two 128-bit vector
2904  // operands.
2905  bool MadeChange = false;
2906  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2907  II->setArgOperand(0, V);
2908  MadeChange = true;
2909  }
2910  if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
2911  II->setArgOperand(1, V);
2912  MadeChange = true;
2913  }
2914  if (MadeChange)
2915  return II;
2916  break;
2917  }
2918 
2919  case Intrinsic::x86_sse41_pblendvb:
2920  case Intrinsic::x86_sse41_blendvps:
2921  case Intrinsic::x86_sse41_blendvpd:
2922  case Intrinsic::x86_avx_blendv_ps_256:
2923  case Intrinsic::x86_avx_blendv_pd_256:
2924  case Intrinsic::x86_avx2_pblendvb: {
2925  // Convert blendv* to vector selects if the mask is constant.
2926  // This optimization is convoluted because the intrinsic is defined as
2927  // getting a vector of floats or doubles for the ps and pd versions.
2928  // FIXME: That should be changed.
2929 
2930  Value *Op0 = II->getArgOperand(0);
2931  Value *Op1 = II->getArgOperand(1);
2932  Value *Mask = II->getArgOperand(2);
2933 
2934  // fold (blend A, A, Mask) -> A
2935  if (Op0 == Op1)
2936  return replaceInstUsesWith(CI, Op0);
2937 
2938  // Zero Mask - select 1st argument.
2939  if (isa<ConstantAggregateZero>(Mask))
2940  return replaceInstUsesWith(CI, Op0);
2941 
2942  // Constant Mask - select 1st/2nd argument lane based on top bit of mask.
2943  if (auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) {
2944  Constant *NewSelector = getNegativeIsTrueBoolVec(ConstantMask);
2945  return SelectInst::Create(NewSelector, Op1, Op0, "blendv");
2946  }
2947  break;
2948  }
2949 
2950  case Intrinsic::x86_ssse3_pshuf_b_128:
2951  case Intrinsic::x86_avx2_pshuf_b:
2952  case Intrinsic::x86_avx512_pshuf_b_512:
2953  if (Value *V = simplifyX86pshufb(*II, Builder))
2954  return replaceInstUsesWith(*II, V);
2955  break;
2956 
2957  case Intrinsic::x86_avx_vpermilvar_ps:
2958  case Intrinsic::x86_avx_vpermilvar_ps_256:
2959  case Intrinsic::x86_avx512_vpermilvar_ps_512:
2960  case Intrinsic::x86_avx_vpermilvar_pd:
2961  case Intrinsic::x86_avx_vpermilvar_pd_256:
2962  case Intrinsic::x86_avx512_vpermilvar_pd_512:
2963  if (Value *V = simplifyX86vpermilvar(*II, Builder))
2964  return replaceInstUsesWith(*II, V);
2965  break;
2966 
2967  case Intrinsic::x86_avx2_permd:
2968  case Intrinsic::x86_avx2_permps:
2969  case Intrinsic::x86_avx512_permvar_df_256:
2970  case Intrinsic::x86_avx512_permvar_df_512:
2971  case Intrinsic::x86_avx512_permvar_di_256:
2972  case Intrinsic::x86_avx512_permvar_di_512:
2973  case Intrinsic::x86_avx512_permvar_hi_128:
2974  case Intrinsic::x86_avx512_permvar_hi_256:
2975  case Intrinsic::x86_avx512_permvar_hi_512:
2976  case Intrinsic::x86_avx512_permvar_qi_128:
2977  case Intrinsic::x86_avx512_permvar_qi_256:
2978  case Intrinsic::x86_avx512_permvar_qi_512:
2979  case Intrinsic::x86_avx512_permvar_sf_512:
2980  case Intrinsic::x86_avx512_permvar_si_512:
2981  if (Value *V = simplifyX86vpermv(*II, Builder))
2982  return replaceInstUsesWith(*II, V);
2983  break;
2984 
2985  case Intrinsic::x86_avx_maskload_ps:
2986  case Intrinsic::x86_avx_maskload_pd:
2987  case Intrinsic::x86_avx_maskload_ps_256:
2988  case Intrinsic::x86_avx_maskload_pd_256:
2989  case Intrinsic::x86_avx2_maskload_d:
2990  case Intrinsic::x86_avx2_maskload_q:
2991  case Intrinsic::x86_avx2_maskload_d_256:
2992  case Intrinsic::x86_avx2_maskload_q_256:
2993  if (Instruction *I = simplifyX86MaskedLoad(*II, *this))
2994  return I;
2995  break;
2996 
2997  case Intrinsic::x86_sse2_maskmov_dqu:
2998  case Intrinsic::x86_avx_maskstore_ps:
2999  case Intrinsic::x86_avx_maskstore_pd:
3000  case Intrinsic::x86_avx_maskstore_ps_256:
3001  case Intrinsic::x86_avx_maskstore_pd_256:
3002  case Intrinsic::x86_avx2_maskstore_d:
3003  case Intrinsic::x86_avx2_maskstore_q:
3004  case Intrinsic::x86_avx2_maskstore_d_256:
3005  case Intrinsic::x86_avx2_maskstore_q_256:
3006  if (simplifyX86MaskedStore(*II, *this))
3007  return nullptr;
3008  break;
3009 
3010  case Intrinsic::x86_xop_vpcomb:
3011  case Intrinsic::x86_xop_vpcomd:
3012  case Intrinsic::x86_xop_vpcomq:
3013  case Intrinsic::x86_xop_vpcomw:
3014  if (Value *V = simplifyX86vpcom(*II, Builder, true))
3015  return replaceInstUsesWith(*II, V);
3016  break;
3017 
3018  case Intrinsic::x86_xop_vpcomub:
3019  case Intrinsic::x86_xop_vpcomud:
3020  case Intrinsic::x86_xop_vpcomuq:
3021  case Intrinsic::x86_xop_vpcomuw:
3022  if (Value *V = simplifyX86vpcom(*II, Builder, false))
3023  return replaceInstUsesWith(*II, V);
3024  break;
3025 
3026  case Intrinsic::ppc_altivec_vperm:
3027  // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
3028  // Note that ppc_altivec_vperm has a big-endian bias, so when creating
3029  // a vectorshuffle for little endian, we must undo the transformation
3030  // performed on vec_perm in altivec.h. That is, we must complement
3031  // the permutation mask with respect to 31 and reverse the order of
3032  // V1 and V2.
3033  if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) {
3034  assert(Mask->getType()->getVectorNumElements() == 16 &&
3035  "Bad type for intrinsic!");
3036 
3037  // Check that all of the elements are integer constants or undefs.
3038  bool AllEltsOk = true;
3039  for (unsigned i = 0; i != 16; ++i) {
3040  Constant *Elt = Mask->getAggregateElement(i);
3041  if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
3042  AllEltsOk = false;
3043  break;
3044  }
3045  }
3046 
3047  if (AllEltsOk) {
3048  // Cast the input vectors to byte vectors.
3049  Value *Op0 = Builder.CreateBitCast(II->getArgOperand(0),
3050  Mask->getType());
3051  Value *Op1 = Builder.CreateBitCast(II->getArgOperand(1),
3052  Mask->getType());
3053  Value *Result = UndefValue::get(Op0->getType());
3054 
3055  // Only extract each element once.
3056  Value *ExtractedElts[32];
3057  memset(ExtractedElts, 0, sizeof(ExtractedElts));
3058 
3059  for (unsigned i = 0; i != 16; ++i) {
3060  if (isa<UndefValue>(Mask->getAggregateElement(i)))
3061  continue;
3062  unsigned Idx =
3063  cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
3064  Idx &= 31; // Match the hardware behavior.
3065  if (DL.isLittleEndian())
3066  Idx = 31 - Idx;
3067 
3068  if (!ExtractedElts[Idx]) {
3069  Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
3070  Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
3071  ExtractedElts[Idx] =
3072  Builder.CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse,
3073  Builder.getInt32(Idx&15));
3074  }
3075 
3076  // Insert this value into the result vector.
3077  Result = Builder.CreateInsertElement(Result, ExtractedElts[Idx],
3078  Builder.getInt32(i));
3079  }
3080  return CastInst::Create(Instruction::BitCast, Result, CI.getType());
3081  }
3082  }
3083  break;
3084 
3085  case Intrinsic::arm_neon_vld1: {
3086  unsigned MemAlign = getKnownAlignment(II->getArgOperand(0),
3087  DL, II, &AC, &DT);
3088  if (Value *V = simplifyNeonVld1(*II, MemAlign, Builder))
3089  return replaceInstUsesWith(*II, V);
3090  break;
3091  }
3092 
3093  case Intrinsic::arm_neon_vld2:
3094  case Intrinsic::arm_neon_vld3:
3095  case Intrinsic::arm_neon_vld4:
3096  case Intrinsic::arm_neon_vld2lane:
3097  case Intrinsic::arm_neon_vld3lane:
3098  case Intrinsic::arm_neon_vld4lane:
3099  case Intrinsic::arm_neon_vst1:
3100  case Intrinsic::arm_neon_vst2:
3101  case Intrinsic::arm_neon_vst3:
3102  case Intrinsic::arm_neon_vst4:
3103  case Intrinsic::arm_neon_vst2lane:
3104  case Intrinsic::arm_neon_vst3lane:
3105  case Intrinsic::arm_neon_vst4lane: {
3106  unsigned MemAlign =
3107  getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT);
3108  unsigned AlignArg = II->getNumArgOperands() - 1;
3109  ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
3110  if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
3111  II->setArgOperand(AlignArg,
3112  ConstantInt::get(Type::getInt32Ty(II->getContext()),
3113  MemAlign, false));
3114  return II;
3115  }
3116  break;
3117  }
3118 
3119  case Intrinsic::arm_neon_vtbl1:
3120  case Intrinsic::aarch64_neon_tbl1:
3121  if (Value *V = simplifyNeonTbl1(*II, Builder))
3122  return replaceInstUsesWith(*II, V);
3123  break;
3124 
3125  case Intrinsic::arm_neon_vmulls:
3126  case Intrinsic::arm_neon_vmullu:
3127  case Intrinsic::aarch64_neon_smull:
3128  case Intrinsic::aarch64_neon_umull: {
3129  Value *Arg0 = II->getArgOperand(0);
3130  Value *Arg1 = II->getArgOperand(1);
3131 
3132  // Handle mul by zero first:
3133  if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) {
3134  return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
3135  }
3136 
3137  // Check for constant LHS & RHS - in this case we just simplify.
3138  bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu ||
3139  II->getIntrinsicID() == Intrinsic::aarch64_neon_umull);
3140  VectorType *NewVT = cast<VectorType>(II->getType());
3141  if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
3142  if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
3143  CV0 = ConstantExpr::getIntegerCast(CV0, NewVT, /*isSigned=*/!Zext);
3144  CV1 = ConstantExpr::getIntegerCast(CV1, NewVT, /*isSigned=*/!Zext);
3145 
3146  return replaceInstUsesWith(CI, ConstantExpr::getMul(CV0, CV1));
3147  }
3148 
3149  // Couldn't simplify - canonicalize constant to the RHS.
3150  std::swap(Arg0, Arg1);
3151  }
3152 
3153  // Handle mul by one:
3154  if (Constant *CV1 = dyn_cast<Constant>(Arg1))
3155  if (ConstantInt *Splat =
3156  dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
3157  if (Splat->isOne())
3158  return CastInst::CreateIntegerCast(Arg0, II->getType(),
3159  /*isSigned=*/!Zext);
3160 
3161  break;
3162  }
3163  case Intrinsic::arm_neon_aesd:
3164  case Intrinsic::arm_neon_aese:
3165  case Intrinsic::aarch64_crypto_aesd:
3166  case Intrinsic::aarch64_crypto_aese: {
3167  Value *DataArg = II->getArgOperand(0);
3168  Value *KeyArg = II->getArgOperand(1);
3169 
3170  // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
3171  Value *Data, *Key;
3172  if (match(KeyArg, m_ZeroInt()) &&
3173  match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) {
3174  II->setArgOperand(0, Data);
3175  II->setArgOperand(1, Key);
3176  return II;
3177  }
3178  break;
3179  }
3180  case Intrinsic::amdgcn_rcp: {
3181  Value *Src = II->getArgOperand(0);
3182 
3183  // TODO: Move to ConstantFolding/InstSimplify?
3184  if (isa<UndefValue>(Src))
3185  return replaceInstUsesWith(CI, Src);
3186 
3187  if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
3188  const APFloat &ArgVal = C->getValueAPF();
3189  APFloat Val(ArgVal.getSemantics(), 1.0);
3190  APFloat::opStatus Status = Val.divide(ArgVal,
3192  // Only do this if it was exact and therefore not dependent on the
3193  // rounding mode.
3194  if (Status == APFloat::opOK)
3195  return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(), Val));
3196  }
3197 
3198  break;
3199  }
3200  case Intrinsic::amdgcn_rsq: {
3201  Value *Src = II->getArgOperand(0);
3202 
3203  // TODO: Move to ConstantFolding/InstSimplify?
3204  if (isa<UndefValue>(Src))
3205  return replaceInstUsesWith(CI, Src);
3206  break;
3207  }
3208  case Intrinsic::amdgcn_frexp_mant:
3209  case Intrinsic::amdgcn_frexp_exp: {
3210  Value *Src = II->getArgOperand(0);
3211  if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
3212  int Exp;
3213  APFloat Significand = frexp(C->getValueAPF(), Exp,
3215 
3216  if (II->getIntrinsicID() == Intrinsic::amdgcn_frexp_mant) {
3217  return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(),
3218  Significand));
3219  }
3220 
3221  // Match instruction special case behavior.
3222  if (Exp == APFloat::IEK_NaN || Exp == APFloat::IEK_Inf)
3223  Exp = 0;
3224 
3225  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Exp));
3226  }
3227 
3228  if (isa<UndefValue>(Src))
3229  return replaceInstUsesWith(CI, UndefValue::get(II->getType()));
3230 
3231  break;
3232  }
3233  case Intrinsic::amdgcn_class: {
3234  enum {
3235  S_NAN = 1 << 0, // Signaling NaN
3236  Q_NAN = 1 << 1, // Quiet NaN
3237  N_INFINITY = 1 << 2, // Negative infinity
3238  N_NORMAL = 1 << 3, // Negative normal
3239  N_SUBNORMAL = 1 << 4, // Negative subnormal
3240  N_ZERO = 1 << 5, // Negative zero
3241  P_ZERO = 1 << 6, // Positive zero
3242  P_SUBNORMAL = 1 << 7, // Positive subnormal
3243  P_NORMAL = 1 << 8, // Positive normal
3244  P_INFINITY = 1 << 9 // Positive infinity
3245  };
3246 
3247  const uint32_t FullMask = S_NAN | Q_NAN | N_INFINITY | N_NORMAL |
3249 
3250  Value *Src0 = II->getArgOperand(0);
3251  Value *Src1 = II->getArgOperand(1);
3252  const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1);
3253  if (!CMask) {
3254  if (isa<UndefValue>(Src0))
3255  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3256 
3257  if (isa<UndefValue>(Src1))
3258  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), false));
3259  break;
3260  }
3261 
3262  uint32_t Mask = CMask->getZExtValue();
3263 
3264  // If all tests are made, it doesn't matter what the value is.
3265  if ((Mask & FullMask) == FullMask)
3266  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), true));
3267 
3268  if ((Mask & FullMask) == 0)
3269  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), false));
3270 
3271  if (Mask == (S_NAN | Q_NAN)) {
3272  // Equivalent of isnan. Replace with standard fcmp.
3273  Value *FCmp = Builder.CreateFCmpUNO(Src0, Src0);
3274  FCmp->takeName(II);
3275  return replaceInstUsesWith(*II, FCmp);
3276  }
3277 
3278  const ConstantFP *CVal = dyn_cast<ConstantFP>(Src0);
3279  if (!CVal) {
3280  if (isa<UndefValue>(Src0))
3281  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3282 
3283  // Clamp mask to used bits
3284  if ((Mask & FullMask) != Mask) {
3285  CallInst *NewCall = Builder.CreateCall(II->getCalledFunction(),
3286  { Src0, ConstantInt::get(Src1->getType(), Mask & FullMask) }
3287  );
3288 
3289  NewCall->takeName(II);
3290  return replaceInstUsesWith(*II, NewCall);
3291  }
3292 
3293  break;
3294  }
3295 
3296  const APFloat &Val = CVal->getValueAPF();
3297 
3298  bool Result =
3299  ((Mask & S_NAN) && Val.isNaN() && Val.isSignaling()) ||
3300  ((Mask & Q_NAN) && Val.isNaN() && !Val.isSignaling()) ||
3301  ((Mask & N_INFINITY) && Val.isInfinity() && Val.isNegative()) ||
3302  ((Mask & N_NORMAL) && Val.isNormal() && Val.isNegative()) ||
3303  ((Mask & N_SUBNORMAL) && Val.isDenormal() && Val.isNegative()) ||
3304  ((Mask & N_ZERO) && Val.isZero() && Val.isNegative()) ||
3305  ((Mask & P_ZERO) && Val.isZero() && !Val.isNegative()) ||
3306  ((Mask & P_SUBNORMAL) && Val.isDenormal() && !Val.isNegative()) ||
3307  ((Mask & P_NORMAL) && Val.isNormal() && !Val.isNegative()) ||
3308  ((Mask & P_INFINITY) && Val.isInfinity() && !Val.isNegative());
3309 
3310  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), Result));
3311  }
3312  case Intrinsic::amdgcn_cvt_pkrtz: {
3313  Value *Src0 = II->getArgOperand(0);
3314  Value *Src1 = II->getArgOperand(1);
3315  if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
3316  if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
3317  const fltSemantics &HalfSem
3318  = II->getType()->getScalarType()->getFltSemantics();
3319  bool LosesInfo;
3320  APFloat Val0 = C0->getValueAPF();
3321  APFloat Val1 = C1->getValueAPF();
3322  Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
3323  Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
3324 
3325  Constant *Folded = ConstantVector::get({
3326  ConstantFP::get(II->getContext(), Val0),
3327  ConstantFP::get(II->getContext(), Val1) });
3328  return replaceInstUsesWith(*II, Folded);
3329  }
3330  }
3331 
3332  if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1))
3333  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3334 
3335  break;
3336  }
3337  case Intrinsic::amdgcn_cvt_pknorm_i16:
3338  case Intrinsic::amdgcn_cvt_pknorm_u16:
3339  case Intrinsic::amdgcn_cvt_pk_i16:
3340  case Intrinsic::amdgcn_cvt_pk_u16: {
3341  Value *Src0 = II->getArgOperand(0);
3342  Value *Src1 = II->getArgOperand(1);
3343 
3344  if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1))
3345  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3346 
3347  break;
3348  }
3349  case Intrinsic::amdgcn_ubfe:
3350  case Intrinsic::amdgcn_sbfe: {
3351  // Decompose simple cases into standard shifts.
3352  Value *Src = II->getArgOperand(0);
3353  if (isa<UndefValue>(Src))
3354  return replaceInstUsesWith(*II, Src);
3355 
3356  unsigned Width;
3357  Type *Ty = II->getType();
3358  unsigned IntSize = Ty->getIntegerBitWidth();
3359 
3360  ConstantInt *CWidth = dyn_cast<ConstantInt>(II->getArgOperand(2));
3361  if (CWidth) {
3362  Width = CWidth->getZExtValue();
3363  if ((Width & (IntSize - 1)) == 0)
3364  return replaceInstUsesWith(*II, ConstantInt::getNullValue(Ty));
3365 
3366  if (Width >= IntSize) {
3367  // Hardware ignores high bits, so remove those.
3368  II->setArgOperand(2, ConstantInt::get(CWidth->getType(),
3369  Width & (IntSize - 1)));
3370  return II;
3371  }
3372  }
3373 
3374  unsigned Offset;
3375  ConstantInt *COffset = dyn_cast<ConstantInt>(II->getArgOperand(1));
3376  if (COffset) {
3377  Offset = COffset->getZExtValue();
3378  if (Offset >= IntSize) {
3379  II->setArgOperand(1, ConstantInt::get(COffset->getType(),
3380  Offset & (IntSize - 1)));
3381  return II;
3382  }
3383  }
3384 
3385  bool Signed = II->getIntrinsicID() == Intrinsic::amdgcn_sbfe;
3386 
3387  // TODO: Also emit sub if only width is constant.
3388  if (!CWidth && COffset && Offset == 0) {
3389  Constant *KSize = ConstantInt::get(COffset->getType(), IntSize);
3390  Value *ShiftVal = Builder.CreateSub(KSize, II->getArgOperand(2));
3391  ShiftVal = Builder.CreateZExt(ShiftVal, II->getType());
3392 
3393  Value *Shl = Builder.CreateShl(Src, ShiftVal);
3394  Value *RightShift = Signed ? Builder.CreateAShr(Shl, ShiftVal)
3395  : Builder.CreateLShr(Shl, ShiftVal);
3396  RightShift->takeName(II);
3397  return replaceInstUsesWith(*II, RightShift);
3398  }
3399 
3400  if (!CWidth || !COffset)
3401  break;
3402 
3403  // TODO: This allows folding to undef when the hardware has specific
3404  // behavior?
3405  if (Offset + Width < IntSize) {
3406  Value *Shl = Builder.CreateShl(Src, IntSize - Offset - Width);
3407  Value *RightShift = Signed ? Builder.CreateAShr(Shl, IntSize - Width)
3408  : Builder.CreateLShr(Shl, IntSize - Width);
3409  RightShift->takeName(II);
3410  return replaceInstUsesWith(*II, RightShift);
3411  }
3412 
3413  Value *RightShift = Signed ? Builder.CreateAShr(Src, Offset)
3414  : Builder.CreateLShr(Src, Offset);
3415 
3416  RightShift->takeName(II);
3417  return replaceInstUsesWith(*II, RightShift);
3418  }
3419  case Intrinsic::amdgcn_exp:
3420  case Intrinsic::amdgcn_exp_compr: {
3421  ConstantInt *En = dyn_cast<ConstantInt>(II->getArgOperand(1));
3422  if (!En) // Illegal.
3423  break;
3424 
3425  unsigned EnBits = En->getZExtValue();
3426  if (EnBits == 0xf)
3427  break; // All inputs enabled.
3428 
3429  bool IsCompr = II->getIntrinsicID() == Intrinsic::amdgcn_exp_compr;
3430  bool Changed = false;
3431  for (int I = 0; I < (IsCompr ? 2 : 4); ++I) {
3432  if ((!IsCompr && (EnBits & (1 << I)) == 0) ||
3433  (IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) {
3434  Value *Src = II->getArgOperand(I + 2);
3435  if (!isa<UndefValue>(Src)) {
3436  II->setArgOperand(I + 2, UndefValue::get(Src->getType()));
3437  Changed = true;
3438  }
3439  }
3440  }
3441 
3442  if (Changed)
3443  return II;
3444 
3445  break;
3446  }
3447  case Intrinsic::amdgcn_fmed3: {
3448  // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled
3449  // for the shader.
3450 
3451  Value *Src0 = II->getArgOperand(0);
3452  Value *Src1 = II->getArgOperand(1);
3453  Value *Src2 = II->getArgOperand(2);
3454 
3455  // Checking for NaN before canonicalization provides better fidelity when
3456  // mapping other operations onto fmed3 since the order of operands is
3457  // unchanged.
3458  CallInst *NewCall = nullptr;
3459  if (match(Src0, m_NaN()) || isa<UndefValue>(Src0)) {
3460  NewCall = Builder.CreateMinNum(Src1, Src2);
3461  } else if (match(Src1, m_NaN()) || isa<UndefValue>(Src1)) {
3462  NewCall = Builder.CreateMinNum(Src0, Src2);
3463  } else if (match(Src2, m_NaN()) || isa<UndefValue>(Src2)) {
3464  NewCall = Builder.CreateMaxNum(Src0, Src1);
3465  }
3466 
3467  if (NewCall) {
3468  NewCall->copyFastMathFlags(II);
3469  NewCall->takeName(II);
3470  return replaceInstUsesWith(*II, NewCall);
3471  }
3472 
3473  bool Swap = false;
3474  // Canonicalize constants to RHS operands.
3475  //
3476  // fmed3(c0, x, c1) -> fmed3(x, c0, c1)
3477  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
3478  std::swap(Src0, Src1);
3479  Swap = true;
3480  }
3481 
3482  if (isa<Constant>(Src1) && !isa<Constant>(Src2)) {
3483  std::swap(Src1, Src2);
3484  Swap = true;
3485  }
3486 
3487  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
3488  std::swap(Src0, Src1);
3489  Swap = true;
3490  }
3491 
3492  if (Swap) {
3493  II->setArgOperand(0, Src0);
3494  II->setArgOperand(1, Src1);
3495  II->setArgOperand(2, Src2);
3496  return II;
3497  }
3498 
3499  if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
3500  if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
3501  if (const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) {
3502  APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(),
3503  C2->getValueAPF());
3504  return replaceInstUsesWith(*II,
3505  ConstantFP::get(Builder.getContext(), Result));
3506  }
3507  }
3508  }
3509 
3510  break;
3511  }
3512  case Intrinsic::amdgcn_icmp:
3513  case Intrinsic::amdgcn_fcmp: {
3514  const ConstantInt *CC = dyn_cast<ConstantInt>(II->getArgOperand(2));
3515  if (!CC)
3516  break;
3517 
3518  // Guard against invalid arguments.
3519  int64_t CCVal = CC->getZExtValue();
3520  bool IsInteger = II->getIntrinsicID() == Intrinsic::amdgcn_icmp;
3521  if ((IsInteger && (CCVal < CmpInst::FIRST_ICMP_PREDICATE ||
3522  CCVal > CmpInst::LAST_ICMP_PREDICATE)) ||
3523  (!IsInteger && (CCVal < CmpInst::FIRST_FCMP_PREDICATE ||
3524  CCVal > CmpInst::LAST_FCMP_PREDICATE)))
3525  break;
3526 
3527  Value *Src0 = II->getArgOperand(0);
3528  Value *Src1 = II->getArgOperand(1);
3529 
3530  if (auto *CSrc0 = dyn_cast<Constant>(Src0)) {
3531  if (auto *CSrc1 = dyn_cast<Constant>(Src1)) {
3532  Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1);
3533  if (CCmp->isNullValue()) {
3534  return replaceInstUsesWith(
3535  *II, ConstantExpr::getSExt(CCmp, II->getType()));
3536  }
3537 
3538  // The result of V_ICMP/V_FCMP assembly instructions (which this
3539  // intrinsic exposes) is one bit per thread, masked with the EXEC
3540  // register (which contains the bitmask of live threads). So a
3541  // comparison that always returns true is the same as a read of the
3542  // EXEC register.
3544  II->getModule(), Intrinsic::read_register, II->getType());
3545  Metadata *MDArgs[] = {MDString::get(II->getContext(), "exec")};
3546  MDNode *MD = MDNode::get(II->getContext(), MDArgs);
3547  Value *Args[] = {MetadataAsValue::get(II->getContext(), MD)};
3548  CallInst *NewCall = Builder.CreateCall(NewF, Args);
3551  NewCall->takeName(II);
3552  return replaceInstUsesWith(*II, NewCall);
3553  }
3554 
3555  // Canonicalize constants to RHS.
3556  CmpInst::Predicate SwapPred
3557  = CmpInst::getSwappedPredicate(static_cast<CmpInst::Predicate>(CCVal));
3558  II->setArgOperand(0, Src1);
3559  II->setArgOperand(1, Src0);
3560  II->setArgOperand(2, ConstantInt::get(CC->getType(),
3561  static_cast<int>(SwapPred)));
3562  return II;
3563  }
3564 
3565  if (CCVal != CmpInst::ICMP_EQ && CCVal != CmpInst::ICMP_NE)
3566  break;
3567 
3568  // Canonicalize compare eq with true value to compare != 0
3569  // llvm.amdgcn.icmp(zext (i1 x), 1, eq)
3570  // -> llvm.amdgcn.icmp(zext (i1 x), 0, ne)
3571  // llvm.amdgcn.icmp(sext (i1 x), -1, eq)
3572  // -> llvm.amdgcn.icmp(sext (i1 x), 0, ne)
3573  Value *ExtSrc;
3574  if (CCVal == CmpInst::ICMP_EQ &&
3575  ((match(Src1, m_One()) && match(Src0, m_ZExt(m_Value(ExtSrc)))) ||
3576  (match(Src1, m_AllOnes()) && match(Src0, m_SExt(m_Value(ExtSrc))))) &&
3577  ExtSrc->getType()->isIntegerTy(1)) {
3578  II->setArgOperand(1, ConstantInt::getNullValue(Src1->getType()));
3579  II->setArgOperand(2, ConstantInt::get(CC->getType(), CmpInst::ICMP_NE));
3580  return II;
3581  }
3582 
3583  CmpInst::Predicate SrcPred;
3584  Value *SrcLHS;
3585  Value *SrcRHS;
3586 
3587  // Fold compare eq/ne with 0 from a compare result as the predicate to the
3588  // intrinsic. The typical use is a wave vote function in the library, which
3589  // will be fed from a user code condition compared with 0. Fold in the
3590  // redundant compare.
3591 
3592  // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, ne)
3593  // -> llvm.amdgcn.[if]cmp(a, b, pred)
3594  //
3595  // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, eq)
3596  // -> llvm.amdgcn.[if]cmp(a, b, inv pred)
3597  if (match(Src1, m_Zero()) &&
3598  match(Src0,
3599  m_ZExtOrSExt(m_Cmp(SrcPred, m_Value(SrcLHS), m_Value(SrcRHS))))) {
3600  if (CCVal == CmpInst::ICMP_EQ)
3601  SrcPred = CmpInst::getInversePredicate(SrcPred);
3602 
3603  Intrinsic::ID NewIID = CmpInst::isFPPredicate(SrcPred) ?
3604  Intrinsic::amdgcn_fcmp : Intrinsic::amdgcn_icmp;
3605 
3606  Value *NewF = Intrinsic::getDeclaration(II->getModule(), NewIID,
3607  SrcLHS->getType());
3608  Value *Args[] = { SrcLHS, SrcRHS,
3609  ConstantInt::get(CC->getType(), SrcPred) };
3610  CallInst *NewCall = Builder.CreateCall(NewF, Args);
3611  NewCall->takeName(II);
3612  return replaceInstUsesWith(*II, NewCall);
3613  }
3614 
3615  break;
3616  }
3617  case Intrinsic::amdgcn_wqm_vote: {
3618  // wqm_vote is identity when the argument is constant.
3619  if (!isa<Constant>(II->getArgOperand(0)))
3620  break;
3621 
3622  return replaceInstUsesWith(*II, II->getArgOperand(0));
3623  }
3624  case Intrinsic::amdgcn_kill: {
3625  const ConstantInt *C = dyn_cast<ConstantInt>(II->getArgOperand(0));
3626  if (!C || !C->getZExtValue())
3627  break;
3628 
3629  // amdgcn.kill(i1 1) is a no-op
3630  return eraseInstFromFunction(CI);
3631  }
3632  case Intrinsic::amdgcn_update_dpp: {
3633  Value *Old = II->getArgOperand(0);
3634 
3635  auto BC = dyn_cast<ConstantInt>(II->getArgOperand(5));
3636  auto RM = dyn_cast<ConstantInt>(II->getArgOperand(3));
3637  auto BM = dyn_cast<ConstantInt>(II->getArgOperand(4));
3638  if (!BC || !RM || !BM ||
3639  BC->isZeroValue() ||
3640  RM->getZExtValue() != 0xF ||
3641  BM->getZExtValue() != 0xF ||
3642  isa<UndefValue>(Old))
3643  break;
3644 
3645  // If bound_ctrl = 1, row mask = bank mask = 0xf we can omit old value.
3646  II->setOperand(0, UndefValue::get(Old->getType()));
3647  return II;
3648  }
3649  case Intrinsic::stackrestore: {
3650  // If the save is right next to the restore, remove the restore. This can
3651  // happen when variable allocas are DCE'd.
3652  if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3653  if (SS->getIntrinsicID() == Intrinsic::stacksave) {
3654  // Skip over debug info.
3655  if (SS->getNextNonDebugInstruction() == II) {
3656  return eraseInstFromFunction(CI);
3657  }
3658  }
3659  }
3660 
3661  // Scan down this block to see if there is another stack restore in the
3662  // same block without an intervening call/alloca.
3663  BasicBlock::iterator BI(II);
3664  TerminatorInst *TI = II->getParent()->getTerminator();
3665  bool CannotRemove = false;
3666  for (++BI; &*BI != TI; ++BI) {
3667  if (isa<AllocaInst>(BI)) {
3668  CannotRemove = true;
3669  break;
3670  }
3671  if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
3672  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
3673  // If there is a stackrestore below this one, remove this one.
3674  if (II->getIntrinsicID() == Intrinsic::stackrestore)
3675  return eraseInstFromFunction(CI);
3676 
3677  // Bail if we cross over an intrinsic with side effects, such as
3678  // llvm.stacksave, llvm.read_register, or llvm.setjmp.
3679  if (II->mayHaveSideEffects()) {
3680  CannotRemove = true;
3681  break;
3682  }
3683  } else {
3684  // If we found a non-intrinsic call, we can't remove the stack
3685  // restore.
3686  CannotRemove = true;
3687  break;
3688  }
3689  }
3690  }
3691 
3692  // If the stack restore is in a return, resume, or unwind block and if there
3693  // are no allocas or calls between the restore and the return, nuke the
3694  // restore.
3695  if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
3696  return eraseInstFromFunction(CI);
3697  break;
3698  }
3699  case Intrinsic::lifetime_start:
3700  // Asan needs to poison memory to detect invalid access which is possible
3701  // even for empty lifetime range.
3702  if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
3703  II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress))
3704  break;
3705 
3706  if (removeTriviallyEmptyRange(*II, Intrinsic::lifetime_start,
3707  Intrinsic::lifetime_end, *this))
3708  return nullptr;
3709  break;
3710  case Intrinsic::assume: {
3711  Value *IIOperand = II->getArgOperand(0);
3712  // Remove an assume if it is followed by an identical assume.
3713  // TODO: Do we need this? Unless there are conflicting assumptions, the
3714  // computeKnownBits(IIOperand) below here eliminates redundant assumes.
3716  if (match(Next, m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))))
3717  return eraseInstFromFunction(CI);
3718 
3719  // Canonicalize assume(a && b) -> assume(a); assume(b);
3720  // Note: New assumption intrinsics created here are registered by
3721  // the InstCombineIRInserter object.
3722  Value *AssumeIntrinsic = II->getCalledValue(), *A, *B;
3723  if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) {
3724  Builder.CreateCall(AssumeIntrinsic, A, II->getName());
3725  Builder.CreateCall(AssumeIntrinsic, B, II->getName());
3726  return eraseInstFromFunction(*II);
3727  }
3728  // assume(!(a || b)) -> assume(!a); assume(!b);
3729  if (match(IIOperand, m_Not(m_Or(m_Value(A), m_Value(B))))) {
3730  Builder.CreateCall(AssumeIntrinsic, Builder.CreateNot(A), II->getName());
3731  Builder.CreateCall(AssumeIntrinsic, Builder.CreateNot(B), II->getName());
3732  return eraseInstFromFunction(*II);
3733  }
3734 
3735  // assume( (load addr) != null ) -> add 'nonnull' metadata to load
3736  // (if assume is valid at the load)
3737  CmpInst::Predicate Pred;
3738  Instruction *LHS;
3739  if (match(IIOperand, m_ICmp(Pred, m_Instruction(LHS), m_Zero())) &&
3740  Pred == ICmpInst::ICMP_NE && LHS->getOpcode() == Instruction::Load &&
3741  LHS->getType()->isPointerTy() &&
3742  isValidAssumeForContext(II, LHS, &DT)) {
3743  MDNode *MD = MDNode::get(II->getContext(), None);
3745  return eraseInstFromFunction(*II);
3746 
3747  // TODO: apply nonnull return attributes to calls and invokes
3748  // TODO: apply range metadata for range check patterns?
3749  }
3750 
3751  // If there is a dominating assume with the same condition as this one,
3752  // then this one is redundant, and should be removed.
3753  KnownBits Known(1);
3754  computeKnownBits(IIOperand, Known, 0, II);
3755  if (Known.isAllOnes())
3756  return eraseInstFromFunction(*II);
3757 
3758  // Update the cache of affected values for this assumption (we might be
3759  // here because we just simplified the condition).
3760  AC.updateAffectedValues(II);
3761  break;
3762  }
3763  case Intrinsic::experimental_gc_relocate: {
3764  // Translate facts known about a pointer before relocating into
3765  // facts about the relocate value, while being careful to
3766  // preserve relocation semantics.
3767  Value *DerivedPtr = cast<GCRelocateInst>(II)->getDerivedPtr();
3768 
3769  // Remove the relocation if unused, note that this check is required
3770  // to prevent the cases below from looping forever.
3771  if (II->use_empty())
3772  return eraseInstFromFunction(*II);
3773 
3774  // Undef is undef, even after relocation.
3775  // TODO: provide a hook for this in GCStrategy. This is clearly legal for
3776  // most practical collectors, but there was discussion in the review thread
3777  // about whether it was legal for all possible collectors.
3778  if (isa<UndefValue>(DerivedPtr))
3779  // Use undef of gc_relocate's type to replace it.
3780  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3781 
3782  if (auto *PT = dyn_cast<PointerType>(II->getType())) {
3783  // The relocation of null will be null for most any collector.
3784  // TODO: provide a hook for this in GCStrategy. There might be some
3785  // weird collector this property does not hold for.
3786  if (isa<ConstantPointerNull>(DerivedPtr))
3787  // Use null-pointer of gc_relocate's type to replace it.
3788  return replaceInstUsesWith(*II, ConstantPointerNull::get(PT));
3789 
3790  // isKnownNonNull -> nonnull attribute
3791  if (isKnownNonZero(DerivedPtr, DL, 0, &AC, II, &DT))
3792  II->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
3793  }
3794 
3795  // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
3796  // Canonicalize on the type from the uses to the defs
3797 
3798  // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
3799  break;
3800  }
3801 
3802  case Intrinsic::experimental_guard: {
3803  // Is this guard followed by another guard? We scan forward over a small
3804  // fixed window of instructions to handle common cases with conditions
3805  // computed between guards.
3806  Instruction *NextInst = II->getNextNode();
3807  for (unsigned i = 0; i < GuardWideningWindow; i++) {
3808  // Note: Using context-free form to avoid compile time blow up
3809  if (!isSafeToSpeculativelyExecute(NextInst))
3810  break;
3811  NextInst = NextInst->getNextNode();
3812  }
3813  Value *NextCond = nullptr;
3814  if (match(NextInst,
3815  m_Intrinsic<Intrinsic::experimental_guard>(m_Value(NextCond)))) {
3816  Value *CurrCond = II->getArgOperand(0);
3817 
3818  // Remove a guard that it is immediately preceded by an identical guard.
3819  if (CurrCond == NextCond)
3820  return eraseInstFromFunction(*NextInst);
3821 
3822  // Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
3823  Instruction* MoveI = II->getNextNode();
3824  while (MoveI != NextInst) {
3825  auto *Temp = MoveI;
3826  MoveI = MoveI->getNextNode();
3827  Temp->moveBefore(II);
3828  }
3829  II->setArgOperand(0, Builder.CreateAnd(CurrCond, NextCond));
3830  return eraseInstFromFunction(*NextInst);
3831  }
3832  break;
3833  }
3834  }
3835  return visitCallSite(II);
3836 }
3837 
3838 // Fence instruction simplification
3840  // Remove identical consecutive fences.
3842  if (auto *NFI = dyn_cast<FenceInst>(Next))
3843  if (FI.isIdenticalTo(NFI))
3844  return eraseInstFromFunction(FI);
3845  return nullptr;
3846 }
3847 
3848 // InvokeInst simplification
3850  return visitCallSite(&II);
3851 }
3852 
3853 /// If this cast does not affect the value passed through the varargs area, we
3854 /// can eliminate the use of the cast.
3856  const DataLayout &DL,
3857  const CastInst *const CI,
3858  const int ix) {
3859  if (!CI->isLosslessCast())
3860  return false;
3861 
3862  // If this is a GC intrinsic, avoid munging types. We need types for
3863  // statepoint reconstruction in SelectionDAG.
3864  // TODO: This is probably something which should be expanded to all
3865  // intrinsics since the entire point of intrinsics is that
3866  // they are understandable by the optimizer.
3867  if (isStatepoint(CS) || isGCRelocate(CS) || isGCResult(CS))
3868  return false;
3869 
3870  // The size of ByVal or InAlloca arguments is derived from the type, so we
3871  // can't change to a type with a different size. If the size were
3872  // passed explicitly we could avoid this check.
3873  if (!CS.isByValOrInAllocaArgument(ix))
3874  return true;
3875 
3876  Type* SrcTy =
3877  cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
3878  Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
3879  if (!SrcTy->isSized() || !DstTy->isSized())
3880  return false;
3881  if (DL.getTypeAllocSize(SrcTy) != DL.getTypeAllocSize(DstTy))
3882  return false;
3883  return true;
3884 }
3885 
3886 Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) {
3887  if (!CI->getCalledFunction()) return nullptr;
3888 
3889  auto InstCombineRAUW = [this](Instruction *From, Value *With) {
3890  replaceInstUsesWith(*From, With);
3891  };
3892  LibCallSimplifier Simplifier(DL, &TLI, ORE, InstCombineRAUW);
3893  if (Value *With = Simplifier.optimizeCall(CI)) {
3894  ++NumSimplified;
3895  return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
3896  }
3897 
3898  return nullptr;
3899 }
3900 
3902  // Strip off at most one level of pointer casts, looking for an alloca. This
3903  // is good enough in practice and simpler than handling any number of casts.
3904  Value *Underlying = TrampMem->stripPointerCasts();
3905  if (Underlying != TrampMem &&
3906  (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
3907  return nullptr;
3908  if (!isa<AllocaInst>(Underlying))
3909  return nullptr;
3910 
3911  IntrinsicInst *InitTrampoline = nullptr;
3912  for (User *U : TrampMem->users()) {
3914  if (!II)
3915  return nullptr;
3916  if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
3917  if (InitTrampoline)
3918  // More than one init_trampoline writes to this value. Give up.
3919  return nullptr;
3920  InitTrampoline = II;
3921  continue;
3922  }
3923  if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
3924  // Allow any number of calls to adjust.trampoline.
3925  continue;
3926  return nullptr;
3927  }
3928 
3929  // No call to init.trampoline found.
3930  if (!InitTrampoline)
3931  return nullptr;
3932 
3933  // Check that the alloca is being used in the expected way.
3934  if (InitTrampoline->getOperand(0) != TrampMem)
3935  return nullptr;
3936 
3937  return InitTrampoline;
3938 }
3939 
3941  Value *TrampMem) {
3942  // Visit all the previous instructions in the basic block, and try to find a
3943  // init.trampoline which has a direct path to the adjust.trampoline.
3944  for (BasicBlock::iterator I = AdjustTramp->getIterator(),
3945  E = AdjustTramp->getParent()->begin();
3946  I != E;) {
3947  Instruction *Inst = &*--I;
3948  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
3949  if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
3950  II->getOperand(0) == TrampMem)
3951  return II;
3952  if (Inst->mayWriteToMemory())
3953  return nullptr;
3954  }
3955  return nullptr;
3956 }
3957 
3958 // Given a call to llvm.adjust.trampoline, find and return the corresponding
3959 // call to llvm.init.trampoline if the call to the trampoline can be optimized
3960 // to a direct call to a function. Otherwise return NULL.
3962  Callee = Callee->stripPointerCasts();
3963  IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
3964  if (!AdjustTramp ||
3965  AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
3966  return nullptr;
3967 
3968  Value *TrampMem = AdjustTramp->getOperand(0);
3969 
3971  return IT;
3972  if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
3973  return IT;
3974  return nullptr;
3975 }
3976 
3977 /// Improvements for call and invoke instructions.
3978 Instruction *InstCombiner::visitCallSite(CallSite CS) {
3979  if (isAllocLikeFn(CS.getInstruction(), &TLI))
3980  return visitAllocSite(*CS.getInstruction());
3981 
3982  bool Changed = false;
3983 
3984  // Mark any parameters that are known to be non-null with the nonnull
3985  // attribute. This is helpful for inlining calls to functions with null
3986  // checks on their arguments.
3987  SmallVector<unsigned, 4> ArgNos;
3988  unsigned ArgNo = 0;
3989 
3990  for (Value *V : CS.args()) {
3991  if (V->getType()->isPointerTy() &&
3992  !CS.paramHasAttr(ArgNo, Attribute::NonNull) &&
3993  isKnownNonZero(V, DL, 0, &AC, CS.getInstruction(), &DT))
3994  ArgNos.push_back(ArgNo);
3995  ArgNo++;
3996  }
3997 
3998  assert(ArgNo == CS.arg_size() && "sanity check");
3999 
4000  if (!ArgNos.empty()) {
4002  LLVMContext &Ctx = CS.getInstruction()->getContext();
4003  AS = AS.addParamAttribute(Ctx, ArgNos,
4004  Attribute::get(Ctx, Attribute::NonNull));
4005  CS.setAttributes(AS);
4006  Changed = true;
4007  }
4008 
4009  // If the callee is a pointer to a function, attempt to move any casts to the
4010  // arguments of the call/invoke.
4011  Value *Callee = CS.getCalledValue();
4012  if (!isa<Function>(Callee) && transformConstExprCastCall(CS))
4013  return nullptr;
4014 
4015  if (Function *CalleeF = dyn_cast<Function>(Callee)) {
4016  // Remove the convergent attr on calls when the callee is not convergent.
4017  if (CS.isConvergent() && !CalleeF->isConvergent() &&
4018  !CalleeF->isIntrinsic()) {
4019  LLVM_DEBUG(dbgs() << "Removing convergent attr from instr "
4020  << CS.getInstruction() << "\n");
4021  CS.setNotConvergent();
4022  return CS.getInstruction();
4023  }
4024 
4025  // If the call and callee calling conventions don't match, this call must
4026  // be unreachable, as the call is undefined.
4027  if (CalleeF->getCallingConv() != CS.getCallingConv() &&
4028  // Only do this for calls to a function with a body. A prototype may
4029  // not actually end up matching the implementation's calling conv for a
4030  // variety of reasons (e.g. it may be written in assembly).
4031  !CalleeF->isDeclaration()) {
4032  Instruction *OldCall = CS.getInstruction();
4033  new StoreInst(ConstantInt::getTrue(Callee->getContext()),
4035  OldCall);
4036  // If OldCall does not return void then replaceAllUsesWith undef.
4037  // This allows ValueHandlers and custom metadata to adjust itself.
4038  if (!OldCall->getType()->isVoidTy())
4039  replaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType()));
4040  if (isa<CallInst>(OldCall))
4041  return eraseInstFromFunction(*OldCall);
4042 
4043  // We cannot remove an invoke, because it would change the CFG, just
4044  // change the callee to a null pointer.
4045  cast<InvokeInst>(OldCall)->setCalledFunction(
4046  Constant::getNullValue(CalleeF->getType()));
4047  return nullptr;
4048  }
4049  }
4050 
4051  if ((isa<ConstantPointerNull>(Callee) &&
4053  isa<UndefValue>(Callee)) {
4054  // If CS does not return void then replaceAllUsesWith undef.
4055  // This allows ValueHandlers and custom metadata to adjust itself.
4056  if (!CS.getInstruction()->getType()->isVoidTy())
4057  replaceInstUsesWith(*CS.getInstruction(),
4059 
4060  if (isa<InvokeInst>(CS.getInstruction())) {
4061  // Can't remove an invoke because we cannot change the CFG.
4062  return nullptr;
4063  }
4064 
4065  // This instruction is not reachable, just remove it. We insert a store to
4066  // undef so that we know that this code is not reachable, despite the fact
4067  // that we can't modify the CFG here.
4068  new StoreInst(ConstantInt::getTrue(Callee->getContext()),
4070  CS.getInstruction());
4071 
4072  return eraseInstFromFunction(*CS.getInstruction());
4073  }
4074 
4075  if (IntrinsicInst *II = findInitTrampoline(Callee))
4076  return transformCallThroughTrampoline(CS, II);
4077 
4078  PointerType *PTy = cast<PointerType>(Callee->getType());
4079  FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
4080  if (FTy->isVarArg()) {
4081  int ix = FTy->getNumParams();
4082  // See if we can optimize any arguments passed through the varargs area of
4083  // the call.
4084  for (CallSite::arg_iterator I = CS.arg_begin() + FTy->getNumParams(),
4085  E = CS.arg_end(); I != E; ++I, ++ix) {
4086  CastInst *CI = dyn_cast<CastInst>(*I);
4087  if (CI && isSafeToEliminateVarargsCast(CS, DL, CI, ix)) {
4088  *I = CI->getOperand(0);
4089  Changed = true;
4090  }
4091  }
4092  }
4093 
4094  if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
4095  // Inline asm calls cannot throw - mark them 'nounwind'.
4096  CS.setDoesNotThrow();
4097  Changed = true;
4098  }
4099 
4100  // Try to optimize the call if possible, we require DataLayout for most of
4101  // this. None of these calls are seen as possibly dead so go ahead and
4102  // delete the instruction now.
4103  if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
4104  Instruction *I = tryOptimizeCall(CI);
4105  // If we changed something return the result, etc. Otherwise let
4106  // the fallthrough check.
4107  if (I) return eraseInstFromFunction(*I);
4108  }
4109 
4110  return Changed ? CS.getInstruction() : nullptr;
4111 }
4112 
4113 /// If the callee is a constexpr cast of a function, attempt to move the cast to
4114 /// the arguments of the call/invoke.
4115 bool InstCombiner::transformConstExprCastCall(CallSite CS) {
4117  if (!Callee)
4118  return false;
4119 
4120  // If this is a call to a thunk function, don't remove the cast. Thunks are
4121  // used to transparently forward all incoming parameters and outgoing return
4122  // values, so it's important to leave the cast in place.
4123  if (Callee->hasFnAttribute("thunk"))
4124  return false;
4125 
4126  // If this is a musttail call, the callee's prototype must match the caller's
4127  // prototype with the exception of pointee types. The code below doesn't
4128  // implement that, so we can't do this transform.
4129  // TODO: Do the transform if it only requires adding pointer casts.
4130  if (CS.isMustTailCall())
4131  return false;
4132 
4133  Instruction *Caller = CS.getInstruction();
4134  const AttributeList &CallerPAL = CS.getAttributes();
4135 
4136  // Okay, this is a cast from a function to a different type. Unless doing so
4137  // would cause a type conversion of one of our arguments, change this call to
4138  // be a direct call with arguments casted to the appropriate types.
4139  FunctionType *FT = Callee->getFunctionType();
4140  Type *OldRetTy = Caller->getType();
4141  Type *NewRetTy = FT->getReturnType();
4142 
4143  // Check to see if we are changing the return type...
4144  if (OldRetTy != NewRetTy) {
4145 
4146  if (NewRetTy->isStructTy())
4147  return false; // TODO: Handle multiple return values.
4148 
4149  if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
4150  if (Callee->isDeclaration())
4151  return false; // Cannot transform this return value.
4152 
4153  if (!Caller->use_empty() &&
4154  // void -> non-void is handled specially
4155  !NewRetTy->isVoidTy())
4156  return false; // Cannot transform this return value.
4157  }
4158 
4159  if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
4160  AttrBuilder RAttrs(CallerPAL, AttributeList::ReturnIndex);
4161  if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(NewRetTy)))
4162  return false; // Attribute not compatible with transformed value.
4163  }
4164 
4165  // If the callsite is an invoke instruction, and the return value is used by
4166  // a PHI node in a successor, we cannot change the return type of the call
4167  // because there is no place to put the cast instruction (without breaking
4168  // the critical edge). Bail out in this case.
4169  if (!Caller->use_empty())
4170  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
4171  for (User *U : II->users())
4172  if (PHINode *PN = dyn_cast<PHINode>(U))
4173  if (PN->getParent() == II->getNormalDest() ||
4174  PN->getParent() == II->getUnwindDest())
4175  return false;
4176  }
4177 
4178  unsigned NumActualArgs = CS.arg_size();
4179  unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
4180 
4181  // Prevent us turning:
4182  // declare void @takes_i32_inalloca(i32* inalloca)
4183  // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
4184  //
4185  // into:
4186  // call void @takes_i32_inalloca(i32* null)
4187  //
4188  // Similarly, avoid folding away bitcasts of byval calls.
4189  if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
4190  Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))
4191  return false;
4192 
4194  for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
4195  Type *ParamTy = FT->getParamType(i);
4196  Type *ActTy = (*AI)->getType();
4197 
4198  if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
4199  return false; // Cannot transform this parameter value.
4200 
4201  if (AttrBuilder(CallerPAL.getParamAttributes(i))
4202  .overlaps(AttributeFuncs::typeIncompatible(ParamTy)))
4203  return false; // Attribute not compatible with transformed value.
4204 
4205  if (CS.isInAllocaArgument(i))
4206  return false; // Cannot transform to and from inalloca.
4207 
4208  // If the parameter is passed as a byval argument, then we have to have a
4209  // sized type and the sized type has to have the same size as the old type.
4210  if (ParamTy != ActTy && CallerPAL.hasParamAttribute(i, Attribute::ByVal)) {
4211  PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
4212  if (!ParamPTy || !ParamPTy->getElementType()->isSized())
4213  return false;
4214 
4215  Type *CurElTy = ActTy->getPointerElementType();
4216  if (DL.getTypeAllocSize(CurElTy) !=
4217  DL.getTypeAllocSize(ParamPTy->getElementType()))
4218  return false;
4219  }
4220  }
4221 
4222  if (Callee->isDeclaration()) {
4223  // Do not delete arguments unless we have a function body.
4224  if (FT->getNumParams() < NumActualArgs && !FT->isVarArg())
4225  return false;
4226 
4227  // If the callee is just a declaration, don't change the varargsness of the
4228  // call. We don't want to introduce a varargs call where one doesn't
4229  // already exist.
4230  PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType());
4231  if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg())
4232  return false;
4233 
4234  // If both the callee and the cast type are varargs, we still have to make
4235  // sure the number of fixed parameters are the same or we have the same
4236  // ABI issues as if we introduce a varargs call.
4237  if (FT->isVarArg() &&
4238  cast<FunctionType>(APTy->getElementType())->isVarArg() &&
4239  FT->getNumParams() !=
4240  cast<FunctionType>(APTy->getElementType())->getNumParams())
4241  return false;
4242  }
4243 
4244  if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
4245  !CallerPAL.isEmpty()) {
4246  // In this case we have more arguments than the new function type, but we
4247  // won't be dropping them. Check that these extra arguments have attributes
4248  // that are compatible with being a vararg call argument.
4249  unsigned SRetIdx;
4250  if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) &&
4251  SRetIdx > FT->getNumParams())
4252  return false;
4253  }
4254 
4255  // Okay, we decided that this is a safe thing to do: go ahead and start
4256  // inserting cast instructions as necessary.
4259  Args.reserve(NumActualArgs);
4260  ArgAttrs.reserve(NumActualArgs);
4261 
4262  // Get any return attributes.
4263  AttrBuilder RAttrs(CallerPAL, AttributeList::ReturnIndex);
4264 
4265  // If the return value is not being used, the type may not be compatible
4266  // with the existing attributes. Wipe out any problematic attributes.
4267  RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy));
4268 
4269  AI = CS.arg_begin();
4270  for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
4271  Type *ParamTy = FT->getParamType(i);
4272 
4273  Value *NewArg = *AI;
4274  if ((*AI)->getType() != ParamTy)
4275  NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy);
4276  Args.push_back(NewArg);
4277 
4278  // Add any parameter attributes.
4279  ArgAttrs.push_back(CallerPAL.getParamAttributes(i));
4280  }
4281 
4282  // If the function takes more arguments than the call was taking, add them
4283  // now.
4284  for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
4286  ArgAttrs.push_back(AttributeSet());
4287  }
4288 
4289  // If we are removing arguments to the function, emit an obnoxious warning.
4290  if (FT->getNumParams() < NumActualArgs) {
4291  // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
4292  if (FT->isVarArg()) {
4293  // Add all of the arguments in their promoted form to the arg list.
4294  for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
4295  Type *PTy = getPromotedType((*AI)->getType());
4296  Value *NewArg = *AI;
4297  if (PTy != (*AI)->getType()) {
4298  // Must promote to pass through va_arg area!
4299  Instruction::CastOps opcode =
4300  CastInst::getCastOpcode(*AI, false, PTy, false);
4301  NewArg = Builder.CreateCast(opcode, *AI, PTy);
4302  }
4303  Args.push_back(NewArg);
4304 
4305  // Add any parameter attributes.
4306  ArgAttrs.push_back(CallerPAL.getParamAttributes(i));
4307  }
4308  }
4309  }
4310 
4311  AttributeSet FnAttrs = CallerPAL.getFnAttributes();
4312 
4313  if (NewRetTy->isVoidTy())
4314  Caller->setName(""); // Void type should not have a name.
4315 
4316  assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
4317  "missing argument attributes");
4318  LLVMContext &Ctx = Callee->getContext();
4319  AttributeList NewCallerPAL = AttributeList::get(
4320  Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs);
4321 
4323  CS.getOperandBundlesAsDefs(OpBundles);
4324 
4325  CallSite NewCS;
4326  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
4327  NewCS = Builder.CreateInvoke(Callee, II->getNormalDest(),
4328  II->getUnwindDest(), Args, OpBundles);
4329  } else {
4330  NewCS = Builder.CreateCall(Callee, Args, OpBundles);
4331  cast<CallInst>(NewCS.getInstruction())
4332  ->setTailCallKind(cast<CallInst>(Caller)->getTailCallKind());
4333  }
4334  NewCS->takeName(Caller);
4335  NewCS.setCallingConv(CS.getCallingConv());
4336  NewCS.setAttributes(NewCallerPAL);
4337 
4338  // Preserve the weight metadata for the new call instruction. The metadata
4339  // is used by SamplePGO to check callsite's hotness.
4340  uint64_t W;
4341  if (Caller->extractProfTotalWeight(W))
4342  NewCS->setProfWeight(W);
4343 
4344  // Insert a cast of the return type as necessary.
4345  Instruction *NC = NewCS.getInstruction();
4346  Value *NV = NC;
4347  if (OldRetTy != NV->getType() && !Caller->use_empty()) {
4348  if (!NV->getType()->isVoidTy()) {
4349  NV = NC = CastInst::CreateBitOrPointerCast(NC, OldRetTy);
4350  NC->setDebugLoc(Caller->getDebugLoc());
4351 
4352  // If this is an invoke instruction, we should insert it after the first
4353  // non-phi, instruction in the normal successor block.
4354  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
4355  BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt();
4356  InsertNewInstBefore(NC, *I);
4357  } else {
4358  // Otherwise, it's a call, just insert cast right after the call.
4359  InsertNewInstBefore(NC, *Caller);
4360  }
4361  Worklist.AddUsersToWorkList(*Caller);
4362  } else {
4363  NV = UndefValue::get(Caller->getType());
4364  }
4365  }
4366 
4367  if (!Caller->use_empty())
4368  replaceInstUsesWith(*Caller, NV);
4369  else if (Caller->hasValueHandle()) {
4370  if (OldRetTy == NV->getType())
4371  ValueHandleBase::ValueIsRAUWd(Caller, NV);
4372  else
4373  // We cannot call ValueIsRAUWd with a different type, and the
4374  // actual tracked value will disappear.
4376  }
4377 
4378  eraseInstFromFunction(*Caller);
4379  return true;
4380 }
4381 
4382 /// Turn a call to a function created by init_trampoline / adjust_trampoline
4383 /// intrinsic pair into a direct call to the underlying function.
4384 Instruction *
4385 InstCombiner::transformCallThroughTrampoline(CallSite CS,
4386  IntrinsicInst *Tramp) {
4387  Value *Callee = CS.getCalledValue();
4388  PointerType *PTy = cast<PointerType>(Callee->getType());
4389  FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
4391 
4392  // If the call already has the 'nest' attribute somewhere then give up -
4393  // otherwise 'nest' would occur twice after splicing in the chain.
4394  if (Attrs.hasAttrSomewhere(Attribute::Nest))
4395  return nullptr;
4396 
4397  assert(Tramp &&
4398  "transformCallThroughTrampoline called with incorrect CallSite.");
4399 
4400  Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
4401  FunctionType *NestFTy = cast<FunctionType>(NestF->getValueType());
4402 
4403  AttributeList NestAttrs = NestF->getAttributes();
4404  if (!NestAttrs.isEmpty()) {
4405  unsigned NestArgNo = 0;
4406  Type *NestTy = nullptr;
4407  AttributeSet NestAttr;
4408 
4409  // Look for a parameter marked with the 'nest' attribute.
4410  for (FunctionType::param_iterator I = NestFTy->param_begin(),
4411  E = NestFTy->param_end();
4412  I != E; ++NestArgNo, ++I) {
4413  AttributeSet AS = NestAttrs.getParamAttributes(NestArgNo);
4414  if (AS.hasAttribute(Attribute::Nest)) {
4415  // Record the parameter type and any other attributes.
4416  NestTy = *I;
4417  NestAttr = AS;
4418  break;
4419  }
4420  }
4421 
4422  if (NestTy) {
4423  Instruction *Caller = CS.getInstruction();
4424  std::vector<Value*> NewArgs;
4425  std::vector<AttributeSet> NewArgAttrs;
4426  NewArgs.reserve(CS.arg_size() + 1);
4427  NewArgAttrs.reserve(CS.arg_size());
4428 
4429  // Insert the nest argument into the call argument list, which may
4430  // mean appending it. Likewise for attributes.
4431 
4432  {
4433  unsigned ArgNo = 0;
4434  CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
4435  do {
4436  if (ArgNo == NestArgNo) {
4437  // Add the chain argument and attributes.
4438  Value *NestVal = Tramp->getArgOperand(2);
4439  if (NestVal->getType() != NestTy)
4440  NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest");
4441  NewArgs.push_back(NestVal);
4442  NewArgAttrs.push_back(NestAttr);
4443  }
4444 
4445  if (I == E)
4446  break;
4447 
4448  // Add the original argument and attributes.
4449  NewArgs.push_back(*I);
4450  NewArgAttrs.push_back(Attrs.getParamAttributes(ArgNo));
4451 
4452  ++ArgNo;
4453  ++I;
4454  } while (true);
4455  }
4456 
4457  // The trampoline may have been bitcast to a bogus type (FTy).
4458  // Handle this by synthesizing a new function type, equal to FTy
4459  // with the chain parameter inserted.
4460 
4461  std::vector<Type*> NewTypes;
4462  NewTypes.reserve(FTy->getNumParams()+1);
4463 
4464  // Insert the chain's type into the list of parameter types, which may
4465  // mean appending it.
4466  {
4467  unsigned ArgNo = 0;
4468  FunctionType::param_iterator I = FTy->param_begin(),
4469  E = FTy->param_end();
4470 
4471  do {
4472  if (ArgNo == NestArgNo)
4473  // Add the chain's type.
4474  NewTypes.push_back(NestTy);
4475 
4476  if (I == E)
4477  break;
4478 
4479  // Add the original type.
4480  NewTypes.push_back(*I);
4481 
4482  ++ArgNo;
4483  ++I;
4484  } while (true);
4485  }
4486 
4487  // Replace the trampoline call with a direct call. Let the generic
4488  // code sort out any function type mismatches.
4489  FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
4490  FTy->isVarArg());
4491  Constant *NewCallee =
4492  NestF->getType() == PointerType::getUnqual(NewFTy) ?
4493  NestF : ConstantExpr::getBitCast(NestF,
4494  PointerType::getUnqual(NewFTy));
4495  AttributeList NewPAL =
4496  AttributeList::get(FTy->getContext(), Attrs.getFnAttributes(),
4497  Attrs.getRetAttributes(), NewArgAttrs);
4498 
4500  CS.getOperandBundlesAsDefs(OpBundles);
4501 
4502  Instruction *NewCaller;
4503  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
4504  NewCaller = InvokeInst::Create(NewCallee,
4505  II->getNormalDest(), II->getUnwindDest(),
4506  NewArgs, OpBundles);
4507  cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
4508  cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
4509  } else {
4510  NewCaller = CallInst::Create(NewCallee, NewArgs, OpBundles);
4511  cast<CallInst>(NewCaller)->setTailCallKind(
4512  cast<CallInst>(Caller)->getTailCallKind());
4513  cast<CallInst>(NewCaller)->setCallingConv(
4514  cast<CallInst>(Caller)->getCallingConv());
4515  cast<CallInst>(NewCaller)->setAttributes(NewPAL);
4516  }
4517  NewCaller->setDebugLoc(Caller->getDebugLoc());
4518 
4519  return NewCaller;
4520  }
4521  }
4522 
4523  // Replace the trampoline call with a direct call. Since there is no 'nest'
4524  // parameter, there is no need to adjust the argument list. Let the generic
4525  // code sort out any function type mismatches.
4526  Constant *NewCallee =
4527  NestF->getType() == PTy ? NestF :
4528  ConstantExpr::getBitCast(NestF, PTy);
4529  CS.setCalledFunction(NewCallee);
4530  return CS.getInstruction();
4531 }
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
bool isFPPredicate() const
Definition: InstrTypes.h:976
const NoneType None
Definition: None.h:24
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double, and whose elements are just simple data values (i.e.
Definition: Constants.h:758
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
Definition: PatternMatch.h:718
uint64_t CallInst * C
User::op_iterator arg_iterator
The type of iterator to use when looping over actual arguments at this call site. ...
Definition: CallSite.h:213
LibCallSimplifier - This class implements a collection of optimizations that replace well formed call...
BinaryOp_match< cstfp_pred_ty< is_neg_zero_fp >, RHS, Instruction::FSub > m_FNeg(const RHS &X)
Match &#39;fneg X&#39; as &#39;fsub -0.0, X&#39;.
Definition: PatternMatch.h:665
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:172
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMin(const Opnd0 &Op0, const Opnd1 &Op1)
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Instruction *CxtI) const
void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction, which must be an operator which supports these flags.
Function * getCalledFunction() const
Return the function called, or null if this is an indirect function invocation.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:72
static void ValueIsDeleted(Value *V)
Definition: Value.cpp:841
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1846
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
bool isZero() const
Definition: APFloat.h:1143
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:173
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:80
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1556
unsigned getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1165
static Value * simplifyX86immShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:561
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:373
DiagnosticInfoOptimizationBase::Argument NV
unsigned arg_size() const
Definition: CallSite.h:219
CallingConv::ID getCallingConv() const
Get the calling convention of the call.
Definition: CallSite.h:312
Atomic ordering constants.
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1634
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:289
void addAttribute(unsigned i, Attribute::AttrKind Kind)
adds the attribute to the list of attributes.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
Constant * getElementAsConstant(unsigned i) const
Return a Constant for a specified index&#39;s element.
Definition: Constants.cpp:2709
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition: KnownBits.h:186
bool isInAllocaArgument(unsigned ArgNo) const
Determine whether this argument is passed in an alloca.
Definition: CallSite.h:603
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:63
Instruction * visitCallInst(CallInst &CI)
CallInst simplification.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_FMax(const Opnd0 &Op0, const Opnd1 &Op1)
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:265
An instruction for ordering other memory operations.
Definition: Instructions.h:444
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:454
Instruction * visitVACopyInst(VACopyInst &I)
static Instruction * simplifyInvariantGroupIntrinsic(IntrinsicInst &II, InstCombiner &IC)
This function transforms launder.invariant.group and strip.invariant.group like: launder(launder(x)) ...
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1309
static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC)
This class represents a function call, abstracting a target machine&#39;s calling convention.
This file contains the declarations for metadata subclasses.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:647
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this load instruction.
Definition: Instructions.h:243
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:91
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:617
iterator_range< IterTy > args() const
Definition: CallSite.h:215
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
m_Intrinsic_Ty< Opnd0 >::Ty m_BSwap(const Opnd0 &Op0)
bool hasValueHandle() const
Return true if there is a value handle associated with this value.
Definition: Value.h:485
unsigned less or equal
Definition: InstrTypes.h:911
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
unsigned less than
Definition: InstrTypes.h:910
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1322
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
static Instruction * foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC)
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:714
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:921
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:817
bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, const DominatorTree *DT=nullptr)
Return true if it is valid to use the assumptions provided by an assume intrinsic, I, at the point in the control-flow identified by the context instruction, CxtI.
STATISTIC(NumFunctions, "Total number of functions")
Metadata node.
Definition: Metadata.h:862
F(f)
static CallInst * Create(Value *Func, ArrayRef< Value *> Args, ArrayRef< OperandBundleDef > Bundles=None, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
const fltSemantics & getSemantics() const
Definition: APFloat.h:1155
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
BinaryOp_match< LHS, RHS, Instruction::FSub > m_FSub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:657
An instruction for reading from memory.
Definition: Instructions.h:168
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:882
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:1903
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition: KnownBits.h:166
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
static OverflowCheckFlavor IntrinsicIDToOverflowCheckFlavor(unsigned ID)
Returns the OverflowCheckFlavor corresponding to a overflow_with_op intrinsic.
void reserve(size_type N)
Definition: SmallVector.h:371
Value * getLength() const
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
static Instruction * simplifyMaskedStore(IntrinsicInst &II, InstCombiner &IC)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:361
Instruction * visitVAStartInst(VAStartInst &I)
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:534
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1502
Value * CreateLaunderInvariantGroup(Value *Ptr)
Create a launder.invariant.group intrinsic call.
Definition: IRBuilder.h:2022
bool isGCRelocate(ImmutableCallSite CS)
Definition: Statepoint.cpp:43
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:130
const CallInst * isFreeCall(const Value *I, const TargetLibraryInfo *TLI)
isFreeCall - Returns non-null if the value is a call to the builtin free()
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:268
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:136
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op...
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:264
bool isIdenticalTo(const Instruction *I) const
Return true if the specified instruction is exactly identical to the current one. ...
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition: APFloat.h:968
static Instruction * SimplifyNVVMIntrinsic(IntrinsicInst *II, InstCombiner &IC)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
Instruction * visitInvokeInst(InvokeInst &II)
static Constant * getIntegerCast(Constant *C, Type *Ty, bool isSigned)
Create a ZExt, Bitcast or Trunc for integer -> integer casts.
Definition: Constants.cpp:1590
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:514
static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
Definition: PatternMatch.h:730
Type * getPointerElementType() const
Definition: Type.h:376
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:983
OverflowCheckFlavor
Specific patterns of overflow check idioms that we match.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:592
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:992
AttributeList addParamAttribute(LLVMContext &C, unsigned ArgNo, Attribute::AttrKind Kind) const
Add an argument attribute to the list.
Definition: Attributes.h:397
static Value * simplifyNeonTbl1(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Convert a table lookup to shufflevector if the mask is constant.
IterTy arg_end() const
Definition: CallSite.h:575
Instruction * eraseInstFromFunction(Instruction &I)
Combiner aware instruction erasure.
CastClass_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:731
The core instruction combiner logic.
static bool isSafeToEliminateVarargsCast(const CallSite CS, const DataLayout &DL, const CastInst *const CI, const int ix)
If this cast does not affect the value passed through the varargs area, we can eliminate the use of t...
This file contains the simple types necessary to represent the attributes associated with functions a...
InstrTy * getInstruction() const
Definition: CallSite.h:92
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1628
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:295
uint64_t getNumElements() const
Definition: DerivedTypes.h:359
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:977
This file implements a class to represent arbitrary precision integral constant values and operations...
All zero aggregate value.
Definition: Constants.h:337
static Value * simplifyX86vpermv(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
Metadata * LowAndHigh[]
ValTy * getCalledValue() const
Return the pointer to function that is being called.
Definition: CallSite.h:100
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E, unsigned NumOperands)
DominatorTree & getDominatorTree() const
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition: KnownBits.h:191
Key
PAL metadata keys.
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:85
Class to represent function types.
Definition: DerivedTypes.h:103
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1629
bool isInfinity() const
Definition: APFloat.h:1144
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
Value * CreateSExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a SExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:1581
cstfp_pred_ty< is_nan > m_NaN()
Match an arbitrary NaN constant.
Definition: PatternMatch.h:424
This represents the llvm.va_start intrinsic.
CastClass_match< OpTy, Instruction::FPExt > m_FPExt(const OpTy &Op)
Matches FPExt.
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:4444
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
AttributeSet getParamAttributes(unsigned ArgNo) const
The attributes for the argument or parameter at the given index are returned.
bool isVarArg() const
Definition: DerivedTypes.h:123
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Return true if the call or the callee has the given attribute.
Definition: CallSite.h:377
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:200
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.h:2076
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:138
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
cstfp_pred_ty< is_pos_zero_fp > m_PosZeroFP()
Match a floating-point positive zero.
Definition: PatternMatch.h:442
AttrBuilder & remove(const AttrBuilder &B)
Remove the attributes from the builder.
static Value * simplifyX86pack(IntrinsicInst &II, bool IsSigned)
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:210
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
Definition: APFloat.h:166
An instruction for storing to memory.
Definition: Instructions.h:310
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
Definition: Metadata.cpp:1340
SelectClass_match< Cond, LHS, RHS > m_Select(const Cond &C, const LHS &L, const RHS &R)
static void ValueIsRAUWd(Value *Old, Value *New)
Definition: Value.cpp:894
static Value * simplifyX86vpcom(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsSigned)
Decode XOP integer vector comparison intrinsics.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:301
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:408
static Value * simplifyX86movmsk(const IntrinsicInst &II)
amdgpu Simplify well known AMD library false Value * Callee
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1007
This class represents a truncation of integer types.
Type * getElementType() const
Return the element type of the array/vector.
Definition: Constants.cpp:2369
Value * getOperand(unsigned i) const
Definition: User.h:170
Class to represent pointers.
Definition: DerivedTypes.h:467
bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
Definition: Attributes.cpp:576
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:338
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:304
Value * getOperand(unsigned i_nocapture) const
const DataLayout & getDataLayout() const
static MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:106
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1750
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:141
bool hasAttrSomewhere(Attribute::AttrKind Kind, unsigned *Index=nullptr) const
Return true if the specified attribute is set for at least one parameter or for the return value...
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:63
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1164
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:363
void setAttributes(AttributeList PAL)
Set the parameter attributes of the call.
Definition: CallSite.h:333
bool doesNotThrow() const
Determine if the call cannot unwind.
Instruction * visitFenceInst(FenceInst &FI)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
static Instruction * simplifyMaskedScatter(IntrinsicInst &II, InstCombiner &IC)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:55
static AttributeSet get(LLVMContext &C, const AttrBuilder &B)
Definition: Attributes.cpp:511
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:287
bool isNegative() const
Definition: APFloat.h:1147
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1378
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1092
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:429
ConstantInt * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
Definition: PatternMatch.h:724
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:287
CallInst * CreateIntrinsic(Intrinsic::ID ID, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with no operands.
Definition: IRBuilder.cpp:741
bool isNaN() const
Definition: APFloat.h:1145
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.h:1901
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
unsigned getNumParams() const
Return the number of fixed parameters this function type requires.
Definition: DerivedTypes.h:139
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
Definition: PatternMatch.h:306
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:499
const Instruction * getNextNonDebugInstruction() const
Return a pointer to the next non-debug instruction in the same basic block as &#39;this&#39;, or nullptr if no such instruction exists.
This file declares a class to represent arbitrary precision floating point values and provide a varie...
bool isFast() const
Determine whether all fast-math-flags are set.
std::underlying_type< E >::type Underlying(E Val)
Check that Val is in range for E, and return Val cast to E&#39;s underlying type.
Definition: BitmaskEnum.h:91
static IntrinsicInst * findInitTrampolineFromBB(IntrinsicInst *AdjustTramp, Value *TrampMem)
void setCalledFunction(Value *Fn)
Set the function called.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:885
match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, CastClass_match< OpTy, Instruction::SExt > > m_ZExtOrSExt(const OpTy &Op)
bool isAllOnes() const
Returns true if value is all one bits.
Definition: KnownBits.h:78
void setCallingConv(CallingConv::ID CC)
Set the calling convention of the call.
Definition: CallSite.h:316
bool isGCResult(ImmutableCallSite CS)
Definition: Statepoint.cpp:53
This class represents any memset intrinsic.
static FunctionType * get(Type *Result, ArrayRef< Type *> Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:297
void setArgOperand(unsigned i, Value *v)
self_iterator getIterator()
Definition: ilist_node.h:82
Class to represent integer types.
Definition: DerivedTypes.h:40
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:360
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:1921
bool isIntN(unsigned N) const
Check if this APInt has an N-bits unsigned integer value.
Definition: APInt.h:449
void setNotConvergent()
Definition: CallSite.h:527
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:60
void setAlignment(unsigned Align)
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:322
static Value * simplifyX86varShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1392
const AMDGPUAS & AS
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:539
iterator_range< User::op_iterator > arg_operands()
Iteration adapter for range-for loops.
size_t size() const
Definition: SmallVector.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1226
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1238
static InvokeInst * Create(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value *> Args, const Twine &NameStr, Instruction *InsertBefore=nullptr)
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:970
signed greater than
Definition: InstrTypes.h:912
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:305
static Value * simplifyX86extrq(IntrinsicInst &II, Value *Op0, ConstantInt *CILength, ConstantInt *CIIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding or conversion to a shuffle...
const APFloat & getValueAPF() const
Definition: Constants.h:299
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition: Function.h:506
static BinaryOperator * CreateFNeg(Value *Op, const Twine &Name="", Instruction *InsertBefore=nullptr)
static Type * getHalfTy(LLVMContext &C)
Definition: Type.cpp:163
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:240
static CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
Iterator for intrusive lists based on ilist_node.
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition: KnownBits.h:176
bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const
Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind).
static PointerType * getInt1PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:216
static cl::opt< unsigned > GuardWideningWindow("instcombine-guard-widening-window", cl::init(3), cl::desc("How wide an instruction window to bypass looking for " "another guard"))
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:251
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the generic address space (address sp...
Definition: DerivedTypes.h:482
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
static Value * simplifyX86vpermilvar(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermilvar* to shufflevector if the mask is constant.
iterator end()
Definition: BasicBlock.h:266
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
IterTy arg_begin() const
Definition: CallSite.h:571
static IntrinsicInst * findInitTrampolineFromAlloca(Value *TrampMem)
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
Value * CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:1934
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:521
Type::subtype_iterator param_iterator
Definition: DerivedTypes.h:126
bool overlaps(const AttrBuilder &B) const
Return true if the builder has any attribute that&#39;s in the specified builder.
static Value * simplifyNeonVld1(const IntrinsicInst &II, unsigned MemAlign, InstCombiner::BuilderTy &Builder)
Convert a vector load intrinsic into a simple llvm load instruction.
static Instruction * simplifyMaskedGather(IntrinsicInst &II, InstCombiner &IC)
void setDoesNotThrow()
Definition: CallSite.h:508
signed less than
Definition: InstrTypes.h:914
Type * getReturnType() const
Definition: DerivedTypes.h:124
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, unsigned Align, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:491
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition: APFloat.h:1205
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:1948
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:621
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.cpp:635
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:684
#define NC
Definition: regutils.h:42
CallInst * CreateMaskedLoad(Value *Ptr, unsigned Align, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:470
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1286
const Value * stripPointerCastsAndInvariantGroups() const
Strip off pointer casts, all-zero GEPs, aliases and invariant group info.
Definition: Value.cpp:551
Value * SimplifyCall(ImmutableCallSite CS, const SimplifyQuery &Q)
Given a callsite, fold the result or return null.
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1426
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:577
bool isDenormal() const
Definition: APFloat.h:1148
void setOperand(unsigned i, Value *Val)
Definition: User.h:175
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:462
signed less or equal
Definition: InstrTypes.h:915
void setOperand(unsigned i_nocapture, Value *Val_nocapture)
Class to represent vector types.
Definition: DerivedTypes.h:393
void setVolatile(bool V)
Specify whether this is a volatile store or not.
Definition: Instructions.h:346
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:56
Class for arbitrary precision integers.
Definition: APInt.h:69
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
iterator_range< user_iterator > users()
Definition: Value.h:399
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1051
static Value * simplifyX86pshufb(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert pshufb* to shufflevector if the mask is constant.
static cl::opt< bool > FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden, cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."), cl::init(false))
amdgpu Simplify well known AMD library false Value Value * Arg
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition: IRBuilder.h:332
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::ZeroOrMore, cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate IT block based on arch"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow deprecated IT based on ARMv8"), clEnumValN(NoRestrictedIT, "arm-no-restrict-it", "Allow IT blocks based on ARMv7")))
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:428
specific_fpval m_FPOne()
Match a float 1.0 or vector with all elements equal to 1.0.
Definition: PatternMatch.h:540
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass&#39;s ...
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this store instruction.
Definition: Instructions.h:368
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:546
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:185
static Value * simplifyMinnumMaxnum(const IntrinsicInst &II)
static Value * simplifyMaskedLoad(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Instruction * simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC)
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:290
static bool maskIsAllOneOrUndef(Value *Mask)
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
OverflowResult
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
unsigned getNumArgOperands() const
Return the number of call arguments.
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
unsigned greater or equal
Definition: InstrTypes.h:909
void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Definition: CallSite.h:582
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
static Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
bool doesNotThrow() const
Determine if the call cannot unwind.
Definition: CallSite.h:505
bool isNormal() const
Definition: APFloat.h:1151
bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast=false)
Tests if a value is a call or invoke to a library function that allocates memory (either malloc...
void setDoesNotThrow()
Value * CreateStripInvariantGroup(Value *Ptr)
Create a strip.invariant.group intrinsic call.
Definition: IRBuilder.h:2049
Value * optimizeCall(CallInst *CI)
optimizeCall - Take the given call instruction and return a more optimal value to replace the instruc...
static bool removeTriviallyEmptyRange(IntrinsicInst &I, unsigned StartID, unsigned EndID, InstCombiner &IC)
unsigned getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition: Local.h:260
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
Type * getValueType() const
Definition: GlobalValue.h:275
static IntrinsicInst * findInitTrampoline(Value *Callee)
bool isByValOrInAllocaArgument(unsigned ArgNo) const
Determine whether this argument is passed by value or in an alloca.
Definition: CallSite.h:608
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:81
AssumptionCache & getAssumptionCache() const
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:455
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1112
static Value * simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1, APInt APLength, APInt APIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant folding or conversion to a shu...
bool isStatepoint(ImmutableCallSite CS)
Definition: Statepoint.cpp:27
static Constant * getNegativeIsTrueBoolVec(ConstantDataVector *V)
Return a constant boolean vector that has true elements in all positions where the input constant dat...
This represents the llvm.va_copy intrinsic.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:544
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
static Value * simplifyX86round(IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
bool isSafeToSpeculativelyExecute(const Value *V, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
LoadInst * CreateAlignedLoad(Value *Ptr, unsigned Align, const char *Name)
Provided to resolve &#39;CreateAlignedLoad(Ptr, Align, "...")&#39; correctly, instead of converting the strin...
Definition: IRBuilder.h:1328
static Instruction * foldCtpop(IntrinsicInst &II, InstCombiner &IC)
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
LLVM Value Representation.
Definition: Value.h:73
void setAlignment(unsigned Align)
This file provides internal interfaces used to implement the InstCombine.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:352
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:593
AttrBuilder typeIncompatible(Type *Ty)
Which attributes cannot be applied to a type.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
AttributeSet getFnAttributes() const
The function attributes are returned.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:87
Attribute getFnAttribute(Attribute::AttrKind Kind) const