LLVM  8.0.0svn
InstCombineCalls.cpp
Go to the documentation of this file.
1 //===- InstCombineCalls.cpp -----------------------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the visitCall and visitInvoke functions.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "InstCombineInternal.h"
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/ADT/Twine.h"
29 #include "llvm/IR/Attributes.h"
30 #include "llvm/IR/BasicBlock.h"
31 #include "llvm/IR/CallSite.h"
32 #include "llvm/IR/Constant.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DataLayout.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/IR/GlobalVariable.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Instruction.h"
40 #include "llvm/IR/Instructions.h"
41 #include "llvm/IR/IntrinsicInst.h"
42 #include "llvm/IR/Intrinsics.h"
43 #include "llvm/IR/LLVMContext.h"
44 #include "llvm/IR/Metadata.h"
45 #include "llvm/IR/PatternMatch.h"
46 #include "llvm/IR/Statepoint.h"
47 #include "llvm/IR/Type.h"
48 #include "llvm/IR/User.h"
49 #include "llvm/IR/Value.h"
50 #include "llvm/IR/ValueHandle.h"
52 #include "llvm/Support/Casting.h"
54 #include "llvm/Support/Compiler.h"
55 #include "llvm/Support/Debug.h"
57 #include "llvm/Support/KnownBits.h"
62 #include <algorithm>
63 #include <cassert>
64 #include <cstdint>
65 #include <cstring>
66 #include <utility>
67 #include <vector>
68 
69 using namespace llvm;
70 using namespace PatternMatch;
71 
72 #define DEBUG_TYPE "instcombine"
73 
74 STATISTIC(NumSimplified, "Number of library calls simplified");
75 
77  "instcombine-guard-widening-window",
78  cl::init(3),
79  cl::desc("How wide an instruction window to bypass looking for "
80  "another guard"));
81 
82 /// Return the specified type promoted as it would be to pass though a va_arg
83 /// area.
84 static Type *getPromotedType(Type *Ty) {
85  if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
86  if (ITy->getBitWidth() < 32)
87  return Type::getInt32Ty(Ty->getContext());
88  }
89  return Ty;
90 }
91 
92 /// Return a constant boolean vector that has true elements in all positions
93 /// where the input constant data vector has an element with the sign bit set.
96  IntegerType *BoolTy = Type::getInt1Ty(V->getContext());
97  for (unsigned I = 0, E = V->getNumElements(); I != E; ++I) {
98  Constant *Elt = V->getElementAsConstant(I);
99  assert((isa<ConstantInt>(Elt) || isa<ConstantFP>(Elt)) &&
100  "Unexpected constant data vector element type");
101  bool Sign = V->getElementType()->isIntegerTy()
102  ? cast<ConstantInt>(Elt)->isNegative()
103  : cast<ConstantFP>(Elt)->isNegative();
104  BoolVec.push_back(ConstantInt::get(BoolTy, Sign));
105  }
106  return ConstantVector::get(BoolVec);
107 }
108 
109 Instruction *InstCombiner::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
110  unsigned DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT);
111  unsigned CopyDstAlign = MI->getDestAlignment();
112  if (CopyDstAlign < DstAlign){
113  MI->setDestAlignment(DstAlign);
114  return MI;
115  }
116 
117  unsigned SrcAlign = getKnownAlignment(MI->getRawSource(), DL, MI, &AC, &DT);
118  unsigned CopySrcAlign = MI->getSourceAlignment();
119  if (CopySrcAlign < SrcAlign) {
120  MI->setSourceAlignment(SrcAlign);
121  return MI;
122  }
123 
124  // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
125  // load/store.
126  ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getLength());
127  if (!MemOpLength) return nullptr;
128 
129  // Source and destination pointer types are always "i8*" for intrinsic. See
130  // if the size is something we can handle with a single primitive load/store.
131  // A single load+store correctly handles overlapping memory in the memmove
132  // case.
133  uint64_t Size = MemOpLength->getLimitedValue();
134  assert(Size && "0-sized memory transferring should be removed already.");
135 
136  if (Size > 8 || (Size&(Size-1)))
137  return nullptr; // If not 1/2/4/8 bytes, exit.
138 
139  // Use an integer load+store unless we can find something better.
140  unsigned SrcAddrSp =
141  cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
142  unsigned DstAddrSp =
143  cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();
144 
145  IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
146  Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
147  Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
148 
149  // If the memcpy has metadata describing the members, see if we can get the
150  // TBAA tag describing our copy.
151  MDNode *CopyMD = nullptr;
152  if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa)) {
153  CopyMD = M;
154  } else if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) {
155  if (M->getNumOperands() == 3 && M->getOperand(0) &&
156  mdconst::hasa<ConstantInt>(M->getOperand(0)) &&
157  mdconst::extract<ConstantInt>(M->getOperand(0))->isZero() &&
158  M->getOperand(1) &&
159  mdconst::hasa<ConstantInt>(M->getOperand(1)) &&
160  mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() ==
161  Size &&
162  M->getOperand(2) && isa<MDNode>(M->getOperand(2)))
163  CopyMD = cast<MDNode>(M->getOperand(2));
164  }
165 
166  Value *Src = Builder.CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
167  Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
168  LoadInst *L = Builder.CreateLoad(Src);
169  // Alignment from the mem intrinsic will be better, so use it.
170  L->setAlignment(CopySrcAlign);
171  if (CopyMD)
172  L->setMetadata(LLVMContext::MD_tbaa, CopyMD);
173  MDNode *LoopMemParallelMD =
175  if (LoopMemParallelMD)
177 
178  StoreInst *S = Builder.CreateStore(L, Dest);
179  // Alignment from the mem intrinsic will be better, so use it.
180  S->setAlignment(CopyDstAlign);
181  if (CopyMD)
182  S->setMetadata(LLVMContext::MD_tbaa, CopyMD);
183  if (LoopMemParallelMD)
185 
186  if (auto *MT = dyn_cast<MemTransferInst>(MI)) {
187  // non-atomics can be volatile
188  L->setVolatile(MT->isVolatile());
189  S->setVolatile(MT->isVolatile());
190  }
191  if (isa<AtomicMemTransferInst>(MI)) {
192  // atomics have to be unordered
195  }
196 
197  // Set the size of the copy to 0, it will be deleted on the next iteration.
198  MI->setLength(Constant::getNullValue(MemOpLength->getType()));
199  return MI;
200 }
201 
202 Instruction *InstCombiner::SimplifyAnyMemSet(AnyMemSetInst *MI) {
203  unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
204  if (MI->getDestAlignment() < Alignment) {
205  MI->setDestAlignment(Alignment);
206  return MI;
207  }
208 
209  // Extract the length and alignment and fill if they are constant.
210  ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
211  ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
212  if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
213  return nullptr;
214  uint64_t Len = LenC->getLimitedValue();
215  Alignment = MI->getDestAlignment();
216  assert(Len && "0-sized memory setting should be removed already.");
217 
218  // memset(s,c,n) -> store s, c (for n=1,2,4,8)
219  if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
220  Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8.
221 
222  Value *Dest = MI->getDest();
223  unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
224  Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
225  Dest = Builder.CreateBitCast(Dest, NewDstPtrTy);
226 
227  // Alignment 0 is identity for alignment 1 for memset, but not store.
228  if (Alignment == 0) Alignment = 1;
229 
230  // Extract the fill value and store.
231  uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
232  StoreInst *S = Builder.CreateStore(ConstantInt::get(ITy, Fill), Dest,
233  MI->isVolatile());
234  S->setAlignment(Alignment);
235  if (isa<AtomicMemSetInst>(MI))
237 
238  // Set the size of the copy to 0, it will be deleted on the next iteration.
239  MI->setLength(Constant::getNullValue(LenC->getType()));
240  return MI;
241  }
242 
243  return nullptr;
244 }
245 
247  InstCombiner::BuilderTy &Builder) {
248  bool IsAddition;
249 
250  switch (II.getIntrinsicID()) {
251  default: llvm_unreachable("Unexpected intrinsic!");
252  case Intrinsic::x86_sse2_padds_b:
253  case Intrinsic::x86_sse2_padds_w:
254  case Intrinsic::x86_avx2_padds_b:
255  case Intrinsic::x86_avx2_padds_w:
256  case Intrinsic::x86_avx512_padds_b_512:
257  case Intrinsic::x86_avx512_padds_w_512:
258  IsAddition = true;
259  break;
260  case Intrinsic::x86_sse2_psubs_b:
261  case Intrinsic::x86_sse2_psubs_w:
262  case Intrinsic::x86_avx2_psubs_b:
263  case Intrinsic::x86_avx2_psubs_w:
264  case Intrinsic::x86_avx512_psubs_b_512:
265  case Intrinsic::x86_avx512_psubs_w_512:
266  IsAddition = false;
267  break;
268  }
269 
270  auto *Arg0 = dyn_cast<Constant>(II.getOperand(0));
271  auto *Arg1 = dyn_cast<Constant>(II.getOperand(1));
272  auto VT = cast<VectorType>(II.getType());
273  auto SVT = VT->getElementType();
274  unsigned NumElems = VT->getNumElements();
275 
276  if (!Arg0 || !Arg1)
277  return nullptr;
278 
280 
281  APInt MaxValue = APInt::getSignedMaxValue(SVT->getIntegerBitWidth());
282  APInt MinValue = APInt::getSignedMinValue(SVT->getIntegerBitWidth());
283  for (unsigned i = 0; i < NumElems; ++i) {
284  auto *Elt0 = Arg0->getAggregateElement(i);
285  auto *Elt1 = Arg1->getAggregateElement(i);
286  if (isa<UndefValue>(Elt0) || isa<UndefValue>(Elt1)) {
287  Result.push_back(UndefValue::get(SVT));
288  continue;
289  }
290 
291  if (!isa<ConstantInt>(Elt0) || !isa<ConstantInt>(Elt1))
292  return nullptr;
293 
294  const APInt &Val0 = cast<ConstantInt>(Elt0)->getValue();
295  const APInt &Val1 = cast<ConstantInt>(Elt1)->getValue();
296  bool Overflow = false;
297  APInt ResultElem = IsAddition ? Val0.sadd_ov(Val1, Overflow)
298  : Val0.ssub_ov(Val1, Overflow);
299  if (Overflow)
300  ResultElem = Val0.isNegative() ? MinValue : MaxValue;
301  Result.push_back(Constant::getIntegerValue(SVT, ResultElem));
302  }
303 
304  return ConstantVector::get(Result);
305 }
306 
308  InstCombiner::BuilderTy &Builder) {
309  bool LogicalShift = false;
310  bool ShiftLeft = false;
311 
312  switch (II.getIntrinsicID()) {
313  default: llvm_unreachable("Unexpected intrinsic!");
314  case Intrinsic::x86_sse2_psra_d:
315  case Intrinsic::x86_sse2_psra_w:
316  case Intrinsic::x86_sse2_psrai_d:
317  case Intrinsic::x86_sse2_psrai_w:
318  case Intrinsic::x86_avx2_psra_d:
319  case Intrinsic::x86_avx2_psra_w:
320  case Intrinsic::x86_avx2_psrai_d:
321  case Intrinsic::x86_avx2_psrai_w:
322  case Intrinsic::x86_avx512_psra_q_128:
323  case Intrinsic::x86_avx512_psrai_q_128:
324  case Intrinsic::x86_avx512_psra_q_256:
325  case Intrinsic::x86_avx512_psrai_q_256:
326  case Intrinsic::x86_avx512_psra_d_512:
327  case Intrinsic::x86_avx512_psra_q_512:
328  case Intrinsic::x86_avx512_psra_w_512:
329  case Intrinsic::x86_avx512_psrai_d_512:
330  case Intrinsic::x86_avx512_psrai_q_512:
331  case Intrinsic::x86_avx512_psrai_w_512:
332  LogicalShift = false; ShiftLeft = false;
333  break;
334  case Intrinsic::x86_sse2_psrl_d:
335  case Intrinsic::x86_sse2_psrl_q:
336  case Intrinsic::x86_sse2_psrl_w:
337  case Intrinsic::x86_sse2_psrli_d:
338  case Intrinsic::x86_sse2_psrli_q:
339  case Intrinsic::x86_sse2_psrli_w:
340  case Intrinsic::x86_avx2_psrl_d:
341  case Intrinsic::x86_avx2_psrl_q:
342  case Intrinsic::x86_avx2_psrl_w:
343  case Intrinsic::x86_avx2_psrli_d:
344  case Intrinsic::x86_avx2_psrli_q:
345  case Intrinsic::x86_avx2_psrli_w:
346  case Intrinsic::x86_avx512_psrl_d_512:
347  case Intrinsic::x86_avx512_psrl_q_512:
348  case Intrinsic::x86_avx512_psrl_w_512:
349  case Intrinsic::x86_avx512_psrli_d_512:
350  case Intrinsic::x86_avx512_psrli_q_512:
351  case Intrinsic::x86_avx512_psrli_w_512:
352  LogicalShift = true; ShiftLeft = false;
353  break;
354  case Intrinsic::x86_sse2_psll_d:
355  case Intrinsic::x86_sse2_psll_q:
356  case Intrinsic::x86_sse2_psll_w:
357  case Intrinsic::x86_sse2_pslli_d:
358  case Intrinsic::x86_sse2_pslli_q:
359  case Intrinsic::x86_sse2_pslli_w:
360  case Intrinsic::x86_avx2_psll_d:
361  case Intrinsic::x86_avx2_psll_q:
362  case Intrinsic::x86_avx2_psll_w:
363  case Intrinsic::x86_avx2_pslli_d:
364  case Intrinsic::x86_avx2_pslli_q:
365  case Intrinsic::x86_avx2_pslli_w:
366  case Intrinsic::x86_avx512_psll_d_512:
367  case Intrinsic::x86_avx512_psll_q_512:
368  case Intrinsic::x86_avx512_psll_w_512:
369  case Intrinsic::x86_avx512_pslli_d_512:
370  case Intrinsic::x86_avx512_pslli_q_512:
371  case Intrinsic::x86_avx512_pslli_w_512:
372  LogicalShift = true; ShiftLeft = true;
373  break;
374  }
375  assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
376 
377  // Simplify if count is constant.
378  auto Arg1 = II.getArgOperand(1);
379  auto CAZ = dyn_cast<ConstantAggregateZero>(Arg1);
380  auto CDV = dyn_cast<ConstantDataVector>(Arg1);
381  auto CInt = dyn_cast<ConstantInt>(Arg1);
382  if (!CAZ && !CDV && !CInt)
383  return nullptr;
384 
385  APInt Count(64, 0);
386  if (CDV) {
387  // SSE2/AVX2 uses all the first 64-bits of the 128-bit vector
388  // operand to compute the shift amount.
389  auto VT = cast<VectorType>(CDV->getType());
390  unsigned BitWidth = VT->getElementType()->getPrimitiveSizeInBits();
391  assert((64 % BitWidth) == 0 && "Unexpected packed shift size");
392  unsigned NumSubElts = 64 / BitWidth;
393 
394  // Concatenate the sub-elements to create the 64-bit value.
395  for (unsigned i = 0; i != NumSubElts; ++i) {
396  unsigned SubEltIdx = (NumSubElts - 1) - i;
397  auto SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx));
398  Count <<= BitWidth;
399  Count |= SubElt->getValue().zextOrTrunc(64);
400  }
401  }
402  else if (CInt)
403  Count = CInt->getValue();
404 
405  auto Vec = II.getArgOperand(0);
406  auto VT = cast<VectorType>(Vec->getType());
407  auto SVT = VT->getElementType();
408  unsigned VWidth = VT->getNumElements();
409  unsigned BitWidth = SVT->getPrimitiveSizeInBits();
410 
411  // If shift-by-zero then just return the original value.
412  if (Count.isNullValue())
413  return Vec;
414 
415  // Handle cases when Shift >= BitWidth.
416  if (Count.uge(BitWidth)) {
417  // If LogicalShift - just return zero.
418  if (LogicalShift)
419  return ConstantAggregateZero::get(VT);
420 
421  // If ArithmeticShift - clamp Shift to (BitWidth - 1).
422  Count = APInt(64, BitWidth - 1);
423  }
424 
425  // Get a constant vector of the same type as the first operand.
426  auto ShiftAmt = ConstantInt::get(SVT, Count.zextOrTrunc(BitWidth));
427  auto ShiftVec = Builder.CreateVectorSplat(VWidth, ShiftAmt);
428 
429  if (ShiftLeft)
430  return Builder.CreateShl(Vec, ShiftVec);
431 
432  if (LogicalShift)
433  return Builder.CreateLShr(Vec, ShiftVec);
434 
435  return Builder.CreateAShr(Vec, ShiftVec);
436 }
437 
438 // Attempt to simplify AVX2 per-element shift intrinsics to a generic IR shift.
439 // Unlike the generic IR shifts, the intrinsics have defined behaviour for out
440 // of range shift amounts (logical - set to zero, arithmetic - splat sign bit).
442  InstCombiner::BuilderTy &Builder) {
443  bool LogicalShift = false;
444  bool ShiftLeft = false;
445 
446  switch (II.getIntrinsicID()) {
447  default: llvm_unreachable("Unexpected intrinsic!");
448  case Intrinsic::x86_avx2_psrav_d:
449  case Intrinsic::x86_avx2_psrav_d_256:
450  case Intrinsic::x86_avx512_psrav_q_128:
451  case Intrinsic::x86_avx512_psrav_q_256:
452  case Intrinsic::x86_avx512_psrav_d_512:
453  case Intrinsic::x86_avx512_psrav_q_512:
454  case Intrinsic::x86_avx512_psrav_w_128:
455  case Intrinsic::x86_avx512_psrav_w_256:
456  case Intrinsic::x86_avx512_psrav_w_512:
457  LogicalShift = false;
458  ShiftLeft = false;
459  break;
460  case Intrinsic::x86_avx2_psrlv_d:
461  case Intrinsic::x86_avx2_psrlv_d_256:
462  case Intrinsic::x86_avx2_psrlv_q:
463  case Intrinsic::x86_avx2_psrlv_q_256:
464  case Intrinsic::x86_avx512_psrlv_d_512:
465  case Intrinsic::x86_avx512_psrlv_q_512:
466  case Intrinsic::x86_avx512_psrlv_w_128:
467  case Intrinsic::x86_avx512_psrlv_w_256:
468  case Intrinsic::x86_avx512_psrlv_w_512:
469  LogicalShift = true;
470  ShiftLeft = false;
471  break;
472  case Intrinsic::x86_avx2_psllv_d:
473  case Intrinsic::x86_avx2_psllv_d_256:
474  case Intrinsic::x86_avx2_psllv_q:
475  case Intrinsic::x86_avx2_psllv_q_256:
476  case Intrinsic::x86_avx512_psllv_d_512:
477  case Intrinsic::x86_avx512_psllv_q_512:
478  case Intrinsic::x86_avx512_psllv_w_128:
479  case Intrinsic::x86_avx512_psllv_w_256:
480  case Intrinsic::x86_avx512_psllv_w_512:
481  LogicalShift = true;
482  ShiftLeft = true;
483  break;
484  }
485  assert((LogicalShift || !ShiftLeft) && "Only logical shifts can shift left");
486 
487  // Simplify if all shift amounts are constant/undef.
488  auto *CShift = dyn_cast<Constant>(II.getArgOperand(1));
489  if (!CShift)
490  return nullptr;
491 
492  auto Vec = II.getArgOperand(0);
493  auto VT = cast<VectorType>(II.getType());
494  auto SVT = VT->getVectorElementType();
495  int NumElts = VT->getNumElements();
496  int BitWidth = SVT->getIntegerBitWidth();
497 
498  // Collect each element's shift amount.
499  // We also collect special cases: UNDEF = -1, OUT-OF-RANGE = BitWidth.
500  bool AnyOutOfRange = false;
501  SmallVector<int, 8> ShiftAmts;
502  for (int I = 0; I < NumElts; ++I) {
503  auto *CElt = CShift->getAggregateElement(I);
504  if (CElt && isa<UndefValue>(CElt)) {
505  ShiftAmts.push_back(-1);
506  continue;
507  }
508 
509  auto *COp = dyn_cast_or_null<ConstantInt>(CElt);
510  if (!COp)
511  return nullptr;
512 
513  // Handle out of range shifts.
514  // If LogicalShift - set to BitWidth (special case).
515  // If ArithmeticShift - set to (BitWidth - 1) (sign splat).
516  APInt ShiftVal = COp->getValue();
517  if (ShiftVal.uge(BitWidth)) {
518  AnyOutOfRange = LogicalShift;
519  ShiftAmts.push_back(LogicalShift ? BitWidth : BitWidth - 1);
520  continue;
521  }
522 
523  ShiftAmts.push_back((int)ShiftVal.getZExtValue());
524  }
525 
526  // If all elements out of range or UNDEF, return vector of zeros/undefs.
527  // ArithmeticShift should only hit this if they are all UNDEF.
528  auto OutOfRange = [&](int Idx) { return (Idx < 0) || (BitWidth <= Idx); };
529  if (llvm::all_of(ShiftAmts, OutOfRange)) {
530  SmallVector<Constant *, 8> ConstantVec;
531  for (int Idx : ShiftAmts) {
532  if (Idx < 0) {
533  ConstantVec.push_back(UndefValue::get(SVT));
534  } else {
535  assert(LogicalShift && "Logical shift expected");
536  ConstantVec.push_back(ConstantInt::getNullValue(SVT));
537  }
538  }
539  return ConstantVector::get(ConstantVec);
540  }
541 
542  // We can't handle only some out of range values with generic logical shifts.
543  if (AnyOutOfRange)
544  return nullptr;
545 
546  // Build the shift amount constant vector.
547  SmallVector<Constant *, 8> ShiftVecAmts;
548  for (int Idx : ShiftAmts) {
549  if (Idx < 0)
550  ShiftVecAmts.push_back(UndefValue::get(SVT));
551  else
552  ShiftVecAmts.push_back(ConstantInt::get(SVT, Idx));
553  }
554  auto ShiftVec = ConstantVector::get(ShiftVecAmts);
555 
556  if (ShiftLeft)
557  return Builder.CreateShl(Vec, ShiftVec);
558 
559  if (LogicalShift)
560  return Builder.CreateLShr(Vec, ShiftVec);
561 
562  return Builder.CreateAShr(Vec, ShiftVec);
563 }
564 
565 static Value *simplifyX86pack(IntrinsicInst &II, bool IsSigned) {
566  Value *Arg0 = II.getArgOperand(0);
567  Value *Arg1 = II.getArgOperand(1);
568  Type *ResTy = II.getType();
569 
570  // Fast all undef handling.
571  if (isa<UndefValue>(Arg0) && isa<UndefValue>(Arg1))
572  return UndefValue::get(ResTy);
573 
574  Type *ArgTy = Arg0->getType();
575  unsigned NumLanes = ResTy->getPrimitiveSizeInBits() / 128;
576  unsigned NumDstElts = ResTy->getVectorNumElements();
577  unsigned NumSrcElts = ArgTy->getVectorNumElements();
578  assert(NumDstElts == (2 * NumSrcElts) && "Unexpected packing types");
579 
580  unsigned NumDstEltsPerLane = NumDstElts / NumLanes;
581  unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes;
582  unsigned DstScalarSizeInBits = ResTy->getScalarSizeInBits();
583  assert(ArgTy->getScalarSizeInBits() == (2 * DstScalarSizeInBits) &&
584  "Unexpected packing types");
585 
586  // Constant folding.
587  auto *Cst0 = dyn_cast<Constant>(Arg0);
588  auto *Cst1 = dyn_cast<Constant>(Arg1);
589  if (!Cst0 || !Cst1)
590  return nullptr;
591 
593  for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
594  for (unsigned Elt = 0; Elt != NumDstEltsPerLane; ++Elt) {
595  unsigned SrcIdx = Lane * NumSrcEltsPerLane + Elt % NumSrcEltsPerLane;
596  auto *Cst = (Elt >= NumSrcEltsPerLane) ? Cst1 : Cst0;
597  auto *COp = Cst->getAggregateElement(SrcIdx);
598  if (COp && isa<UndefValue>(COp)) {
599  Vals.push_back(UndefValue::get(ResTy->getScalarType()));
600  continue;
601  }
602 
603  auto *CInt = dyn_cast_or_null<ConstantInt>(COp);
604  if (!CInt)
605  return nullptr;
606 
607  APInt Val = CInt->getValue();
608  assert(Val.getBitWidth() == ArgTy->getScalarSizeInBits() &&
609  "Unexpected constant bitwidth");
610 
611  if (IsSigned) {
612  // PACKSS: Truncate signed value with signed saturation.
613  // Source values less than dst minint are saturated to minint.
614  // Source values greater than dst maxint are saturated to maxint.
615  if (Val.isSignedIntN(DstScalarSizeInBits))
616  Val = Val.trunc(DstScalarSizeInBits);
617  else if (Val.isNegative())
618  Val = APInt::getSignedMinValue(DstScalarSizeInBits);
619  else
620  Val = APInt::getSignedMaxValue(DstScalarSizeInBits);
621  } else {
622  // PACKUS: Truncate signed value with unsigned saturation.
623  // Source values less than zero are saturated to zero.
624  // Source values greater than dst maxuint are saturated to maxuint.
625  if (Val.isIntN(DstScalarSizeInBits))
626  Val = Val.trunc(DstScalarSizeInBits);
627  else if (Val.isNegative())
628  Val = APInt::getNullValue(DstScalarSizeInBits);
629  else
630  Val = APInt::getAllOnesValue(DstScalarSizeInBits);
631  }
632 
633  Vals.push_back(ConstantInt::get(ResTy->getScalarType(), Val));
634  }
635  }
636 
637  return ConstantVector::get(Vals);
638 }
639 
640 // Replace X86-specific intrinsics with generic floor-ceil where applicable.
642  InstCombiner::BuilderTy &Builder) {
643  ConstantInt *Arg = nullptr;
644  Intrinsic::ID IntrinsicID = II.getIntrinsicID();
645 
646  if (IntrinsicID == Intrinsic::x86_sse41_round_ss ||
647  IntrinsicID == Intrinsic::x86_sse41_round_sd)
648  Arg = dyn_cast<ConstantInt>(II.getArgOperand(2));
649  else if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
650  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd)
651  Arg = dyn_cast<ConstantInt>(II.getArgOperand(4));
652  else
653  Arg = dyn_cast<ConstantInt>(II.getArgOperand(1));
654  if (!Arg)
655  return nullptr;
656  unsigned RoundControl = Arg->getZExtValue();
657 
658  Arg = nullptr;
659  unsigned SAE = 0;
660  if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_512 ||
661  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_512)
662  Arg = dyn_cast<ConstantInt>(II.getArgOperand(4));
663  else if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
664  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd)
665  Arg = dyn_cast<ConstantInt>(II.getArgOperand(5));
666  else
667  SAE = 4;
668  if (!SAE) {
669  if (!Arg)
670  return nullptr;
671  SAE = Arg->getZExtValue();
672  }
673 
674  if (SAE != 4 || (RoundControl != 2 /*ceil*/ && RoundControl != 1 /*floor*/))
675  return nullptr;
676 
677  Value *Src, *Dst, *Mask;
678  bool IsScalar = false;
679  if (IntrinsicID == Intrinsic::x86_sse41_round_ss ||
680  IntrinsicID == Intrinsic::x86_sse41_round_sd ||
681  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
682  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd) {
683  IsScalar = true;
684  if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
685  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd) {
686  Mask = II.getArgOperand(3);
687  Value *Zero = Constant::getNullValue(Mask->getType());
688  Mask = Builder.CreateAnd(Mask, 1);
689  Mask = Builder.CreateICmp(ICmpInst::ICMP_NE, Mask, Zero);
690  Dst = II.getArgOperand(2);
691  } else
692  Dst = II.getArgOperand(0);
693  Src = Builder.CreateExtractElement(II.getArgOperand(1), (uint64_t)0);
694  } else {
695  Src = II.getArgOperand(0);
696  if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_128 ||
697  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_256 ||
698  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_512 ||
699  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_128 ||
700  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_256 ||
701  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_512) {
702  Dst = II.getArgOperand(2);
703  Mask = II.getArgOperand(3);
704  } else {
705  Dst = Src;
707  Builder.getIntNTy(Src->getType()->getVectorNumElements()));
708  }
709  }
710 
711  Intrinsic::ID ID = (RoundControl == 2) ? Intrinsic::ceil : Intrinsic::floor;
712  Value *Res = Builder.CreateUnaryIntrinsic(ID, Src, &II);
713  if (!IsScalar) {
714  if (auto *C = dyn_cast<Constant>(Mask))
715  if (C->isAllOnesValue())
716  return Res;
717  auto *MaskTy = VectorType::get(
718  Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
719  Mask = Builder.CreateBitCast(Mask, MaskTy);
720  unsigned Width = Src->getType()->getVectorNumElements();
721  if (MaskTy->getVectorNumElements() > Width) {
722  uint32_t Indices[4];
723  for (unsigned i = 0; i != Width; ++i)
724  Indices[i] = i;
725  Mask = Builder.CreateShuffleVector(Mask, Mask,
726  makeArrayRef(Indices, Width));
727  }
728  return Builder.CreateSelect(Mask, Res, Dst);
729  }
730  if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
731  IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd) {
732  Dst = Builder.CreateExtractElement(Dst, (uint64_t)0);
733  Res = Builder.CreateSelect(Mask, Res, Dst);
734  Dst = II.getArgOperand(0);
735  }
736  return Builder.CreateInsertElement(Dst, Res, (uint64_t)0);
737 }
738 
740  Value *Arg = II.getArgOperand(0);
741  Type *ResTy = II.getType();
742  Type *ArgTy = Arg->getType();
743 
744  // movmsk(undef) -> zero as we must ensure the upper bits are zero.
745  if (isa<UndefValue>(Arg))
746  return Constant::getNullValue(ResTy);
747 
748  // We can't easily peek through x86_mmx types.
749  if (!ArgTy->isVectorTy())
750  return nullptr;
751 
752  auto *C = dyn_cast<Constant>(Arg);
753  if (!C)
754  return nullptr;
755 
756  // Extract signbits of the vector input and pack into integer result.
757  APInt Result(ResTy->getPrimitiveSizeInBits(), 0);
758  for (unsigned I = 0, E = ArgTy->getVectorNumElements(); I != E; ++I) {
759  auto *COp = C->getAggregateElement(I);
760  if (!COp)
761  return nullptr;
762  if (isa<UndefValue>(COp))
763  continue;
764 
765  auto *CInt = dyn_cast<ConstantInt>(COp);
766  auto *CFp = dyn_cast<ConstantFP>(COp);
767  if (!CInt && !CFp)
768  return nullptr;
769 
770  if ((CInt && CInt->isNegative()) || (CFp && CFp->isNegative()))
771  Result.setBit(I);
772  }
773 
774  return Constant::getIntegerValue(ResTy, Result);
775 }
776 
778  InstCombiner::BuilderTy &Builder) {
779  auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2));
780  if (!CInt)
781  return nullptr;
782 
783  VectorType *VecTy = cast<VectorType>(II.getType());
784  assert(VecTy->getNumElements() == 4 && "insertps with wrong vector type");
785 
786  // The immediate permute control byte looks like this:
787  // [3:0] - zero mask for each 32-bit lane
788  // [5:4] - select one 32-bit destination lane
789  // [7:6] - select one 32-bit source lane
790 
791  uint8_t Imm = CInt->getZExtValue();
792  uint8_t ZMask = Imm & 0xf;
793  uint8_t DestLane = (Imm >> 4) & 0x3;
794  uint8_t SourceLane = (Imm >> 6) & 0x3;
795 
797 
798  // If all zero mask bits are set, this was just a weird way to
799  // generate a zero vector.
800  if (ZMask == 0xf)
801  return ZeroVector;
802 
803  // Initialize by passing all of the first source bits through.
804  uint32_t ShuffleMask[4] = { 0, 1, 2, 3 };
805 
806  // We may replace the second operand with the zero vector.
807  Value *V1 = II.getArgOperand(1);
808 
809  if (ZMask) {
810  // If the zero mask is being used with a single input or the zero mask
811  // overrides the destination lane, this is a shuffle with the zero vector.
812  if ((II.getArgOperand(0) == II.getArgOperand(1)) ||
813  (ZMask & (1 << DestLane))) {
814  V1 = ZeroVector;
815  // We may still move 32-bits of the first source vector from one lane
816  // to another.
817  ShuffleMask[DestLane] = SourceLane;
818  // The zero mask may override the previous insert operation.
819  for (unsigned i = 0; i < 4; ++i)
820  if ((ZMask >> i) & 0x1)
821  ShuffleMask[i] = i + 4;
822  } else {
823  // TODO: Model this case as 2 shuffles or a 'logical and' plus shuffle?
824  return nullptr;
825  }
826  } else {
827  // Replace the selected destination lane with the selected source lane.
828  ShuffleMask[DestLane] = SourceLane + 4;
829  }
830 
831  return Builder.CreateShuffleVector(II.getArgOperand(0), V1, ShuffleMask);
832 }
833 
834 /// Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding
835 /// or conversion to a shuffle vector.
837  ConstantInt *CILength, ConstantInt *CIIndex,
838  InstCombiner::BuilderTy &Builder) {
839  auto LowConstantHighUndef = [&](uint64_t Val) {
840  Type *IntTy64 = Type::getInt64Ty(II.getContext());
841  Constant *Args[] = {ConstantInt::get(IntTy64, Val),
842  UndefValue::get(IntTy64)};
843  return ConstantVector::get(Args);
844  };
845 
846  // See if we're dealing with constant values.
847  Constant *C0 = dyn_cast<Constant>(Op0);
848  ConstantInt *CI0 =
849  C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
850  : nullptr;
851 
852  // Attempt to constant fold.
853  if (CILength && CIIndex) {
854  // From AMD documentation: "The bit index and field length are each six
855  // bits in length other bits of the field are ignored."
856  APInt APIndex = CIIndex->getValue().zextOrTrunc(6);
857  APInt APLength = CILength->getValue().zextOrTrunc(6);
858 
859  unsigned Index = APIndex.getZExtValue();
860 
861  // From AMD documentation: "a value of zero in the field length is
862  // defined as length of 64".
863  unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
864 
865  // From AMD documentation: "If the sum of the bit index + length field
866  // is greater than 64, the results are undefined".
867  unsigned End = Index + Length;
868 
869  // Note that both field index and field length are 8-bit quantities.
870  // Since variables 'Index' and 'Length' are unsigned values
871  // obtained from zero-extending field index and field length
872  // respectively, their sum should never wrap around.
873  if (End > 64)
874  return UndefValue::get(II.getType());
875 
876  // If we are inserting whole bytes, we can convert this to a shuffle.
877  // Lowering can recognize EXTRQI shuffle masks.
878  if ((Length % 8) == 0 && (Index % 8) == 0) {
879  // Convert bit indices to byte indices.
880  Length /= 8;
881  Index /= 8;
882 
883  Type *IntTy8 = Type::getInt8Ty(II.getContext());
884  Type *IntTy32 = Type::getInt32Ty(II.getContext());
885  VectorType *ShufTy = VectorType::get(IntTy8, 16);
886 
887  SmallVector<Constant *, 16> ShuffleMask;
888  for (int i = 0; i != (int)Length; ++i)
889  ShuffleMask.push_back(
890  Constant::getIntegerValue(IntTy32, APInt(32, i + Index)));
891  for (int i = Length; i != 8; ++i)
892  ShuffleMask.push_back(
893  Constant::getIntegerValue(IntTy32, APInt(32, i + 16)));
894  for (int i = 8; i != 16; ++i)
895  ShuffleMask.push_back(UndefValue::get(IntTy32));
896 
897  Value *SV = Builder.CreateShuffleVector(
898  Builder.CreateBitCast(Op0, ShufTy),
899  ConstantAggregateZero::get(ShufTy), ConstantVector::get(ShuffleMask));
900  return Builder.CreateBitCast(SV, II.getType());
901  }
902 
903  // Constant Fold - shift Index'th bit to lowest position and mask off
904  // Length bits.
905  if (CI0) {
906  APInt Elt = CI0->getValue();
907  Elt.lshrInPlace(Index);
908  Elt = Elt.zextOrTrunc(Length);
909  return LowConstantHighUndef(Elt.getZExtValue());
910  }
911 
912  // If we were an EXTRQ call, we'll save registers if we convert to EXTRQI.
913  if (II.getIntrinsicID() == Intrinsic::x86_sse4a_extrq) {
914  Value *Args[] = {Op0, CILength, CIIndex};
915  Module *M = II.getModule();
916  Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_extrqi);
917  return Builder.CreateCall(F, Args);
918  }
919  }
920 
921  // Constant Fold - extraction from zero is always {zero, undef}.
922  if (CI0 && CI0->isZero())
923  return LowConstantHighUndef(0);
924 
925  return nullptr;
926 }
927 
928 /// Attempt to simplify SSE4A INSERTQ/INSERTQI instructions using constant
929 /// folding or conversion to a shuffle vector.
931  APInt APLength, APInt APIndex,
932  InstCombiner::BuilderTy &Builder) {
933  // From AMD documentation: "The bit index and field length are each six bits
934  // in length other bits of the field are ignored."
935  APIndex = APIndex.zextOrTrunc(6);
936  APLength = APLength.zextOrTrunc(6);
937 
938  // Attempt to constant fold.
939  unsigned Index = APIndex.getZExtValue();
940 
941  // From AMD documentation: "a value of zero in the field length is
942  // defined as length of 64".
943  unsigned Length = APLength == 0 ? 64 : APLength.getZExtValue();
944 
945  // From AMD documentation: "If the sum of the bit index + length field
946  // is greater than 64, the results are undefined".
947  unsigned End = Index + Length;
948 
949  // Note that both field index and field length are 8-bit quantities.
950  // Since variables 'Index' and 'Length' are unsigned values
951  // obtained from zero-extending field index and field length
952  // respectively, their sum should never wrap around.
953  if (End > 64)
954  return UndefValue::get(II.getType());
955 
956  // If we are inserting whole bytes, we can convert this to a shuffle.
957  // Lowering can recognize INSERTQI shuffle masks.
958  if ((Length % 8) == 0 && (Index % 8) == 0) {
959  // Convert bit indices to byte indices.
960  Length /= 8;
961  Index /= 8;
962 
963  Type *IntTy8 = Type::getInt8Ty(II.getContext());
964  Type *IntTy32 = Type::getInt32Ty(II.getContext());
965  VectorType *ShufTy = VectorType::get(IntTy8, 16);
966 
967  SmallVector<Constant *, 16> ShuffleMask;
968  for (int i = 0; i != (int)Index; ++i)
969  ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i)));
970  for (int i = 0; i != (int)Length; ++i)
971  ShuffleMask.push_back(
972  Constant::getIntegerValue(IntTy32, APInt(32, i + 16)));
973  for (int i = Index + Length; i != 8; ++i)
974  ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i)));
975  for (int i = 8; i != 16; ++i)
976  ShuffleMask.push_back(UndefValue::get(IntTy32));
977 
978  Value *SV = Builder.CreateShuffleVector(Builder.CreateBitCast(Op0, ShufTy),
979  Builder.CreateBitCast(Op1, ShufTy),
980  ConstantVector::get(ShuffleMask));
981  return Builder.CreateBitCast(SV, II.getType());
982  }
983 
984  // See if we're dealing with constant values.
985  Constant *C0 = dyn_cast<Constant>(Op0);
986  Constant *C1 = dyn_cast<Constant>(Op1);
987  ConstantInt *CI00 =
988  C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
989  : nullptr;
990  ConstantInt *CI10 =
991  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
992  : nullptr;
993 
994  // Constant Fold - insert bottom Length bits starting at the Index'th bit.
995  if (CI00 && CI10) {
996  APInt V00 = CI00->getValue();
997  APInt V10 = CI10->getValue();
998  APInt Mask = APInt::getLowBitsSet(64, Length).shl(Index);
999  V00 = V00 & ~Mask;
1000  V10 = V10.zextOrTrunc(Length).zextOrTrunc(64).shl(Index);
1001  APInt Val = V00 | V10;
1002  Type *IntTy64 = Type::getInt64Ty(II.getContext());
1003  Constant *Args[] = {ConstantInt::get(IntTy64, Val.getZExtValue()),
1004  UndefValue::get(IntTy64)};
1005  return ConstantVector::get(Args);
1006  }
1007 
1008  // If we were an INSERTQ call, we'll save demanded elements if we convert to
1009  // INSERTQI.
1010  if (II.getIntrinsicID() == Intrinsic::x86_sse4a_insertq) {
1011  Type *IntTy8 = Type::getInt8Ty(II.getContext());
1012  Constant *CILength = ConstantInt::get(IntTy8, Length, false);
1013  Constant *CIIndex = ConstantInt::get(IntTy8, Index, false);
1014 
1015  Value *Args[] = {Op0, Op1, CILength, CIIndex};
1016  Module *M = II.getModule();
1017  Value *F = Intrinsic::getDeclaration(M, Intrinsic::x86_sse4a_insertqi);
1018  return Builder.CreateCall(F, Args);
1019  }
1020 
1021  return nullptr;
1022 }
1023 
1024 /// Attempt to convert pshufb* to shufflevector if the mask is constant.
1026  InstCombiner::BuilderTy &Builder) {
1027  Constant *V = dyn_cast<Constant>(II.getArgOperand(1));
1028  if (!V)
1029  return nullptr;
1030 
1031  auto *VecTy = cast<VectorType>(II.getType());
1032  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
1033  unsigned NumElts = VecTy->getNumElements();
1034  assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
1035  "Unexpected number of elements in shuffle mask!");
1036 
1037  // Construct a shuffle mask from constant integers or UNDEFs.
1038  Constant *Indexes[64] = {nullptr};
1039 
1040  // Each byte in the shuffle control mask forms an index to permute the
1041  // corresponding byte in the destination operand.
1042  for (unsigned I = 0; I < NumElts; ++I) {
1043  Constant *COp = V->getAggregateElement(I);
1044  if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1045  return nullptr;
1046 
1047  if (isa<UndefValue>(COp)) {
1048  Indexes[I] = UndefValue::get(MaskEltTy);
1049  continue;
1050  }
1051 
1052  int8_t Index = cast<ConstantInt>(COp)->getValue().getZExtValue();
1053 
1054  // If the most significant bit (bit[7]) of each byte of the shuffle
1055  // control mask is set, then zero is written in the result byte.
1056  // The zero vector is in the right-hand side of the resulting
1057  // shufflevector.
1058 
1059  // The value of each index for the high 128-bit lane is the least
1060  // significant 4 bits of the respective shuffle control byte.
1061  Index = ((Index < 0) ? NumElts : Index & 0x0F) + (I & 0xF0);
1062  Indexes[I] = ConstantInt::get(MaskEltTy, Index);
1063  }
1064 
1065  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts));
1066  auto V1 = II.getArgOperand(0);
1067  auto V2 = Constant::getNullValue(VecTy);
1068  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
1069 }
1070 
1071 /// Attempt to convert vpermilvar* to shufflevector if the mask is constant.
1073  InstCombiner::BuilderTy &Builder) {
1074  Constant *V = dyn_cast<Constant>(II.getArgOperand(1));
1075  if (!V)
1076  return nullptr;
1077 
1078  auto *VecTy = cast<VectorType>(II.getType());
1079  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
1080  unsigned NumElts = VecTy->getVectorNumElements();
1081  bool IsPD = VecTy->getScalarType()->isDoubleTy();
1082  unsigned NumLaneElts = IsPD ? 2 : 4;
1083  assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2);
1084 
1085  // Construct a shuffle mask from constant integers or UNDEFs.
1086  Constant *Indexes[16] = {nullptr};
1087 
1088  // The intrinsics only read one or two bits, clear the rest.
1089  for (unsigned I = 0; I < NumElts; ++I) {
1090  Constant *COp = V->getAggregateElement(I);
1091  if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1092  return nullptr;
1093 
1094  if (isa<UndefValue>(COp)) {
1095  Indexes[I] = UndefValue::get(MaskEltTy);
1096  continue;
1097  }
1098 
1099  APInt Index = cast<ConstantInt>(COp)->getValue();
1100  Index = Index.zextOrTrunc(32).getLoBits(2);
1101 
1102  // The PD variants uses bit 1 to select per-lane element index, so
1103  // shift down to convert to generic shuffle mask index.
1104  if (IsPD)
1105  Index.lshrInPlace(1);
1106 
1107  // The _256 variants are a bit trickier since the mask bits always index
1108  // into the corresponding 128 half. In order to convert to a generic
1109  // shuffle, we have to make that explicit.
1110  Index += APInt(32, (I / NumLaneElts) * NumLaneElts);
1111 
1112  Indexes[I] = ConstantInt::get(MaskEltTy, Index);
1113  }
1114 
1115  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts));
1116  auto V1 = II.getArgOperand(0);
1117  auto V2 = UndefValue::get(V1->getType());
1118  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
1119 }
1120 
1121 /// Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
1123  InstCombiner::BuilderTy &Builder) {
1124  auto *V = dyn_cast<Constant>(II.getArgOperand(1));
1125  if (!V)
1126  return nullptr;
1127 
1128  auto *VecTy = cast<VectorType>(II.getType());
1129  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
1130  unsigned Size = VecTy->getNumElements();
1131  assert((Size == 4 || Size == 8 || Size == 16 || Size == 32 || Size == 64) &&
1132  "Unexpected shuffle mask size");
1133 
1134  // Construct a shuffle mask from constant integers or UNDEFs.
1135  Constant *Indexes[64] = {nullptr};
1136 
1137  for (unsigned I = 0; I < Size; ++I) {
1138  Constant *COp = V->getAggregateElement(I);
1139  if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
1140  return nullptr;
1141 
1142  if (isa<UndefValue>(COp)) {
1143  Indexes[I] = UndefValue::get(MaskEltTy);
1144  continue;
1145  }
1146 
1147  uint32_t Index = cast<ConstantInt>(COp)->getZExtValue();
1148  Index &= Size - 1;
1149  Indexes[I] = ConstantInt::get(MaskEltTy, Index);
1150  }
1151 
1152  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, Size));
1153  auto V1 = II.getArgOperand(0);
1154  auto V2 = UndefValue::get(VecTy);
1155  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
1156 }
1157 
1158 /// Decode XOP integer vector comparison intrinsics.
1160  InstCombiner::BuilderTy &Builder,
1161  bool IsSigned) {
1162  if (auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2))) {
1163  uint64_t Imm = CInt->getZExtValue() & 0x7;
1164  VectorType *VecTy = cast<VectorType>(II.getType());
1166 
1167  switch (Imm) {
1168  case 0x0:
1169  Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
1170  break;
1171  case 0x1:
1172  Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
1173  break;
1174  case 0x2:
1175  Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
1176  break;
1177  case 0x3:
1178  Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
1179  break;
1180  case 0x4:
1181  Pred = ICmpInst::ICMP_EQ; break;
1182  case 0x5:
1183  Pred = ICmpInst::ICMP_NE; break;
1184  case 0x6:
1185  return ConstantInt::getSigned(VecTy, 0); // FALSE
1186  case 0x7:
1187  return ConstantInt::getSigned(VecTy, -1); // TRUE
1188  }
1189 
1190  if (Value *Cmp = Builder.CreateICmp(Pred, II.getArgOperand(0),
1191  II.getArgOperand(1)))
1192  return Builder.CreateSExtOrTrunc(Cmp, VecTy);
1193  }
1194  return nullptr;
1195 }
1196 
1198  auto *ConstMask = dyn_cast<Constant>(Mask);
1199  if (!ConstMask)
1200  return false;
1201  if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask))
1202  return true;
1203  for (unsigned I = 0, E = ConstMask->getType()->getVectorNumElements(); I != E;
1204  ++I) {
1205  if (auto *MaskElt = ConstMask->getAggregateElement(I))
1206  if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt))
1207  continue;
1208  return false;
1209  }
1210  return true;
1211 }
1212 
1214  InstCombiner::BuilderTy &Builder) {
1215  // If the mask is all ones or undefs, this is a plain vector load of the 1st
1216  // argument.
1217  if (maskIsAllOneOrUndef(II.getArgOperand(2))) {
1218  Value *LoadPtr = II.getArgOperand(0);
1219  unsigned Alignment = cast<ConstantInt>(II.getArgOperand(1))->getZExtValue();
1220  return Builder.CreateAlignedLoad(LoadPtr, Alignment, "unmaskedload");
1221  }
1222 
1223  return nullptr;
1224 }
1225 
1227  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
1228  if (!ConstMask)
1229  return nullptr;
1230 
1231  // If the mask is all zeros, this instruction does nothing.
1232  if (ConstMask->isNullValue())
1233  return IC.eraseInstFromFunction(II);
1234 
1235  // If the mask is all ones, this is a plain vector store of the 1st argument.
1236  if (ConstMask->isAllOnesValue()) {
1237  Value *StorePtr = II.getArgOperand(1);
1238  unsigned Alignment = cast<ConstantInt>(II.getArgOperand(2))->getZExtValue();
1239  return new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
1240  }
1241 
1242  return nullptr;
1243 }
1244 
1246  // If the mask is all zeros, return the "passthru" argument of the gather.
1247  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
1248  if (ConstMask && ConstMask->isNullValue())
1249  return IC.replaceInstUsesWith(II, II.getArgOperand(3));
1250 
1251  return nullptr;
1252 }
1253 
1254 /// This function transforms launder.invariant.group and strip.invariant.group
1255 /// like:
1256 /// launder(launder(%x)) -> launder(%x) (the result is not the argument)
1257 /// launder(strip(%x)) -> launder(%x)
1258 /// strip(strip(%x)) -> strip(%x) (the result is not the argument)
1259 /// strip(launder(%x)) -> strip(%x)
1260 /// This is legal because it preserves the most recent information about
1261 /// the presence or absence of invariant.group.
1263  InstCombiner &IC) {
1264  auto *Arg = II.getArgOperand(0);
1265  auto *StrippedArg = Arg->stripPointerCasts();
1266  auto *StrippedInvariantGroupsArg = Arg->stripPointerCastsAndInvariantGroups();
1267  if (StrippedArg == StrippedInvariantGroupsArg)
1268  return nullptr; // No launders/strips to remove.
1269 
1270  Value *Result = nullptr;
1271 
1272  if (II.getIntrinsicID() == Intrinsic::launder_invariant_group)
1273  Result = IC.Builder.CreateLaunderInvariantGroup(StrippedInvariantGroupsArg);
1274  else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
1275  Result = IC.Builder.CreateStripInvariantGroup(StrippedInvariantGroupsArg);
1276  else
1278  "simplifyInvariantGroupIntrinsic only handles launder and strip");
1279  if (Result->getType()->getPointerAddressSpace() !=
1281  Result = IC.Builder.CreateAddrSpaceCast(Result, II.getType());
1282  if (Result->getType() != II.getType())
1283  Result = IC.Builder.CreateBitCast(Result, II.getType());
1284 
1285  return cast<Instruction>(Result);
1286 }
1287 
1289  // If the mask is all zeros, a scatter does nothing.
1290  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
1291  if (ConstMask && ConstMask->isNullValue())
1292  return IC.eraseInstFromFunction(II);
1293 
1294  return nullptr;
1295 }
1296 
1298  assert((II.getIntrinsicID() == Intrinsic::cttz ||
1299  II.getIntrinsicID() == Intrinsic::ctlz) &&
1300  "Expected cttz or ctlz intrinsic");
1301  Value *Op0 = II.getArgOperand(0);
1302 
1303  KnownBits Known = IC.computeKnownBits(Op0, 0, &II);
1304 
1305  // Create a mask for bits above (ctlz) or below (cttz) the first known one.
1306  bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
1307  unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
1308  : Known.countMaxLeadingZeros();
1309  unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
1310  : Known.countMinLeadingZeros();
1311 
1312  // If all bits above (ctlz) or below (cttz) the first known one are known
1313  // zero, this value is constant.
1314  // FIXME: This should be in InstSimplify because we're replacing an
1315  // instruction with a constant.
1316  if (PossibleZeros == DefiniteZeros) {
1317  auto *C = ConstantInt::get(Op0->getType(), DefiniteZeros);
1318  return IC.replaceInstUsesWith(II, C);
1319  }
1320 
1321  // If the input to cttz/ctlz is known to be non-zero,
1322  // then change the 'ZeroIsUndef' parameter to 'true'
1323  // because we know the zero behavior can't affect the result.
1324  if (!Known.One.isNullValue() ||
1325  isKnownNonZero(Op0, IC.getDataLayout(), 0, &IC.getAssumptionCache(), &II,
1326  &IC.getDominatorTree())) {
1327  if (!match(II.getArgOperand(1), m_One())) {
1328  II.setOperand(1, IC.Builder.getTrue());
1329  return &II;
1330  }
1331  }
1332 
1333  // Add range metadata since known bits can't completely reflect what we know.
1334  // TODO: Handle splat vectors.
1335  auto *IT = dyn_cast<IntegerType>(Op0->getType());
1336  if (IT && IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
1337  Metadata *LowAndHigh[] = {
1338  ConstantAsMetadata::get(ConstantInt::get(IT, DefiniteZeros)),
1339  ConstantAsMetadata::get(ConstantInt::get(IT, PossibleZeros + 1))};
1342  return &II;
1343  }
1344 
1345  return nullptr;
1346 }
1347 
1349  assert(II.getIntrinsicID() == Intrinsic::ctpop &&
1350  "Expected ctpop intrinsic");
1351  Value *Op0 = II.getArgOperand(0);
1352  // FIXME: Try to simplify vectors of integers.
1353  auto *IT = dyn_cast<IntegerType>(Op0->getType());
1354  if (!IT)
1355  return nullptr;
1356 
1357  unsigned BitWidth = IT->getBitWidth();
1358  KnownBits Known(BitWidth);
1359  IC.computeKnownBits(Op0, Known, 0, &II);
1360 
1361  unsigned MinCount = Known.countMinPopulation();
1362  unsigned MaxCount = Known.countMaxPopulation();
1363 
1364  // Add range metadata since known bits can't completely reflect what we know.
1365  if (IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
1366  Metadata *LowAndHigh[] = {
1368  ConstantAsMetadata::get(ConstantInt::get(IT, MaxCount + 1))};
1371  return &II;
1372  }
1373 
1374  return nullptr;
1375 }
1376 
1377 // TODO: If the x86 backend knew how to convert a bool vector mask back to an
1378 // XMM register mask efficiently, we could transform all x86 masked intrinsics
1379 // to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
1381  Value *Ptr = II.getOperand(0);
1382  Value *Mask = II.getOperand(1);
1383  Constant *ZeroVec = Constant::getNullValue(II.getType());
1384 
1385  // Special case a zero mask since that's not a ConstantDataVector.
1386  // This masked load instruction creates a zero vector.
1387  if (isa<ConstantAggregateZero>(Mask))
1388  return IC.replaceInstUsesWith(II, ZeroVec);
1389 
1390  auto *ConstMask = dyn_cast<ConstantDataVector>(Mask);
1391  if (!ConstMask)
1392  return nullptr;
1393 
1394  // The mask is constant. Convert this x86 intrinsic to the LLVM instrinsic
1395  // to allow target-independent optimizations.
1396 
1397  // First, cast the x86 intrinsic scalar pointer to a vector pointer to match
1398  // the LLVM intrinsic definition for the pointer argument.
1399  unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
1400  PointerType *VecPtrTy = PointerType::get(II.getType(), AddrSpace);
1401  Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
1402 
1403  // Second, convert the x86 XMM integer vector mask to a vector of bools based
1404  // on each element's most significant bit (the sign bit).
1405  Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask);
1406 
1407  // The pass-through vector for an x86 masked load is a zero vector.
1408  CallInst *NewMaskedLoad =
1409  IC.Builder.CreateMaskedLoad(PtrCast, 1, BoolMask, ZeroVec);
1410  return IC.replaceInstUsesWith(II, NewMaskedLoad);
1411 }
1412 
1413 // TODO: If the x86 backend knew how to convert a bool vector mask back to an
1414 // XMM register mask efficiently, we could transform all x86 masked intrinsics
1415 // to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
1417  Value *Ptr = II.getOperand(0);
1418  Value *Mask = II.getOperand(1);
1419  Value *Vec = II.getOperand(2);
1420 
1421  // Special case a zero mask since that's not a ConstantDataVector:
1422  // this masked store instruction does nothing.
1423  if (isa<ConstantAggregateZero>(Mask)) {
1424  IC.eraseInstFromFunction(II);
1425  return true;
1426  }
1427 
1428  // The SSE2 version is too weird (eg, unaligned but non-temporal) to do
1429  // anything else at this level.
1430  if (II.getIntrinsicID() == Intrinsic::x86_sse2_maskmov_dqu)
1431  return false;
1432 
1433  auto *ConstMask = dyn_cast<ConstantDataVector>(Mask);
1434  if (!ConstMask)
1435  return false;
1436 
1437  // The mask is constant. Convert this x86 intrinsic to the LLVM instrinsic
1438  // to allow target-independent optimizations.
1439 
1440  // First, cast the x86 intrinsic scalar pointer to a vector pointer to match
1441  // the LLVM intrinsic definition for the pointer argument.
1442  unsigned AddrSpace = cast<PointerType>(Ptr->getType())->getAddressSpace();
1443  PointerType *VecPtrTy = PointerType::get(Vec->getType(), AddrSpace);
1444  Value *PtrCast = IC.Builder.CreateBitCast(Ptr, VecPtrTy, "castvec");
1445 
1446  // Second, convert the x86 XMM integer vector mask to a vector of bools based
1447  // on each element's most significant bit (the sign bit).
1448  Constant *BoolMask = getNegativeIsTrueBoolVec(ConstMask);
1449 
1450  IC.Builder.CreateMaskedStore(Vec, PtrCast, 1, BoolMask);
1451 
1452  // 'Replace uses' doesn't work for stores. Erase the original masked store.
1453  IC.eraseInstFromFunction(II);
1454  return true;
1455 }
1456 
1457 // Constant fold llvm.amdgcn.fmed3 intrinsics for standard inputs.
1458 //
1459 // A single NaN input is folded to minnum, so we rely on that folding for
1460 // handling NaNs.
1461 static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
1462  const APFloat &Src2) {
1463  APFloat Max3 = maxnum(maxnum(Src0, Src1), Src2);
1464 
1465  APFloat::cmpResult Cmp0 = Max3.compare(Src0);
1466  assert(Cmp0 != APFloat::cmpUnordered && "nans handled separately");
1467  if (Cmp0 == APFloat::cmpEqual)
1468  return maxnum(Src1, Src2);
1469 
1470  APFloat::cmpResult Cmp1 = Max3.compare(Src1);
1471  assert(Cmp1 != APFloat::cmpUnordered && "nans handled separately");
1472  if (Cmp1 == APFloat::cmpEqual)
1473  return maxnum(Src0, Src2);
1474 
1475  return maxnum(Src0, Src1);
1476 }
1477 
1478 /// Convert a table lookup to shufflevector if the mask is constant.
1479 /// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
1480 /// which case we could lower the shufflevector with rev64 instructions
1481 /// as it's actually a byte reverse.
1483  InstCombiner::BuilderTy &Builder) {
1484  // Bail out if the mask is not a constant.
1485  auto *C = dyn_cast<Constant>(II.getArgOperand(1));
1486  if (!C)
1487  return nullptr;
1488 
1489  auto *VecTy = cast<VectorType>(II.getType());
1490  unsigned NumElts = VecTy->getNumElements();
1491 
1492  // Only perform this transformation for <8 x i8> vector types.
1493  if (!VecTy->getElementType()->isIntegerTy(8) || NumElts != 8)
1494  return nullptr;
1495 
1496  uint32_t Indexes[8];
1497 
1498  for (unsigned I = 0; I < NumElts; ++I) {
1499  Constant *COp = C->getAggregateElement(I);
1500 
1501  if (!COp || !isa<ConstantInt>(COp))
1502  return nullptr;
1503 
1504  Indexes[I] = cast<ConstantInt>(COp)->getLimitedValue();
1505 
1506  // Make sure the mask indices are in range.
1507  if (Indexes[I] >= NumElts)
1508  return nullptr;
1509  }
1510 
1511  auto *ShuffleMask = ConstantDataVector::get(II.getContext(),
1512  makeArrayRef(Indexes));
1513  auto *V1 = II.getArgOperand(0);
1514  auto *V2 = Constant::getNullValue(V1->getType());
1515  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
1516 }
1517 
1518 /// Convert a vector load intrinsic into a simple llvm load instruction.
1519 /// This is beneficial when the underlying object being addressed comes
1520 /// from a constant, since we get constant-folding for free.
1522  unsigned MemAlign,
1523  InstCombiner::BuilderTy &Builder) {
1524  auto *IntrAlign = dyn_cast<ConstantInt>(II.getArgOperand(1));
1525 
1526  if (!IntrAlign)
1527  return nullptr;
1528 
1529  unsigned Alignment = IntrAlign->getLimitedValue() < MemAlign ?
1530  MemAlign : IntrAlign->getLimitedValue();
1531 
1532  if (!isPowerOf2_32(Alignment))
1533  return nullptr;
1534 
1535  auto *BCastInst = Builder.CreateBitCast(II.getArgOperand(0),
1536  PointerType::get(II.getType(), 0));
1537  return Builder.CreateAlignedLoad(BCastInst, Alignment);
1538 }
1539 
1540 // Returns true iff the 2 intrinsics have the same operands, limiting the
1541 // comparison to the first NumOperands.
1542 static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
1543  unsigned NumOperands) {
1544  assert(I.getNumArgOperands() >= NumOperands && "Not enough operands");
1545  assert(E.getNumArgOperands() >= NumOperands && "Not enough operands");
1546  for (unsigned i = 0; i < NumOperands; i++)
1547  if (I.getArgOperand(i) != E.getArgOperand(i))
1548  return false;
1549  return true;
1550 }
1551 
1552 // Remove trivially empty start/end intrinsic ranges, i.e. a start
1553 // immediately followed by an end (ignoring debuginfo or other
1554 // start/end intrinsics in between). As this handles only the most trivial
1555 // cases, tracking the nesting level is not needed:
1556 //
1557 // call @llvm.foo.start(i1 0) ; &I
1558 // call @llvm.foo.start(i1 0)
1559 // call @llvm.foo.end(i1 0) ; This one will not be skipped: it will be removed
1560 // call @llvm.foo.end(i1 0)
1561 static bool removeTriviallyEmptyRange(IntrinsicInst &I, unsigned StartID,
1562  unsigned EndID, InstCombiner &IC) {
1563  assert(I.getIntrinsicID() == StartID &&
1564  "Start intrinsic does not have expected ID");
1565  BasicBlock::iterator BI(I), BE(I.getParent()->end());
1566  for (++BI; BI != BE; ++BI) {
1567  if (auto *E = dyn_cast<IntrinsicInst>(BI)) {
1568  if (isa<DbgInfoIntrinsic>(E) || E->getIntrinsicID() == StartID)
1569  continue;
1570  if (E->getIntrinsicID() == EndID &&
1571  haveSameOperands(I, *E, E->getNumArgOperands())) {
1572  IC.eraseInstFromFunction(*E);
1573  IC.eraseInstFromFunction(I);
1574  return true;
1575  }
1576  }
1577  break;
1578  }
1579 
1580  return false;
1581 }
1582 
1583 // Convert NVVM intrinsics to target-generic LLVM code where possible.
1585  // Each NVVM intrinsic we can simplify can be replaced with one of:
1586  //
1587  // * an LLVM intrinsic,
1588  // * an LLVM cast operation,
1589  // * an LLVM binary operation, or
1590  // * ad-hoc LLVM IR for the particular operation.
1591 
1592  // Some transformations are only valid when the module's
1593  // flush-denormals-to-zero (ftz) setting is true/false, whereas other
1594  // transformations are valid regardless of the module's ftz setting.
1595  enum FtzRequirementTy {
1596  FTZ_Any, // Any ftz setting is ok.
1597  FTZ_MustBeOn, // Transformation is valid only if ftz is on.
1598  FTZ_MustBeOff, // Transformation is valid only if ftz is off.
1599  };
1600  // Classes of NVVM intrinsics that can't be replaced one-to-one with a
1601  // target-generic intrinsic, cast op, or binary op but that we can nonetheless
1602  // simplify.
1603  enum SpecialCase {
1604  SPC_Reciprocal,
1605  };
1606 
1607  // SimplifyAction is a poor-man's variant (plus an additional flag) that
1608  // represents how to replace an NVVM intrinsic with target-generic LLVM IR.
1609  struct SimplifyAction {
1610  // Invariant: At most one of these Optionals has a value.
1614  Optional<SpecialCase> Special;
1615 
1616  FtzRequirementTy FtzRequirement = FTZ_Any;
1617 
1618  SimplifyAction() = default;
1619 
1620  SimplifyAction(Intrinsic::ID IID, FtzRequirementTy FtzReq)
1621  : IID(IID), FtzRequirement(FtzReq) {}
1622 
1623  // Cast operations don't have anything to do with FTZ, so we skip that
1624  // argument.
1625  SimplifyAction(Instruction::CastOps CastOp) : CastOp(CastOp) {}
1626 
1627  SimplifyAction(Instruction::BinaryOps BinaryOp, FtzRequirementTy FtzReq)
1628  : BinaryOp(BinaryOp), FtzRequirement(FtzReq) {}
1629 
1630  SimplifyAction(SpecialCase Special, FtzRequirementTy FtzReq)
1631  : Special(Special), FtzRequirement(FtzReq) {}
1632  };
1633 
1634  // Try to generate a SimplifyAction describing how to replace our
1635  // IntrinsicInstr with target-generic LLVM IR.
1636  const SimplifyAction Action = [II]() -> SimplifyAction {
1637  switch (II->getIntrinsicID()) {
1638  // NVVM intrinsics that map directly to LLVM intrinsics.
1639  case Intrinsic::nvvm_ceil_d:
1640  return {Intrinsic::ceil, FTZ_Any};
1641  case Intrinsic::nvvm_ceil_f:
1642  return {Intrinsic::ceil, FTZ_MustBeOff};
1643  case Intrinsic::nvvm_ceil_ftz_f:
1644  return {Intrinsic::ceil, FTZ_MustBeOn};
1645  case Intrinsic::nvvm_fabs_d:
1646  return {Intrinsic::fabs, FTZ_Any};
1647  case Intrinsic::nvvm_fabs_f:
1648  return {Intrinsic::fabs, FTZ_MustBeOff};
1649  case Intrinsic::nvvm_fabs_ftz_f:
1650  return {Intrinsic::fabs, FTZ_MustBeOn};
1651  case Intrinsic::nvvm_floor_d:
1652  return {Intrinsic::floor, FTZ_Any};
1653  case Intrinsic::nvvm_floor_f:
1654  return {Intrinsic::floor, FTZ_MustBeOff};
1655  case Intrinsic::nvvm_floor_ftz_f:
1656  return {Intrinsic::floor, FTZ_MustBeOn};
1657  case Intrinsic::nvvm_fma_rn_d:
1658  return {Intrinsic::fma, FTZ_Any};
1659  case Intrinsic::nvvm_fma_rn_f:
1660  return {Intrinsic::fma, FTZ_MustBeOff};
1661  case Intrinsic::nvvm_fma_rn_ftz_f:
1662  return {Intrinsic::fma, FTZ_MustBeOn};
1663  case Intrinsic::nvvm_fmax_d:
1664  return {Intrinsic::maxnum, FTZ_Any};
1665  case Intrinsic::nvvm_fmax_f:
1666  return {Intrinsic::maxnum, FTZ_MustBeOff};
1667  case Intrinsic::nvvm_fmax_ftz_f:
1668  return {Intrinsic::maxnum, FTZ_MustBeOn};
1669  case Intrinsic::nvvm_fmin_d:
1670  return {Intrinsic::minnum, FTZ_Any};
1671  case Intrinsic::nvvm_fmin_f:
1672  return {Intrinsic::minnum, FTZ_MustBeOff};
1673  case Intrinsic::nvvm_fmin_ftz_f:
1674  return {Intrinsic::minnum, FTZ_MustBeOn};
1675  case Intrinsic::nvvm_round_d:
1676  return {Intrinsic::round, FTZ_Any};
1677  case Intrinsic::nvvm_round_f:
1678  return {Intrinsic::round, FTZ_MustBeOff};
1679  case Intrinsic::nvvm_round_ftz_f:
1680  return {Intrinsic::round, FTZ_MustBeOn};
1681  case Intrinsic::nvvm_sqrt_rn_d:
1682  return {Intrinsic::sqrt, FTZ_Any};
1683  case Intrinsic::nvvm_sqrt_f:
1684  // nvvm_sqrt_f is a special case. For most intrinsics, foo_ftz_f is the
1685  // ftz version, and foo_f is the non-ftz version. But nvvm_sqrt_f adopts
1686  // the ftz-ness of the surrounding code. sqrt_rn_f and sqrt_rn_ftz_f are
1687  // the versions with explicit ftz-ness.
1688  return {Intrinsic::sqrt, FTZ_Any};
1689  case Intrinsic::nvvm_sqrt_rn_f:
1690  return {Intrinsic::sqrt, FTZ_MustBeOff};
1691  case Intrinsic::nvvm_sqrt_rn_ftz_f:
1692  return {Intrinsic::sqrt, FTZ_MustBeOn};
1693  case Intrinsic::nvvm_trunc_d:
1694  return {Intrinsic::trunc, FTZ_Any};
1695  case Intrinsic::nvvm_trunc_f:
1696  return {Intrinsic::trunc, FTZ_MustBeOff};
1697  case Intrinsic::nvvm_trunc_ftz_f:
1698  return {Intrinsic::trunc, FTZ_MustBeOn};
1699 
1700  // NVVM intrinsics that map to LLVM cast operations.
1701  //
1702  // Note that llvm's target-generic conversion operators correspond to the rz
1703  // (round to zero) versions of the nvvm conversion intrinsics, even though
1704  // most everything else here uses the rn (round to nearest even) nvvm ops.
1705  case Intrinsic::nvvm_d2i_rz:
1706  case Intrinsic::nvvm_f2i_rz:
1707  case Intrinsic::nvvm_d2ll_rz:
1708  case Intrinsic::nvvm_f2ll_rz:
1709  return {Instruction::FPToSI};
1710  case Intrinsic::nvvm_d2ui_rz:
1711  case Intrinsic::nvvm_f2ui_rz:
1712  case Intrinsic::nvvm_d2ull_rz:
1713  case Intrinsic::nvvm_f2ull_rz:
1714  return {Instruction::FPToUI};
1715  case Intrinsic::nvvm_i2d_rz:
1716  case Intrinsic::nvvm_i2f_rz:
1717  case Intrinsic::nvvm_ll2d_rz:
1718  case Intrinsic::nvvm_ll2f_rz:
1719  return {Instruction::SIToFP};
1720  case Intrinsic::nvvm_ui2d_rz:
1721  case Intrinsic::nvvm_ui2f_rz:
1722  case Intrinsic::nvvm_ull2d_rz:
1723  case Intrinsic::nvvm_ull2f_rz:
1724  return {Instruction::UIToFP};
1725 
1726  // NVVM intrinsics that map to LLVM binary ops.
1727  case Intrinsic::nvvm_add_rn_d:
1728  return {Instruction::FAdd, FTZ_Any};
1729  case Intrinsic::nvvm_add_rn_f:
1730  return {Instruction::FAdd, FTZ_MustBeOff};
1731  case Intrinsic::nvvm_add_rn_ftz_f:
1732  return {Instruction::FAdd, FTZ_MustBeOn};
1733  case Intrinsic::nvvm_mul_rn_d:
1734  return {Instruction::FMul, FTZ_Any};
1735  case Intrinsic::nvvm_mul_rn_f:
1736  return {Instruction::FMul, FTZ_MustBeOff};
1737  case Intrinsic::nvvm_mul_rn_ftz_f:
1738  return {Instruction::FMul, FTZ_MustBeOn};
1739  case Intrinsic::nvvm_div_rn_d:
1740  return {Instruction::FDiv, FTZ_Any};
1741  case Intrinsic::nvvm_div_rn_f:
1742  return {Instruction::FDiv, FTZ_MustBeOff};
1743  case Intrinsic::nvvm_div_rn_ftz_f:
1744  return {Instruction::FDiv, FTZ_MustBeOn};
1745 
1746  // The remainder of cases are NVVM intrinsics that map to LLVM idioms, but
1747  // need special handling.
1748  //
1749  // We seem to be missing intrinsics for rcp.approx.{ftz.}f32, which is just
1750  // as well.
1751  case Intrinsic::nvvm_rcp_rn_d:
1752  return {SPC_Reciprocal, FTZ_Any};
1753  case Intrinsic::nvvm_rcp_rn_f:
1754  return {SPC_Reciprocal, FTZ_MustBeOff};
1755  case Intrinsic::nvvm_rcp_rn_ftz_f:
1756  return {SPC_Reciprocal, FTZ_MustBeOn};
1757 
1758  // We do not currently simplify intrinsics that give an approximate answer.
1759  // These include:
1760  //
1761  // - nvvm_cos_approx_{f,ftz_f}
1762  // - nvvm_ex2_approx_{d,f,ftz_f}
1763  // - nvvm_lg2_approx_{d,f,ftz_f}
1764  // - nvvm_sin_approx_{f,ftz_f}
1765  // - nvvm_sqrt_approx_{f,ftz_f}
1766  // - nvvm_rsqrt_approx_{d,f,ftz_f}
1767  // - nvvm_div_approx_{ftz_d,ftz_f,f}
1768  // - nvvm_rcp_approx_ftz_d
1769  //
1770  // Ideally we'd encode them as e.g. "fast call @llvm.cos", where "fast"
1771  // means that fastmath is enabled in the intrinsic. Unfortunately only
1772  // binary operators (currently) have a fastmath bit in SelectionDAG, so this
1773  // information gets lost and we can't select on it.
1774  //
1775  // TODO: div and rcp are lowered to a binary op, so these we could in theory
1776  // lower them to "fast fdiv".
1777 
1778  default:
1779  return {};
1780  }
1781  }();
1782 
1783  // If Action.FtzRequirementTy is not satisfied by the module's ftz state, we
1784  // can bail out now. (Notice that in the case that IID is not an NVVM
1785  // intrinsic, we don't have to look up any module metadata, as
1786  // FtzRequirementTy will be FTZ_Any.)
1787  if (Action.FtzRequirement != FTZ_Any) {
1788  bool FtzEnabled =
1789  II->getFunction()->getFnAttribute("nvptx-f32ftz").getValueAsString() ==
1790  "true";
1791 
1792  if (FtzEnabled != (Action.FtzRequirement == FTZ_MustBeOn))
1793  return nullptr;
1794  }
1795 
1796  // Simplify to target-generic intrinsic.
1797  if (Action.IID) {
1799  // All the target-generic intrinsics currently of interest to us have one
1800  // type argument, equal to that of the nvvm intrinsic's argument.
1801  Type *Tys[] = {II->getArgOperand(0)->getType()};
1802  return CallInst::Create(
1803  Intrinsic::getDeclaration(II->getModule(), *Action.IID, Tys), Args);
1804  }
1805 
1806  // Simplify to target-generic binary op.
1807  if (Action.BinaryOp)
1808  return BinaryOperator::Create(*Action.BinaryOp, II->getArgOperand(0),
1809  II->getArgOperand(1), II->getName());
1810 
1811  // Simplify to target-generic cast op.
1812  if (Action.CastOp)
1813  return CastInst::Create(*Action.CastOp, II->getArgOperand(0), II->getType(),
1814  II->getName());
1815 
1816  // All that's left are the special cases.
1817  if (!Action.Special)
1818  return nullptr;
1819 
1820  switch (*Action.Special) {
1821  case SPC_Reciprocal:
1822  // Simplify reciprocal.
1823  return BinaryOperator::Create(
1824  Instruction::FDiv, ConstantFP::get(II->getArgOperand(0)->getType(), 1),
1825  II->getArgOperand(0), II->getName());
1826  }
1827  llvm_unreachable("All SpecialCase enumerators should be handled in switch.");
1828 }
1829 
1831  removeTriviallyEmptyRange(I, Intrinsic::vastart, Intrinsic::vaend, *this);
1832  return nullptr;
1833 }
1834 
1836  removeTriviallyEmptyRange(I, Intrinsic::vacopy, Intrinsic::vaend, *this);
1837  return nullptr;
1838 }
1839 
1840 /// CallInst simplification. This mostly only handles folding of intrinsic
1841 /// instructions. For normal calls, it allows visitCallSite to do the heavy
1842 /// lifting.
1844  if (Value *V = SimplifyCall(&CI, SQ.getWithInstruction(&CI)))
1845  return replaceInstUsesWith(CI, V);
1846 
1847  if (isFreeCall(&CI, &TLI))
1848  return visitFree(CI);
1849 
1850  // If the caller function is nounwind, mark the call as nounwind, even if the
1851  // callee isn't.
1852  if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
1853  CI.setDoesNotThrow();
1854  return &CI;
1855  }
1856 
1857  IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
1858  if (!II) return visitCallSite(&CI);
1859 
1860  // Intrinsics cannot occur in an invoke, so handle them here instead of in
1861  // visitCallSite.
1862  if (auto *MI = dyn_cast<AnyMemIntrinsic>(II)) {
1863  bool Changed = false;
1864 
1865  // memmove/cpy/set of zero bytes is a noop.
1866  if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
1867  if (NumBytes->isNullValue())
1868  return eraseInstFromFunction(CI);
1869 
1870  if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
1871  if (CI->getZExtValue() == 1) {
1872  // Replace the instruction with just byte operations. We would
1873  // transform other cases to loads/stores, but we don't know if
1874  // alignment is sufficient.
1875  }
1876  }
1877 
1878  // No other transformations apply to volatile transfers.
1879  if (auto *M = dyn_cast<MemIntrinsic>(MI))
1880  if (M->isVolatile())
1881  return nullptr;
1882 
1883  // If we have a memmove and the source operation is a constant global,
1884  // then the source and dest pointers can't alias, so we can change this
1885  // into a call to memcpy.
1886  if (auto *MMI = dyn_cast<AnyMemMoveInst>(MI)) {
1887  if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
1888  if (GVSrc->isConstant()) {
1889  Module *M = CI.getModule();
1890  Intrinsic::ID MemCpyID =
1891  isa<AtomicMemMoveInst>(MMI)
1892  ? Intrinsic::memcpy_element_unordered_atomic
1893  : Intrinsic::memcpy;
1894  Type *Tys[3] = { CI.getArgOperand(0)->getType(),
1895  CI.getArgOperand(1)->getType(),
1896  CI.getArgOperand(2)->getType() };
1897  CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys));
1898  Changed = true;
1899  }
1900  }
1901 
1902  if (AnyMemTransferInst *MTI = dyn_cast<AnyMemTransferInst>(MI)) {
1903  // memmove(x,x,size) -> noop.
1904  if (MTI->getSource() == MTI->getDest())
1905  return eraseInstFromFunction(CI);
1906  }
1907 
1908  // If we can determine a pointer alignment that is bigger than currently
1909  // set, update the alignment.
1910  if (auto *MTI = dyn_cast<AnyMemTransferInst>(MI)) {
1911  if (Instruction *I = SimplifyAnyMemTransfer(MTI))
1912  return I;
1913  } else if (auto *MSI = dyn_cast<AnyMemSetInst>(MI)) {
1914  if (Instruction *I = SimplifyAnyMemSet(MSI))
1915  return I;
1916  }
1917 
1918  if (Changed) return II;
1919  }
1920 
1921  if (Instruction *I = SimplifyNVVMIntrinsic(II, *this))
1922  return I;
1923 
1924  auto SimplifyDemandedVectorEltsLow = [this](Value *Op, unsigned Width,
1925  unsigned DemandedWidth) {
1926  APInt UndefElts(Width, 0);
1927  APInt DemandedElts = APInt::getLowBitsSet(Width, DemandedWidth);
1928  return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts);
1929  };
1930 
1931  switch (II->getIntrinsicID()) {
1932  default: break;
1933  case Intrinsic::objectsize:
1934  if (ConstantInt *N =
1935  lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false))
1936  return replaceInstUsesWith(CI, N);
1937  return nullptr;
1938  case Intrinsic::bswap: {
1939  Value *IIOperand = II->getArgOperand(0);
1940  Value *X = nullptr;
1941 
1942  // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
1943  if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
1944  unsigned C = X->getType()->getPrimitiveSizeInBits() -
1945  IIOperand->getType()->getPrimitiveSizeInBits();
1946  Value *CV = ConstantInt::get(X->getType(), C);
1947  Value *V = Builder.CreateLShr(X, CV);
1948  return new TruncInst(V, IIOperand->getType());
1949  }
1950  break;
1951  }
1952  case Intrinsic::masked_load:
1953  if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, Builder))
1954  return replaceInstUsesWith(CI, SimplifiedMaskedOp);
1955  break;
1956  case Intrinsic::masked_store:
1957  return simplifyMaskedStore(*II, *this);
1958  case Intrinsic::masked_gather:
1959  return simplifyMaskedGather(*II, *this);
1960  case Intrinsic::masked_scatter:
1961  return simplifyMaskedScatter(*II, *this);
1962  case Intrinsic::launder_invariant_group:
1963  case Intrinsic::strip_invariant_group:
1964  if (auto *SkippedBarrier = simplifyInvariantGroupIntrinsic(*II, *this))
1965  return replaceInstUsesWith(*II, SkippedBarrier);
1966  break;
1967  case Intrinsic::powi:
1968  if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
1969  // 0 and 1 are handled in instsimplify
1970 
1971  // powi(x, -1) -> 1/x
1972  if (Power->isMinusOne())
1973  return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
1974  II->getArgOperand(0));
1975  // powi(x, 2) -> x*x
1976  if (Power->equalsInt(2))
1977  return BinaryOperator::CreateFMul(II->getArgOperand(0),
1978  II->getArgOperand(0));
1979  }
1980  break;
1981 
1982  case Intrinsic::cttz:
1983  case Intrinsic::ctlz:
1984  if (auto *I = foldCttzCtlz(*II, *this))
1985  return I;
1986  break;
1987 
1988  case Intrinsic::ctpop:
1989  if (auto *I = foldCtpop(*II, *this))
1990  return I;
1991  break;
1992 
1993  case Intrinsic::uadd_with_overflow:
1994  case Intrinsic::sadd_with_overflow:
1995  case Intrinsic::umul_with_overflow:
1996  case Intrinsic::smul_with_overflow:
1997  if (isa<Constant>(II->getArgOperand(0)) &&
1998  !isa<Constant>(II->getArgOperand(1))) {
1999  // Canonicalize constants into the RHS.
2000  Value *LHS = II->getArgOperand(0);
2001  II->setArgOperand(0, II->getArgOperand(1));
2002  II->setArgOperand(1, LHS);
2003  return II;
2004  }
2006 
2007  case Intrinsic::usub_with_overflow:
2008  case Intrinsic::ssub_with_overflow: {
2009  OverflowCheckFlavor OCF =
2011  assert(OCF != OCF_INVALID && "unexpected!");
2012 
2013  Value *OperationResult = nullptr;
2014  Constant *OverflowResult = nullptr;
2015  if (OptimizeOverflowCheck(OCF, II->getArgOperand(0), II->getArgOperand(1),
2016  *II, OperationResult, OverflowResult))
2017  return CreateOverflowTuple(II, OperationResult, OverflowResult);
2018 
2019  break;
2020  }
2021 
2022  case Intrinsic::minnum:
2023  case Intrinsic::maxnum: {
2024  Value *Arg0 = II->getArgOperand(0);
2025  Value *Arg1 = II->getArgOperand(1);
2026  // Canonicalize constants to the RHS.
2027  if (isa<ConstantFP>(Arg0) && !isa<ConstantFP>(Arg1)) {
2028  II->setArgOperand(0, Arg1);
2029  II->setArgOperand(1, Arg0);
2030  return II;
2031  }
2032 
2033  Value *X, *Y;
2034  if (match(Arg0, m_FNeg(m_Value(X))) && match(Arg1, m_FNeg(m_Value(Y))) &&
2035  (Arg0->hasOneUse() || Arg1->hasOneUse())) {
2036  // If both operands are negated, invert the call and negate the result:
2037  // minnum(-X, -Y) --> -(maxnum(X, Y))
2038  // maxnum(-X, -Y) --> -(minnum(X, Y))
2039  Intrinsic::ID NewIID = II->getIntrinsicID() == Intrinsic::maxnum ?
2041  Value *NewCall = Builder.CreateBinaryIntrinsic(NewIID, X, Y, II);
2042  Instruction *FNeg = BinaryOperator::CreateFNeg(NewCall);
2043  FNeg->copyIRFlags(II);
2044  return FNeg;
2045  }
2046  break;
2047  }
2048  case Intrinsic::fmuladd: {
2049  // Canonicalize fast fmuladd to the separate fmul + fadd.
2050  if (II->isFast()) {
2051  BuilderTy::FastMathFlagGuard Guard(Builder);
2052  Builder.setFastMathFlags(II->getFastMathFlags());
2053  Value *Mul = Builder.CreateFMul(II->getArgOperand(0),
2054  II->getArgOperand(1));
2055  Value *Add = Builder.CreateFAdd(Mul, II->getArgOperand(2));
2056  Add->takeName(II);
2057  return replaceInstUsesWith(*II, Add);
2058  }
2059 
2061  }
2062  case Intrinsic::fma: {
2063  Value *Src0 = II->getArgOperand(0);
2064  Value *Src1 = II->getArgOperand(1);
2065 
2066  // Canonicalize constant multiply operand to Src1.
2067  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
2068  II->setArgOperand(0, Src1);
2069  II->setArgOperand(1, Src0);
2070  std::swap(Src0, Src1);
2071  }
2072 
2073  // fma fneg(x), fneg(y), z -> fma x, y, z
2074  Value *X, *Y;
2075  if (match(Src0, m_FNeg(m_Value(X))) && match(Src1, m_FNeg(m_Value(Y)))) {
2076  II->setArgOperand(0, X);
2077  II->setArgOperand(1, Y);
2078  return II;
2079  }
2080 
2081  // fma fabs(x), fabs(x), z -> fma x, x, z
2082  if (match(Src0, m_FAbs(m_Value(X))) &&
2083  match(Src1, m_FAbs(m_Specific(X)))) {
2084  II->setArgOperand(0, X);
2085  II->setArgOperand(1, X);
2086  return II;
2087  }
2088 
2089  // fma x, 1, z -> fadd x, z
2090  if (match(Src1, m_FPOne())) {
2091  auto *FAdd = BinaryOperator::CreateFAdd(Src0, II->getArgOperand(2));
2092  FAdd->copyFastMathFlags(II);
2093  return FAdd;
2094  }
2095 
2096  break;
2097  }
2098  case Intrinsic::fabs: {
2099  Value *Cond;
2100  Constant *LHS, *RHS;
2101  if (match(II->getArgOperand(0),
2102  m_Select(m_Value(Cond), m_Constant(LHS), m_Constant(RHS)))) {
2103  CallInst *Call0 = Builder.CreateCall(II->getCalledFunction(), {LHS});
2104  CallInst *Call1 = Builder.CreateCall(II->getCalledFunction(), {RHS});
2105  return SelectInst::Create(Cond, Call0, Call1);
2106  }
2107 
2109  }
2110  case Intrinsic::ceil:
2111  case Intrinsic::floor:
2112  case Intrinsic::round:
2113  case Intrinsic::nearbyint:
2114  case Intrinsic::rint:
2115  case Intrinsic::trunc: {
2116  Value *ExtSrc;
2117  if (match(II->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc))))) {
2118  // Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
2119  Value *NarrowII =
2120  Builder.CreateUnaryIntrinsic(II->getIntrinsicID(), ExtSrc, II);
2121  return new FPExtInst(NarrowII, II->getType());
2122  }
2123  break;
2124  }
2125  case Intrinsic::cos:
2126  case Intrinsic::amdgcn_cos: {
2127  Value *X;
2128  Value *Src = II->getArgOperand(0);
2129  if (match(Src, m_FNeg(m_Value(X))) || match(Src, m_FAbs(m_Value(X)))) {
2130  // cos(-x) -> cos(x)
2131  // cos(fabs(x)) -> cos(x)
2132  II->setArgOperand(0, X);
2133  return II;
2134  }
2135  break;
2136  }
2137  case Intrinsic::sin: {
2138  Value *X;
2139  if (match(II->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X))))) {
2140  // sin(-x) --> -sin(x)
2141  Value *NewSin = Builder.CreateUnaryIntrinsic(Intrinsic::sin, X, II);
2142  Instruction *FNeg = BinaryOperator::CreateFNeg(NewSin);
2143  FNeg->copyFastMathFlags(II);
2144  return FNeg;
2145  }
2146  break;
2147  }
2148  case Intrinsic::ppc_altivec_lvx:
2149  case Intrinsic::ppc_altivec_lvxl:
2150  // Turn PPC lvx -> load if the pointer is known aligned.
2151  if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC,
2152  &DT) >= 16) {
2153  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
2154  PointerType::getUnqual(II->getType()));
2155  return new LoadInst(Ptr);
2156  }
2157  break;
2158  case Intrinsic::ppc_vsx_lxvw4x:
2159  case Intrinsic::ppc_vsx_lxvd2x: {
2160  // Turn PPC VSX loads into normal loads.
2161  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
2162  PointerType::getUnqual(II->getType()));
2163  return new LoadInst(Ptr, Twine(""), false, 1);
2164  }
2165  case Intrinsic::ppc_altivec_stvx:
2166  case Intrinsic::ppc_altivec_stvxl:
2167  // Turn stvx -> store if the pointer is known aligned.
2168  if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC,
2169  &DT) >= 16) {
2170  Type *OpPtrTy =
2171  PointerType::getUnqual(II->getArgOperand(0)->getType());
2172  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
2173  return new StoreInst(II->getArgOperand(0), Ptr);
2174  }
2175  break;
2176  case Intrinsic::ppc_vsx_stxvw4x:
2177  case Intrinsic::ppc_vsx_stxvd2x: {
2178  // Turn PPC VSX stores into normal stores.
2179  Type *OpPtrTy = PointerType::getUnqual(II->getArgOperand(0)->getType());
2180  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
2181  return new StoreInst(II->getArgOperand(0), Ptr, false, 1);
2182  }
2183  case Intrinsic::ppc_qpx_qvlfs:
2184  // Turn PPC QPX qvlfs -> load if the pointer is known aligned.
2185  if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC,
2186  &DT) >= 16) {
2187  Type *VTy = VectorType::get(Builder.getFloatTy(),
2188  II->getType()->getVectorNumElements());
2189  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
2190  PointerType::getUnqual(VTy));
2191  Value *Load = Builder.CreateLoad(Ptr);
2192  return new FPExtInst(Load, II->getType());
2193  }
2194  break;
2195  case Intrinsic::ppc_qpx_qvlfd:
2196  // Turn PPC QPX qvlfd -> load if the pointer is known aligned.
2197  if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, &AC,
2198  &DT) >= 32) {
2199  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(0),
2200  PointerType::getUnqual(II->getType()));
2201  return new LoadInst(Ptr);
2202  }
2203  break;
2204  case Intrinsic::ppc_qpx_qvstfs:
2205  // Turn PPC QPX qvstfs -> store if the pointer is known aligned.
2206  if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC,
2207  &DT) >= 16) {
2208  Type *VTy = VectorType::get(Builder.getFloatTy(),
2209  II->getArgOperand(0)->getType()->getVectorNumElements());
2210  Value *TOp = Builder.CreateFPTrunc(II->getArgOperand(0), VTy);
2211  Type *OpPtrTy = PointerType::getUnqual(VTy);
2212  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
2213  return new StoreInst(TOp, Ptr);
2214  }
2215  break;
2216  case Intrinsic::ppc_qpx_qvstfd:
2217  // Turn PPC QPX qvstfd -> store if the pointer is known aligned.
2218  if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, II, &AC,
2219  &DT) >= 32) {
2220  Type *OpPtrTy =
2221  PointerType::getUnqual(II->getArgOperand(0)->getType());
2222  Value *Ptr = Builder.CreateBitCast(II->getArgOperand(1), OpPtrTy);
2223  return new StoreInst(II->getArgOperand(0), Ptr);
2224  }
2225  break;
2226 
2227  case Intrinsic::x86_bmi_bextr_32:
2228  case Intrinsic::x86_bmi_bextr_64:
2229  case Intrinsic::x86_tbm_bextri_u32:
2230  case Intrinsic::x86_tbm_bextri_u64:
2231  // If the RHS is a constant we can try some simplifications.
2232  if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
2233  uint64_t Shift = C->getZExtValue();
2234  uint64_t Length = (Shift >> 8) & 0xff;
2235  Shift &= 0xff;
2236  unsigned BitWidth = II->getType()->getIntegerBitWidth();
2237  // If the length is 0 or the shift is out of range, replace with zero.
2238  if (Length == 0 || Shift >= BitWidth)
2239  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0));
2240  // If the LHS is also a constant, we can completely constant fold this.
2241  if (auto *InC = dyn_cast<ConstantInt>(II->getArgOperand(0))) {
2242  uint64_t Result = InC->getZExtValue() >> Shift;
2243  if (Length > BitWidth)
2244  Length = BitWidth;
2245  Result &= maskTrailingOnes<uint64_t>(Length);
2246  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result));
2247  }
2248  // TODO should we turn this into 'and' if shift is 0? Or 'shl' if we
2249  // are only masking bits that a shift already cleared?
2250  }
2251  break;
2252 
2253  case Intrinsic::x86_bmi_bzhi_32:
2254  case Intrinsic::x86_bmi_bzhi_64:
2255  // If the RHS is a constant we can try some simplifications.
2256  if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
2257  uint64_t Index = C->getZExtValue() & 0xff;
2258  unsigned BitWidth = II->getType()->getIntegerBitWidth();
2259  if (Index >= BitWidth)
2260  return replaceInstUsesWith(CI, II->getArgOperand(0));
2261  if (Index == 0)
2262  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), 0));
2263  // If the LHS is also a constant, we can completely constant fold this.
2264  if (auto *InC = dyn_cast<ConstantInt>(II->getArgOperand(0))) {
2265  uint64_t Result = InC->getZExtValue();
2266  Result &= maskTrailingOnes<uint64_t>(Index);
2267  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Result));
2268  }
2269  // TODO should we convert this to an AND if the RHS is constant?
2270  }
2271  break;
2272 
2273  case Intrinsic::x86_vcvtph2ps_128:
2274  case Intrinsic::x86_vcvtph2ps_256: {
2275  auto Arg = II->getArgOperand(0);
2276  auto ArgType = cast<VectorType>(Arg->getType());
2277  auto RetType = cast<VectorType>(II->getType());
2278  unsigned ArgWidth = ArgType->getNumElements();
2279  unsigned RetWidth = RetType->getNumElements();
2280  assert(RetWidth <= ArgWidth && "Unexpected input/return vector widths");
2281  assert(ArgType->isIntOrIntVectorTy() &&
2282  ArgType->getScalarSizeInBits() == 16 &&
2283  "CVTPH2PS input type should be 16-bit integer vector");
2284  assert(RetType->getScalarType()->isFloatTy() &&
2285  "CVTPH2PS output type should be 32-bit float vector");
2286 
2287  // Constant folding: Convert to generic half to single conversion.
2288  if (isa<ConstantAggregateZero>(Arg))
2289  return replaceInstUsesWith(*II, ConstantAggregateZero::get(RetType));
2290 
2291  if (isa<ConstantDataVector>(Arg)) {
2292  auto VectorHalfAsShorts = Arg;
2293  if (RetWidth < ArgWidth) {
2294  SmallVector<uint32_t, 8> SubVecMask;
2295  for (unsigned i = 0; i != RetWidth; ++i)
2296  SubVecMask.push_back((int)i);
2297  VectorHalfAsShorts = Builder.CreateShuffleVector(
2298  Arg, UndefValue::get(ArgType), SubVecMask);
2299  }
2300 
2301  auto VectorHalfType =
2302  VectorType::get(Type::getHalfTy(II->getContext()), RetWidth);
2303  auto VectorHalfs =
2304  Builder.CreateBitCast(VectorHalfAsShorts, VectorHalfType);
2305  auto VectorFloats = Builder.CreateFPExt(VectorHalfs, RetType);
2306  return replaceInstUsesWith(*II, VectorFloats);
2307  }
2308 
2309  // We only use the lowest lanes of the argument.
2310  if (Value *V = SimplifyDemandedVectorEltsLow(Arg, ArgWidth, RetWidth)) {
2311  II->setArgOperand(0, V);
2312  return II;
2313  }
2314  break;
2315  }
2316 
2317  case Intrinsic::x86_sse_cvtss2si:
2318  case Intrinsic::x86_sse_cvtss2si64:
2319  case Intrinsic::x86_sse_cvttss2si:
2320  case Intrinsic::x86_sse_cvttss2si64:
2321  case Intrinsic::x86_sse2_cvtsd2si:
2322  case Intrinsic::x86_sse2_cvtsd2si64:
2323  case Intrinsic::x86_sse2_cvttsd2si:
2324  case Intrinsic::x86_sse2_cvttsd2si64:
2325  case Intrinsic::x86_avx512_vcvtss2si32:
2326  case Intrinsic::x86_avx512_vcvtss2si64:
2327  case Intrinsic::x86_avx512_vcvtss2usi32:
2328  case Intrinsic::x86_avx512_vcvtss2usi64:
2329  case Intrinsic::x86_avx512_vcvtsd2si32:
2330  case Intrinsic::x86_avx512_vcvtsd2si64:
2331  case Intrinsic::x86_avx512_vcvtsd2usi32:
2332  case Intrinsic::x86_avx512_vcvtsd2usi64:
2333  case Intrinsic::x86_avx512_cvttss2si:
2334  case Intrinsic::x86_avx512_cvttss2si64:
2335  case Intrinsic::x86_avx512_cvttss2usi:
2336  case Intrinsic::x86_avx512_cvttss2usi64:
2337  case Intrinsic::x86_avx512_cvttsd2si:
2338  case Intrinsic::x86_avx512_cvttsd2si64:
2339  case Intrinsic::x86_avx512_cvttsd2usi:
2340  case Intrinsic::x86_avx512_cvttsd2usi64: {
2341  // These intrinsics only demand the 0th element of their input vectors. If
2342  // we can simplify the input based on that, do so now.
2343  Value *Arg = II->getArgOperand(0);
2344  unsigned VWidth = Arg->getType()->getVectorNumElements();
2345  if (Value *V = SimplifyDemandedVectorEltsLow(Arg, VWidth, 1)) {
2346  II->setArgOperand(0, V);
2347  return II;
2348  }
2349  break;
2350  }
2351 
2352  case Intrinsic::x86_sse41_round_ps:
2353  case Intrinsic::x86_sse41_round_pd:
2354  case Intrinsic::x86_avx_round_ps_256:
2355  case Intrinsic::x86_avx_round_pd_256:
2356  case Intrinsic::x86_avx512_mask_rndscale_ps_128:
2357  case Intrinsic::x86_avx512_mask_rndscale_ps_256:
2358  case Intrinsic::x86_avx512_mask_rndscale_ps_512:
2359  case Intrinsic::x86_avx512_mask_rndscale_pd_128:
2360  case Intrinsic::x86_avx512_mask_rndscale_pd_256:
2361  case Intrinsic::x86_avx512_mask_rndscale_pd_512:
2362  case Intrinsic::x86_avx512_mask_rndscale_ss:
2363  case Intrinsic::x86_avx512_mask_rndscale_sd:
2364  if (Value *V = simplifyX86round(*II, Builder))
2365  return replaceInstUsesWith(*II, V);
2366  break;
2367 
2368  case Intrinsic::x86_mmx_pmovmskb:
2369  case Intrinsic::x86_sse_movmsk_ps:
2370  case Intrinsic::x86_sse2_movmsk_pd:
2371  case Intrinsic::x86_sse2_pmovmskb_128:
2372  case Intrinsic::x86_avx_movmsk_pd_256:
2373  case Intrinsic::x86_avx_movmsk_ps_256:
2374  case Intrinsic::x86_avx2_pmovmskb:
2375  if (Value *V = simplifyX86movmsk(*II))
2376  return replaceInstUsesWith(*II, V);
2377  break;
2378 
2379  case Intrinsic::x86_sse_comieq_ss:
2380  case Intrinsic::x86_sse_comige_ss:
2381  case Intrinsic::x86_sse_comigt_ss:
2382  case Intrinsic::x86_sse_comile_ss:
2383  case Intrinsic::x86_sse_comilt_ss:
2384  case Intrinsic::x86_sse_comineq_ss:
2385  case Intrinsic::x86_sse_ucomieq_ss:
2386  case Intrinsic::x86_sse_ucomige_ss:
2387  case Intrinsic::x86_sse_ucomigt_ss:
2388  case Intrinsic::x86_sse_ucomile_ss:
2389  case Intrinsic::x86_sse_ucomilt_ss:
2390  case Intrinsic::x86_sse_ucomineq_ss:
2391  case Intrinsic::x86_sse2_comieq_sd:
2392  case Intrinsic::x86_sse2_comige_sd:
2393  case Intrinsic::x86_sse2_comigt_sd:
2394  case Intrinsic::x86_sse2_comile_sd:
2395  case Intrinsic::x86_sse2_comilt_sd:
2396  case Intrinsic::x86_sse2_comineq_sd:
2397  case Intrinsic::x86_sse2_ucomieq_sd:
2398  case Intrinsic::x86_sse2_ucomige_sd:
2399  case Intrinsic::x86_sse2_ucomigt_sd:
2400  case Intrinsic::x86_sse2_ucomile_sd:
2401  case Intrinsic::x86_sse2_ucomilt_sd:
2402  case Intrinsic::x86_sse2_ucomineq_sd:
2403  case Intrinsic::x86_avx512_vcomi_ss:
2404  case Intrinsic::x86_avx512_vcomi_sd:
2405  case Intrinsic::x86_avx512_mask_cmp_ss:
2406  case Intrinsic::x86_avx512_mask_cmp_sd: {
2407  // These intrinsics only demand the 0th element of their input vectors. If
2408  // we can simplify the input based on that, do so now.
2409  bool MadeChange = false;
2410  Value *Arg0 = II->getArgOperand(0);
2411  Value *Arg1 = II->getArgOperand(1);
2412  unsigned VWidth = Arg0->getType()->getVectorNumElements();
2413  if (Value *V = SimplifyDemandedVectorEltsLow(Arg0, VWidth, 1)) {
2414  II->setArgOperand(0, V);
2415  MadeChange = true;
2416  }
2417  if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
2418  II->setArgOperand(1, V);
2419  MadeChange = true;
2420  }
2421  if (MadeChange)
2422  return II;
2423  break;
2424  }
2425  case Intrinsic::x86_avx512_cmp_pd_128:
2426  case Intrinsic::x86_avx512_cmp_pd_256:
2427  case Intrinsic::x86_avx512_cmp_pd_512:
2428  case Intrinsic::x86_avx512_cmp_ps_128:
2429  case Intrinsic::x86_avx512_cmp_ps_256:
2430  case Intrinsic::x86_avx512_cmp_ps_512: {
2431  // Folding cmp(sub(a,b),0) -> cmp(a,b) and cmp(0,sub(a,b)) -> cmp(b,a)
2432  Value *Arg0 = II->getArgOperand(0);
2433  Value *Arg1 = II->getArgOperand(1);
2434  bool Arg0IsZero = match(Arg0, m_PosZeroFP());
2435  if (Arg0IsZero)
2436  std::swap(Arg0, Arg1);
2437  Value *A, *B;
2438  // This fold requires only the NINF(not +/- inf) since inf minus
2439  // inf is nan.
2440  // NSZ(No Signed Zeros) is not needed because zeros of any sign are
2441  // equal for both compares.
2442  // NNAN is not needed because nans compare the same for both compares.
2443  // The compare intrinsic uses the above assumptions and therefore
2444  // doesn't require additional flags.
2445  if ((match(Arg0, m_OneUse(m_FSub(m_Value(A), m_Value(B)))) &&
2446  match(Arg1, m_PosZeroFP()) && isa<Instruction>(Arg0) &&
2447  cast<Instruction>(Arg0)->getFastMathFlags().noInfs())) {
2448  if (Arg0IsZero)
2449  std::swap(A, B);
2450  II->setArgOperand(0, A);
2451  II->setArgOperand(1, B);
2452  return II;
2453  }
2454  break;
2455  }
2456 
2457  case Intrinsic::x86_avx512_add_ps_512:
2458  case Intrinsic::x86_avx512_div_ps_512:
2459  case Intrinsic::x86_avx512_mul_ps_512:
2460  case Intrinsic::x86_avx512_sub_ps_512:
2461  case Intrinsic::x86_avx512_add_pd_512:
2462  case Intrinsic::x86_avx512_div_pd_512:
2463  case Intrinsic::x86_avx512_mul_pd_512:
2464  case Intrinsic::x86_avx512_sub_pd_512:
2465  // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
2466  // IR operations.
2467  if (auto *R = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
2468  if (R->getValue() == 4) {
2469  Value *Arg0 = II->getArgOperand(0);
2470  Value *Arg1 = II->getArgOperand(1);
2471 
2472  Value *V;
2473  switch (II->getIntrinsicID()) {
2474  default: llvm_unreachable("Case stmts out of sync!");
2475  case Intrinsic::x86_avx512_add_ps_512:
2476  case Intrinsic::x86_avx512_add_pd_512:
2477  V = Builder.CreateFAdd(Arg0, Arg1);
2478  break;
2479  case Intrinsic::x86_avx512_sub_ps_512:
2480  case Intrinsic::x86_avx512_sub_pd_512:
2481  V = Builder.CreateFSub(Arg0, Arg1);
2482  break;
2483  case Intrinsic::x86_avx512_mul_ps_512:
2484  case Intrinsic::x86_avx512_mul_pd_512:
2485  V = Builder.CreateFMul(Arg0, Arg1);
2486  break;
2487  case Intrinsic::x86_avx512_div_ps_512:
2488  case Intrinsic::x86_avx512_div_pd_512:
2489  V = Builder.CreateFDiv(Arg0, Arg1);
2490  break;
2491  }
2492 
2493  return replaceInstUsesWith(*II, V);
2494  }
2495  }
2496  break;
2497 
2498  case Intrinsic::x86_avx512_mask_add_ss_round:
2499  case Intrinsic::x86_avx512_mask_div_ss_round:
2500  case Intrinsic::x86_avx512_mask_mul_ss_round:
2501  case Intrinsic::x86_avx512_mask_sub_ss_round:
2502  case Intrinsic::x86_avx512_mask_add_sd_round:
2503  case Intrinsic::x86_avx512_mask_div_sd_round:
2504  case Intrinsic::x86_avx512_mask_mul_sd_round:
2505  case Intrinsic::x86_avx512_mask_sub_sd_round:
2506  // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
2507  // IR operations.
2508  if (auto *R = dyn_cast<ConstantInt>(II->getArgOperand(4))) {
2509  if (R->getValue() == 4) {
2510  // Extract the element as scalars.
2511  Value *Arg0 = II->getArgOperand(0);
2512  Value *Arg1 = II->getArgOperand(1);
2513  Value *LHS = Builder.CreateExtractElement(Arg0, (uint64_t)0);
2514  Value *RHS = Builder.CreateExtractElement(Arg1, (uint64_t)0);
2515 
2516  Value *V;
2517  switch (II->getIntrinsicID()) {
2518  default: llvm_unreachable("Case stmts out of sync!");
2519  case Intrinsic::x86_avx512_mask_add_ss_round:
2520  case Intrinsic::x86_avx512_mask_add_sd_round:
2521  V = Builder.CreateFAdd(LHS, RHS);
2522  break;
2523  case Intrinsic::x86_avx512_mask_sub_ss_round:
2524  case Intrinsic::x86_avx512_mask_sub_sd_round:
2525  V = Builder.CreateFSub(LHS, RHS);
2526  break;
2527  case Intrinsic::x86_avx512_mask_mul_ss_round:
2528  case Intrinsic::x86_avx512_mask_mul_sd_round:
2529  V = Builder.CreateFMul(LHS, RHS);
2530  break;
2531  case Intrinsic::x86_avx512_mask_div_ss_round:
2532  case Intrinsic::x86_avx512_mask_div_sd_round:
2533  V = Builder.CreateFDiv(LHS, RHS);
2534  break;
2535  }
2536 
2537  // Handle the masking aspect of the intrinsic.
2538  Value *Mask = II->getArgOperand(3);
2539  auto *C = dyn_cast<ConstantInt>(Mask);
2540  // We don't need a select if we know the mask bit is a 1.
2541  if (!C || !C->getValue()[0]) {
2542  // Cast the mask to an i1 vector and then extract the lowest element.
2543  auto *MaskTy = VectorType::get(Builder.getInt1Ty(),
2544  cast<IntegerType>(Mask->getType())->getBitWidth());
2545  Mask = Builder.CreateBitCast(Mask, MaskTy);
2546  Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
2547  // Extract the lowest element from the passthru operand.
2548  Value *Passthru = Builder.CreateExtractElement(II->getArgOperand(2),
2549  (uint64_t)0);
2550  V = Builder.CreateSelect(Mask, V, Passthru);
2551  }
2552 
2553  // Insert the result back into the original argument 0.
2554  V = Builder.CreateInsertElement(Arg0, V, (uint64_t)0);
2555 
2556  return replaceInstUsesWith(*II, V);
2557  }
2558  }
2560 
2561  // X86 scalar intrinsics simplified with SimplifyDemandedVectorElts.
2562  case Intrinsic::x86_avx512_mask_max_ss_round:
2563  case Intrinsic::x86_avx512_mask_min_ss_round:
2564  case Intrinsic::x86_avx512_mask_max_sd_round:
2565  case Intrinsic::x86_avx512_mask_min_sd_round:
2566  case Intrinsic::x86_sse_cmp_ss:
2567  case Intrinsic::x86_sse_min_ss:
2568  case Intrinsic::x86_sse_max_ss:
2569  case Intrinsic::x86_sse2_cmp_sd:
2570  case Intrinsic::x86_sse2_min_sd:
2571  case Intrinsic::x86_sse2_max_sd:
2572  case Intrinsic::x86_xop_vfrcz_ss:
2573  case Intrinsic::x86_xop_vfrcz_sd: {
2574  unsigned VWidth = II->getType()->getVectorNumElements();
2575  APInt UndefElts(VWidth, 0);
2576  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
2577  if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
2578  if (V != II)
2579  return replaceInstUsesWith(*II, V);
2580  return II;
2581  }
2582  break;
2583  }
2584  case Intrinsic::x86_sse41_round_ss:
2585  case Intrinsic::x86_sse41_round_sd: {
2586  unsigned VWidth = II->getType()->getVectorNumElements();
2587  APInt UndefElts(VWidth, 0);
2588  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
2589  if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
2590  if (V != II)
2591  return replaceInstUsesWith(*II, V);
2592  return II;
2593  } else if (Value *V = simplifyX86round(*II, Builder))
2594  return replaceInstUsesWith(*II, V);
2595  break;
2596  }
2597 
2598  // Constant fold add/sub with saturation intrinsics.
2599  case Intrinsic::x86_sse2_padds_b:
2600  case Intrinsic::x86_sse2_padds_w:
2601  case Intrinsic::x86_sse2_psubs_b:
2602  case Intrinsic::x86_sse2_psubs_w:
2603  case Intrinsic::x86_avx2_padds_b:
2604  case Intrinsic::x86_avx2_padds_w:
2605  case Intrinsic::x86_avx2_psubs_b:
2606  case Intrinsic::x86_avx2_psubs_w:
2607  case Intrinsic::x86_avx512_padds_b_512:
2608  case Intrinsic::x86_avx512_padds_w_512:
2609  case Intrinsic::x86_avx512_psubs_b_512:
2610  case Intrinsic::x86_avx512_psubs_w_512:
2611  if (Value *V = simplifyX86AddsSubs(*II, Builder))
2612  return replaceInstUsesWith(*II, V);
2613  break;
2614 
2615  // Constant fold ashr( <A x Bi>, Ci ).
2616  // Constant fold lshr( <A x Bi>, Ci ).
2617  // Constant fold shl( <A x Bi>, Ci ).
2618  case Intrinsic::x86_sse2_psrai_d:
2619  case Intrinsic::x86_sse2_psrai_w:
2620  case Intrinsic::x86_avx2_psrai_d:
2621  case Intrinsic::x86_avx2_psrai_w:
2622  case Intrinsic::x86_avx512_psrai_q_128:
2623  case Intrinsic::x86_avx512_psrai_q_256:
2624  case Intrinsic::x86_avx512_psrai_d_512:
2625  case Intrinsic::x86_avx512_psrai_q_512:
2626  case Intrinsic::x86_avx512_psrai_w_512:
2627  case Intrinsic::x86_sse2_psrli_d:
2628  case Intrinsic::x86_sse2_psrli_q:
2629  case Intrinsic::x86_sse2_psrli_w:
2630  case Intrinsic::x86_avx2_psrli_d:
2631  case Intrinsic::x86_avx2_psrli_q:
2632  case Intrinsic::x86_avx2_psrli_w:
2633  case Intrinsic::x86_avx512_psrli_d_512:
2634  case Intrinsic::x86_avx512_psrli_q_512:
2635  case Intrinsic::x86_avx512_psrli_w_512:
2636  case Intrinsic::x86_sse2_pslli_d:
2637  case Intrinsic::x86_sse2_pslli_q:
2638  case Intrinsic::x86_sse2_pslli_w:
2639  case Intrinsic::x86_avx2_pslli_d:
2640  case Intrinsic::x86_avx2_pslli_q:
2641  case Intrinsic::x86_avx2_pslli_w:
2642  case Intrinsic::x86_avx512_pslli_d_512:
2643  case Intrinsic::x86_avx512_pslli_q_512:
2644  case Intrinsic::x86_avx512_pslli_w_512:
2645  if (Value *V = simplifyX86immShift(*II, Builder))
2646  return replaceInstUsesWith(*II, V);
2647  break;
2648 
2649  case Intrinsic::x86_sse2_psra_d:
2650  case Intrinsic::x86_sse2_psra_w:
2651  case Intrinsic::x86_avx2_psra_d:
2652  case Intrinsic::x86_avx2_psra_w:
2653  case Intrinsic::x86_avx512_psra_q_128:
2654  case Intrinsic::x86_avx512_psra_q_256:
2655  case Intrinsic::x86_avx512_psra_d_512:
2656  case Intrinsic::x86_avx512_psra_q_512:
2657  case Intrinsic::x86_avx512_psra_w_512:
2658  case Intrinsic::x86_sse2_psrl_d:
2659  case Intrinsic::x86_sse2_psrl_q:
2660  case Intrinsic::x86_sse2_psrl_w:
2661  case Intrinsic::x86_avx2_psrl_d:
2662  case Intrinsic::x86_avx2_psrl_q:
2663  case Intrinsic::x86_avx2_psrl_w:
2664  case Intrinsic::x86_avx512_psrl_d_512:
2665  case Intrinsic::x86_avx512_psrl_q_512:
2666  case Intrinsic::x86_avx512_psrl_w_512:
2667  case Intrinsic::x86_sse2_psll_d:
2668  case Intrinsic::x86_sse2_psll_q:
2669  case Intrinsic::x86_sse2_psll_w:
2670  case Intrinsic::x86_avx2_psll_d:
2671  case Intrinsic::x86_avx2_psll_q:
2672  case Intrinsic::x86_avx2_psll_w:
2673  case Intrinsic::x86_avx512_psll_d_512:
2674  case Intrinsic::x86_avx512_psll_q_512:
2675  case Intrinsic::x86_avx512_psll_w_512: {
2676  if (Value *V = simplifyX86immShift(*II, Builder))
2677  return replaceInstUsesWith(*II, V);
2678 
2679  // SSE2/AVX2 uses only the first 64-bits of the 128-bit vector
2680  // operand to compute the shift amount.
2681  Value *Arg1 = II->getArgOperand(1);
2682  assert(Arg1->getType()->getPrimitiveSizeInBits() == 128 &&
2683  "Unexpected packed shift size");
2684  unsigned VWidth = Arg1->getType()->getVectorNumElements();
2685 
2686  if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, VWidth / 2)) {
2687  II->setArgOperand(1, V);
2688  return II;
2689  }
2690  break;
2691  }
2692 
2693  case Intrinsic::x86_avx2_psllv_d:
2694  case Intrinsic::x86_avx2_psllv_d_256:
2695  case Intrinsic::x86_avx2_psllv_q:
2696  case Intrinsic::x86_avx2_psllv_q_256:
2697  case Intrinsic::x86_avx512_psllv_d_512:
2698  case Intrinsic::x86_avx512_psllv_q_512:
2699  case Intrinsic::x86_avx512_psllv_w_128:
2700  case Intrinsic::x86_avx512_psllv_w_256:
2701  case Intrinsic::x86_avx512_psllv_w_512:
2702  case Intrinsic::x86_avx2_psrav_d:
2703  case Intrinsic::x86_avx2_psrav_d_256:
2704  case Intrinsic::x86_avx512_psrav_q_128:
2705  case Intrinsic::x86_avx512_psrav_q_256:
2706  case Intrinsic::x86_avx512_psrav_d_512:
2707  case Intrinsic::x86_avx512_psrav_q_512:
2708  case Intrinsic::x86_avx512_psrav_w_128:
2709  case Intrinsic::x86_avx512_psrav_w_256:
2710  case Intrinsic::x86_avx512_psrav_w_512:
2711  case Intrinsic::x86_avx2_psrlv_d:
2712  case Intrinsic::x86_avx2_psrlv_d_256:
2713  case Intrinsic::x86_avx2_psrlv_q:
2714  case Intrinsic::x86_avx2_psrlv_q_256:
2715  case Intrinsic::x86_avx512_psrlv_d_512:
2716  case Intrinsic::x86_avx512_psrlv_q_512:
2717  case Intrinsic::x86_avx512_psrlv_w_128:
2718  case Intrinsic::x86_avx512_psrlv_w_256:
2719  case Intrinsic::x86_avx512_psrlv_w_512:
2720  if (Value *V = simplifyX86varShift(*II, Builder))
2721  return replaceInstUsesWith(*II, V);
2722  break;
2723 
2724  case Intrinsic::x86_sse2_packssdw_128:
2725  case Intrinsic::x86_sse2_packsswb_128:
2726  case Intrinsic::x86_avx2_packssdw:
2727  case Intrinsic::x86_avx2_packsswb:
2728  case Intrinsic::x86_avx512_packssdw_512:
2729  case Intrinsic::x86_avx512_packsswb_512:
2730  if (Value *V = simplifyX86pack(*II, true))
2731  return replaceInstUsesWith(*II, V);
2732  break;
2733 
2734  case Intrinsic::x86_sse2_packuswb_128:
2735  case Intrinsic::x86_sse41_packusdw:
2736  case Intrinsic::x86_avx2_packusdw:
2737  case Intrinsic::x86_avx2_packuswb:
2738  case Intrinsic::x86_avx512_packusdw_512:
2739  case Intrinsic::x86_avx512_packuswb_512:
2740  if (Value *V = simplifyX86pack(*II, false))
2741  return replaceInstUsesWith(*II, V);
2742  break;
2743 
2744  case Intrinsic::x86_pclmulqdq:
2745  case Intrinsic::x86_pclmulqdq_256:
2746  case Intrinsic::x86_pclmulqdq_512: {
2747  if (auto *C = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
2748  unsigned Imm = C->getZExtValue();
2749 
2750  bool MadeChange = false;
2751  Value *Arg0 = II->getArgOperand(0);
2752  Value *Arg1 = II->getArgOperand(1);
2753  unsigned VWidth = Arg0->getType()->getVectorNumElements();
2754 
2755  APInt UndefElts1(VWidth, 0);
2756  APInt DemandedElts1 = APInt::getSplat(VWidth,
2757  APInt(2, (Imm & 0x01) ? 2 : 1));
2758  if (Value *V = SimplifyDemandedVectorElts(Arg0, DemandedElts1,
2759  UndefElts1)) {
2760  II->setArgOperand(0, V);
2761  MadeChange = true;
2762  }
2763 
2764  APInt UndefElts2(VWidth, 0);
2765  APInt DemandedElts2 = APInt::getSplat(VWidth,
2766  APInt(2, (Imm & 0x10) ? 2 : 1));
2767  if (Value *V = SimplifyDemandedVectorElts(Arg1, DemandedElts2,
2768  UndefElts2)) {
2769  II->setArgOperand(1, V);
2770  MadeChange = true;
2771  }
2772 
2773  // If either input elements are undef, the result is zero.
2774  if (DemandedElts1.isSubsetOf(UndefElts1) ||
2775  DemandedElts2.isSubsetOf(UndefElts2))
2776  return replaceInstUsesWith(*II,
2777  ConstantAggregateZero::get(II->getType()));
2778 
2779  if (MadeChange)
2780  return II;
2781  }
2782  break;
2783  }
2784 
2785  case Intrinsic::x86_sse41_insertps:
2786  if (Value *V = simplifyX86insertps(*II, Builder))
2787  return replaceInstUsesWith(*II, V);
2788  break;
2789 
2790  case Intrinsic::x86_sse4a_extrq: {
2791  Value *Op0 = II->getArgOperand(0);
2792  Value *Op1 = II->getArgOperand(1);
2793  unsigned VWidth0 = Op0->getType()->getVectorNumElements();
2794  unsigned VWidth1 = Op1->getType()->getVectorNumElements();
2795  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2796  Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
2797  VWidth1 == 16 && "Unexpected operand sizes");
2798 
2799  // See if we're dealing with constant values.
2800  Constant *C1 = dyn_cast<Constant>(Op1);
2801  ConstantInt *CILength =
2802  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
2803  : nullptr;
2804  ConstantInt *CIIndex =
2805  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
2806  : nullptr;
2807 
2808  // Attempt to simplify to a constant, shuffle vector or EXTRQI call.
2809  if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, Builder))
2810  return replaceInstUsesWith(*II, V);
2811 
2812  // EXTRQ only uses the lowest 64-bits of the first 128-bit vector
2813  // operands and the lowest 16-bits of the second.
2814  bool MadeChange = false;
2815  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2816  II->setArgOperand(0, V);
2817  MadeChange = true;
2818  }
2819  if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 2)) {
2820  II->setArgOperand(1, V);
2821  MadeChange = true;
2822  }
2823  if (MadeChange)
2824  return II;
2825  break;
2826  }
2827 
2828  case Intrinsic::x86_sse4a_extrqi: {
2829  // EXTRQI: Extract Length bits starting from Index. Zero pad the remaining
2830  // bits of the lower 64-bits. The upper 64-bits are undefined.
2831  Value *Op0 = II->getArgOperand(0);
2832  unsigned VWidth = Op0->getType()->getVectorNumElements();
2833  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
2834  "Unexpected operand size");
2835 
2836  // See if we're dealing with constant values.
2837  ConstantInt *CILength = dyn_cast<ConstantInt>(II->getArgOperand(1));
2838  ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(2));
2839 
2840  // Attempt to simplify to a constant or shuffle vector.
2841  if (Value *V = simplifyX86extrq(*II, Op0, CILength, CIIndex, Builder))
2842  return replaceInstUsesWith(*II, V);
2843 
2844  // EXTRQI only uses the lowest 64-bits of the first 128-bit vector
2845  // operand.
2846  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2847  II->setArgOperand(0, V);
2848  return II;
2849  }
2850  break;
2851  }
2852 
2853  case Intrinsic::x86_sse4a_insertq: {
2854  Value *Op0 = II->getArgOperand(0);
2855  Value *Op1 = II->getArgOperand(1);
2856  unsigned VWidth = Op0->getType()->getVectorNumElements();
2857  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2858  Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth == 2 &&
2859  Op1->getType()->getVectorNumElements() == 2 &&
2860  "Unexpected operand size");
2861 
2862  // See if we're dealing with constant values.
2863  Constant *C1 = dyn_cast<Constant>(Op1);
2864  ConstantInt *CI11 =
2865  C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
2866  : nullptr;
2867 
2868  // Attempt to simplify to a constant, shuffle vector or INSERTQI call.
2869  if (CI11) {
2870  const APInt &V11 = CI11->getValue();
2871  APInt Len = V11.zextOrTrunc(6);
2872  APInt Idx = V11.lshr(8).zextOrTrunc(6);
2873  if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, Builder))
2874  return replaceInstUsesWith(*II, V);
2875  }
2876 
2877  // INSERTQ only uses the lowest 64-bits of the first 128-bit vector
2878  // operand.
2879  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth, 1)) {
2880  II->setArgOperand(0, V);
2881  return II;
2882  }
2883  break;
2884  }
2885 
2886  case Intrinsic::x86_sse4a_insertqi: {
2887  // INSERTQI: Extract lowest Length bits from lower half of second source and
2888  // insert over first source starting at Index bit. The upper 64-bits are
2889  // undefined.
2890  Value *Op0 = II->getArgOperand(0);
2891  Value *Op1 = II->getArgOperand(1);
2892  unsigned VWidth0 = Op0->getType()->getVectorNumElements();
2893  unsigned VWidth1 = Op1->getType()->getVectorNumElements();
2894  assert(Op0->getType()->getPrimitiveSizeInBits() == 128 &&
2895  Op1->getType()->getPrimitiveSizeInBits() == 128 && VWidth0 == 2 &&
2896  VWidth1 == 2 && "Unexpected operand sizes");
2897 
2898  // See if we're dealing with constant values.
2899  ConstantInt *CILength = dyn_cast<ConstantInt>(II->getArgOperand(2));
2900  ConstantInt *CIIndex = dyn_cast<ConstantInt>(II->getArgOperand(3));
2901 
2902  // Attempt to simplify to a constant or shuffle vector.
2903  if (CILength && CIIndex) {
2904  APInt Len = CILength->getValue().zextOrTrunc(6);
2905  APInt Idx = CIIndex->getValue().zextOrTrunc(6);
2906  if (Value *V = simplifyX86insertq(*II, Op0, Op1, Len, Idx, Builder))
2907  return replaceInstUsesWith(*II, V);
2908  }
2909 
2910  // INSERTQI only uses the lowest 64-bits of the first two 128-bit vector
2911  // operands.
2912  bool MadeChange = false;
2913  if (Value *V = SimplifyDemandedVectorEltsLow(Op0, VWidth0, 1)) {
2914  II->setArgOperand(0, V);
2915  MadeChange = true;
2916  }
2917  if (Value *V = SimplifyDemandedVectorEltsLow(Op1, VWidth1, 1)) {
2918  II->setArgOperand(1, V);
2919  MadeChange = true;
2920  }
2921  if (MadeChange)
2922  return II;
2923  break;
2924  }
2925 
2926  case Intrinsic::x86_sse41_pblendvb:
2927  case Intrinsic::x86_sse41_blendvps:
2928  case Intrinsic::x86_sse41_blendvpd:
2929  case Intrinsic::x86_avx_blendv_ps_256:
2930  case Intrinsic::x86_avx_blendv_pd_256:
2931  case Intrinsic::x86_avx2_pblendvb: {
2932  // fold (blend A, A, Mask) -> A
2933  Value *Op0 = II->getArgOperand(0);
2934  Value *Op1 = II->getArgOperand(1);
2935  Value *Mask = II->getArgOperand(2);
2936  if (Op0 == Op1)
2937  return replaceInstUsesWith(CI, Op0);
2938 
2939  // Zero Mask - select 1st argument.
2940  if (isa<ConstantAggregateZero>(Mask))
2941  return replaceInstUsesWith(CI, Op0);
2942 
2943  // Constant Mask - select 1st/2nd argument lane based on top bit of mask.
2944  if (auto *ConstantMask = dyn_cast<ConstantDataVector>(Mask)) {
2945  Constant *NewSelector = getNegativeIsTrueBoolVec(ConstantMask);
2946  return SelectInst::Create(NewSelector, Op1, Op0, "blendv");
2947  }
2948 
2949  // Convert to a vector select if we can bypass casts and find a boolean
2950  // vector condition value.
2951  Value *BoolVec;
2952  Mask = peekThroughBitcast(Mask);
2953  if (match(Mask, m_SExt(m_Value(BoolVec))) &&
2954  BoolVec->getType()->isVectorTy() &&
2955  BoolVec->getType()->getScalarSizeInBits() == 1) {
2956  assert(Mask->getType()->getPrimitiveSizeInBits() ==
2957  II->getType()->getPrimitiveSizeInBits() &&
2958  "Not expecting mask and operands with different sizes");
2959 
2960  unsigned NumMaskElts = Mask->getType()->getVectorNumElements();
2961  unsigned NumOperandElts = II->getType()->getVectorNumElements();
2962  if (NumMaskElts == NumOperandElts)
2963  return SelectInst::Create(BoolVec, Op1, Op0);
2964 
2965  // If the mask has less elements than the operands, each mask bit maps to
2966  // multiple elements of the operands. Bitcast back and forth.
2967  if (NumMaskElts < NumOperandElts) {
2968  Value *CastOp0 = Builder.CreateBitCast(Op0, Mask->getType());
2969  Value *CastOp1 = Builder.CreateBitCast(Op1, Mask->getType());
2970  Value *Sel = Builder.CreateSelect(BoolVec, CastOp1, CastOp0);
2971  return new BitCastInst(Sel, II->getType());
2972  }
2973  }
2974 
2975  break;
2976  }
2977 
2978  case Intrinsic::x86_ssse3_pshuf_b_128:
2979  case Intrinsic::x86_avx2_pshuf_b:
2980  case Intrinsic::x86_avx512_pshuf_b_512:
2981  if (Value *V = simplifyX86pshufb(*II, Builder))
2982  return replaceInstUsesWith(*II, V);
2983  break;
2984 
2985  case Intrinsic::x86_avx_vpermilvar_ps:
2986  case Intrinsic::x86_avx_vpermilvar_ps_256:
2987  case Intrinsic::x86_avx512_vpermilvar_ps_512:
2988  case Intrinsic::x86_avx_vpermilvar_pd:
2989  case Intrinsic::x86_avx_vpermilvar_pd_256:
2990  case Intrinsic::x86_avx512_vpermilvar_pd_512:
2991  if (Value *V = simplifyX86vpermilvar(*II, Builder))
2992  return replaceInstUsesWith(*II, V);
2993  break;
2994 
2995  case Intrinsic::x86_avx2_permd:
2996  case Intrinsic::x86_avx2_permps:
2997  case Intrinsic::x86_avx512_permvar_df_256:
2998  case Intrinsic::x86_avx512_permvar_df_512:
2999  case Intrinsic::x86_avx512_permvar_di_256:
3000  case Intrinsic::x86_avx512_permvar_di_512:
3001  case Intrinsic::x86_avx512_permvar_hi_128:
3002  case Intrinsic::x86_avx512_permvar_hi_256:
3003  case Intrinsic::x86_avx512_permvar_hi_512:
3004  case Intrinsic::x86_avx512_permvar_qi_128:
3005  case Intrinsic::x86_avx512_permvar_qi_256:
3006  case Intrinsic::x86_avx512_permvar_qi_512:
3007  case Intrinsic::x86_avx512_permvar_sf_512:
3008  case Intrinsic::x86_avx512_permvar_si_512:
3009  if (Value *V = simplifyX86vpermv(*II, Builder))
3010  return replaceInstUsesWith(*II, V);
3011  break;
3012 
3013  case Intrinsic::x86_avx_maskload_ps:
3014  case Intrinsic::x86_avx_maskload_pd:
3015  case Intrinsic::x86_avx_maskload_ps_256:
3016  case Intrinsic::x86_avx_maskload_pd_256:
3017  case Intrinsic::x86_avx2_maskload_d:
3018  case Intrinsic::x86_avx2_maskload_q:
3019  case Intrinsic::x86_avx2_maskload_d_256:
3020  case Intrinsic::x86_avx2_maskload_q_256:
3021  if (Instruction *I = simplifyX86MaskedLoad(*II, *this))
3022  return I;
3023  break;
3024 
3025  case Intrinsic::x86_sse2_maskmov_dqu:
3026  case Intrinsic::x86_avx_maskstore_ps:
3027  case Intrinsic::x86_avx_maskstore_pd:
3028  case Intrinsic::x86_avx_maskstore_ps_256:
3029  case Intrinsic::x86_avx_maskstore_pd_256:
3030  case Intrinsic::x86_avx2_maskstore_d:
3031  case Intrinsic::x86_avx2_maskstore_q:
3032  case Intrinsic::x86_avx2_maskstore_d_256:
3033  case Intrinsic::x86_avx2_maskstore_q_256:
3034  if (simplifyX86MaskedStore(*II, *this))
3035  return nullptr;
3036  break;
3037 
3038  case Intrinsic::x86_xop_vpcomb:
3039  case Intrinsic::x86_xop_vpcomd:
3040  case Intrinsic::x86_xop_vpcomq:
3041  case Intrinsic::x86_xop_vpcomw:
3042  if (Value *V = simplifyX86vpcom(*II, Builder, true))
3043  return replaceInstUsesWith(*II, V);
3044  break;
3045 
3046  case Intrinsic::x86_xop_vpcomub:
3047  case Intrinsic::x86_xop_vpcomud:
3048  case Intrinsic::x86_xop_vpcomuq:
3049  case Intrinsic::x86_xop_vpcomuw:
3050  if (Value *V = simplifyX86vpcom(*II, Builder, false))
3051  return replaceInstUsesWith(*II, V);
3052  break;
3053 
3054  case Intrinsic::ppc_altivec_vperm:
3055  // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
3056  // Note that ppc_altivec_vperm has a big-endian bias, so when creating
3057  // a vectorshuffle for little endian, we must undo the transformation
3058  // performed on vec_perm in altivec.h. That is, we must complement
3059  // the permutation mask with respect to 31 and reverse the order of
3060  // V1 and V2.
3061  if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) {
3062  assert(Mask->getType()->getVectorNumElements() == 16 &&
3063  "Bad type for intrinsic!");
3064 
3065  // Check that all of the elements are integer constants or undefs.
3066  bool AllEltsOk = true;
3067  for (unsigned i = 0; i != 16; ++i) {
3068  Constant *Elt = Mask->getAggregateElement(i);
3069  if (!Elt || !(isa<ConstantInt>(Elt) || isa<UndefValue>(Elt))) {
3070  AllEltsOk = false;
3071  break;
3072  }
3073  }
3074 
3075  if (AllEltsOk) {
3076  // Cast the input vectors to byte vectors.
3077  Value *Op0 = Builder.CreateBitCast(II->getArgOperand(0),
3078  Mask->getType());
3079  Value *Op1 = Builder.CreateBitCast(II->getArgOperand(1),
3080  Mask->getType());
3081  Value *Result = UndefValue::get(Op0->getType());
3082 
3083  // Only extract each element once.
3084  Value *ExtractedElts[32];
3085  memset(ExtractedElts, 0, sizeof(ExtractedElts));
3086 
3087  for (unsigned i = 0; i != 16; ++i) {
3088  if (isa<UndefValue>(Mask->getAggregateElement(i)))
3089  continue;
3090  unsigned Idx =
3091  cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue();
3092  Idx &= 31; // Match the hardware behavior.
3093  if (DL.isLittleEndian())
3094  Idx = 31 - Idx;
3095 
3096  if (!ExtractedElts[Idx]) {
3097  Value *Op0ToUse = (DL.isLittleEndian()) ? Op1 : Op0;
3098  Value *Op1ToUse = (DL.isLittleEndian()) ? Op0 : Op1;
3099  ExtractedElts[Idx] =
3100  Builder.CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse,
3101  Builder.getInt32(Idx&15));
3102  }
3103 
3104  // Insert this value into the result vector.
3105  Result = Builder.CreateInsertElement(Result, ExtractedElts[Idx],
3106  Builder.getInt32(i));
3107  }
3108  return CastInst::Create(Instruction::BitCast, Result, CI.getType());
3109  }
3110  }
3111  break;
3112 
3113  case Intrinsic::arm_neon_vld1: {
3114  unsigned MemAlign = getKnownAlignment(II->getArgOperand(0),
3115  DL, II, &AC, &DT);
3116  if (Value *V = simplifyNeonVld1(*II, MemAlign, Builder))
3117  return replaceInstUsesWith(*II, V);
3118  break;
3119  }
3120 
3121  case Intrinsic::arm_neon_vld2:
3122  case Intrinsic::arm_neon_vld3:
3123  case Intrinsic::arm_neon_vld4:
3124  case Intrinsic::arm_neon_vld2lane:
3125  case Intrinsic::arm_neon_vld3lane:
3126  case Intrinsic::arm_neon_vld4lane:
3127  case Intrinsic::arm_neon_vst1:
3128  case Intrinsic::arm_neon_vst2:
3129  case Intrinsic::arm_neon_vst3:
3130  case Intrinsic::arm_neon_vst4:
3131  case Intrinsic::arm_neon_vst2lane:
3132  case Intrinsic::arm_neon_vst3lane:
3133  case Intrinsic::arm_neon_vst4lane: {
3134  unsigned MemAlign =
3135  getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT);
3136  unsigned AlignArg = II->getNumArgOperands() - 1;
3137  ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
3138  if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
3139  II->setArgOperand(AlignArg,
3140  ConstantInt::get(Type::getInt32Ty(II->getContext()),
3141  MemAlign, false));
3142  return II;
3143  }
3144  break;
3145  }
3146 
3147  case Intrinsic::arm_neon_vtbl1:
3148  case Intrinsic::aarch64_neon_tbl1:
3149  if (Value *V = simplifyNeonTbl1(*II, Builder))
3150  return replaceInstUsesWith(*II, V);
3151  break;
3152 
3153  case Intrinsic::arm_neon_vmulls:
3154  case Intrinsic::arm_neon_vmullu:
3155  case Intrinsic::aarch64_neon_smull:
3156  case Intrinsic::aarch64_neon_umull: {
3157  Value *Arg0 = II->getArgOperand(0);
3158  Value *Arg1 = II->getArgOperand(1);
3159 
3160  // Handle mul by zero first:
3161  if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) {
3162  return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
3163  }
3164 
3165  // Check for constant LHS & RHS - in this case we just simplify.
3166  bool Zext = (II->getIntrinsicID() == Intrinsic::arm_neon_vmullu ||
3167  II->getIntrinsicID() == Intrinsic::aarch64_neon_umull);
3168  VectorType *NewVT = cast<VectorType>(II->getType());
3169  if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
3170  if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
3171  CV0 = ConstantExpr::getIntegerCast(CV0, NewVT, /*isSigned=*/!Zext);
3172  CV1 = ConstantExpr::getIntegerCast(CV1, NewVT, /*isSigned=*/!Zext);
3173 
3174  return replaceInstUsesWith(CI, ConstantExpr::getMul(CV0, CV1));
3175  }
3176 
3177  // Couldn't simplify - canonicalize constant to the RHS.
3178  std::swap(Arg0, Arg1);
3179  }
3180 
3181  // Handle mul by one:
3182  if (Constant *CV1 = dyn_cast<Constant>(Arg1))
3183  if (ConstantInt *Splat =
3184  dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
3185  if (Splat->isOne())
3186  return CastInst::CreateIntegerCast(Arg0, II->getType(),
3187  /*isSigned=*/!Zext);
3188 
3189  break;
3190  }
3191  case Intrinsic::arm_neon_aesd:
3192  case Intrinsic::arm_neon_aese:
3193  case Intrinsic::aarch64_crypto_aesd:
3194  case Intrinsic::aarch64_crypto_aese: {
3195  Value *DataArg = II->getArgOperand(0);
3196  Value *KeyArg = II->getArgOperand(1);
3197 
3198  // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
3199  Value *Data, *Key;
3200  if (match(KeyArg, m_ZeroInt()) &&
3201  match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) {
3202  II->setArgOperand(0, Data);
3203  II->setArgOperand(1, Key);
3204  return II;
3205  }
3206  break;
3207  }
3208  case Intrinsic::amdgcn_rcp: {
3209  Value *Src = II->getArgOperand(0);
3210 
3211  // TODO: Move to ConstantFolding/InstSimplify?
3212  if (isa<UndefValue>(Src))
3213  return replaceInstUsesWith(CI, Src);
3214 
3215  if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
3216  const APFloat &ArgVal = C->getValueAPF();
3217  APFloat Val(ArgVal.getSemantics(), 1.0);
3218  APFloat::opStatus Status = Val.divide(ArgVal,
3220  // Only do this if it was exact and therefore not dependent on the
3221  // rounding mode.
3222  if (Status == APFloat::opOK)
3223  return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(), Val));
3224  }
3225 
3226  break;
3227  }
3228  case Intrinsic::amdgcn_rsq: {
3229  Value *Src = II->getArgOperand(0);
3230 
3231  // TODO: Move to ConstantFolding/InstSimplify?
3232  if (isa<UndefValue>(Src))
3233  return replaceInstUsesWith(CI, Src);
3234  break;
3235  }
3236  case Intrinsic::amdgcn_frexp_mant:
3237  case Intrinsic::amdgcn_frexp_exp: {
3238  Value *Src = II->getArgOperand(0);
3239  if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
3240  int Exp;
3241  APFloat Significand = frexp(C->getValueAPF(), Exp,
3243 
3244  if (II->getIntrinsicID() == Intrinsic::amdgcn_frexp_mant) {
3245  return replaceInstUsesWith(CI, ConstantFP::get(II->getContext(),
3246  Significand));
3247  }
3248 
3249  // Match instruction special case behavior.
3250  if (Exp == APFloat::IEK_NaN || Exp == APFloat::IEK_Inf)
3251  Exp = 0;
3252 
3253  return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), Exp));
3254  }
3255 
3256  if (isa<UndefValue>(Src))
3257  return replaceInstUsesWith(CI, UndefValue::get(II->getType()));
3258 
3259  break;
3260  }
3261  case Intrinsic::amdgcn_class: {
3262  enum {
3263  S_NAN = 1 << 0, // Signaling NaN
3264  Q_NAN = 1 << 1, // Quiet NaN
3265  N_INFINITY = 1 << 2, // Negative infinity
3266  N_NORMAL = 1 << 3, // Negative normal
3267  N_SUBNORMAL = 1 << 4, // Negative subnormal
3268  N_ZERO = 1 << 5, // Negative zero
3269  P_ZERO = 1 << 6, // Positive zero
3270  P_SUBNORMAL = 1 << 7, // Positive subnormal
3271  P_NORMAL = 1 << 8, // Positive normal
3272  P_INFINITY = 1 << 9 // Positive infinity
3273  };
3274 
3275  const uint32_t FullMask = S_NAN | Q_NAN | N_INFINITY | N_NORMAL |
3277 
3278  Value *Src0 = II->getArgOperand(0);
3279  Value *Src1 = II->getArgOperand(1);
3280  const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1);
3281  if (!CMask) {
3282  if (isa<UndefValue>(Src0))
3283  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3284 
3285  if (isa<UndefValue>(Src1))
3286  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), false));
3287  break;
3288  }
3289 
3290  uint32_t Mask = CMask->getZExtValue();
3291 
3292  // If all tests are made, it doesn't matter what the value is.
3293  if ((Mask & FullMask) == FullMask)
3294  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), true));
3295 
3296  if ((Mask & FullMask) == 0)
3297  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), false));
3298 
3299  if (Mask == (S_NAN | Q_NAN)) {
3300  // Equivalent of isnan. Replace with standard fcmp.
3301  Value *FCmp = Builder.CreateFCmpUNO(Src0, Src0);
3302  FCmp->takeName(II);
3303  return replaceInstUsesWith(*II, FCmp);
3304  }
3305 
3306  if (Mask == (N_ZERO | P_ZERO)) {
3307  // Equivalent of == 0.
3308  Value *FCmp = Builder.CreateFCmpOEQ(
3309  Src0, ConstantFP::get(Src0->getType(), 0.0));
3310 
3311  FCmp->takeName(II);
3312  return replaceInstUsesWith(*II, FCmp);
3313  }
3314 
3315  // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
3316  if (((Mask & S_NAN) || (Mask & Q_NAN)) && isKnownNeverNaN(Src0, &TLI)) {
3317  II->setArgOperand(1, ConstantInt::get(Src1->getType(),
3318  Mask & ~(S_NAN | Q_NAN)));
3319  return II;
3320  }
3321 
3322  const ConstantFP *CVal = dyn_cast<ConstantFP>(Src0);
3323  if (!CVal) {
3324  if (isa<UndefValue>(Src0))
3325  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3326 
3327  // Clamp mask to used bits
3328  if ((Mask & FullMask) != Mask) {
3329  CallInst *NewCall = Builder.CreateCall(II->getCalledFunction(),
3330  { Src0, ConstantInt::get(Src1->getType(), Mask & FullMask) }
3331  );
3332 
3333  NewCall->takeName(II);
3334  return replaceInstUsesWith(*II, NewCall);
3335  }
3336 
3337  break;
3338  }
3339 
3340  const APFloat &Val = CVal->getValueAPF();
3341 
3342  bool Result =
3343  ((Mask & S_NAN) && Val.isNaN() && Val.isSignaling()) ||
3344  ((Mask & Q_NAN) && Val.isNaN() && !Val.isSignaling()) ||
3345  ((Mask & N_INFINITY) && Val.isInfinity() && Val.isNegative()) ||
3346  ((Mask & N_NORMAL) && Val.isNormal() && Val.isNegative()) ||
3347  ((Mask & N_SUBNORMAL) && Val.isDenormal() && Val.isNegative()) ||
3348  ((Mask & N_ZERO) && Val.isZero() && Val.isNegative()) ||
3349  ((Mask & P_ZERO) && Val.isZero() && !Val.isNegative()) ||
3350  ((Mask & P_SUBNORMAL) && Val.isDenormal() && !Val.isNegative()) ||
3351  ((Mask & P_NORMAL) && Val.isNormal() && !Val.isNegative()) ||
3352  ((Mask & P_INFINITY) && Val.isInfinity() && !Val.isNegative());
3353 
3354  return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), Result));
3355  }
3356  case Intrinsic::amdgcn_cvt_pkrtz: {
3357  Value *Src0 = II->getArgOperand(0);
3358  Value *Src1 = II->getArgOperand(1);
3359  if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
3360  if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
3361  const fltSemantics &HalfSem
3362  = II->getType()->getScalarType()->getFltSemantics();
3363  bool LosesInfo;
3364  APFloat Val0 = C0->getValueAPF();
3365  APFloat Val1 = C1->getValueAPF();
3366  Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
3367  Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
3368 
3369  Constant *Folded = ConstantVector::get({
3370  ConstantFP::get(II->getContext(), Val0),
3371  ConstantFP::get(II->getContext(), Val1) });
3372  return replaceInstUsesWith(*II, Folded);
3373  }
3374  }
3375 
3376  if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1))
3377  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3378 
3379  break;
3380  }
3381  case Intrinsic::amdgcn_cvt_pknorm_i16:
3382  case Intrinsic::amdgcn_cvt_pknorm_u16:
3383  case Intrinsic::amdgcn_cvt_pk_i16:
3384  case Intrinsic::amdgcn_cvt_pk_u16: {
3385  Value *Src0 = II->getArgOperand(0);
3386  Value *Src1 = II->getArgOperand(1);
3387 
3388  if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1))
3389  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3390 
3391  break;
3392  }
3393  case Intrinsic::amdgcn_ubfe:
3394  case Intrinsic::amdgcn_sbfe: {
3395  // Decompose simple cases into standard shifts.
3396  Value *Src = II->getArgOperand(0);
3397  if (isa<UndefValue>(Src))
3398  return replaceInstUsesWith(*II, Src);
3399 
3400  unsigned Width;
3401  Type *Ty = II->getType();
3402  unsigned IntSize = Ty->getIntegerBitWidth();
3403 
3404  ConstantInt *CWidth = dyn_cast<ConstantInt>(II->getArgOperand(2));
3405  if (CWidth) {
3406  Width = CWidth->getZExtValue();
3407  if ((Width & (IntSize - 1)) == 0)
3408  return replaceInstUsesWith(*II, ConstantInt::getNullValue(Ty));
3409 
3410  if (Width >= IntSize) {
3411  // Hardware ignores high bits, so remove those.
3412  II->setArgOperand(2, ConstantInt::get(CWidth->getType(),
3413  Width & (IntSize - 1)));
3414  return II;
3415  }
3416  }
3417 
3418  unsigned Offset;
3419  ConstantInt *COffset = dyn_cast<ConstantInt>(II->getArgOperand(1));
3420  if (COffset) {
3421  Offset = COffset->getZExtValue();
3422  if (Offset >= IntSize) {
3423  II->setArgOperand(1, ConstantInt::get(COffset->getType(),
3424  Offset & (IntSize - 1)));
3425  return II;
3426  }
3427  }
3428 
3429  bool Signed = II->getIntrinsicID() == Intrinsic::amdgcn_sbfe;
3430 
3431  // TODO: Also emit sub if only width is constant.
3432  if (!CWidth && COffset && Offset == 0) {
3433  Constant *KSize = ConstantInt::get(COffset->getType(), IntSize);
3434  Value *ShiftVal = Builder.CreateSub(KSize, II->getArgOperand(2));
3435  ShiftVal = Builder.CreateZExt(ShiftVal, II->getType());
3436 
3437  Value *Shl = Builder.CreateShl(Src, ShiftVal);
3438  Value *RightShift = Signed ? Builder.CreateAShr(Shl, ShiftVal)
3439  : Builder.CreateLShr(Shl, ShiftVal);
3440  RightShift->takeName(II);
3441  return replaceInstUsesWith(*II, RightShift);
3442  }
3443 
3444  if (!CWidth || !COffset)
3445  break;
3446 
3447  // TODO: This allows folding to undef when the hardware has specific
3448  // behavior?
3449  if (Offset + Width < IntSize) {
3450  Value *Shl = Builder.CreateShl(Src, IntSize - Offset - Width);
3451  Value *RightShift = Signed ? Builder.CreateAShr(Shl, IntSize - Width)
3452  : Builder.CreateLShr(Shl, IntSize - Width);
3453  RightShift->takeName(II);
3454  return replaceInstUsesWith(*II, RightShift);
3455  }
3456 
3457  Value *RightShift = Signed ? Builder.CreateAShr(Src, Offset)
3458  : Builder.CreateLShr(Src, Offset);
3459 
3460  RightShift->takeName(II);
3461  return replaceInstUsesWith(*II, RightShift);
3462  }
3463  case Intrinsic::amdgcn_exp:
3464  case Intrinsic::amdgcn_exp_compr: {
3465  ConstantInt *En = dyn_cast<ConstantInt>(II->getArgOperand(1));
3466  if (!En) // Illegal.
3467  break;
3468 
3469  unsigned EnBits = En->getZExtValue();
3470  if (EnBits == 0xf)
3471  break; // All inputs enabled.
3472 
3473  bool IsCompr = II->getIntrinsicID() == Intrinsic::amdgcn_exp_compr;
3474  bool Changed = false;
3475  for (int I = 0; I < (IsCompr ? 2 : 4); ++I) {
3476  if ((!IsCompr && (EnBits & (1 << I)) == 0) ||
3477  (IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) {
3478  Value *Src = II->getArgOperand(I + 2);
3479  if (!isa<UndefValue>(Src)) {
3480  II->setArgOperand(I + 2, UndefValue::get(Src->getType()));
3481  Changed = true;
3482  }
3483  }
3484  }
3485 
3486  if (Changed)
3487  return II;
3488 
3489  break;
3490  }
3491  case Intrinsic::amdgcn_fmed3: {
3492  // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled
3493  // for the shader.
3494 
3495  Value *Src0 = II->getArgOperand(0);
3496  Value *Src1 = II->getArgOperand(1);
3497  Value *Src2 = II->getArgOperand(2);
3498 
3499  // Checking for NaN before canonicalization provides better fidelity when
3500  // mapping other operations onto fmed3 since the order of operands is
3501  // unchanged.
3502  CallInst *NewCall = nullptr;
3503  if (match(Src0, m_NaN()) || isa<UndefValue>(Src0)) {
3504  NewCall = Builder.CreateMinNum(Src1, Src2);
3505  } else if (match(Src1, m_NaN()) || isa<UndefValue>(Src1)) {
3506  NewCall = Builder.CreateMinNum(Src0, Src2);
3507  } else if (match(Src2, m_NaN()) || isa<UndefValue>(Src2)) {
3508  NewCall = Builder.CreateMaxNum(Src0, Src1);
3509  }
3510 
3511  if (NewCall) {
3512  NewCall->copyFastMathFlags(II);
3513  NewCall->takeName(II);
3514  return replaceInstUsesWith(*II, NewCall);
3515  }
3516 
3517  bool Swap = false;
3518  // Canonicalize constants to RHS operands.
3519  //
3520  // fmed3(c0, x, c1) -> fmed3(x, c0, c1)
3521  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
3522  std::swap(Src0, Src1);
3523  Swap = true;
3524  }
3525 
3526  if (isa<Constant>(Src1) && !isa<Constant>(Src2)) {
3527  std::swap(Src1, Src2);
3528  Swap = true;
3529  }
3530 
3531  if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
3532  std::swap(Src0, Src1);
3533  Swap = true;
3534  }
3535 
3536  if (Swap) {
3537  II->setArgOperand(0, Src0);
3538  II->setArgOperand(1, Src1);
3539  II->setArgOperand(2, Src2);
3540  return II;
3541  }
3542 
3543  if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
3544  if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
3545  if (const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) {
3546  APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(),
3547  C2->getValueAPF());
3548  return replaceInstUsesWith(*II,
3549  ConstantFP::get(Builder.getContext(), Result));
3550  }
3551  }
3552  }
3553 
3554  break;
3555  }
3556  case Intrinsic::amdgcn_icmp:
3557  case Intrinsic::amdgcn_fcmp: {
3558  const ConstantInt *CC = dyn_cast<ConstantInt>(II->getArgOperand(2));
3559  if (!CC)
3560  break;
3561 
3562  // Guard against invalid arguments.
3563  int64_t CCVal = CC->getZExtValue();
3564  bool IsInteger = II->getIntrinsicID() == Intrinsic::amdgcn_icmp;
3565  if ((IsInteger && (CCVal < CmpInst::FIRST_ICMP_PREDICATE ||
3566  CCVal > CmpInst::LAST_ICMP_PREDICATE)) ||
3567  (!IsInteger && (CCVal < CmpInst::FIRST_FCMP_PREDICATE ||
3568  CCVal > CmpInst::LAST_FCMP_PREDICATE)))
3569  break;
3570 
3571  Value *Src0 = II->getArgOperand(0);
3572  Value *Src1 = II->getArgOperand(1);
3573 
3574  if (auto *CSrc0 = dyn_cast<Constant>(Src0)) {
3575  if (auto *CSrc1 = dyn_cast<Constant>(Src1)) {
3576  Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1);
3577  if (CCmp->isNullValue()) {
3578  return replaceInstUsesWith(
3579  *II, ConstantExpr::getSExt(CCmp, II->getType()));
3580  }
3581 
3582  // The result of V_ICMP/V_FCMP assembly instructions (which this
3583  // intrinsic exposes) is one bit per thread, masked with the EXEC
3584  // register (which contains the bitmask of live threads). So a
3585  // comparison that always returns true is the same as a read of the
3586  // EXEC register.
3588  II->getModule(), Intrinsic::read_register, II->getType());
3589  Metadata *MDArgs[] = {MDString::get(II->getContext(), "exec")};
3590  MDNode *MD = MDNode::get(II->getContext(), MDArgs);
3591  Value *Args[] = {MetadataAsValue::get(II->getContext(), MD)};
3592  CallInst *NewCall = Builder.CreateCall(NewF, Args);
3595  NewCall->takeName(II);
3596  return replaceInstUsesWith(*II, NewCall);
3597  }
3598 
3599  // Canonicalize constants to RHS.
3600  CmpInst::Predicate SwapPred
3601  = CmpInst::getSwappedPredicate(static_cast<CmpInst::Predicate>(CCVal));
3602  II->setArgOperand(0, Src1);
3603  II->setArgOperand(1, Src0);
3604  II->setArgOperand(2, ConstantInt::get(CC->getType(),
3605  static_cast<int>(SwapPred)));
3606  return II;
3607  }
3608 
3609  if (CCVal != CmpInst::ICMP_EQ && CCVal != CmpInst::ICMP_NE)
3610  break;
3611 
3612  // Canonicalize compare eq with true value to compare != 0
3613  // llvm.amdgcn.icmp(zext (i1 x), 1, eq)
3614  // -> llvm.amdgcn.icmp(zext (i1 x), 0, ne)
3615  // llvm.amdgcn.icmp(sext (i1 x), -1, eq)
3616  // -> llvm.amdgcn.icmp(sext (i1 x), 0, ne)
3617  Value *ExtSrc;
3618  if (CCVal == CmpInst::ICMP_EQ &&
3619  ((match(Src1, m_One()) && match(Src0, m_ZExt(m_Value(ExtSrc)))) ||
3620  (match(Src1, m_AllOnes()) && match(Src0, m_SExt(m_Value(ExtSrc))))) &&
3621  ExtSrc->getType()->isIntegerTy(1)) {
3622  II->setArgOperand(1, ConstantInt::getNullValue(Src1->getType()));
3623  II->setArgOperand(2, ConstantInt::get(CC->getType(), CmpInst::ICMP_NE));
3624  return II;
3625  }
3626 
3627  CmpInst::Predicate SrcPred;
3628  Value *SrcLHS;
3629  Value *SrcRHS;
3630 
3631  // Fold compare eq/ne with 0 from a compare result as the predicate to the
3632  // intrinsic. The typical use is a wave vote function in the library, which
3633  // will be fed from a user code condition compared with 0. Fold in the
3634  // redundant compare.
3635 
3636  // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, ne)
3637  // -> llvm.amdgcn.[if]cmp(a, b, pred)
3638  //
3639  // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, eq)
3640  // -> llvm.amdgcn.[if]cmp(a, b, inv pred)
3641  if (match(Src1, m_Zero()) &&
3642  match(Src0,
3643  m_ZExtOrSExt(m_Cmp(SrcPred, m_Value(SrcLHS), m_Value(SrcRHS))))) {
3644  if (CCVal == CmpInst::ICMP_EQ)
3645  SrcPred = CmpInst::getInversePredicate(SrcPred);
3646 
3647  Intrinsic::ID NewIID = CmpInst::isFPPredicate(SrcPred) ?
3648  Intrinsic::amdgcn_fcmp : Intrinsic::amdgcn_icmp;
3649 
3650  Type *Ty = SrcLHS->getType();
3651  if (auto *CmpType = dyn_cast<IntegerType>(Ty)) {
3652  // Promote to next legal integer type.
3653  unsigned Width = CmpType->getBitWidth();
3654  unsigned NewWidth = Width;
3655  if (Width <= 16)
3656  NewWidth = 16;
3657  else if (Width <= 32)
3658  NewWidth = 32;
3659  else if (Width <= 64)
3660  NewWidth = 64;
3661  else if (Width > 64)
3662  break; // Can't handle this.
3663 
3664  if (Width != NewWidth) {
3665  IntegerType *CmpTy = Builder.getIntNTy(NewWidth);
3666  if (CmpInst::isSigned(SrcPred)) {
3667  SrcLHS = Builder.CreateSExt(SrcLHS, CmpTy);
3668  SrcRHS = Builder.CreateSExt(SrcRHS, CmpTy);
3669  } else {
3670  SrcLHS = Builder.CreateZExt(SrcLHS, CmpTy);
3671  SrcRHS = Builder.CreateZExt(SrcRHS, CmpTy);
3672  }
3673  }
3674  } else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())
3675  break;
3676 
3677  Value *NewF = Intrinsic::getDeclaration(II->getModule(), NewIID,
3678  SrcLHS->getType());
3679  Value *Args[] = { SrcLHS, SrcRHS,
3680  ConstantInt::get(CC->getType(), SrcPred) };
3681  CallInst *NewCall = Builder.CreateCall(NewF, Args);
3682  NewCall->takeName(II);
3683  return replaceInstUsesWith(*II, NewCall);
3684  }
3685 
3686  break;
3687  }
3688  case Intrinsic::amdgcn_wqm_vote: {
3689  // wqm_vote is identity when the argument is constant.
3690  if (!isa<Constant>(II->getArgOperand(0)))
3691  break;
3692 
3693  return replaceInstUsesWith(*II, II->getArgOperand(0));
3694  }
3695  case Intrinsic::amdgcn_kill: {
3696  const ConstantInt *C = dyn_cast<ConstantInt>(II->getArgOperand(0));
3697  if (!C || !C->getZExtValue())
3698  break;
3699 
3700  // amdgcn.kill(i1 1) is a no-op
3701  return eraseInstFromFunction(CI);
3702  }
3703  case Intrinsic::amdgcn_update_dpp: {
3704  Value *Old = II->getArgOperand(0);
3705 
3706  auto BC = dyn_cast<ConstantInt>(II->getArgOperand(5));
3707  auto RM = dyn_cast<ConstantInt>(II->getArgOperand(3));
3708  auto BM = dyn_cast<ConstantInt>(II->getArgOperand(4));
3709  if (!BC || !RM || !BM ||
3710  BC->isZeroValue() ||
3711  RM->getZExtValue() != 0xF ||
3712  BM->getZExtValue() != 0xF ||
3713  isa<UndefValue>(Old))
3714  break;
3715 
3716  // If bound_ctrl = 1, row mask = bank mask = 0xf we can omit old value.
3717  II->setOperand(0, UndefValue::get(Old->getType()));
3718  return II;
3719  }
3720  case Intrinsic::stackrestore: {
3721  // If the save is right next to the restore, remove the restore. This can
3722  // happen when variable allocas are DCE'd.
3723  if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3724  if (SS->getIntrinsicID() == Intrinsic::stacksave) {
3725  // Skip over debug info.
3726  if (SS->getNextNonDebugInstruction() == II) {
3727  return eraseInstFromFunction(CI);
3728  }
3729  }
3730  }
3731 
3732  // Scan down this block to see if there is another stack restore in the
3733  // same block without an intervening call/alloca.
3734  BasicBlock::iterator BI(II);
3735  TerminatorInst *TI = II->getParent()->getTerminator();
3736  bool CannotRemove = false;
3737  for (++BI; &*BI != TI; ++BI) {
3738  if (isa<AllocaInst>(BI)) {
3739  CannotRemove = true;
3740  break;
3741  }
3742  if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
3743  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
3744  // If there is a stackrestore below this one, remove this one.
3745  if (II->getIntrinsicID() == Intrinsic::stackrestore)
3746  return eraseInstFromFunction(CI);
3747 
3748  // Bail if we cross over an intrinsic with side effects, such as
3749  // llvm.stacksave, llvm.read_register, or llvm.setjmp.
3750  if (II->mayHaveSideEffects()) {
3751  CannotRemove = true;
3752  break;
3753  }
3754  } else {
3755  // If we found a non-intrinsic call, we can't remove the stack
3756  // restore.
3757  CannotRemove = true;
3758  break;
3759  }
3760  }
3761  }
3762 
3763  // If the stack restore is in a return, resume, or unwind block and if there
3764  // are no allocas or calls between the restore and the return, nuke the
3765  // restore.
3766  if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
3767  return eraseInstFromFunction(CI);
3768  break;
3769  }
3770  case Intrinsic::lifetime_start:
3771  // Asan needs to poison memory to detect invalid access which is possible
3772  // even for empty lifetime range.
3773  if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
3774  II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress))
3775  break;
3776 
3777  if (removeTriviallyEmptyRange(*II, Intrinsic::lifetime_start,
3778  Intrinsic::lifetime_end, *this))
3779  return nullptr;
3780  break;
3781  case Intrinsic::assume: {
3782  Value *IIOperand = II->getArgOperand(0);
3783  // Remove an assume if it is followed by an identical assume.
3784  // TODO: Do we need this? Unless there are conflicting assumptions, the
3785  // computeKnownBits(IIOperand) below here eliminates redundant assumes.
3787  if (match(Next, m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))))
3788  return eraseInstFromFunction(CI);
3789 
3790  // Canonicalize assume(a && b) -> assume(a); assume(b);
3791  // Note: New assumption intrinsics created here are registered by
3792  // the InstCombineIRInserter object.
3793  Value *AssumeIntrinsic = II->getCalledValue(), *A, *B;
3794  if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) {
3795  Builder.CreateCall(AssumeIntrinsic, A, II->getName());
3796  Builder.CreateCall(AssumeIntrinsic, B, II->getName());
3797  return eraseInstFromFunction(*II);
3798  }
3799  // assume(!(a || b)) -> assume(!a); assume(!b);
3800  if (match(IIOperand, m_Not(m_Or(m_Value(A), m_Value(B))))) {
3801  Builder.CreateCall(AssumeIntrinsic, Builder.CreateNot(A), II->getName());
3802  Builder.CreateCall(AssumeIntrinsic, Builder.CreateNot(B), II->getName());
3803  return eraseInstFromFunction(*II);
3804  }
3805 
3806  // assume( (load addr) != null ) -> add 'nonnull' metadata to load
3807  // (if assume is valid at the load)
3808  CmpInst::Predicate Pred;
3809  Instruction *LHS;
3810  if (match(IIOperand, m_ICmp(Pred, m_Instruction(LHS), m_Zero())) &&
3811  Pred == ICmpInst::ICMP_NE && LHS->getOpcode() == Instruction::Load &&
3812  LHS->getType()->isPointerTy() &&
3813  isValidAssumeForContext(II, LHS, &DT)) {
3814  MDNode *MD = MDNode::get(II->getContext(), None);
3816  return eraseInstFromFunction(*II);
3817 
3818  // TODO: apply nonnull return attributes to calls and invokes
3819  // TODO: apply range metadata for range check patterns?
3820  }
3821 
3822  // If there is a dominating assume with the same condition as this one,
3823  // then this one is redundant, and should be removed.
3824  KnownBits Known(1);
3825  computeKnownBits(IIOperand, Known, 0, II);
3826  if (Known.isAllOnes())
3827  return eraseInstFromFunction(*II);
3828 
3829  // Update the cache of affected values for this assumption (we might be
3830  // here because we just simplified the condition).
3831  AC.updateAffectedValues(II);
3832  break;
3833  }
3834  case Intrinsic::experimental_gc_relocate: {
3835  // Translate facts known about a pointer before relocating into
3836  // facts about the relocate value, while being careful to
3837  // preserve relocation semantics.
3838  Value *DerivedPtr = cast<GCRelocateInst>(II)->getDerivedPtr();
3839 
3840  // Remove the relocation if unused, note that this check is required
3841  // to prevent the cases below from looping forever.
3842  if (II->use_empty())
3843  return eraseInstFromFunction(*II);
3844 
3845  // Undef is undef, even after relocation.
3846  // TODO: provide a hook for this in GCStrategy. This is clearly legal for
3847  // most practical collectors, but there was discussion in the review thread
3848  // about whether it was legal for all possible collectors.
3849  if (isa<UndefValue>(DerivedPtr))
3850  // Use undef of gc_relocate's type to replace it.
3851  return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
3852 
3853  if (auto *PT = dyn_cast<PointerType>(II->getType())) {
3854  // The relocation of null will be null for most any collector.
3855  // TODO: provide a hook for this in GCStrategy. There might be some
3856  // weird collector this property does not hold for.
3857  if (isa<ConstantPointerNull>(DerivedPtr))
3858  // Use null-pointer of gc_relocate's type to replace it.
3859  return replaceInstUsesWith(*II, ConstantPointerNull::get(PT));
3860 
3861  // isKnownNonNull -> nonnull attribute
3862  if (isKnownNonZero(DerivedPtr, DL, 0, &AC, II, &DT))
3863  II->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
3864  }
3865 
3866  // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
3867  // Canonicalize on the type from the uses to the defs
3868 
3869  // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
3870  break;
3871  }
3872 
3873  case Intrinsic::experimental_guard: {
3874  // Is this guard followed by another guard? We scan forward over a small
3875  // fixed window of instructions to handle common cases with conditions
3876  // computed between guards.
3877  Instruction *NextInst = II->getNextNode();
3878  for (unsigned i = 0; i < GuardWideningWindow; i++) {
3879  // Note: Using context-free form to avoid compile time blow up
3880  if (!isSafeToSpeculativelyExecute(NextInst))
3881  break;
3882  NextInst = NextInst->getNextNode();
3883  }
3884  Value *NextCond = nullptr;
3885  if (match(NextInst,
3886  m_Intrinsic<Intrinsic::experimental_guard>(m_Value(NextCond)))) {
3887  Value *CurrCond = II->getArgOperand(0);
3888 
3889  // Remove a guard that it is immediately preceded by an identical guard.
3890  if (CurrCond == NextCond)
3891  return eraseInstFromFunction(*NextInst);
3892 
3893  // Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
3894  Instruction* MoveI = II->getNextNode();
3895  while (MoveI != NextInst) {
3896  auto *Temp = MoveI;
3897  MoveI = MoveI->getNextNode();
3898  Temp->moveBefore(II);
3899  }
3900  II->setArgOperand(0, Builder.CreateAnd(CurrCond, NextCond));
3901  return eraseInstFromFunction(*NextInst);
3902  }
3903  break;
3904  }
3905  }
3906  return visitCallSite(II);
3907 }
3908 
3909 // Fence instruction simplification
3911  // Remove identical consecutive fences.
3913  if (auto *NFI = dyn_cast<FenceInst>(Next))
3914  if (FI.isIdenticalTo(NFI))
3915  return eraseInstFromFunction(FI);
3916  return nullptr;
3917 }
3918 
3919 // InvokeInst simplification
3921  return visitCallSite(&II);
3922 }
3923 
3924 /// If this cast does not affect the value passed through the varargs area, we
3925 /// can eliminate the use of the cast.
3927  const DataLayout &DL,
3928  const CastInst *const CI,
3929  const int ix) {
3930  if (!CI->isLosslessCast())
3931  return false;
3932 
3933  // If this is a GC intrinsic, avoid munging types. We need types for
3934  // statepoint reconstruction in SelectionDAG.
3935  // TODO: This is probably something which should be expanded to all
3936  // intrinsics since the entire point of intrinsics is that
3937  // they are understandable by the optimizer.
3938  if (isStatepoint(CS) || isGCRelocate(CS) || isGCResult(CS))
3939  return false;
3940 
3941  // The size of ByVal or InAlloca arguments is derived from the type, so we
3942  // can't change to a type with a different size. If the size were
3943  // passed explicitly we could avoid this check.
3944  if (!CS.isByValOrInAllocaArgument(ix))
3945  return true;
3946 
3947  Type* SrcTy =
3948  cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
3949  Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
3950  if (!SrcTy->isSized() || !DstTy->isSized())
3951  return false;
3952  if (DL.getTypeAllocSize(SrcTy) != DL.getTypeAllocSize(DstTy))
3953  return false;
3954  return true;
3955 }
3956 
3957 Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) {
3958  if (!CI->getCalledFunction()) return nullptr;
3959 
3960  auto InstCombineRAUW = [this](Instruction *From, Value *With) {
3961  replaceInstUsesWith(*From, With);
3962  };
3963  auto InstCombineErase = [this](Instruction *I) {
3964  eraseInstFromFunction(*I);
3965  };
3966  LibCallSimplifier Simplifier(DL, &TLI, ORE, InstCombineRAUW,
3967  InstCombineErase);
3968  if (Value *With = Simplifier.optimizeCall(CI)) {
3969  ++NumSimplified;
3970  return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
3971  }
3972 
3973  return nullptr;
3974 }
3975 
3977  // Strip off at most one level of pointer casts, looking for an alloca. This
3978  // is good enough in practice and simpler than handling any number of casts.
3979  Value *Underlying = TrampMem->stripPointerCasts();
3980  if (Underlying != TrampMem &&
3981  (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
3982  return nullptr;
3983  if (!isa<AllocaInst>(Underlying))
3984  return nullptr;
3985 
3986  IntrinsicInst *InitTrampoline = nullptr;
3987  for (User *U : TrampMem->users()) {
3989  if (!II)
3990  return nullptr;
3991  if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
3992  if (InitTrampoline)
3993  // More than one init_trampoline writes to this value. Give up.
3994  return nullptr;
3995  InitTrampoline = II;
3996  continue;
3997  }
3998  if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
3999  // Allow any number of calls to adjust.trampoline.
4000  continue;
4001  return nullptr;
4002  }
4003 
4004  // No call to init.trampoline found.
4005  if (!InitTrampoline)
4006  return nullptr;
4007 
4008  // Check that the alloca is being used in the expected way.
4009  if (InitTrampoline->getOperand(0) != TrampMem)
4010  return nullptr;
4011 
4012  return InitTrampoline;
4013 }
4014 
4016  Value *TrampMem) {
4017  // Visit all the previous instructions in the basic block, and try to find a
4018  // init.trampoline which has a direct path to the adjust.trampoline.
4019  for (BasicBlock::iterator I = AdjustTramp->getIterator(),
4020  E = AdjustTramp->getParent()->begin();
4021  I != E;) {
4022  Instruction *Inst = &*--I;
4023  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
4024  if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
4025  II->getOperand(0) == TrampMem)
4026  return II;
4027  if (Inst->mayWriteToMemory())
4028  return nullptr;
4029  }
4030  return nullptr;
4031 }
4032 
4033 // Given a call to llvm.adjust.trampoline, find and return the corresponding
4034 // call to llvm.init.trampoline if the call to the trampoline can be optimized
4035 // to a direct call to a function. Otherwise return NULL.
4037  Callee = Callee->stripPointerCasts();
4038  IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
4039  if (!AdjustTramp ||
4040  AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
4041  return nullptr;
4042 
4043  Value *TrampMem = AdjustTramp->getOperand(0);
4044 
4046  return IT;
4047  if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
4048  return IT;
4049  return nullptr;
4050 }
4051 
4052 /// Improvements for call and invoke instructions.
4053 Instruction *InstCombiner::visitCallSite(CallSite CS) {
4054  if (isAllocLikeFn(CS.getInstruction(), &TLI))
4055  return visitAllocSite(*CS.getInstruction());
4056 
4057  bool Changed = false;
4058 
4059  // Mark any parameters that are known to be non-null with the nonnull
4060  // attribute. This is helpful for inlining calls to functions with null
4061  // checks on their arguments.
4062  SmallVector<unsigned, 4> ArgNos;
4063  unsigned ArgNo = 0;
4064 
4065  for (Value *V : CS.args()) {
4066  if (V->getType()->isPointerTy() &&
4067  !CS.paramHasAttr(ArgNo, Attribute::NonNull) &&
4068  isKnownNonZero(V, DL, 0, &AC, CS.getInstruction(), &DT))
4069  ArgNos.push_back(ArgNo);
4070  ArgNo++;
4071  }
4072 
4073  assert(ArgNo == CS.arg_size() && "sanity check");
4074 
4075  if (!ArgNos.empty()) {
4076  AttributeList AS = CS.getAttributes();
4077  LLVMContext &Ctx = CS.getInstruction()->getContext();
4078  AS = AS.addParamAttribute(Ctx, ArgNos,
4079  Attribute::get(Ctx, Attribute::NonNull));
4080  CS.setAttributes(AS);
4081  Changed = true;
4082  }
4083 
4084  // If the callee is a pointer to a function, attempt to move any casts to the
4085  // arguments of the call/invoke.
4086  Value *Callee = CS.getCalledValue();
4087  if (!isa<Function>(Callee) && transformConstExprCastCall(CS))
4088  return nullptr;
4089 
4090  if (Function *CalleeF = dyn_cast<Function>(Callee)) {
4091  // Remove the convergent attr on calls when the callee is not convergent.
4092  if (CS.isConvergent() && !CalleeF->isConvergent() &&
4093  !CalleeF->isIntrinsic()) {
4094  LLVM_DEBUG(dbgs() << "Removing convergent attr from instr "
4095  << CS.getInstruction() << "\n");
4096  CS.setNotConvergent();
4097  return CS.getInstruction();
4098  }
4099 
4100  // If the call and callee calling conventions don't match, this call must
4101  // be unreachable, as the call is undefined.
4102  if (CalleeF->getCallingConv() != CS.getCallingConv() &&
4103  // Only do this for calls to a function with a body. A prototype may
4104  // not actually end up matching the implementation's calling conv for a
4105  // variety of reasons (e.g. it may be written in assembly).
4106  !CalleeF->isDeclaration()) {
4107  Instruction *OldCall = CS.getInstruction();
4108  new StoreInst(ConstantInt::getTrue(Callee->getContext()),
4110  OldCall);
4111  // If OldCall does not return void then replaceAllUsesWith undef.
4112  // This allows ValueHandlers and custom metadata to adjust itself.
4113  if (!OldCall->getType()->isVoidTy())
4114  replaceInstUsesWith(*OldCall, UndefValue::get(OldCall->getType()));
4115  if (isa<CallInst>(OldCall))
4116  return eraseInstFromFunction(*OldCall);
4117 
4118  // We cannot remove an invoke, because it would change the CFG, just
4119  // change the callee to a null pointer.
4120  cast<InvokeInst>(OldCall)->setCalledFunction(
4121  Constant::getNullValue(CalleeF->getType()));
4122  return nullptr;
4123  }
4124  }
4125 
4126  if ((isa<ConstantPointerNull>(Callee) &&
4128  isa<UndefValue>(Callee)) {
4129  // If CS does not return void then replaceAllUsesWith undef.
4130  // This allows ValueHandlers and custom metadata to adjust itself.
4131  if (!CS.getInstruction()->getType()->isVoidTy())
4132  replaceInstUsesWith(*CS.getInstruction(),
4134 
4135  if (isa<InvokeInst>(CS.getInstruction())) {
4136  // Can't remove an invoke because we cannot change the CFG.
4137  return nullptr;
4138  }
4139 
4140  // This instruction is not reachable, just remove it. We insert a store to
4141  // undef so that we know that this code is not reachable, despite the fact
4142  // that we can't modify the CFG here.
4143  new StoreInst(ConstantInt::getTrue(Callee->getContext()),
4145  CS.getInstruction());
4146 
4147  return eraseInstFromFunction(*CS.getInstruction());
4148  }
4149 
4150  if (IntrinsicInst *II = findInitTrampoline(Callee))
4151  return transformCallThroughTrampoline(CS, II);
4152 
4153  PointerType *PTy = cast<PointerType>(Callee->getType());
4154  FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
4155  if (FTy->isVarArg()) {
4156  int ix = FTy->getNumParams();
4157  // See if we can optimize any arguments passed through the varargs area of
4158  // the call.
4159  for (CallSite::arg_iterator I = CS.arg_begin() + FTy->getNumParams(),
4160  E = CS.arg_end(); I != E; ++I, ++ix) {
4161  CastInst *CI = dyn_cast<CastInst>(*I);
4162  if (CI && isSafeToEliminateVarargsCast(CS, DL, CI, ix)) {
4163  *I = CI->getOperand(0);
4164  Changed = true;
4165  }
4166  }
4167  }
4168 
4169  if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
4170  // Inline asm calls cannot throw - mark them 'nounwind'.
4171  CS.setDoesNotThrow();
4172  Changed = true;
4173  }
4174 
4175  // Try to optimize the call if possible, we require DataLayout for most of
4176  // this. None of these calls are seen as possibly dead so go ahead and
4177  // delete the instruction now.
4178  if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
4179  Instruction *I = tryOptimizeCall(CI);
4180  // If we changed something return the result, etc. Otherwise let
4181  // the fallthrough check.
4182  if (I) return eraseInstFromFunction(*I);
4183  }
4184 
4185  return Changed ? CS.getInstruction() : nullptr;
4186 }
4187 
4188 /// If the callee is a constexpr cast of a function, attempt to move the cast to
4189 /// the arguments of the call/invoke.
4190 bool InstCombiner::transformConstExprCastCall(CallSite CS) {
4192  if (!Callee)
4193  return false;
4194 
4195  // If this is a call to a thunk function, don't remove the cast. Thunks are
4196  // used to transparently forward all incoming parameters and outgoing return
4197  // values, so it's important to leave the cast in place.
4198  if (Callee->hasFnAttribute("thunk"))
4199  return false;
4200 
4201  // If this is a musttail call, the callee's prototype must match the caller's
4202  // prototype with the exception of pointee types. The code below doesn't
4203  // implement that, so we can't do this transform.
4204  // TODO: Do the transform if it only requires adding pointer casts.
4205  if (CS.isMustTailCall())
4206  return false;
4207 
4208  Instruction *Caller = CS.getInstruction();
4209  const AttributeList &CallerPAL = CS.getAttributes();
4210 
4211  // Okay, this is a cast from a function to a different type. Unless doing so
4212  // would cause a type conversion of one of our arguments, change this call to
4213  // be a direct call with arguments casted to the appropriate types.
4214  FunctionType *FT = Callee->getFunctionType();
4215  Type *OldRetTy = Caller->getType();
4216  Type *NewRetTy = FT->getReturnType();
4217 
4218  // Check to see if we are changing the return type...
4219  if (OldRetTy != NewRetTy) {
4220 
4221  if (NewRetTy->isStructTy())
4222  return false; // TODO: Handle multiple return values.
4223 
4224  if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
4225  if (Callee->isDeclaration())
4226  return false; // Cannot transform this return value.
4227 
4228  if (!Caller->use_empty() &&
4229  // void -> non-void is handled specially
4230  !NewRetTy->isVoidTy())
4231  return false; // Cannot transform this return value.
4232  }
4233 
4234  if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
4235  AttrBuilder RAttrs(CallerPAL, AttributeList::ReturnIndex);
4236  if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(NewRetTy)))
4237  return false; // Attribute not compatible with transformed value.
4238  }
4239 
4240  // If the callsite is an invoke instruction, and the return value is used by
4241  // a PHI node in a successor, we cannot change the return type of the call
4242  // because there is no place to put the cast instruction (without breaking
4243  // the critical edge). Bail out in this case.
4244  if (!Caller->use_empty())
4245  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
4246  for (User *U : II->users())
4247  if (PHINode *PN = dyn_cast<PHINode>(U))
4248  if (PN->getParent() == II->getNormalDest() ||
4249  PN->getParent() == II->getUnwindDest())
4250  return false;
4251  }
4252 
4253  unsigned NumActualArgs = CS.arg_size();
4254  unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
4255 
4256  // Prevent us turning:
4257  // declare void @takes_i32_inalloca(i32* inalloca)
4258  // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
4259  //
4260  // into:
4261  // call void @takes_i32_inalloca(i32* null)
4262  //
4263  // Similarly, avoid folding away bitcasts of byval calls.
4264  if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
4265  Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))
4266  return false;
4267 
4269  for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
4270  Type *ParamTy = FT->getParamType(i);
4271  Type *ActTy = (*AI)->getType();
4272 
4273  if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
4274  return false; // Cannot transform this parameter value.
4275 
4276  if (AttrBuilder(CallerPAL.getParamAttributes(i))
4277  .overlaps(AttributeFuncs::typeIncompatible(ParamTy)))
4278  return false; // Attribute not compatible with transformed value.
4279 
4280  if (CS.isInAllocaArgument(i))
4281  return false; // Cannot transform to and from inalloca.
4282 
4283  // If the parameter is passed as a byval argument, then we have to have a
4284  // sized type and the sized type has to have the same size as the old type.
4285  if (ParamTy != ActTy && CallerPAL.hasParamAttribute(i, Attribute::ByVal)) {
4286  PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
4287  if (!ParamPTy || !ParamPTy->getElementType()->isSized())
4288  return false;
4289 
4290  Type *CurElTy = ActTy->getPointerElementType();
4291  if (DL.getTypeAllocSize(CurElTy) !=
4292  DL.getTypeAllocSize(ParamPTy->getElementType()))
4293  return false;
4294  }
4295  }
4296 
4297  if (Callee->isDeclaration()) {
4298  // Do not delete arguments unless we have a function body.
4299  if (FT->getNumParams() < NumActualArgs && !FT->isVarArg())
4300  return false;
4301 
4302  // If the callee is just a declaration, don't change the varargsness of the
4303  // call. We don't want to introduce a varargs call where one doesn't
4304  // already exist.
4305  PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType());
4306  if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg())
4307  return false;
4308 
4309  // If both the callee and the cast type are varargs, we still have to make
4310  // sure the number of fixed parameters are the same or we have the same
4311  // ABI issues as if we introduce a varargs call.
4312  if (FT->isVarArg() &&
4313  cast<FunctionType>(APTy->getElementType())->isVarArg() &&
4314  FT->getNumParams() !=
4315  cast<FunctionType>(APTy->getElementType())->getNumParams())
4316  return false;
4317  }
4318 
4319  if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
4320  !CallerPAL.isEmpty()) {
4321  // In this case we have more arguments than the new function type, but we
4322  // won't be dropping them. Check that these extra arguments have attributes
4323  // that are compatible with being a vararg call argument.
4324  unsigned SRetIdx;
4325  if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) &&
4326  SRetIdx > FT->getNumParams())
4327  return false;
4328  }
4329 
4330  // Okay, we decided that this is a safe thing to do: go ahead and start
4331  // inserting cast instructions as necessary.
4334  Args.reserve(NumActualArgs);
4335  ArgAttrs.reserve(NumActualArgs);
4336 
4337  // Get any return attributes.
4338  AttrBuilder RAttrs(CallerPAL, AttributeList::ReturnIndex);
4339 
4340  // If the return value is not being used, the type may not be compatible
4341  // with the existing attributes. Wipe out any problematic attributes.
4342  RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy));
4343 
4344  AI = CS.arg_begin();
4345  for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
4346  Type *ParamTy = FT->getParamType(i);
4347 
4348  Value *NewArg = *AI;
4349  if ((*AI)->getType() != ParamTy)
4350  NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy);
4351  Args.push_back(NewArg);
4352 
4353  // Add any parameter attributes.
4354  ArgAttrs.push_back(CallerPAL.getParamAttributes(i));
4355  }
4356 
4357  // If the function takes more arguments than the call was taking, add them
4358  // now.
4359  for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
4361  ArgAttrs.push_back(AttributeSet());
4362  }
4363 
4364  // If we are removing arguments to the function, emit an obnoxious warning.
4365  if (FT->getNumParams() < NumActualArgs) {
4366  // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
4367  if (FT->isVarArg()) {
4368  // Add all of the arguments in their promoted form to the arg list.
4369  for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
4370  Type *PTy = getPromotedType((*AI)->getType());
4371  Value *NewArg = *AI;
4372  if (PTy != (*AI)->getType()) {
4373  // Must promote to pass through va_arg area!
4374  Instruction::CastOps opcode =
4375  CastInst::getCastOpcode(*AI, false, PTy, false);
4376  NewArg = Builder.CreateCast(opcode, *AI, PTy);
4377  }
4378  Args.push_back(NewArg);
4379 
4380  // Add any parameter attributes.
4381  ArgAttrs.push_back(CallerPAL.getParamAttributes(i));
4382  }
4383  }
4384  }
4385 
4386  AttributeSet FnAttrs = CallerPAL.getFnAttributes();
4387 
4388  if (NewRetTy->isVoidTy())
4389  Caller->setName(""); // Void type should not have a name.
4390 
4391  assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
4392  "missing argument attributes");
4393  LLVMContext &Ctx = Callee->getContext();
4394  AttributeList NewCallerPAL = AttributeList::get(
4395  Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs);
4396 
4398  CS.getOperandBundlesAsDefs(OpBundles);
4399 
4400  CallSite NewCS;
4401  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
4402  NewCS = Builder.CreateInvoke(Callee, II->getNormalDest(),
4403  II->getUnwindDest(), Args, OpBundles);
4404  } else {
4405  NewCS = Builder.CreateCall(Callee, Args, OpBundles);
4406  cast<CallInst>(NewCS.getInstruction())
4407  ->setTailCallKind(cast<CallInst>(Caller)->getTailCallKind());
4408  }
4409  NewCS->takeName(Caller);
4410  NewCS.setCallingConv(CS.getCallingConv());
4411  NewCS.setAttributes(NewCallerPAL);
4412 
4413  // Preserve the weight metadata for the new call instruction. The metadata
4414  // is used by SamplePGO to check callsite's hotness.
4415  uint64_t W;
4416  if (Caller->extractProfTotalWeight(W))
4417  NewCS->setProfWeight(W);
4418 
4419  // Insert a cast of the return type as necessary.
4420  Instruction *NC = NewCS.getInstruction();
4421  Value *NV = NC;
4422  if (OldRetTy != NV->getType() && !Caller->use_empty()) {
4423  if (!NV->getType()->isVoidTy()) {
4424  NV = NC = CastInst::CreateBitOrPointerCast(NC, OldRetTy);
4425  NC->setDebugLoc(Caller->getDebugLoc());
4426 
4427  // If this is an invoke instruction, we should insert it after the first
4428  // non-phi, instruction in the normal successor block.
4429  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
4430  BasicBlock::iterator I = II->getNormalDest()->getFirstInsertionPt();
4431  InsertNewInstBefore(NC, *I);
4432  } else {
4433  // Otherwise, it's a call, just insert cast right after the call.
4434  InsertNewInstBefore(NC, *Caller);
4435  }
4436  Worklist.AddUsersToWorkList(*Caller);
4437  } else {
4438  NV = UndefValue::get(Caller->getType());
4439  }
4440  }
4441 
4442  if (!Caller->use_empty())
4443  replaceInstUsesWith(*Caller, NV);
4444  else if (Caller->hasValueHandle()) {
4445  if (OldRetTy == NV->getType())
4446  ValueHandleBase::ValueIsRAUWd(Caller, NV);
4447  else
4448  // We cannot call ValueIsRAUWd with a different type, and the
4449  // actual tracked value will disappear.
4451  }
4452 
4453  eraseInstFromFunction(*Caller);
4454  return true;
4455 }
4456 
4457 /// Turn a call to a function created by init_trampoline / adjust_trampoline
4458 /// intrinsic pair into a direct call to the underlying function.
4459 Instruction *
4460 InstCombiner::transformCallThroughTrampoline(CallSite CS,
4461  IntrinsicInst *Tramp) {
4462  Value *Callee = CS.getCalledValue();
4463  PointerType *PTy = cast<PointerType>(Callee->getType());
4464  FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
4466 
4467  // If the call already has the 'nest' attribute somewhere then give up -
4468  // otherwise 'nest' would occur twice after splicing in the chain.
4469  if (Attrs.hasAttrSomewhere(Attribute::Nest))
4470  return nullptr;
4471 
4472  assert(Tramp &&
4473  "transformCallThroughTrampoline called with incorrect CallSite.");
4474 
4475  Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
4476  FunctionType *NestFTy = cast<FunctionType>(NestF->getValueType());
4477 
4478  AttributeList NestAttrs = NestF->getAttributes();
4479  if (!NestAttrs.isEmpty()) {
4480  unsigned NestArgNo = 0;
4481  Type *NestTy = nullptr;
4482  AttributeSet NestAttr;
4483 
4484  // Look for a parameter marked with the 'nest' attribute.
4485  for (FunctionType::param_iterator I = NestFTy->param_begin(),
4486  E = NestFTy->param_end();
4487  I != E; ++NestArgNo, ++I) {
4488  AttributeSet AS = NestAttrs.getParamAttributes(NestArgNo);
4489  if (AS.hasAttribute(Attribute::Nest)) {
4490  // Record the parameter type and any other attributes.
4491  NestTy = *I;
4492  NestAttr = AS;
4493  break;
4494  }
4495  }
4496 
4497  if (NestTy) {
4498  Instruction *Caller = CS.getInstruction();
4499  std::vector<Value*> NewArgs;
4500  std::vector<AttributeSet> NewArgAttrs;
4501  NewArgs.reserve(CS.arg_size() + 1);
4502  NewArgAttrs.reserve(CS.arg_size());
4503 
4504  // Insert the nest argument into the call argument list, which may
4505  // mean appending it. Likewise for attributes.
4506 
4507  {
4508  unsigned ArgNo = 0;
4509  CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
4510  do {
4511  if (ArgNo == NestArgNo) {
4512  // Add the chain argument and attributes.
4513  Value *NestVal = Tramp->getArgOperand(2);
4514  if (NestVal->getType() != NestTy)
4515  NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest");
4516  NewArgs.push_back(NestVal);
4517  NewArgAttrs.push_back(NestAttr);
4518  }
4519 
4520  if (I == E)
4521  break;
4522 
4523  // Add the original argument and attributes.
4524  NewArgs.push_back(*I);
4525  NewArgAttrs.push_back(Attrs.getParamAttributes(ArgNo));
4526 
4527  ++ArgNo;
4528  ++I;
4529  } while (true);
4530  }
4531 
4532  // The trampoline may have been bitcast to a bogus type (FTy).
4533  // Handle this by synthesizing a new function type, equal to FTy
4534  // with the chain parameter inserted.
4535 
4536  std::vector<Type*> NewTypes;
4537  NewTypes.reserve(FTy->getNumParams()+1);
4538 
4539  // Insert the chain's type into the list of parameter types, which may
4540  // mean appending it.
4541  {
4542  unsigned ArgNo = 0;
4543  FunctionType::param_iterator I = FTy->param_begin(),
4544  E = FTy->param_end();
4545 
4546  do {
4547  if (ArgNo == NestArgNo)
4548  // Add the chain's type.
4549  NewTypes.push_back(NestTy);
4550 
4551  if (I == E)
4552  break;
4553 
4554  // Add the original type.
4555  NewTypes.push_back(*I);
4556 
4557  ++ArgNo;
4558  ++I;
4559  } while (true);
4560  }
4561 
4562  // Replace the trampoline call with a direct call. Let the generic
4563  // code sort out any function type mismatches.
4564  FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
4565  FTy->isVarArg());
4566  Constant *NewCallee =
4567  NestF->getType() == PointerType::getUnqual(NewFTy) ?
4568  NestF : ConstantExpr::getBitCast(NestF,
4569  PointerType::getUnqual(NewFTy));
4570  AttributeList NewPAL =
4571  AttributeList::get(FTy->getContext(), Attrs.getFnAttributes(),
4572  Attrs.getRetAttributes(), NewArgAttrs);
4573 
4575  CS.getOperandBundlesAsDefs(OpBundles);
4576 
4577  Instruction *NewCaller;
4578  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
4579  NewCaller = InvokeInst::Create(NewCallee,
4580  II->getNormalDest(), II->getUnwindDest(),
4581  NewArgs, OpBundles);
4582  cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
4583  cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
4584  } else {
4585  NewCaller = CallInst::Create(NewCallee, NewArgs, OpBundles);
4586  cast<CallInst>(NewCaller)->setTailCallKind(
4587  cast<CallInst>(Caller)->getTailCallKind());
4588  cast<CallInst>(NewCaller)->setCallingConv(
4589  cast<CallInst>(Caller)->getCallingConv());
4590  cast<CallInst>(NewCaller)->setAttributes(NewPAL);
4591  }
4592  NewCaller->setDebugLoc(Caller->getDebugLoc());
4593 
4594  return NewCaller;
4595  }
4596  }
4597 
4598  // Replace the trampoline call with a direct call. Since there is no 'nest'
4599  // parameter, there is no need to adjust the argument list. Let the generic
4600  // code sort out any function type mismatches.
4601  Constant *NewCallee =
4602  NestF->getType() == PTy ? NestF :
4603  ConstantExpr::getBitCast(NestF, PTy);
4604  CS.setCalledFunction(NewCallee);
4605  return CS.getInstruction();
4606 }
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
bool isFPPredicate() const
Definition: InstrTypes.h:776
const NoneType None
Definition: None.h:24
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double, and whose elements are just simple data values (i.e.
Definition: Constants.h:758
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
Definition: PatternMatch.h:725
uint64_t CallInst * C
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, OptimizationRemarkEmitter *ORE=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
User::op_iterator arg_iterator
The type of iterator to use when looping over actual arguments at this call site. ...
Definition: CallSite.h:213
LibCallSimplifier - This class implements a collection of optimizations that replace well formed call...
BinaryOp_match< cstfp_pred_ty< is_neg_zero_fp >, RHS, Instruction::FSub > m_FNeg(const RHS &X)
Match &#39;fneg X&#39; as &#39;fsub -0.0, X&#39;.
Definition: PatternMatch.h:665
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:172
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth, const Instruction *CxtI) const
void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction, which must be an operator which supports these flags.
Function * getCalledFunction() const
Return the function called, or null if this is an indirect function invocation.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:72
static void ValueIsDeleted(Value *V)
Definition: Value.cpp:841
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1858
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
bool isZero() const
Definition: APFloat.h:1143
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:173
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:80
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1557
unsigned getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1175
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
static Value * simplifyX86immShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:562
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:373
DiagnosticInfoOptimizationBase::Argument NV
unsigned arg_size() const
Definition: CallSite.h:219
CallingConv::ID getCallingConv() const
Get the calling convention of the call.
Definition: CallSite.h:312
Atomic ordering constants.
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1646
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:289
void addAttribute(unsigned i, Attribute::AttrKind Kind)
adds the attribute to the list of attributes.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
Constant * getElementAsConstant(unsigned i) const
Return a Constant for a specified index&#39;s element.
Definition: Constants.cpp:2709
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition: KnownBits.h:186
bool isInAllocaArgument(unsigned ArgNo) const
Determine whether this argument is passed in an alloca.
Definition: CallSite.h:603
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:64
Instruction * visitCallInst(CallInst &CI)
CallInst simplification.
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:265
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
An instruction for ordering other memory operations.
Definition: Instructions.h:444
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:454
Instruction * visitVACopyInst(VACopyInst &I)
static Instruction * simplifyInvariantGroupIntrinsic(IntrinsicInst &II, InstCombiner &IC)
This function transforms launder.invariant.group and strip.invariant.group like: launder(launder(x)) ...
static ConstantAggregateZero * get(Type *Ty)
Definition: Constants.cpp:1309
static bool simplifyX86MaskedStore(IntrinsicInst &II, InstCombiner &IC)
This class represents a function call, abstracting a target machine&#39;s calling convention.
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
This file contains the declarations for metadata subclasses.
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:648
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this load instruction.
Definition: Instructions.h:243
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:91
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:617
iterator_range< IterTy > args() const
Definition: CallSite.h:215
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
m_Intrinsic_Ty< Opnd0 >::Ty m_BSwap(const Opnd0 &Op0)
bool hasValueHandle() const
Return true if there is a value handle associated with this value.
Definition: Value.h:486
unsigned less or equal
Definition: InstrTypes.h:711
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
unsigned less than
Definition: InstrTypes.h:710
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1323
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
static Instruction * foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC)
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:714
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static Value * simplifyX86AddsSubs(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1042
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:811
bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, const DominatorTree *DT=nullptr)
Return true if it is valid to use the assumptions provided by an assume intrinsic, I, at the point in the control-flow identified by the context instruction, CxtI.
STATISTIC(NumFunctions, "Total number of functions")
Metadata node.
Definition: Metadata.h:864
F(f)
static CallInst * Create(Value *Func, ArrayRef< Value *> Args, ArrayRef< OperandBundleDef > Bundles=None, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
const fltSemantics & getSemantics() const
Definition: APFloat.h:1155
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
BinaryOp_match< LHS, RHS, Instruction::FSub > m_FSub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:657
An instruction for reading from memory.
Definition: Instructions.h:168
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:876
static Constant * getCompare(unsigned short pred, Constant *C1, Constant *C2, bool OnlyIfReduced=false)
Return an ICmp or FCmp comparison operator constant expression.
Definition: Constants.cpp:1903
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition: KnownBits.h:166
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
static OverflowCheckFlavor IntrinsicIDToOverflowCheckFlavor(unsigned ID)
Returns the OverflowCheckFlavor corresponding to a overflow_with_op intrinsic.
void reserve(size_type N)
Definition: SmallVector.h:376
Value * getLength() const
void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
static Instruction * simplifyMaskedStore(IntrinsicInst &II, InstCombiner &IC)
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:361
Instruction * visitVAStartInst(VAStartInst &I)
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:535
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1503
Value * CreateLaunderInvariantGroup(Value *Ptr)
Create a launder.invariant.group intrinsic call.
Definition: IRBuilder.h:2034
bool isGCRelocate(ImmutableCallSite CS)
Definition: Statepoint.cpp:43
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:130
const CallInst * isFreeCall(const Value *I, const TargetLibraryInfo *TLI)
isFreeCall - Returns non-null if the value is a call to the builtin free()
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:268
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:136
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op...
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:264
bool isIdenticalTo(const Instruction *I) const
Return true if the specified instruction is exactly identical to the current one. ...
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
opStatus divide(const APFloat &RHS, roundingMode RM)
Definition: APFloat.h:968
static Instruction * SimplifyNVVMIntrinsic(IntrinsicInst *II, InstCombiner &IC)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
Instruction * visitInvokeInst(InvokeInst &II)
static Constant * getIntegerCast(Constant *C, Type *Ty, bool isSigned)
Create a ZExt, Bitcast or Trunc for integer -> integer casts.
Definition: Constants.cpp:1590
bool isSigned() const
Definition: InstrTypes.h:854
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:516
static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1, const APFloat &Src2)
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
Definition: PatternMatch.h:737
Type * getPointerElementType() const
Definition: Type.h:376
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
Definition: InstrTypes.h:783
OverflowCheckFlavor
Specific patterns of overflow check idioms that we match.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:392
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:993
AttributeList addParamAttribute(LLVMContext &C, unsigned ArgNo, Attribute::AttrKind Kind) const
Add an argument attribute to the list.
Definition: Attributes.h:397
static Value * simplifyNeonTbl1(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Convert a table lookup to shufflevector if the mask is constant.
IterTy arg_end() const
Definition: CallSite.h:575
Instruction * eraseInstFromFunction(Instruction &I)
Combiner aware instruction erasure.
CastClass_match< OpTy, Instruction::Trunc > m_Trunc(const OpTy &Op)
Matches Trunc.
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:743
The core instruction combiner logic.
static bool isSafeToEliminateVarargsCast(const CallSite CS, const DataLayout &DL, const CastInst *const CI, const int ix)
If this cast does not affect the value passed through the varargs area, we can eliminate the use of t...
This file contains the simple types necessary to represent the attributes associated with functions a...
InstrTy * getInstruction() const
Definition: CallSite.h:92
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1628
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:295
uint64_t getNumElements() const
Definition: DerivedTypes.h:359
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:978
This file implements a class to represent arbitrary precision integral constant values and operations...
All zero aggregate value.
Definition: Constants.h:337
static Value * simplifyX86vpermv(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
Metadata * LowAndHigh[]
ValTy * getCalledValue() const
Return the pointer to function that is being called.
Definition: CallSite.h:100
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E, unsigned NumOperands)
DominatorTree & getDominatorTree() const
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition: KnownBits.h:191
Key
PAL metadata keys.
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:85
Class to represent function types.
Definition: DerivedTypes.h:103
static Value * peekThroughBitcast(Value *V, bool OneUseOnly=false)
Return the source operand of a potentially bitcasted value while optionally checking if it has one us...
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1641
bool isInfinity() const
Definition: APFloat.h:1144
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
Value * CreateSExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a SExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:1593
cstfp_pred_ty< is_nan > m_NaN()
Match an arbitrary NaN constant.
Definition: PatternMatch.h:424
This represents the llvm.va_start intrinsic.
CastClass_match< OpTy, Instruction::FPExt > m_FPExt(const OpTy &Op)
Matches FPExt.
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:4444
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
AttributeSet getParamAttributes(unsigned ArgNo) const
The attributes for the argument or parameter at the given index are returned.
bool isVarArg() const
Definition: DerivedTypes.h:123
This class represents a no-op cast from one type to another.
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Return true if the call or the callee has the given attribute.
Definition: CallSite.h:377
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:217
Value * CreateVectorSplat(unsigned NumElts, Value *V, const Twine &Name="")
Return a vector value that contains.
Definition: IRBuilder.h:2088
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:138
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
cstfp_pred_ty< is_pos_zero_fp > m_PosZeroFP()
Match a floating-point positive zero.
Definition: PatternMatch.h:442
AttrBuilder & remove(const AttrBuilder &B)
Remove the attributes from the builder.
static Value * simplifyX86pack(IntrinsicInst &II, bool IsSigned)
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:224
cmpResult
IEEE-754R 5.11: Floating Point Comparison Relations.
Definition: APFloat.h:166
An instruction for storing to memory.
Definition: Instructions.h:310
bool extractProfTotalWeight(uint64_t &TotalVal) const
Retrieve total raw weight values of a branch.
Definition: Metadata.cpp:1340
CallInst * CreateUnaryIntrinsic(Intrinsic::ID ID, Value *V, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with 1 operand which is mangled on its type.
Definition: IRBuilder.cpp:734
static void ValueIsRAUWd(Value *Old, Value *New)
Definition: Value.cpp:894
static Value * simplifyX86vpcom(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsSigned)
Decode XOP integer vector comparison intrinsics.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:301
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:410
static Value * simplifyX86movmsk(const IntrinsicInst &II)
amdgpu Simplify well known AMD library false Value * Callee
Function * getDeclaration(Module *M, ID id, ArrayRef< Type *> Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1021
This class represents a truncation of integer types.
Type * getElementType() const
Return the element type of the array/vector.
Definition: Constants.cpp:2369
Value * getOperand(unsigned i) const
Definition: User.h:170
Class to represent pointers.
Definition: DerivedTypes.h:467
bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
Definition: Attributes.cpp:578
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:338
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:304
Value * getOperand(unsigned i_nocapture) const
const DataLayout & getDataLayout() const
static MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition: Metadata.cpp:106
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1750
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:141
bool isFloatTy() const
Return true if this is &#39;float&#39;, a 32-bit IEEE fp type.
Definition: Type.h:147
bool hasAttrSomewhere(Attribute::AttrKind Kind, unsigned *Index=nullptr) const
Return true if the specified attribute is set for at least one parameter or for the return value...
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:63
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata *> MDs)
Definition: Metadata.h:1166
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:364
void setAttributes(AttributeList PAL)
Set the parameter attributes of the call.
Definition: CallSite.h:333
bool doesNotThrow() const
Determine if the call cannot unwind.
Instruction * visitFenceInst(FenceInst &FI)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:410
static Instruction * simplifyMaskedScatter(IntrinsicInst &II, InstCombiner &IC)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:149
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:55
static AttributeSet get(LLVMContext &C, const AttrBuilder &B)
Definition: Attributes.cpp:513
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:304
bool isNegative() const
Definition: APFloat.h:1147
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1378
Value * CreateAShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1104
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:429
ConstantInt * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
Definition: PatternMatch.h:731
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:287
bool isNaN() const
Definition: APFloat.h:1145
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.h:1913
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:224
unsigned getNumParams() const
Return the number of fixed parameters this function type requires.
Definition: DerivedTypes.h:139
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:264
APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1888
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
Definition: PatternMatch.h:306
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:499
const Instruction * getNextNonDebugInstruction() const
Return a pointer to the next non-debug instruction in the same basic block as &#39;this&#39;, or nullptr if no such instruction exists.
This file declares a class to represent arbitrary precision floating point values and provide a varie...
bool isFast() const
Determine whether all fast-math-flags are set.
std::underlying_type< E >::type Underlying(E Val)
Check that Val is in range for E, and return Val cast to E&#39;s underlying type.
Definition: BitmaskEnum.h:91
static IntrinsicInst * findInitTrampolineFromBB(IntrinsicInst *AdjustTramp, Value *TrampMem)
void setCalledFunction(Value *Fn)
Set the function called.
bool isHalfTy() const
Return true if this is &#39;half&#39;, a 16-bit IEEE fp type.
Definition: Type.h:144
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:685
match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, CastClass_match< OpTy, Instruction::SExt > > m_ZExtOrSExt(const OpTy &Op)
bool isAllOnes() const
Returns true if value is all one bits.
Definition: KnownBits.h:78
void setCallingConv(CallingConv::ID CC)
Set the calling convention of the call.
Definition: CallSite.h:316
bool isGCResult(ImmutableCallSite CS)
Definition: Statepoint.cpp:53
This class represents any memset intrinsic.
static FunctionType * get(Type *Result, ArrayRef< Type *> Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:297
void setArgOperand(unsigned i, Value *v)
self_iterator getIterator()
Definition: ilist_node.h:82
Class to represent integer types.
Definition: DerivedTypes.h:40
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:360
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:1933
bool isIntN(unsigned N) const
Check if this APInt has an N-bits unsigned integer value.
Definition: APInt.h:450
void setNotConvergent()
Definition: CallSite.h:527
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:60
void setAlignment(unsigned Align)
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:322
static Value * simplifyX86varShift(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1392
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:539
iterator_range< User::op_iterator > arg_operands()
Iteration adapter for range-for loops.
size_t size() const
Definition: SmallVector.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1226
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1238
static InvokeInst * Create(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value *> Args, const Twine &NameStr, Instruction *InsertBefore=nullptr)
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:971
signed greater than
Definition: InstrTypes.h:712
static Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
Definition: Constants.cpp:305
static Value * simplifyX86extrq(IntrinsicInst &II, Value *Op0, ConstantInt *CILength, ConstantInt *CIIndex, InstCombiner::BuilderTy &Builder)
Attempt to simplify SSE4A EXTRQ/EXTRQI instructions using constant folding or conversion to a shuffle...
const APFloat & getValueAPF() const
Definition: Constants.h:299
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:51
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition: Function.h:520
static BinaryOperator * CreateFNeg(Value *Op, const Twine &Name="", Instruction *InsertBefore=nullptr)
static Type * getHalfTy(LLVMContext &C)
Definition: Type.cpp:163
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:240
static CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
Iterator for intrusive lists based on ilist_node.
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition: KnownBits.h:176
bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const
Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind).
static PointerType * getInt1PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:216
static cl::opt< unsigned > GuardWideningWindow("instcombine-guard-widening-window", cl::init(3), cl::desc("How wide an instruction window to bypass looking for " "another guard"))
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:251
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the generic address space (address sp...
Definition: DerivedTypes.h:482
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
BlockVerifier::State From
static Value * simplifyX86vpermilvar(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert vpermilvar* to shufflevector if the mask is constant.
iterator end()
Definition: BasicBlock.h:266
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
IterTy arg_begin() const
Definition: CallSite.h:571
static IntrinsicInst * findInitTrampolineFromAlloca(Value *TrampMem)
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
Value * CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:1946
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:523
Type::subtype_iterator param_iterator
Definition: DerivedTypes.h:126
bool overlaps(const AttrBuilder &B) const
Return true if the builder has any attribute that&#39;s in the specified builder.
static Value * simplifyNeonVld1(const IntrinsicInst &II, unsigned MemAlign, InstCombiner::BuilderTy &Builder)
Convert a vector load intrinsic into a simple llvm load instruction.
static Instruction * simplifyMaskedGather(IntrinsicInst &II, InstCombiner &IC)
void setDoesNotThrow()
Definition: CallSite.h:508
signed less than
Definition: InstrTypes.h:714
Type * getReturnType() const
Definition: DerivedTypes.h:124
CallInst * CreateMaskedStore(Value *Val, Value *Ptr, unsigned Align, Value *Mask)
Create a call to Masked Store intrinsic.
Definition: IRBuilder.cpp:492
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:180
APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM)
Equivalent of C standard library function.
Definition: APFloat.h:1205
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:1960
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:621
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.cpp:635
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:684
#define NC
Definition: regutils.h:42
CallInst * CreateMaskedLoad(Value *Ptr, unsigned Align, Value *Mask, Value *PassThru=nullptr, const Twine &Name="")
Create a call to Masked Load intrinsic.
Definition: IRBuilder.cpp:471
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1287
const Value * stripPointerCastsAndInvariantGroups() const
Strip off pointer casts, all-zero GEPs, aliases and invariant group info.
Definition: Value.cpp:551
Value * SimplifyCall(ImmutableCallSite CS, const SimplifyQuery &Q)
Given a callsite, fold the result or return null.
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
Definition: Function.cpp:1440
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:577
bool isDenormal() const
Definition: APFloat.h:1148
void setOperand(unsigned i, Value *Val)
Definition: User.h:175
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:462
signed less or equal
Definition: InstrTypes.h:715
void setOperand(unsigned i_nocapture, Value *Val_nocapture)
Class to represent vector types.
Definition: DerivedTypes.h:393
void setVolatile(bool V)
Specify whether this is a volatile store or not.
Definition: Instructions.h:346
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:56
Class for arbitrary precision integers.
Definition: APInt.h:70
bool isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if the given value is known to be non-zero when defined.
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), Instruction *InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
iterator_range< user_iterator > users()
Definition: Value.h:400
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1063
static Value * simplifyX86pshufb(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Attempt to convert pshufb* to shufflevector if the mask is constant.
static cl::opt< bool > FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden, cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."), cl::init(false))
amdgpu Simplify well known AMD library false Value Value * Arg
IntegerType * getInt1Ty()
Fetch the type representing a single bit.
Definition: IRBuilder.h:332
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::ZeroOrMore, cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate IT block based on arch"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow deprecated IT based on ARMv8"), clEnumValN(NoRestrictedIT, "arm-no-restrict-it", "Allow IT blocks based on ARMv7")))
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:428
specific_fpval m_FPOne()
Match a float 1.0 or vector with all elements equal to 1.0.
Definition: PatternMatch.h:540
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass&#39;s ...
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this store instruction.
Definition: Instructions.h:368
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:546
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:185
static Value * simplifyMaskedLoad(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
static Instruction * simplifyX86MaskedLoad(IntrinsicInst &II, InstCombiner &IC)
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:307
static bool maskIsAllOneOrUndef(Value *Mask)
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
OverflowResult
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
unsigned getNumArgOperands() const
Return the number of call arguments.
StringRef getValueAsString() const
Return the attribute&#39;s value as a string.
Definition: Attributes.cpp:195
unsigned greater or equal
Definition: InstrTypes.h:709
void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Definition: CallSite.h:582
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
static Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
bool doesNotThrow() const
Determine if the call cannot unwind.
Definition: CallSite.h:505
bool isNormal() const
Definition: APFloat.h:1151
bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI, bool LookThroughBitCast=false)
Tests if a value is a call or invoke to a library function that allocates memory (either malloc...
void setDoesNotThrow()
Value * CreateStripInvariantGroup(Value *Ptr)
Create a strip.invariant.group intrinsic call.
Definition: IRBuilder.h:2061
Value * optimizeCall(CallInst *CI)
optimizeCall - Take the given call instruction and return a more optimal value to replace the instruc...
static bool removeTriviallyEmptyRange(IntrinsicInst &I, unsigned StartID, unsigned EndID, InstCombiner &IC)