LLVM  16.0.0git
InstCombineCalls.cpp
Go to the documentation of this file.
1 //===- InstCombineCalls.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the visitCall, visitInvoke, and visitCallBr functions.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "InstCombineInternal.h"
14 #include "llvm/ADT/APFloat.h"
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/APSInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/None.h"
19 #include "llvm/ADT/Optional.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/Statistic.h"
28 #include "llvm/Analysis/Loads.h"
32 #include "llvm/IR/Attributes.h"
33 #include "llvm/IR/BasicBlock.h"
34 #include "llvm/IR/Constant.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/IR/GlobalVariable.h"
40 #include "llvm/IR/InlineAsm.h"
41 #include "llvm/IR/InstrTypes.h"
42 #include "llvm/IR/Instruction.h"
43 #include "llvm/IR/Instructions.h"
44 #include "llvm/IR/IntrinsicInst.h"
45 #include "llvm/IR/Intrinsics.h"
46 #include "llvm/IR/IntrinsicsAArch64.h"
47 #include "llvm/IR/IntrinsicsAMDGPU.h"
48 #include "llvm/IR/IntrinsicsARM.h"
49 #include "llvm/IR/IntrinsicsHexagon.h"
50 #include "llvm/IR/LLVMContext.h"
51 #include "llvm/IR/Metadata.h"
52 #include "llvm/IR/PatternMatch.h"
53 #include "llvm/IR/Statepoint.h"
54 #include "llvm/IR/Type.h"
55 #include "llvm/IR/User.h"
56 #include "llvm/IR/Value.h"
57 #include "llvm/IR/ValueHandle.h"
59 #include "llvm/Support/Casting.h"
61 #include "llvm/Support/Compiler.h"
62 #include "llvm/Support/Debug.h"
64 #include "llvm/Support/KnownBits.h"
71 #include <algorithm>
72 #include <cassert>
73 #include <cstdint>
74 #include <utility>
75 #include <vector>
76 
77 #define DEBUG_TYPE "instcombine"
79 
80 using namespace llvm;
81 using namespace PatternMatch;
82 
83 STATISTIC(NumSimplified, "Number of library calls simplified");
84 
86  "instcombine-guard-widening-window",
87  cl::init(3),
88  cl::desc("How wide an instruction window to bypass looking for "
89  "another guard"));
90 
91 namespace llvm {
92 /// enable preservation of attributes in assume like:
93 /// call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
95 } // namespace llvm
96 
97 /// Return the specified type promoted as it would be to pass though a va_arg
98 /// area.
99 static Type *getPromotedType(Type *Ty) {
100  if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
101  if (ITy->getBitWidth() < 32)
102  return Type::getInt32Ty(Ty->getContext());
103  }
104  return Ty;
105 }
106 
107 /// Recognize a memcpy/memmove from a trivially otherwise unused alloca.
108 /// TODO: This should probably be integrated with visitAllocSites, but that
109 /// requires a deeper change to allow either unread or unwritten objects.
111  auto *Src = MI->getRawSource();
112  while (isa<GetElementPtrInst>(Src) || isa<BitCastInst>(Src)) {
113  if (!Src->hasOneUse())
114  return false;
115  Src = cast<Instruction>(Src)->getOperand(0);
116  }
117  return isa<AllocaInst>(Src) && Src->hasOneUse();
118 }
119 
121  Align DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT);
122  MaybeAlign CopyDstAlign = MI->getDestAlign();
123  if (!CopyDstAlign || *CopyDstAlign < DstAlign) {
124  MI->setDestAlignment(DstAlign);
125  return MI;
126  }
127 
128  Align SrcAlign = getKnownAlignment(MI->getRawSource(), DL, MI, &AC, &DT);
129  MaybeAlign CopySrcAlign = MI->getSourceAlign();
130  if (!CopySrcAlign || *CopySrcAlign < SrcAlign) {
131  MI->setSourceAlignment(SrcAlign);
132  return MI;
133  }
134 
135  // If we have a store to a location which is known constant, we can conclude
136  // that the store must be storing the constant value (else the memory
137  // wouldn't be constant), and this must be a noop.
138  if (AA->pointsToConstantMemory(MI->getDest())) {
139  // Set the size of the copy to 0, it will be deleted on the next iteration.
140  MI->setLength(Constant::getNullValue(MI->getLength()->getType()));
141  return MI;
142  }
143 
144  // If the source is provably undef, the memcpy/memmove doesn't do anything
145  // (unless the transfer is volatile).
146  if (hasUndefSource(MI) && !MI->isVolatile()) {
147  // Set the size of the copy to 0, it will be deleted on the next iteration.
148  MI->setLength(Constant::getNullValue(MI->getLength()->getType()));
149  return MI;
150  }
151 
152  // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
153  // load/store.
154  ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getLength());
155  if (!MemOpLength) return nullptr;
156 
157  // Source and destination pointer types are always "i8*" for intrinsic. See
158  // if the size is something we can handle with a single primitive load/store.
159  // A single load+store correctly handles overlapping memory in the memmove
160  // case.
161  uint64_t Size = MemOpLength->getLimitedValue();
162  assert(Size && "0-sized memory transferring should be removed already.");
163 
164  if (Size > 8 || (Size&(Size-1)))
165  return nullptr; // If not 1/2/4/8 bytes, exit.
166 
167  // If it is an atomic and alignment is less than the size then we will
168  // introduce the unaligned memory access which will be later transformed
169  // into libcall in CodeGen. This is not evident performance gain so disable
170  // it now.
171  if (isa<AtomicMemTransferInst>(MI))
172  if (*CopyDstAlign < Size || *CopySrcAlign < Size)
173  return nullptr;
174 
175  // Use an integer load+store unless we can find something better.
176  unsigned SrcAddrSp =
177  cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
178  unsigned DstAddrSp =
179  cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();
180 
181  IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
182  Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
183  Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
184 
185  // If the memcpy has metadata describing the members, see if we can get the
186  // TBAA tag describing our copy.
187  MDNode *CopyMD = nullptr;
188  if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa)) {
189  CopyMD = M;
190  } else if (MDNode *M = MI->getMetadata(LLVMContext::MD_tbaa_struct)) {
191  if (M->getNumOperands() == 3 && M->getOperand(0) &&
192  mdconst::hasa<ConstantInt>(M->getOperand(0)) &&
193  mdconst::extract<ConstantInt>(M->getOperand(0))->isZero() &&
194  M->getOperand(1) &&
195  mdconst::hasa<ConstantInt>(M->getOperand(1)) &&
196  mdconst::extract<ConstantInt>(M->getOperand(1))->getValue() ==
197  Size &&
198  M->getOperand(2) && isa<MDNode>(M->getOperand(2)))
199  CopyMD = cast<MDNode>(M->getOperand(2));
200  }
201 
202  Value *Src = Builder.CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
203  Value *Dest = Builder.CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
204  LoadInst *L = Builder.CreateLoad(IntType, Src);
205  // Alignment from the mem intrinsic will be better, so use it.
206  L->setAlignment(*CopySrcAlign);
207  if (CopyMD)
208  L->setMetadata(LLVMContext::MD_tbaa, CopyMD);
209  MDNode *LoopMemParallelMD =
210  MI->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
211  if (LoopMemParallelMD)
212  L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
213  MDNode *AccessGroupMD = MI->getMetadata(LLVMContext::MD_access_group);
214  if (AccessGroupMD)
215  L->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
216 
217  StoreInst *S = Builder.CreateStore(L, Dest);
218  // Alignment from the mem intrinsic will be better, so use it.
219  S->setAlignment(*CopyDstAlign);
220  if (CopyMD)
221  S->setMetadata(LLVMContext::MD_tbaa, CopyMD);
222  if (LoopMemParallelMD)
223  S->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
224  if (AccessGroupMD)
225  S->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
226 
227  if (auto *MT = dyn_cast<MemTransferInst>(MI)) {
228  // non-atomics can be volatile
229  L->setVolatile(MT->isVolatile());
230  S->setVolatile(MT->isVolatile());
231  }
232  if (isa<AtomicMemTransferInst>(MI)) {
233  // atomics have to be unordered
235  S->setOrdering(AtomicOrdering::Unordered);
236  }
237 
238  // Set the size of the copy to 0, it will be deleted on the next iteration.
239  MI->setLength(Constant::getNullValue(MemOpLength->getType()));
240  return MI;
241 }
242 
244  const Align KnownAlignment =
245  getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
246  MaybeAlign MemSetAlign = MI->getDestAlign();
247  if (!MemSetAlign || *MemSetAlign < KnownAlignment) {
248  MI->setDestAlignment(KnownAlignment);
249  return MI;
250  }
251 
252  // If we have a store to a location which is known constant, we can conclude
253  // that the store must be storing the constant value (else the memory
254  // wouldn't be constant), and this must be a noop.
255  if (AA->pointsToConstantMemory(MI->getDest())) {
256  // Set the size of the copy to 0, it will be deleted on the next iteration.
257  MI->setLength(Constant::getNullValue(MI->getLength()->getType()));
258  return MI;
259  }
260 
261  // Remove memset with an undef value.
262  // FIXME: This is technically incorrect because it might overwrite a poison
263  // value. Change to PoisonValue once #52930 is resolved.
264  if (isa<UndefValue>(MI->getValue())) {
265  // Set the size of the copy to 0, it will be deleted on the next iteration.
266  MI->setLength(Constant::getNullValue(MI->getLength()->getType()));
267  return MI;
268  }
269 
270  // Extract the length and alignment and fill if they are constant.
271  ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
272  ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
273  if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
274  return nullptr;
275  const uint64_t Len = LenC->getLimitedValue();
276  assert(Len && "0-sized memory setting should be removed already.");
277  const Align Alignment = MI->getDestAlign().valueOrOne();
278 
279  // If it is an atomic and alignment is less than the size then we will
280  // introduce the unaligned memory access which will be later transformed
281  // into libcall in CodeGen. This is not evident performance gain so disable
282  // it now.
283  if (isa<AtomicMemSetInst>(MI))
284  if (Alignment < Len)
285  return nullptr;
286 
287  // memset(s,c,n) -> store s, c (for n=1,2,4,8)
288  if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
289  Type *ITy = IntegerType::get(MI->getContext(), Len*8); // n=1 -> i8.
290 
291  Value *Dest = MI->getDest();
292  unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
293  Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
294  Dest = Builder.CreateBitCast(Dest, NewDstPtrTy);
295 
296  // Extract the fill value and store.
297  uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
298  StoreInst *S = Builder.CreateStore(ConstantInt::get(ITy, Fill), Dest,
299  MI->isVolatile());
300  S->setAlignment(Alignment);
301  if (isa<AtomicMemSetInst>(MI))
302  S->setOrdering(AtomicOrdering::Unordered);
303 
304  // Set the size of the copy to 0, it will be deleted on the next iteration.
305  MI->setLength(Constant::getNullValue(LenC->getType()));
306  return MI;
307  }
308 
309  return nullptr;
310 }
311 
312 // TODO, Obvious Missing Transforms:
313 // * Narrow width by halfs excluding zero/undef lanes
314 Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
315  Value *LoadPtr = II.getArgOperand(0);
316  const Align Alignment =
317  cast<ConstantInt>(II.getArgOperand(1))->getAlignValue();
318 
319  // If the mask is all ones or undefs, this is a plain vector load of the 1st
320  // argument.
321  if (maskIsAllOneOrUndef(II.getArgOperand(2))) {
322  LoadInst *L = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
323  "unmaskedload");
324  L->copyMetadata(II);
325  return L;
326  }
327 
328  // If we can unconditionally load from this address, replace with a
329  // load/select idiom. TODO: use DT for context sensitive query
330  if (isDereferenceablePointer(LoadPtr, II.getType(),
331  II.getModule()->getDataLayout(), &II, &AC)) {
332  LoadInst *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
333  "unmaskedload");
334  LI->copyMetadata(II);
335  return Builder.CreateSelect(II.getArgOperand(2), LI, II.getArgOperand(3));
336  }
337 
338  return nullptr;
339 }
340 
341 // TODO, Obvious Missing Transforms:
342 // * Single constant active lane -> store
343 // * Narrow width by halfs excluding zero/undef lanes
344 Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
345  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
346  if (!ConstMask)
347  return nullptr;
348 
349  // If the mask is all zeros, this instruction does nothing.
350  if (ConstMask->isNullValue())
351  return eraseInstFromFunction(II);
352 
353  // If the mask is all ones, this is a plain vector store of the 1st argument.
354  if (ConstMask->isAllOnesValue()) {
355  Value *StorePtr = II.getArgOperand(1);
356  Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
357  StoreInst *S =
358  new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
359  S->copyMetadata(II);
360  return S;
361  }
362 
363  if (isa<ScalableVectorType>(ConstMask->getType()))
364  return nullptr;
365 
366  // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
367  APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
368  APInt UndefElts(DemandedElts.getBitWidth(), 0);
369  if (Value *V =
370  SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts, UndefElts))
371  return replaceOperand(II, 0, V);
372 
373  return nullptr;
374 }
375 
376 // TODO, Obvious Missing Transforms:
377 // * Single constant active lane load -> load
378 // * Dereferenceable address & few lanes -> scalarize speculative load/selects
379 // * Adjacent vector addresses -> masked.load
380 // * Narrow width by halfs excluding zero/undef lanes
381 // * Vector incrementing address -> vector masked load
382 Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) {
383  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
384  if (!ConstMask)
385  return nullptr;
386 
387  // Vector splat address w/known mask -> scalar load
388  // Fold the gather to load the source vector first lane
389  // because it is reloading the same value each time
390  if (ConstMask->isAllOnesValue())
391  if (auto *SplatPtr = getSplatValue(II.getArgOperand(0))) {
392  auto *VecTy = cast<VectorType>(II.getType());
393  const Align Alignment =
394  cast<ConstantInt>(II.getArgOperand(1))->getAlignValue();
395  LoadInst *L = Builder.CreateAlignedLoad(VecTy->getElementType(), SplatPtr,
396  Alignment, "load.scalar");
397  Value *Shuf =
398  Builder.CreateVectorSplat(VecTy->getElementCount(), L, "broadcast");
399  return replaceInstUsesWith(II, cast<Instruction>(Shuf));
400  }
401 
402  return nullptr;
403 }
404 
405 // TODO, Obvious Missing Transforms:
406 // * Single constant active lane -> store
407 // * Adjacent vector addresses -> masked.store
408 // * Narrow store width by halfs excluding zero/undef lanes
409 // * Vector incrementing address -> vector masked store
410 Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
411  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
412  if (!ConstMask)
413  return nullptr;
414 
415  // If the mask is all zeros, a scatter does nothing.
416  if (ConstMask->isNullValue())
417  return eraseInstFromFunction(II);
418 
419  // Vector splat address -> scalar store
420  if (auto *SplatPtr = getSplatValue(II.getArgOperand(1))) {
421  // scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr
422  if (auto *SplatValue = getSplatValue(II.getArgOperand(0))) {
423  Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
424  StoreInst *S =
425  new StoreInst(SplatValue, SplatPtr, /*IsVolatile=*/false, Alignment);
426  S->copyMetadata(II);
427  return S;
428  }
429  // scatter(vector, splat(ptr), splat(true)) -> store extract(vector,
430  // lastlane), ptr
431  if (ConstMask->isAllOnesValue()) {
432  Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
433  VectorType *WideLoadTy = cast<VectorType>(II.getArgOperand(1)->getType());
434  ElementCount VF = WideLoadTy->getElementCount();
435  Constant *EC =
436  ConstantInt::get(Builder.getInt32Ty(), VF.getKnownMinValue());
437  Value *RunTimeVF = VF.isScalable() ? Builder.CreateVScale(EC) : EC;
438  Value *LastLane = Builder.CreateSub(RunTimeVF, Builder.getInt32(1));
439  Value *Extract =
440  Builder.CreateExtractElement(II.getArgOperand(0), LastLane);
441  StoreInst *S =
442  new StoreInst(Extract, SplatPtr, /*IsVolatile=*/false, Alignment);
443  S->copyMetadata(II);
444  return S;
445  }
446  }
447  if (isa<ScalableVectorType>(ConstMask->getType()))
448  return nullptr;
449 
450  // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
451  APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
452  APInt UndefElts(DemandedElts.getBitWidth(), 0);
453  if (Value *V =
454  SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts, UndefElts))
455  return replaceOperand(II, 0, V);
456  if (Value *V =
457  SimplifyDemandedVectorElts(II.getOperand(1), DemandedElts, UndefElts))
458  return replaceOperand(II, 1, V);
459 
460  return nullptr;
461 }
462 
463 /// This function transforms launder.invariant.group and strip.invariant.group
464 /// like:
465 /// launder(launder(%x)) -> launder(%x) (the result is not the argument)
466 /// launder(strip(%x)) -> launder(%x)
467 /// strip(strip(%x)) -> strip(%x) (the result is not the argument)
468 /// strip(launder(%x)) -> strip(%x)
469 /// This is legal because it preserves the most recent information about
470 /// the presence or absence of invariant.group.
472  InstCombinerImpl &IC) {
473  auto *Arg = II.getArgOperand(0);
474  auto *StrippedArg = Arg->stripPointerCasts();
475  auto *StrippedInvariantGroupsArg = StrippedArg;
476  while (auto *Intr = dyn_cast<IntrinsicInst>(StrippedInvariantGroupsArg)) {
477  if (Intr->getIntrinsicID() != Intrinsic::launder_invariant_group &&
478  Intr->getIntrinsicID() != Intrinsic::strip_invariant_group)
479  break;
480  StrippedInvariantGroupsArg = Intr->getArgOperand(0)->stripPointerCasts();
481  }
482  if (StrippedArg == StrippedInvariantGroupsArg)
483  return nullptr; // No launders/strips to remove.
484 
485  Value *Result = nullptr;
486 
487  if (II.getIntrinsicID() == Intrinsic::launder_invariant_group)
488  Result = IC.Builder.CreateLaunderInvariantGroup(StrippedInvariantGroupsArg);
489  else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
490  Result = IC.Builder.CreateStripInvariantGroup(StrippedInvariantGroupsArg);
491  else
493  "simplifyInvariantGroupIntrinsic only handles launder and strip");
494  if (Result->getType()->getPointerAddressSpace() !=
496  Result = IC.Builder.CreateAddrSpaceCast(Result, II.getType());
497  if (Result->getType() != II.getType())
498  Result = IC.Builder.CreateBitCast(Result, II.getType());
499 
500  return cast<Instruction>(Result);
501 }
502 
504  assert((II.getIntrinsicID() == Intrinsic::cttz ||
505  II.getIntrinsicID() == Intrinsic::ctlz) &&
506  "Expected cttz or ctlz intrinsic");
507  bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
508  Value *Op0 = II.getArgOperand(0);
509  Value *Op1 = II.getArgOperand(1);
510  Value *X;
511  // ctlz(bitreverse(x)) -> cttz(x)
512  // cttz(bitreverse(x)) -> ctlz(x)
513  if (match(Op0, m_BitReverse(m_Value(X)))) {
514  Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz;
516  return CallInst::Create(F, {X, II.getArgOperand(1)});
517  }
518 
519  if (II.getType()->isIntOrIntVectorTy(1)) {
520  // ctlz/cttz i1 Op0 --> not Op0
521  if (match(Op1, m_Zero()))
522  return BinaryOperator::CreateNot(Op0);
523  // If zero is poison, then the input can be assumed to be "true", so the
524  // instruction simplifies to "false".
525  assert(match(Op1, m_One()) && "Expected ctlz/cttz operand to be 0 or 1");
527  }
528 
529  // If the operand is a select with constant arm(s), try to hoist ctlz/cttz.
530  if (auto *Sel = dyn_cast<SelectInst>(Op0))
531  if (Instruction *R = IC.FoldOpIntoSelect(II, Sel))
532  return R;
533 
534  if (IsTZ) {
535  // cttz(-x) -> cttz(x)
536  if (match(Op0, m_Neg(m_Value(X))))
537  return IC.replaceOperand(II, 0, X);
538 
539  // cttz(sext(x)) -> cttz(zext(x))
540  if (match(Op0, m_OneUse(m_SExt(m_Value(X))))) {
541  auto *Zext = IC.Builder.CreateZExt(X, II.getType());
542  auto *CttzZext =
543  IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, Zext, Op1);
544  return IC.replaceInstUsesWith(II, CttzZext);
545  }
546 
547  // Zext doesn't change the number of trailing zeros, so narrow:
548  // cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsPoison' parameter is 'true'.
549  if (match(Op0, m_OneUse(m_ZExt(m_Value(X)))) && match(Op1, m_One())) {
550  auto *Cttz = IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, X,
551  IC.Builder.getTrue());
552  auto *ZextCttz = IC.Builder.CreateZExt(Cttz, II.getType());
553  return IC.replaceInstUsesWith(II, ZextCttz);
554  }
555 
556  // cttz(abs(x)) -> cttz(x)
557  // cttz(nabs(x)) -> cttz(x)
558  Value *Y;
560  if (SPF == SPF_ABS || SPF == SPF_NABS)
561  return IC.replaceOperand(II, 0, X);
562 
563  if (match(Op0, m_Intrinsic<Intrinsic::abs>(m_Value(X))))
564  return IC.replaceOperand(II, 0, X);
565  }
566 
567  KnownBits Known = IC.computeKnownBits(Op0, 0, &II);
568 
569  // Create a mask for bits above (ctlz) or below (cttz) the first known one.
570  unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
571  : Known.countMaxLeadingZeros();
572  unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
573  : Known.countMinLeadingZeros();
574 
575  // If all bits above (ctlz) or below (cttz) the first known one are known
576  // zero, this value is constant.
577  // FIXME: This should be in InstSimplify because we're replacing an
578  // instruction with a constant.
579  if (PossibleZeros == DefiniteZeros) {
580  auto *C = ConstantInt::get(Op0->getType(), DefiniteZeros);
581  return IC.replaceInstUsesWith(II, C);
582  }
583 
584  // If the input to cttz/ctlz is known to be non-zero,
585  // then change the 'ZeroIsPoison' parameter to 'true'
586  // because we know the zero behavior can't affect the result.
587  if (!Known.One.isZero() ||
588  isKnownNonZero(Op0, IC.getDataLayout(), 0, &IC.getAssumptionCache(), &II,
589  &IC.getDominatorTree())) {
590  if (!match(II.getArgOperand(1), m_One()))
591  return IC.replaceOperand(II, 1, IC.Builder.getTrue());
592  }
593 
594  // Add range metadata since known bits can't completely reflect what we know.
595  // TODO: Handle splat vectors.
596  auto *IT = dyn_cast<IntegerType>(Op0->getType());
597  if (IT && IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
598  Metadata *LowAndHigh[] = {
600  ConstantAsMetadata::get(ConstantInt::get(IT, PossibleZeros + 1))};
601  II.setMetadata(LLVMContext::MD_range,
603  return &II;
604  }
605 
606  return nullptr;
607 }
608 
610  assert(II.getIntrinsicID() == Intrinsic::ctpop &&
611  "Expected ctpop intrinsic");
612  Type *Ty = II.getType();
613  unsigned BitWidth = Ty->getScalarSizeInBits();
614  Value *Op0 = II.getArgOperand(0);
615  Value *X, *Y;
616 
617  // ctpop(bitreverse(x)) -> ctpop(x)
618  // ctpop(bswap(x)) -> ctpop(x)
619  if (match(Op0, m_BitReverse(m_Value(X))) || match(Op0, m_BSwap(m_Value(X))))
620  return IC.replaceOperand(II, 0, X);
621 
622  // ctpop(rot(x)) -> ctpop(x)
623  if ((match(Op0, m_FShl(m_Value(X), m_Value(Y), m_Value())) ||
624  match(Op0, m_FShr(m_Value(X), m_Value(Y), m_Value()))) &&
625  X == Y)
626  return IC.replaceOperand(II, 0, X);
627 
628  // ctpop(x | -x) -> bitwidth - cttz(x, false)
629  if (Op0->hasOneUse() &&
630  match(Op0, m_c_Or(m_Value(X), m_Neg(m_Deferred(X))))) {
631  Function *F =
632  Intrinsic::getDeclaration(II.getModule(), Intrinsic::cttz, Ty);
633  auto *Cttz = IC.Builder.CreateCall(F, {X, IC.Builder.getFalse()});
634  auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
635  return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
636  }
637 
638  // ctpop(~x & (x - 1)) -> cttz(x, false)
639  if (match(Op0,
641  Function *F =
642  Intrinsic::getDeclaration(II.getModule(), Intrinsic::cttz, Ty);
643  return CallInst::Create(F, {X, IC.Builder.getFalse()});
644  }
645 
646  // Zext doesn't change the number of set bits, so narrow:
647  // ctpop (zext X) --> zext (ctpop X)
648  if (match(Op0, m_OneUse(m_ZExt(m_Value(X))))) {
649  Value *NarrowPop = IC.Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, X);
650  return CastInst::Create(Instruction::ZExt, NarrowPop, Ty);
651  }
652 
653  // If the operand is a select with constant arm(s), try to hoist ctpop.
654  if (auto *Sel = dyn_cast<SelectInst>(Op0))
655  if (Instruction *R = IC.FoldOpIntoSelect(II, Sel))
656  return R;
657 
658  KnownBits Known(BitWidth);
659  IC.computeKnownBits(Op0, Known, 0, &II);
660 
661  // If all bits are zero except for exactly one fixed bit, then the result
662  // must be 0 or 1, and we can get that answer by shifting to LSB:
663  // ctpop (X & 32) --> (X & 32) >> 5
664  if ((~Known.Zero).isPowerOf2())
665  return BinaryOperator::CreateLShr(
666  Op0, ConstantInt::get(Ty, (~Known.Zero).exactLogBase2()));
667 
668  // FIXME: Try to simplify vectors of integers.
669  auto *IT = dyn_cast<IntegerType>(Ty);
670  if (!IT)
671  return nullptr;
672 
673  // Add range metadata since known bits can't completely reflect what we know.
674  unsigned MinCount = Known.countMinPopulation();
675  unsigned MaxCount = Known.countMaxPopulation();
676  if (IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
677  Metadata *LowAndHigh[] = {
680  II.setMetadata(LLVMContext::MD_range,
682  return &II;
683  }
684 
685  return nullptr;
686 }
687 
688 /// Convert a table lookup to shufflevector if the mask is constant.
689 /// This could benefit tbl1 if the mask is { 7,6,5,4,3,2,1,0 }, in
690 /// which case we could lower the shufflevector with rev64 instructions
691 /// as it's actually a byte reverse.
694  // Bail out if the mask is not a constant.
695  auto *C = dyn_cast<Constant>(II.getArgOperand(1));
696  if (!C)
697  return nullptr;
698 
699  auto *VecTy = cast<FixedVectorType>(II.getType());
700  unsigned NumElts = VecTy->getNumElements();
701 
702  // Only perform this transformation for <8 x i8> vector types.
703  if (!VecTy->getElementType()->isIntegerTy(8) || NumElts != 8)
704  return nullptr;
705 
706  int Indexes[8];
707 
708  for (unsigned I = 0; I < NumElts; ++I) {
709  Constant *COp = C->getAggregateElement(I);
710 
711  if (!COp || !isa<ConstantInt>(COp))
712  return nullptr;
713 
714  Indexes[I] = cast<ConstantInt>(COp)->getLimitedValue();
715 
716  // Make sure the mask indices are in range.
717  if ((unsigned)Indexes[I] >= NumElts)
718  return nullptr;
719  }
720 
721  auto *V1 = II.getArgOperand(0);
722  auto *V2 = Constant::getNullValue(V1->getType());
723  return Builder.CreateShuffleVector(V1, V2, makeArrayRef(Indexes));
724 }
725 
726 // Returns true iff the 2 intrinsics have the same operands, limiting the
727 // comparison to the first NumOperands.
728 static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
729  unsigned NumOperands) {
730  assert(I.arg_size() >= NumOperands && "Not enough operands");
731  assert(E.arg_size() >= NumOperands && "Not enough operands");
732  for (unsigned i = 0; i < NumOperands; i++)
733  if (I.getArgOperand(i) != E.getArgOperand(i))
734  return false;
735  return true;
736 }
737 
738 // Remove trivially empty start/end intrinsic ranges, i.e. a start
739 // immediately followed by an end (ignoring debuginfo or other
740 // start/end intrinsics in between). As this handles only the most trivial
741 // cases, tracking the nesting level is not needed:
742 //
743 // call @llvm.foo.start(i1 0)
744 // call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed
745 // call @llvm.foo.end(i1 0)
746 // call @llvm.foo.end(i1 0) ; &I
747 static bool
749  std::function<bool(const IntrinsicInst &)> IsStart) {
750  // We start from the end intrinsic and scan backwards, so that InstCombine
751  // has already processed (and potentially removed) all the instructions
752  // before the end intrinsic.
753  BasicBlock::reverse_iterator BI(EndI), BE(EndI.getParent()->rend());
754  for (; BI != BE; ++BI) {
755  if (auto *I = dyn_cast<IntrinsicInst>(&*BI)) {
756  if (I->isDebugOrPseudoInst() ||
757  I->getIntrinsicID() == EndI.getIntrinsicID())
758  continue;
759  if (IsStart(*I)) {
760  if (haveSameOperands(EndI, *I, EndI.arg_size())) {
762  IC.eraseInstFromFunction(EndI);
763  return true;
764  }
765  // Skip start intrinsics that don't pair with this end intrinsic.
766  continue;
767  }
768  }
769  break;
770  }
771 
772  return false;
773 }
774 
776  removeTriviallyEmptyRange(I, *this, [](const IntrinsicInst &I) {
777  return I.getIntrinsicID() == Intrinsic::vastart ||
778  I.getIntrinsicID() == Intrinsic::vacopy;
779  });
780  return nullptr;
781 }
782 
784  assert(Call.arg_size() > 1 && "Need at least 2 args to swap");
785  Value *Arg0 = Call.getArgOperand(0), *Arg1 = Call.getArgOperand(1);
786  if (isa<Constant>(Arg0) && !isa<Constant>(Arg1)) {
787  Call.setArgOperand(0, Arg1);
788  Call.setArgOperand(1, Arg0);
789  return &Call;
790  }
791  return nullptr;
792 }
793 
794 /// Creates a result tuple for an overflow intrinsic \p II with a given
795 /// \p Result and a constant \p Overflow value.
797  Constant *Overflow) {
798  Constant *V[] = {PoisonValue::get(Result->getType()), Overflow};
799  StructType *ST = cast<StructType>(II->getType());
801  return InsertValueInst::Create(Struct, Result, 0);
802 }
803 
804 Instruction *
805 InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
806  WithOverflowInst *WO = cast<WithOverflowInst>(II);
807  Value *OperationResult = nullptr;
808  Constant *OverflowResult = nullptr;
809  if (OptimizeOverflowCheck(WO->getBinaryOp(), WO->isSigned(), WO->getLHS(),
810  WO->getRHS(), *WO, OperationResult, OverflowResult))
811  return createOverflowTuple(WO, OperationResult, OverflowResult);
812  return nullptr;
813 }
814 
816  const DataLayout &DL, AssumptionCache *AC,
817  DominatorTree *DT) {
818  KnownBits Known = computeKnownBits(Op, DL, 0, AC, CxtI, DT);
819  if (Known.isNonNegative())
820  return false;
821  if (Known.isNegative())
822  return true;
823 
824  Value *X, *Y;
825  if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
827 
829  ICmpInst::ICMP_SLT, Op, Constant::getNullValue(Op->getType()), CxtI, DL);
830 }
831 
832 /// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
833 /// can trigger other combines.
836  Intrinsic::ID MinMaxID = II->getIntrinsicID();
837  assert((MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin ||
838  MinMaxID == Intrinsic::umax || MinMaxID == Intrinsic::umin) &&
839  "Expected a min or max intrinsic");
840 
841  // TODO: Match vectors with undef elements, but undef may not propagate.
842  Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
843  Value *X;
844  const APInt *C0, *C1;
845  if (!match(Op0, m_OneUse(m_Add(m_Value(X), m_APInt(C0)))) ||
846  !match(Op1, m_APInt(C1)))
847  return nullptr;
848 
849  // Check for necessary no-wrap and overflow constraints.
850  bool IsSigned = MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin;
851  auto *Add = cast<BinaryOperator>(Op0);
852  if ((IsSigned && !Add->hasNoSignedWrap()) ||
853  (!IsSigned && !Add->hasNoUnsignedWrap()))
854  return nullptr;
855 
856  // If the constant difference overflows, then instsimplify should reduce the
857  // min/max to the add or C1.
858  bool Overflow;
859  APInt CDiff =
860  IsSigned ? C1->ssub_ov(*C0, Overflow) : C1->usub_ov(*C0, Overflow);
861  assert(!Overflow && "Expected simplify of min/max");
862 
863  // min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0
864  // Note: the "mismatched" no-overflow setting does not propagate.
865  Constant *NewMinMaxC = ConstantInt::get(II->getType(), CDiff);
866  Value *NewMinMax = Builder.CreateBinaryIntrinsic(MinMaxID, X, NewMinMaxC);
867  return IsSigned ? BinaryOperator::CreateNSWAdd(NewMinMax, Add->getOperand(1))
868  : BinaryOperator::CreateNUWAdd(NewMinMax, Add->getOperand(1));
869 }
870 /// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
871 Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
872  Type *Ty = MinMax1.getType();
873 
874  // We are looking for a tree of:
875  // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B))))
876  // Where the min and max could be reversed
877  Instruction *MinMax2;
879  const APInt *MinValue, *MaxValue;
880  if (match(&MinMax1, m_SMin(m_Instruction(MinMax2), m_APInt(MaxValue)))) {
881  if (!match(MinMax2, m_SMax(m_BinOp(AddSub), m_APInt(MinValue))))
882  return nullptr;
883  } else if (match(&MinMax1,
884  m_SMax(m_Instruction(MinMax2), m_APInt(MinValue)))) {
885  if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_APInt(MaxValue))))
886  return nullptr;
887  } else
888  return nullptr;
889 
890  // Check that the constants clamp a saturate, and that the new type would be
891  // sensible to convert to.
892  if (!(*MaxValue + 1).isPowerOf2() || -*MinValue != *MaxValue + 1)
893  return nullptr;
894  // In what bitwidth can this be treated as saturating arithmetics?
895  unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1;
896  // FIXME: This isn't quite right for vectors, but using the scalar type is a
897  // good first approximation for what should be done there.
898  if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth))
899  return nullptr;
900 
901  // Also make sure that the inner min/max and the add/sub have one use.
902  if (!MinMax2->hasOneUse() || !AddSub->hasOneUse())
903  return nullptr;
904 
905  // Create the new type (which can be a vector type)
906  Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth);
907 
908  Intrinsic::ID IntrinsicID;
909  if (AddSub->getOpcode() == Instruction::Add)
910  IntrinsicID = Intrinsic::sadd_sat;
911  else if (AddSub->getOpcode() == Instruction::Sub)
912  IntrinsicID = Intrinsic::ssub_sat;
913  else
914  return nullptr;
915 
916  // The two operands of the add/sub must be nsw-truncatable to the NewTy. This
917  // is usually achieved via a sext from a smaller type.
918  if (ComputeMaxSignificantBits(AddSub->getOperand(0), 0, AddSub) >
919  NewBitWidth ||
920  ComputeMaxSignificantBits(AddSub->getOperand(1), 0, AddSub) > NewBitWidth)
921  return nullptr;
922 
923  // Finally create and return the sat intrinsic, truncated to the new type
924  Function *F = Intrinsic::getDeclaration(MinMax1.getModule(), IntrinsicID, NewTy);
925  Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy);
926  Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy);
927  Value *Sat = Builder.CreateCall(F, {AT, BT});
928  return CastInst::Create(Instruction::SExt, Sat, Ty);
929 }
930 
931 
932 /// If we have a clamp pattern like max (min X, 42), 41 -- where the output
933 /// can only be one of two possible constant values -- turn that into a select
934 /// of constants.
937  Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
938  Value *X;
939  const APInt *C0, *C1;
940  if (!match(I1, m_APInt(C1)) || !I0->hasOneUse())
941  return nullptr;
942 
944  switch (II->getIntrinsicID()) {
945  case Intrinsic::smax:
946  if (match(I0, m_SMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
947  Pred = ICmpInst::ICMP_SGT;
948  break;
949  case Intrinsic::smin:
950  if (match(I0, m_SMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
951  Pred = ICmpInst::ICMP_SLT;
952  break;
953  case Intrinsic::umax:
954  if (match(I0, m_UMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
955  Pred = ICmpInst::ICMP_UGT;
956  break;
957  case Intrinsic::umin:
958  if (match(I0, m_UMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
959  Pred = ICmpInst::ICMP_ULT;
960  break;
961  default:
962  llvm_unreachable("Expected min/max intrinsic");
963  }
964  if (Pred == CmpInst::BAD_ICMP_PREDICATE)
965  return nullptr;
966 
967  // max (min X, 42), 41 --> X > 41 ? 42 : 41
968  // min (max X, 42), 43 --> X < 43 ? 42 : 43
969  Value *Cmp = Builder.CreateICmp(Pred, X, I1);
970  return SelectInst::Create(Cmp, ConstantInt::get(II->getType(), *C0), I1);
971 }
972 
973 /// If this min/max has a constant operand and an operand that is a matching
974 /// min/max with a constant operand, constant-fold the 2 constant operands.
976  Intrinsic::ID MinMaxID = II->getIntrinsicID();
977  auto *LHS = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
978  if (!LHS || LHS->getIntrinsicID() != MinMaxID)
979  return nullptr;
980 
981  Constant *C0, *C1;
982  if (!match(LHS->getArgOperand(1), m_ImmConstant(C0)) ||
983  !match(II->getArgOperand(1), m_ImmConstant(C1)))
984  return nullptr;
985 
986  // max (max X, C0), C1 --> max X, (max C0, C1) --> max X, NewC
988  Constant *CondC = ConstantExpr::getICmp(Pred, C0, C1);
989  Constant *NewC = ConstantExpr::getSelect(CondC, C0, C1);
990 
991  Module *Mod = II->getModule();
992  Function *MinMax = Intrinsic::getDeclaration(Mod, MinMaxID, II->getType());
993  return CallInst::Create(MinMax, {LHS->getArgOperand(0), NewC});
994 }
995 
996 /// If this min/max has a matching min/max operand with a constant, try to push
997 /// the constant operand into this instruction. This can enable more folds.
998 static Instruction *
1001  // Match and capture a min/max operand candidate.
1002  Value *X, *Y;
1003  Constant *C;
1004  Instruction *Inner;
1006  m_Instruction(Inner),
1008  m_Value(Y))))
1009  return nullptr;
1010 
1011  // The inner op must match. Check for constants to avoid infinite loops.
1012  Intrinsic::ID MinMaxID = II->getIntrinsicID();
1013  auto *InnerMM = dyn_cast<IntrinsicInst>(Inner);
1014  if (!InnerMM || InnerMM->getIntrinsicID() != MinMaxID ||
1016  return nullptr;
1017 
1018  // max (max X, C), Y --> max (max X, Y), C
1019  Function *MinMax =
1020  Intrinsic::getDeclaration(II->getModule(), MinMaxID, II->getType());
1021  Value *NewInner = Builder.CreateBinaryIntrinsic(MinMaxID, X, Y);
1022  NewInner->takeName(Inner);
1023  return CallInst::Create(MinMax, {NewInner, C});
1024 }
1025 
1026 /// Reduce a sequence of min/max intrinsics with a common operand.
1028  // Match 3 of the same min/max ops. Example: umin(umin(), umin()).
1029  auto *LHS = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
1030  auto *RHS = dyn_cast<IntrinsicInst>(II->getArgOperand(1));
1031  Intrinsic::ID MinMaxID = II->getIntrinsicID();
1032  if (!LHS || !RHS || LHS->getIntrinsicID() != MinMaxID ||
1033  RHS->getIntrinsicID() != MinMaxID ||
1034  (!LHS->hasOneUse() && !RHS->hasOneUse()))
1035  return nullptr;
1036 
1037  Value *A = LHS->getArgOperand(0);
1038  Value *B = LHS->getArgOperand(1);
1039  Value *C = RHS->getArgOperand(0);
1040  Value *D = RHS->getArgOperand(1);
1041 
1042  // Look for a common operand.
1043  Value *MinMaxOp = nullptr;
1044  Value *ThirdOp = nullptr;
1045  if (LHS->hasOneUse()) {
1046  // If the LHS is only used in this chain and the RHS is used outside of it,
1047  // reuse the RHS min/max because that will eliminate the LHS.
1048  if (D == A || C == A) {
1049  // min(min(a, b), min(c, a)) --> min(min(c, a), b)
1050  // min(min(a, b), min(a, d)) --> min(min(a, d), b)
1051  MinMaxOp = RHS;
1052  ThirdOp = B;
1053  } else if (D == B || C == B) {
1054  // min(min(a, b), min(c, b)) --> min(min(c, b), a)
1055  // min(min(a, b), min(b, d)) --> min(min(b, d), a)
1056  MinMaxOp = RHS;
1057  ThirdOp = A;
1058  }
1059  } else {
1060  assert(RHS->hasOneUse() && "Expected one-use operand");
1061  // Reuse the LHS. This will eliminate the RHS.
1062  if (D == A || D == B) {
1063  // min(min(a, b), min(c, a)) --> min(min(a, b), c)
1064  // min(min(a, b), min(c, b)) --> min(min(a, b), c)
1065  MinMaxOp = LHS;
1066  ThirdOp = C;
1067  } else if (C == A || C == B) {
1068  // min(min(a, b), min(b, d)) --> min(min(a, b), d)
1069  // min(min(a, b), min(c, b)) --> min(min(a, b), d)
1070  MinMaxOp = LHS;
1071  ThirdOp = D;
1072  }
1073  }
1074 
1075  if (!MinMaxOp || !ThirdOp)
1076  return nullptr;
1077 
1078  Module *Mod = II->getModule();
1079  Function *MinMax = Intrinsic::getDeclaration(Mod, MinMaxID, II->getType());
1080  return CallInst::Create(MinMax, { MinMaxOp, ThirdOp });
1081 }
1082 
1083 /// If all arguments of the intrinsic are unary shuffles with the same mask,
1084 /// try to shuffle after the intrinsic.
1085 static Instruction *
1088  // TODO: This should be extended to handle other intrinsics like fshl, ctpop,
1089  // etc. Use llvm::isTriviallyVectorizable() and related to determine
1090  // which intrinsics are safe to shuffle?
1091  switch (II->getIntrinsicID()) {
1092  case Intrinsic::smax:
1093  case Intrinsic::smin:
1094  case Intrinsic::umax:
1095  case Intrinsic::umin:
1096  case Intrinsic::fma:
1097  case Intrinsic::fshl:
1098  case Intrinsic::fshr:
1099  break;
1100  default:
1101  return nullptr;
1102  }
1103 
1104  Value *X;
1106  if (!match(II->getArgOperand(0),
1108  return nullptr;
1109 
1110  // At least 1 operand must have 1 use because we are creating 2 instructions.
1111  if (none_of(II->args(), [](Value *V) { return V->hasOneUse(); }))
1112  return nullptr;
1113 
1114  // See if all arguments are shuffled with the same mask.
1115  SmallVector<Value *, 4> NewArgs(II->arg_size());
1116  NewArgs[0] = X;
1117  Type *SrcTy = X->getType();
1118  for (unsigned i = 1, e = II->arg_size(); i != e; ++i) {
1119  if (!match(II->getArgOperand(i),
1121  X->getType() != SrcTy)
1122  return nullptr;
1123  NewArgs[i] = X;
1124  }
1125 
1126  // intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
1127  Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
1128  Value *NewIntrinsic =
1129  Builder.CreateIntrinsic(II->getIntrinsicID(), SrcTy, NewArgs, FPI);
1130  return new ShuffleVectorInst(NewIntrinsic, Mask);
1131 }
1132 
1133 /// CallInst simplification. This mostly only handles folding of intrinsic
1134 /// instructions. For normal calls, it allows visitCallBase to do the heavy
1135 /// lifting.
1137  // Don't try to simplify calls without uses. It will not do anything useful,
1138  // but will result in the following folds being skipped.
1139  if (!CI.use_empty())
1140  if (Value *V = simplifyCall(&CI, SQ.getWithInstruction(&CI)))
1141  return replaceInstUsesWith(CI, V);
1142 
1143  if (Value *FreedOp = getFreedOperand(&CI, &TLI))
1144  return visitFree(CI, FreedOp);
1145 
1146  // If the caller function (i.e. us, the function that contains this CallInst)
1147  // is nounwind, mark the call as nounwind, even if the callee isn't.
1148  if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
1149  CI.setDoesNotThrow();
1150  return &CI;
1151  }
1152 
1153  IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
1154  if (!II) return visitCallBase(CI);
1155 
1156  // For atomic unordered mem intrinsics if len is not a positive or
1157  // not a multiple of element size then behavior is undefined.
1158  if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(II))
1159  if (ConstantInt *NumBytes = dyn_cast<ConstantInt>(AMI->getLength()))
1160  if (NumBytes->getSExtValue() < 0 ||
1161  (NumBytes->getZExtValue() % AMI->getElementSizeInBytes() != 0)) {
1162  CreateNonTerminatorUnreachable(AMI);
1163  assert(AMI->getType()->isVoidTy() &&
1164  "non void atomic unordered mem intrinsic");
1165  return eraseInstFromFunction(*AMI);
1166  }
1167 
1168  // Intrinsics cannot occur in an invoke or a callbr, so handle them here
1169  // instead of in visitCallBase.
1170  if (auto *MI = dyn_cast<AnyMemIntrinsic>(II)) {
1171  bool Changed = false;
1172 
1173  // memmove/cpy/set of zero bytes is a noop.
1174  if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
1175  if (NumBytes->isNullValue())
1176  return eraseInstFromFunction(CI);
1177  }
1178 
1179  // No other transformations apply to volatile transfers.
1180  if (auto *M = dyn_cast<MemIntrinsic>(MI))
1181  if (M->isVolatile())
1182  return nullptr;
1183 
1184  // If we have a memmove and the source operation is a constant global,
1185  // then the source and dest pointers can't alias, so we can change this
1186  // into a call to memcpy.
1187  if (auto *MMI = dyn_cast<AnyMemMoveInst>(MI)) {
1188  if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
1189  if (GVSrc->isConstant()) {
1190  Module *M = CI.getModule();
1191  Intrinsic::ID MemCpyID =
1192  isa<AtomicMemMoveInst>(MMI)
1193  ? Intrinsic::memcpy_element_unordered_atomic
1195  Type *Tys[3] = { CI.getArgOperand(0)->getType(),
1196  CI.getArgOperand(1)->getType(),
1197  CI.getArgOperand(2)->getType() };
1198  CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys));
1199  Changed = true;
1200  }
1201  }
1202 
1203  if (AnyMemTransferInst *MTI = dyn_cast<AnyMemTransferInst>(MI)) {
1204  // memmove(x,x,size) -> noop.
1205  if (MTI->getSource() == MTI->getDest())
1206  return eraseInstFromFunction(CI);
1207  }
1208 
1209  // If we can determine a pointer alignment that is bigger than currently
1210  // set, update the alignment.
1211  if (auto *MTI = dyn_cast<AnyMemTransferInst>(MI)) {
1212  if (Instruction *I = SimplifyAnyMemTransfer(MTI))
1213  return I;
1214  } else if (auto *MSI = dyn_cast<AnyMemSetInst>(MI)) {
1215  if (Instruction *I = SimplifyAnyMemSet(MSI))
1216  return I;
1217  }
1218 
1219  if (Changed) return II;
1220  }
1221 
1222  // For fixed width vector result intrinsics, use the generic demanded vector
1223  // support.
1224  if (auto *IIFVTy = dyn_cast<FixedVectorType>(II->getType())) {
1225  auto VWidth = IIFVTy->getNumElements();
1226  APInt UndefElts(VWidth, 0);
1227  APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
1228  if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
1229  if (V != II)
1230  return replaceInstUsesWith(*II, V);
1231  return II;
1232  }
1233  }
1234 
1235  if (II->isCommutative()) {
1236  if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(CI))
1237  return NewCall;
1238  }
1239 
1240  // Unused constrained FP intrinsic calls may have declared side effect, which
1241  // prevents it from being removed. In some cases however the side effect is
1242  // actually absent. To detect this case, call SimplifyConstrainedFPCall. If it
1243  // returns a replacement, the call may be removed.
1244  if (CI.use_empty() && isa<ConstrainedFPIntrinsic>(CI)) {
1245  if (simplifyConstrainedFPCall(&CI, SQ.getWithInstruction(&CI)))
1246  return eraseInstFromFunction(CI);
1247  }
1248 
1249  Intrinsic::ID IID = II->getIntrinsicID();
1250  switch (IID) {
1251  case Intrinsic::objectsize:
1252  if (Value *V = lowerObjectSizeCall(II, DL, &TLI, AA, /*MustSucceed=*/false))
1253  return replaceInstUsesWith(CI, V);
1254  return nullptr;
1255  case Intrinsic::abs: {
1256  Value *IIOperand = II->getArgOperand(0);
1257  bool IntMinIsPoison = cast<Constant>(II->getArgOperand(1))->isOneValue();
1258 
1259  // abs(-x) -> abs(x)
1260  // TODO: Copy nsw if it was present on the neg?
1261  Value *X;
1262  if (match(IIOperand, m_Neg(m_Value(X))))
1263  return replaceOperand(*II, 0, X);
1264  if (match(IIOperand, m_Select(m_Value(), m_Value(X), m_Neg(m_Deferred(X)))))
1265  return replaceOperand(*II, 0, X);
1266  if (match(IIOperand, m_Select(m_Value(), m_Neg(m_Value(X)), m_Deferred(X))))
1267  return replaceOperand(*II, 0, X);
1268 
1269  if (Optional<bool> Sign = getKnownSign(IIOperand, II, DL, &AC, &DT)) {
1270  // abs(x) -> x if x >= 0
1271  if (!*Sign)
1272  return replaceInstUsesWith(*II, IIOperand);
1273 
1274  // abs(x) -> -x if x < 0
1275  if (IntMinIsPoison)
1276  return BinaryOperator::CreateNSWNeg(IIOperand);
1277  return BinaryOperator::CreateNeg(IIOperand);
1278  }
1279 
1280  // abs (sext X) --> zext (abs X*)
1281  // Clear the IsIntMin (nsw) bit on the abs to allow narrowing.
1282  if (match(IIOperand, m_OneUse(m_SExt(m_Value(X))))) {
1283  Value *NarrowAbs =
1284  Builder.CreateBinaryIntrinsic(Intrinsic::abs, X, Builder.getFalse());
1285  return CastInst::Create(Instruction::ZExt, NarrowAbs, II->getType());
1286  }
1287 
1288  // Match a complicated way to check if a number is odd/even:
1289  // abs (srem X, 2) --> and X, 1
1290  const APInt *C;
1291  if (match(IIOperand, m_SRem(m_Value(X), m_APInt(C))) && *C == 2)
1292  return BinaryOperator::CreateAnd(X, ConstantInt::get(II->getType(), 1));
1293 
1294  break;
1295  }
1296  case Intrinsic::umin: {
1297  Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
1298  // umin(x, 1) == zext(x != 0)
1299  if (match(I1, m_One())) {
1300  assert(II->getType()->getScalarSizeInBits() != 1 &&
1301  "Expected simplify of umin with max constant");
1302  Value *Zero = Constant::getNullValue(I0->getType());
1303  Value *Cmp = Builder.CreateICmpNE(I0, Zero);
1304  return CastInst::Create(Instruction::ZExt, Cmp, II->getType());
1305  }
1306  [[fallthrough]];
1307  }
1308  case Intrinsic::umax: {
1309  Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
1310  Value *X, *Y;
1311  if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_ZExt(m_Value(Y))) &&
1312  (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
1313  Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
1314  return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
1315  }
1316  Constant *C;
1317  if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_Constant(C)) &&
1318  I0->hasOneUse()) {
1319  Constant *NarrowC = ConstantExpr::getTrunc(C, X->getType());
1320  if (ConstantExpr::getZExt(NarrowC, II->getType()) == C) {
1321  Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
1322  return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
1323  }
1324  }
1325  // If both operands of unsigned min/max are sign-extended, it is still ok
1326  // to narrow the operation.
1327  [[fallthrough]];
1328  }
1329  case Intrinsic::smax:
1330  case Intrinsic::smin: {
1331  Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
1332  Value *X, *Y;
1333  if (match(I0, m_SExt(m_Value(X))) && match(I1, m_SExt(m_Value(Y))) &&
1334  (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
1335  Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
1336  return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
1337  }
1338 
1339  Constant *C;
1340  if (match(I0, m_SExt(m_Value(X))) && match(I1, m_Constant(C)) &&
1341  I0->hasOneUse()) {
1342  Constant *NarrowC = ConstantExpr::getTrunc(C, X->getType());
1343  if (ConstantExpr::getSExt(NarrowC, II->getType()) == C) {
1344  Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
1345  return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
1346  }
1347  }
1348 
1349  if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
1350  // smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y)
1351  // smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y)
1352  // TODO: Canonicalize neg after min/max if I1 is constant.
1353  if (match(I0, m_NSWNeg(m_Value(X))) && match(I1, m_NSWNeg(m_Value(Y))) &&
1354  (I0->hasOneUse() || I1->hasOneUse())) {
1356  Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, Y);
1357  return BinaryOperator::CreateNSWNeg(InvMaxMin);
1358  }
1359  }
1360 
1361  // If we can eliminate ~A and Y is free to invert:
1362  // max ~A, Y --> ~(min A, ~Y)
1363  //
1364  // Examples:
1365  // max ~A, ~Y --> ~(min A, Y)
1366  // max ~A, C --> ~(min A, ~C)
1367  // max ~A, (max ~Y, ~Z) --> ~min( A, (min Y, Z))
1368  auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * {
1369  Value *A;
1370  if (match(X, m_OneUse(m_Not(m_Value(A)))) &&
1371  !isFreeToInvert(A, A->hasOneUse()) &&
1372  isFreeToInvert(Y, Y->hasOneUse())) {
1373  Value *NotY = Builder.CreateNot(Y);
1375  Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, A, NotY);
1376  return BinaryOperator::CreateNot(InvMaxMin);
1377  }
1378  return nullptr;
1379  };
1380 
1381  if (Instruction *I = moveNotAfterMinMax(I0, I1))
1382  return I;
1383  if (Instruction *I = moveNotAfterMinMax(I1, I0))
1384  return I;
1385 
1387  return I;
1388 
1389  // smax(X, -X) --> abs(X)
1390  // smin(X, -X) --> -abs(X)
1391  // umax(X, -X) --> -abs(X)
1392  // umin(X, -X) --> abs(X)
1393  if (isKnownNegation(I0, I1)) {
1394  // We can choose either operand as the input to abs(), but if we can
1395  // eliminate the only use of a value, that's better for subsequent
1396  // transforms/analysis.
1397  if (I0->hasOneUse() && !I1->hasOneUse())
1398  std::swap(I0, I1);
1399 
1400  // This is some variant of abs(). See if we can propagate 'nsw' to the abs
1401  // operation and potentially its negation.
1402  bool IntMinIsPoison = isKnownNegation(I0, I1, /* NeedNSW */ true);
1403  Value *Abs = Builder.CreateBinaryIntrinsic(
1404  Intrinsic::abs, I0,
1405  ConstantInt::getBool(II->getContext(), IntMinIsPoison));
1406 
1407  // We don't have a "nabs" intrinsic, so negate if needed based on the
1408  // max/min operation.
1409  if (IID == Intrinsic::smin || IID == Intrinsic::umax)
1410  Abs = Builder.CreateNeg(Abs, "nabs", /* NUW */ false, IntMinIsPoison);
1411  return replaceInstUsesWith(CI, Abs);
1412  }
1413 
1414  if (Instruction *Sel = foldClampRangeOfTwo(II, Builder))
1415  return Sel;
1416 
1417  if (Instruction *SAdd = matchSAddSubSat(*II))
1418  return SAdd;
1419 
1420  if (match(I1, m_ImmConstant()))
1421  if (auto *Sel = dyn_cast<SelectInst>(I0))
1422  if (Instruction *R = FoldOpIntoSelect(*II, Sel))
1423  return R;
1424 
1425  if (Instruction *NewMinMax = reassociateMinMaxWithConstants(II))
1426  return NewMinMax;
1427 
1429  return R;
1430 
1431  if (Instruction *NewMinMax = factorizeMinMaxTree(II))
1432  return NewMinMax;
1433 
1434  break;
1435  }
1436  case Intrinsic::bswap: {
1437  Value *IIOperand = II->getArgOperand(0);
1438 
1439  // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
1440  // inverse-shift-of-bswap:
1441  // bswap (shl X, Y) --> lshr (bswap X), Y
1442  // bswap (lshr X, Y) --> shl (bswap X), Y
1443  Value *X, *Y;
1444  if (match(IIOperand, m_OneUse(m_LogicalShift(m_Value(X), m_Value(Y))))) {
1445  // The transform allows undef vector elements, so try a constant match
1446  // first. If knownbits can handle that case, that clause could be removed.
1447  unsigned BitWidth = IIOperand->getType()->getScalarSizeInBits();
1448  const APInt *C;
1449  if ((match(Y, m_APIntAllowUndef(C)) && (*C & 7) == 0) ||
1451  Value *NewSwap = Builder.CreateUnaryIntrinsic(Intrinsic::bswap, X);
1452  BinaryOperator::BinaryOps InverseShift =
1453  cast<BinaryOperator>(IIOperand)->getOpcode() == Instruction::Shl
1454  ? Instruction::LShr
1455  : Instruction::Shl;
1456  return BinaryOperator::Create(InverseShift, NewSwap, Y);
1457  }
1458  }
1459 
1460  KnownBits Known = computeKnownBits(IIOperand, 0, II);
1461  uint64_t LZ = alignDown(Known.countMinLeadingZeros(), 8);
1462  uint64_t TZ = alignDown(Known.countMinTrailingZeros(), 8);
1463  unsigned BW = Known.getBitWidth();
1464 
1465  // bswap(x) -> shift(x) if x has exactly one "active byte"
1466  if (BW - LZ - TZ == 8) {
1467  assert(LZ != TZ && "active byte cannot be in the middle");
1468  if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x
1469  return BinaryOperator::CreateNUWShl(
1470  IIOperand, ConstantInt::get(IIOperand->getType(), LZ - TZ));
1471  // -> lshr(x) if the "active byte" is in the high part of x
1472  return BinaryOperator::CreateExactLShr(
1473  IIOperand, ConstantInt::get(IIOperand->getType(), TZ - LZ));
1474  }
1475 
1476  // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
1477  if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
1478  unsigned C = X->getType()->getScalarSizeInBits() - BW;
1479  Value *CV = ConstantInt::get(X->getType(), C);
1480  Value *V = Builder.CreateLShr(X, CV);
1481  return new TruncInst(V, IIOperand->getType());
1482  }
1483  break;
1484  }
1485  case Intrinsic::masked_load:
1486  if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II))
1487  return replaceInstUsesWith(CI, SimplifiedMaskedOp);
1488  break;
1489  case Intrinsic::masked_store:
1490  return simplifyMaskedStore(*II);
1491  case Intrinsic::masked_gather:
1492  return simplifyMaskedGather(*II);
1493  case Intrinsic::masked_scatter:
1494  return simplifyMaskedScatter(*II);
1495  case Intrinsic::launder_invariant_group:
1496  case Intrinsic::strip_invariant_group:
1497  if (auto *SkippedBarrier = simplifyInvariantGroupIntrinsic(*II, *this))
1498  return replaceInstUsesWith(*II, SkippedBarrier);
1499  break;
1500  case Intrinsic::powi:
1501  if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
1502  // 0 and 1 are handled in instsimplify
1503  // powi(x, -1) -> 1/x
1504  if (Power->isMinusOne())
1506  II->getArgOperand(0), II);
1507  // powi(x, 2) -> x*x
1508  if (Power->equalsInt(2))
1510  II->getArgOperand(0), II);
1511 
1512  if (!Power->getValue()[0]) {
1513  Value *X;
1514  // If power is even:
1515  // powi(-x, p) -> powi(x, p)
1516  // powi(fabs(x), p) -> powi(x, p)
1517  // powi(copysign(x, y), p) -> powi(x, p)
1518  if (match(II->getArgOperand(0), m_FNeg(m_Value(X))) ||
1519  match(II->getArgOperand(0), m_FAbs(m_Value(X))) ||
1520  match(II->getArgOperand(0),
1521  m_Intrinsic<Intrinsic::copysign>(m_Value(X), m_Value())))
1522  return replaceOperand(*II, 0, X);
1523  }
1524  }
1525  break;
1526 
1527  case Intrinsic::cttz:
1528  case Intrinsic::ctlz:
1529  if (auto *I = foldCttzCtlz(*II, *this))
1530  return I;
1531  break;
1532 
1533  case Intrinsic::ctpop:
1534  if (auto *I = foldCtpop(*II, *this))
1535  return I;
1536  break;
1537 
1538  case Intrinsic::fshl:
1539  case Intrinsic::fshr: {
1540  Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
1541  Type *Ty = II->getType();
1542  unsigned BitWidth = Ty->getScalarSizeInBits();
1543  Constant *ShAmtC;
1544  if (match(II->getArgOperand(2), m_ImmConstant(ShAmtC))) {
1545  // Canonicalize a shift amount constant operand to modulo the bit-width.
1546  Constant *WidthC = ConstantInt::get(Ty, BitWidth);
1547  Constant *ModuloC =
1548  ConstantFoldBinaryOpOperands(Instruction::URem, ShAmtC, WidthC, DL);
1549  if (!ModuloC)
1550  return nullptr;
1551  if (ModuloC != ShAmtC)
1552  return replaceOperand(*II, 2, ModuloC);
1553 
1554  assert(ConstantExpr::getICmp(ICmpInst::ICMP_UGT, WidthC, ShAmtC) ==
1556  "Shift amount expected to be modulo bitwidth");
1557 
1558  // Canonicalize funnel shift right by constant to funnel shift left. This
1559  // is not entirely arbitrary. For historical reasons, the backend may
1560  // recognize rotate left patterns but miss rotate right patterns.
1561  if (IID == Intrinsic::fshr) {
1562  // fshr X, Y, C --> fshl X, Y, (BitWidth - C)
1563  Constant *LeftShiftC = ConstantExpr::getSub(WidthC, ShAmtC);
1564  Module *Mod = II->getModule();
1565  Function *Fshl = Intrinsic::getDeclaration(Mod, Intrinsic::fshl, Ty);
1566  return CallInst::Create(Fshl, { Op0, Op1, LeftShiftC });
1567  }
1568  assert(IID == Intrinsic::fshl &&
1569  "All funnel shifts by simple constants should go left");
1570 
1571  // fshl(X, 0, C) --> shl X, C
1572  // fshl(X, undef, C) --> shl X, C
1573  if (match(Op1, m_ZeroInt()) || match(Op1, m_Undef()))
1574  return BinaryOperator::CreateShl(Op0, ShAmtC);
1575 
1576  // fshl(0, X, C) --> lshr X, (BW-C)
1577  // fshl(undef, X, C) --> lshr X, (BW-C)
1578  if (match(Op0, m_ZeroInt()) || match(Op0, m_Undef()))
1579  return BinaryOperator::CreateLShr(Op1,
1580  ConstantExpr::getSub(WidthC, ShAmtC));
1581 
1582  // fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form)
1583  if (Op0 == Op1 && BitWidth == 16 && match(ShAmtC, m_SpecificInt(8))) {
1584  Module *Mod = II->getModule();
1585  Function *Bswap = Intrinsic::getDeclaration(Mod, Intrinsic::bswap, Ty);
1586  return CallInst::Create(Bswap, { Op0 });
1587  }
1588  }
1589 
1590  // Left or right might be masked.
1591  if (SimplifyDemandedInstructionBits(*II))
1592  return &CI;
1593 
1594  // The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
1595  // so only the low bits of the shift amount are demanded if the bitwidth is
1596  // a power-of-2.
1597  if (!isPowerOf2_32(BitWidth))
1598  break;
1600  KnownBits Op2Known(BitWidth);
1601  if (SimplifyDemandedBits(II, 2, Op2Demanded, Op2Known))
1602  return &CI;
1603  break;
1604  }
1605  case Intrinsic::uadd_with_overflow:
1606  case Intrinsic::sadd_with_overflow: {
1607  if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
1608  return I;
1609 
1610  // Given 2 constant operands whose sum does not overflow:
1611  // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
1612  // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
1613  Value *X;
1614  const APInt *C0, *C1;
1615  Value *Arg0 = II->getArgOperand(0);
1616  Value *Arg1 = II->getArgOperand(1);
1617  bool IsSigned = IID == Intrinsic::sadd_with_overflow;
1618  bool HasNWAdd = IsSigned ? match(Arg0, m_NSWAdd(m_Value(X), m_APInt(C0)))
1619  : match(Arg0, m_NUWAdd(m_Value(X), m_APInt(C0)));
1620  if (HasNWAdd && match(Arg1, m_APInt(C1))) {
1621  bool Overflow;
1622  APInt NewC =
1623  IsSigned ? C1->sadd_ov(*C0, Overflow) : C1->uadd_ov(*C0, Overflow);
1624  if (!Overflow)
1625  return replaceInstUsesWith(
1626  *II, Builder.CreateBinaryIntrinsic(
1627  IID, X, ConstantInt::get(Arg1->getType(), NewC)));
1628  }
1629  break;
1630  }
1631 
1632  case Intrinsic::umul_with_overflow:
1633  case Intrinsic::smul_with_overflow:
1634  case Intrinsic::usub_with_overflow:
1635  if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
1636  return I;
1637  break;
1638 
1639  case Intrinsic::ssub_with_overflow: {
1640  if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
1641  return I;
1642 
1643  Constant *C;
1644  Value *Arg0 = II->getArgOperand(0);
1645  Value *Arg1 = II->getArgOperand(1);
1646  // Given a constant C that is not the minimum signed value
1647  // for an integer of a given bit width:
1648  //
1649  // ssubo X, C -> saddo X, -C
1650  if (match(Arg1, m_Constant(C)) && C->isNotMinSignedValue()) {
1651  Value *NegVal = ConstantExpr::getNeg(C);
1652  // Build a saddo call that is equivalent to the discovered
1653  // ssubo call.
1654  return replaceInstUsesWith(
1655  *II, Builder.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow,
1656  Arg0, NegVal));
1657  }
1658 
1659  break;
1660  }
1661 
1662  case Intrinsic::uadd_sat:
1663  case Intrinsic::sadd_sat:
1664  case Intrinsic::usub_sat:
1665  case Intrinsic::ssub_sat: {
1666  SaturatingInst *SI = cast<SaturatingInst>(II);
1667  Type *Ty = SI->getType();
1668  Value *Arg0 = SI->getLHS();
1669  Value *Arg1 = SI->getRHS();
1670 
1671  // Make use of known overflow information.
1672  OverflowResult OR = computeOverflow(SI->getBinaryOp(), SI->isSigned(),
1673  Arg0, Arg1, SI);
1674  switch (OR) {
1676  break;
1678  if (SI->isSigned())
1679  return BinaryOperator::CreateNSW(SI->getBinaryOp(), Arg0, Arg1);
1680  else
1681  return BinaryOperator::CreateNUW(SI->getBinaryOp(), Arg0, Arg1);
1683  unsigned BitWidth = Ty->getScalarSizeInBits();
1684  APInt Min = APSInt::getMinValue(BitWidth, !SI->isSigned());
1685  return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Min));
1686  }
1688  unsigned BitWidth = Ty->getScalarSizeInBits();
1689  APInt Max = APSInt::getMaxValue(BitWidth, !SI->isSigned());
1690  return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Max));
1691  }
1692  }
1693 
1694  // ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
1695  Constant *C;
1696  if (IID == Intrinsic::ssub_sat && match(Arg1, m_Constant(C)) &&
1697  C->isNotMinSignedValue()) {
1698  Value *NegVal = ConstantExpr::getNeg(C);
1699  return replaceInstUsesWith(
1700  *II, Builder.CreateBinaryIntrinsic(
1701  Intrinsic::sadd_sat, Arg0, NegVal));
1702  }
1703 
1704  // sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2))
1705  // sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2))
1706  // if Val and Val2 have the same sign
1707  if (auto *Other = dyn_cast<IntrinsicInst>(Arg0)) {
1708  Value *X;
1709  const APInt *Val, *Val2;
1710  APInt NewVal;
1711  bool IsUnsigned =
1712  IID == Intrinsic::uadd_sat || IID == Intrinsic::usub_sat;
1713  if (Other->getIntrinsicID() == IID &&
1714  match(Arg1, m_APInt(Val)) &&
1715  match(Other->getArgOperand(0), m_Value(X)) &&
1716  match(Other->getArgOperand(1), m_APInt(Val2))) {
1717  if (IsUnsigned)
1718  NewVal = Val->uadd_sat(*Val2);
1719  else if (Val->isNonNegative() == Val2->isNonNegative()) {
1720  bool Overflow;
1721  NewVal = Val->sadd_ov(*Val2, Overflow);
1722  if (Overflow) {
1723  // Both adds together may add more than SignedMaxValue
1724  // without saturating the final result.
1725  break;
1726  }
1727  } else {
1728  // Cannot fold saturated addition with different signs.
1729  break;
1730  }
1731 
1732  return replaceInstUsesWith(
1733  *II, Builder.CreateBinaryIntrinsic(
1734  IID, X, ConstantInt::get(II->getType(), NewVal)));
1735  }
1736  }
1737  break;
1738  }
1739 
1740  case Intrinsic::minnum:
1741  case Intrinsic::maxnum:
1742  case Intrinsic::minimum:
1743  case Intrinsic::maximum: {
1744  Value *Arg0 = II->getArgOperand(0);
1745  Value *Arg1 = II->getArgOperand(1);
1746  Value *X, *Y;
1747  if (match(Arg0, m_FNeg(m_Value(X))) && match(Arg1, m_FNeg(m_Value(Y))) &&
1748  (Arg0->hasOneUse() || Arg1->hasOneUse())) {
1749  // If both operands are negated, invert the call and negate the result:
1750  // min(-X, -Y) --> -(max(X, Y))
1751  // max(-X, -Y) --> -(min(X, Y))
1752  Intrinsic::ID NewIID;
1753  switch (IID) {
1754  case Intrinsic::maxnum:
1755  NewIID = Intrinsic::minnum;
1756  break;
1757  case Intrinsic::minnum:
1758  NewIID = Intrinsic::maxnum;
1759  break;
1760  case Intrinsic::maximum:
1761  NewIID = Intrinsic::minimum;
1762  break;
1763  case Intrinsic::minimum:
1764  NewIID = Intrinsic::maximum;
1765  break;
1766  default:
1767  llvm_unreachable("unexpected intrinsic ID");
1768  }
1769  Value *NewCall = Builder.CreateBinaryIntrinsic(NewIID, X, Y, II);
1770  Instruction *FNeg = UnaryOperator::CreateFNeg(NewCall);
1771  FNeg->copyIRFlags(II);
1772  return FNeg;
1773  }
1774 
1775  // m(m(X, C2), C1) -> m(X, C)
1776  const APFloat *C1, *C2;
1777  if (auto *M = dyn_cast<IntrinsicInst>(Arg0)) {
1778  if (M->getIntrinsicID() == IID && match(Arg1, m_APFloat(C1)) &&
1779  ((match(M->getArgOperand(0), m_Value(X)) &&
1780  match(M->getArgOperand(1), m_APFloat(C2))) ||
1781  (match(M->getArgOperand(1), m_Value(X)) &&
1782  match(M->getArgOperand(0), m_APFloat(C2))))) {
1783  APFloat Res(0.0);
1784  switch (IID) {
1785  case Intrinsic::maxnum:
1786  Res = maxnum(*C1, *C2);
1787  break;
1788  case Intrinsic::minnum:
1789  Res = minnum(*C1, *C2);
1790  break;
1791  case Intrinsic::maximum:
1792  Res = maximum(*C1, *C2);
1793  break;
1794  case Intrinsic::minimum:
1795  Res = minimum(*C1, *C2);
1796  break;
1797  default:
1798  llvm_unreachable("unexpected intrinsic ID");
1799  }
1800  Instruction *NewCall = Builder.CreateBinaryIntrinsic(
1801  IID, X, ConstantFP::get(Arg0->getType(), Res), II);
1802  // TODO: Conservatively intersecting FMF. If Res == C2, the transform
1803  // was a simplification (so Arg0 and its original flags could
1804  // propagate?)
1805  NewCall->andIRFlags(M);
1806  return replaceInstUsesWith(*II, NewCall);
1807  }
1808  }
1809 
1810  // m((fpext X), (fpext Y)) -> fpext (m(X, Y))
1811  if (match(Arg0, m_OneUse(m_FPExt(m_Value(X)))) &&
1812  match(Arg1, m_OneUse(m_FPExt(m_Value(Y)))) &&
1813  X->getType() == Y->getType()) {
1814  Value *NewCall =
1815  Builder.CreateBinaryIntrinsic(IID, X, Y, II, II->getName());
1816  return new FPExtInst(NewCall, II->getType());
1817  }
1818 
1819  // max X, -X --> fabs X
1820  // min X, -X --> -(fabs X)
1821  // TODO: Remove one-use limitation? That is obviously better for max.
1822  // It would be an extra instruction for min (fnabs), but that is
1823  // still likely better for analysis and codegen.
1824  if ((match(Arg0, m_OneUse(m_FNeg(m_Value(X)))) && Arg1 == X) ||
1825  (match(Arg1, m_OneUse(m_FNeg(m_Value(X)))) && Arg0 == X)) {
1826  Value *R = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, X, II);
1827  if (IID == Intrinsic::minimum || IID == Intrinsic::minnum)
1828  R = Builder.CreateFNegFMF(R, II);
1829  return replaceInstUsesWith(*II, R);
1830  }
1831 
1832  break;
1833  }
1834  case Intrinsic::matrix_multiply: {
1835  // Optimize negation in matrix multiplication.
1836 
1837  // -A * -B -> A * B
1838  Value *A, *B;
1839  if (match(II->getArgOperand(0), m_FNeg(m_Value(A))) &&
1840  match(II->getArgOperand(1), m_FNeg(m_Value(B)))) {
1841  replaceOperand(*II, 0, A);
1842  replaceOperand(*II, 1, B);
1843  return II;
1844  }
1845 
1846  Value *Op0 = II->getOperand(0);
1847  Value *Op1 = II->getOperand(1);
1848  Value *OpNotNeg, *NegatedOp;
1849  unsigned NegatedOpArg, OtherOpArg;
1850  if (match(Op0, m_FNeg(m_Value(OpNotNeg)))) {
1851  NegatedOp = Op0;
1852  NegatedOpArg = 0;
1853  OtherOpArg = 1;
1854  } else if (match(Op1, m_FNeg(m_Value(OpNotNeg)))) {
1855  NegatedOp = Op1;
1856  NegatedOpArg = 1;
1857  OtherOpArg = 0;
1858  } else
1859  // Multiplication doesn't have a negated operand.
1860  break;
1861 
1862  // Only optimize if the negated operand has only one use.
1863  if (!NegatedOp->hasOneUse())
1864  break;
1865 
1866  Value *OtherOp = II->getOperand(OtherOpArg);
1867  VectorType *RetTy = cast<VectorType>(II->getType());
1868  VectorType *NegatedOpTy = cast<VectorType>(NegatedOp->getType());
1869  VectorType *OtherOpTy = cast<VectorType>(OtherOp->getType());
1870  ElementCount NegatedCount = NegatedOpTy->getElementCount();
1871  ElementCount OtherCount = OtherOpTy->getElementCount();
1872  ElementCount RetCount = RetTy->getElementCount();
1873  // (-A) * B -> A * (-B), if it is cheaper to negate B and vice versa.
1874  if (ElementCount::isKnownGT(NegatedCount, OtherCount) &&
1875  ElementCount::isKnownLT(OtherCount, RetCount)) {
1876  Value *InverseOtherOp = Builder.CreateFNeg(OtherOp);
1877  replaceOperand(*II, NegatedOpArg, OpNotNeg);
1878  replaceOperand(*II, OtherOpArg, InverseOtherOp);
1879  return II;
1880  }
1881  // (-A) * B -> -(A * B), if it is cheaper to negate the result
1882  if (ElementCount::isKnownGT(NegatedCount, RetCount)) {
1883  SmallVector<Value *, 5> NewArgs(II->args());
1884  NewArgs[NegatedOpArg] = OpNotNeg;
1885  Instruction *NewMul =
1886  Builder.CreateIntrinsic(II->getType(), IID, NewArgs, II);
1887  return replaceInstUsesWith(*II, Builder.CreateFNegFMF(NewMul, II));
1888  }
1889  break;
1890  }
1891  case Intrinsic::fmuladd: {
1892  // Canonicalize fast fmuladd to the separate fmul + fadd.
1893  if (II->isFast()) {
1895  Builder.setFastMathFlags(II->getFastMathFlags());
1896  Value *Mul = Builder.CreateFMul(II->getArgOperand(0),
1897  II->getArgOperand(1));
1898  Value *Add = Builder.CreateFAdd(Mul, II->getArgOperand(2));
1899  Add->takeName(II);
1900  return replaceInstUsesWith(*II, Add);
1901  }
1902 
1903  // Try to simplify the underlying FMul.
1904  if (Value *V = simplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1),
1905  II->getFastMathFlags(),
1906  SQ.getWithInstruction(II))) {
1907  auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2));
1908  FAdd->copyFastMathFlags(II);
1909  return FAdd;
1910  }
1911 
1912  [[fallthrough]];
1913  }
1914  case Intrinsic::fma: {
1915  // fma fneg(x), fneg(y), z -> fma x, y, z
1916  Value *Src0 = II->getArgOperand(0);
1917  Value *Src1 = II->getArgOperand(1);
1918  Value *X, *Y;
1919  if (match(Src0, m_FNeg(m_Value(X))) && match(Src1, m_FNeg(m_Value(Y)))) {
1920  replaceOperand(*II, 0, X);
1921  replaceOperand(*II, 1, Y);
1922  return II;
1923  }
1924 
1925  // fma fabs(x), fabs(x), z -> fma x, x, z
1926  if (match(Src0, m_FAbs(m_Value(X))) &&
1927  match(Src1, m_FAbs(m_Specific(X)))) {
1928  replaceOperand(*II, 0, X);
1929  replaceOperand(*II, 1, X);
1930  return II;
1931  }
1932 
1933  // Try to simplify the underlying FMul. We can only apply simplifications
1934  // that do not require rounding.
1935  if (Value *V = simplifyFMAFMul(II->getArgOperand(0), II->getArgOperand(1),
1936  II->getFastMathFlags(),
1937  SQ.getWithInstruction(II))) {
1938  auto *FAdd = BinaryOperator::CreateFAdd(V, II->getArgOperand(2));
1939  FAdd->copyFastMathFlags(II);
1940  return FAdd;
1941  }
1942 
1943  // fma x, y, 0 -> fmul x, y
1944  // This is always valid for -0.0, but requires nsz for +0.0 as
1945  // -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own.
1946  if (match(II->getArgOperand(2), m_NegZeroFP()) ||
1947  (match(II->getArgOperand(2), m_PosZeroFP()) &&
1949  return BinaryOperator::CreateFMulFMF(Src0, Src1, II);
1950 
1951  break;
1952  }
1953  case Intrinsic::copysign: {
1954  Value *Mag = II->getArgOperand(0), *Sign = II->getArgOperand(1);
1955  if (SignBitMustBeZero(Sign, &TLI)) {
1956  // If we know that the sign argument is positive, reduce to FABS:
1957  // copysign Mag, +Sign --> fabs Mag
1958  Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
1959  return replaceInstUsesWith(*II, Fabs);
1960  }
1961  // TODO: There should be a ValueTracking sibling like SignBitMustBeOne.
1962  const APFloat *C;
1963  if (match(Sign, m_APFloat(C)) && C->isNegative()) {
1964  // If we know that the sign argument is negative, reduce to FNABS:
1965  // copysign Mag, -Sign --> fneg (fabs Mag)
1966  Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
1967  return replaceInstUsesWith(*II, Builder.CreateFNegFMF(Fabs, II));
1968  }
1969 
1970  // Propagate sign argument through nested calls:
1971  // copysign Mag, (copysign ?, X) --> copysign Mag, X
1972  Value *X;
1973  if (match(Sign, m_Intrinsic<Intrinsic::copysign>(m_Value(), m_Value(X))))
1974  return replaceOperand(*II, 1, X);
1975 
1976  // Peek through changes of magnitude's sign-bit. This call rewrites those:
1977  // copysign (fabs X), Sign --> copysign X, Sign
1978  // copysign (fneg X), Sign --> copysign X, Sign
1979  if (match(Mag, m_FAbs(m_Value(X))) || match(Mag, m_FNeg(m_Value(X))))
1980  return replaceOperand(*II, 0, X);
1981 
1982  break;
1983  }
1984  case Intrinsic::fabs: {
1985  Value *Cond, *TVal, *FVal;
1986  if (match(II->getArgOperand(0),
1987  m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))) {
1988  // fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF
1989  if (isa<Constant>(TVal) && isa<Constant>(FVal)) {
1990  CallInst *AbsT = Builder.CreateCall(II->getCalledFunction(), {TVal});
1991  CallInst *AbsF = Builder.CreateCall(II->getCalledFunction(), {FVal});
1992  return SelectInst::Create(Cond, AbsT, AbsF);
1993  }
1994  // fabs (select Cond, -FVal, FVal) --> fabs FVal
1995  if (match(TVal, m_FNeg(m_Specific(FVal))))
1996  return replaceOperand(*II, 0, FVal);
1997  // fabs (select Cond, TVal, -TVal) --> fabs TVal
1998  if (match(FVal, m_FNeg(m_Specific(TVal))))
1999  return replaceOperand(*II, 0, TVal);
2000  }
2001 
2002  [[fallthrough]];
2003  }
2004  case Intrinsic::ceil:
2005  case Intrinsic::floor:
2006  case Intrinsic::round:
2007  case Intrinsic::roundeven:
2008  case Intrinsic::nearbyint:
2009  case Intrinsic::rint:
2010  case Intrinsic::trunc: {
2011  Value *ExtSrc;
2012  if (match(II->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc))))) {
2013  // Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
2014  Value *NarrowII = Builder.CreateUnaryIntrinsic(IID, ExtSrc, II);
2015  return new FPExtInst(NarrowII, II->getType());
2016  }
2017  break;
2018  }
2019  case Intrinsic::cos:
2020  case Intrinsic::amdgcn_cos: {
2021  Value *X;
2022  Value *Src = II->getArgOperand(0);
2023  if (match(Src, m_FNeg(m_Value(X))) || match(Src, m_FAbs(m_Value(X)))) {
2024  // cos(-x) -> cos(x)
2025  // cos(fabs(x)) -> cos(x)
2026  return replaceOperand(*II, 0, X);
2027  }
2028  break;
2029  }
2030  case Intrinsic::sin: {
2031  Value *X;
2032  if (match(II->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X))))) {
2033  // sin(-x) --> -sin(x)
2034  Value *NewSin = Builder.CreateUnaryIntrinsic(Intrinsic::sin, X, II);
2035  Instruction *FNeg = UnaryOperator::CreateFNeg(NewSin);
2036  FNeg->copyFastMathFlags(II);
2037  return FNeg;
2038  }
2039  break;
2040  }
2041 
2042  case Intrinsic::arm_neon_vtbl1:
2043  case Intrinsic::aarch64_neon_tbl1:
2044  if (Value *V = simplifyNeonTbl1(*II, Builder))
2045  return replaceInstUsesWith(*II, V);
2046  break;
2047 
2048  case Intrinsic::arm_neon_vmulls:
2049  case Intrinsic::arm_neon_vmullu:
2050  case Intrinsic::aarch64_neon_smull:
2051  case Intrinsic::aarch64_neon_umull: {
2052  Value *Arg0 = II->getArgOperand(0);
2053  Value *Arg1 = II->getArgOperand(1);
2054 
2055  // Handle mul by zero first:
2056  if (isa<ConstantAggregateZero>(Arg0) || isa<ConstantAggregateZero>(Arg1)) {
2057  return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
2058  }
2059 
2060  // Check for constant LHS & RHS - in this case we just simplify.
2061  bool Zext = (IID == Intrinsic::arm_neon_vmullu ||
2062  IID == Intrinsic::aarch64_neon_umull);
2063  VectorType *NewVT = cast<VectorType>(II->getType());
2064  if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
2065  if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
2066  CV0 = ConstantExpr::getIntegerCast(CV0, NewVT, /*isSigned=*/!Zext);
2067  CV1 = ConstantExpr::getIntegerCast(CV1, NewVT, /*isSigned=*/!Zext);
2068 
2069  return replaceInstUsesWith(CI, ConstantExpr::getMul(CV0, CV1));
2070  }
2071 
2072  // Couldn't simplify - canonicalize constant to the RHS.
2073  std::swap(Arg0, Arg1);
2074  }
2075 
2076  // Handle mul by one:
2077  if (Constant *CV1 = dyn_cast<Constant>(Arg1))
2078  if (ConstantInt *Splat =
2079  dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
2080  if (Splat->isOne())
2081  return CastInst::CreateIntegerCast(Arg0, II->getType(),
2082  /*isSigned=*/!Zext);
2083 
2084  break;
2085  }
2086  case Intrinsic::arm_neon_aesd:
2087  case Intrinsic::arm_neon_aese:
2088  case Intrinsic::aarch64_crypto_aesd:
2089  case Intrinsic::aarch64_crypto_aese: {
2090  Value *DataArg = II->getArgOperand(0);
2091  Value *KeyArg = II->getArgOperand(1);
2092 
2093  // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
2094  Value *Data, *Key;
2095  if (match(KeyArg, m_ZeroInt()) &&
2096  match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) {
2097  replaceOperand(*II, 0, Data);
2098  replaceOperand(*II, 1, Key);
2099  return II;
2100  }
2101  break;
2102  }
2103  case Intrinsic::hexagon_V6_vandvrt:
2104  case Intrinsic::hexagon_V6_vandvrt_128B: {
2105  // Simplify Q -> V -> Q conversion.
2106  if (auto Op0 = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
2107  Intrinsic::ID ID0 = Op0->getIntrinsicID();
2108  if (ID0 != Intrinsic::hexagon_V6_vandqrt &&
2109  ID0 != Intrinsic::hexagon_V6_vandqrt_128B)
2110  break;
2111  Value *Bytes = Op0->getArgOperand(1), *Mask = II->getArgOperand(1);
2112  uint64_t Bytes1 = computeKnownBits(Bytes, 0, Op0).One.getZExtValue();
2113  uint64_t Mask1 = computeKnownBits(Mask, 0, II).One.getZExtValue();
2114  // Check if every byte has common bits in Bytes and Mask.
2115  uint64_t C = Bytes1 & Mask1;
2116  if ((C & 0xFF) && (C & 0xFF00) && (C & 0xFF0000) && (C & 0xFF000000))
2117  return replaceInstUsesWith(*II, Op0->getArgOperand(0));
2118  }
2119  break;
2120  }
2121  case Intrinsic::stackrestore: {
2122  enum class ClassifyResult {
2123  None,
2124  Alloca,
2125  StackRestore,
2126  CallWithSideEffects,
2127  };
2128  auto Classify = [](const Instruction *I) {
2129  if (isa<AllocaInst>(I))
2130  return ClassifyResult::Alloca;
2131 
2132  if (auto *CI = dyn_cast<CallInst>(I)) {
2133  if (auto *II = dyn_cast<IntrinsicInst>(CI)) {
2134  if (II->getIntrinsicID() == Intrinsic::stackrestore)
2135  return ClassifyResult::StackRestore;
2136 
2137  if (II->mayHaveSideEffects())
2138  return ClassifyResult::CallWithSideEffects;
2139  } else {
2140  // Consider all non-intrinsic calls to be side effects
2141  return ClassifyResult::CallWithSideEffects;
2142  }
2143  }
2144 
2145  return ClassifyResult::None;
2146  };
2147 
2148  // If the stacksave and the stackrestore are in the same BB, and there is
2149  // no intervening call, alloca, or stackrestore of a different stacksave,
2150  // remove the restore. This can happen when variable allocas are DCE'd.
2151  if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
2152  if (SS->getIntrinsicID() == Intrinsic::stacksave &&
2153  SS->getParent() == II->getParent()) {
2155  bool CannotRemove = false;
2156  for (++BI; &*BI != II; ++BI) {
2157  switch (Classify(&*BI)) {
2158  case ClassifyResult::None:
2159  // So far so good, look at next instructions.
2160  break;
2161 
2162  case ClassifyResult::StackRestore:
2163  // If we found an intervening stackrestore for a different
2164  // stacksave, we can't remove the stackrestore. Otherwise, continue.
2165  if (cast<IntrinsicInst>(*BI).getArgOperand(0) != SS)
2166  CannotRemove = true;
2167  break;
2168 
2169  case ClassifyResult::Alloca:
2170  case ClassifyResult::CallWithSideEffects:
2171  // If we found an alloca, a non-intrinsic call, or an intrinsic
2172  // call with side effects, we can't remove the stackrestore.
2173  CannotRemove = true;
2174  break;
2175  }
2176  if (CannotRemove)
2177  break;
2178  }
2179 
2180  if (!CannotRemove)
2181  return eraseInstFromFunction(CI);
2182  }
2183  }
2184 
2185  // Scan down this block to see if there is another stack restore in the
2186  // same block without an intervening call/alloca.
2187  BasicBlock::iterator BI(II);
2188  Instruction *TI = II->getParent()->getTerminator();
2189  bool CannotRemove = false;
2190  for (++BI; &*BI != TI; ++BI) {
2191  switch (Classify(&*BI)) {
2192  case ClassifyResult::None:
2193  // So far so good, look at next instructions.
2194  break;
2195 
2196  case ClassifyResult::StackRestore:
2197  // If there is a stackrestore below this one, remove this one.
2198  return eraseInstFromFunction(CI);
2199 
2200  case ClassifyResult::Alloca:
2201  case ClassifyResult::CallWithSideEffects:
2202  // If we found an alloca, a non-intrinsic call, or an intrinsic call
2203  // with side effects (such as llvm.stacksave and llvm.read_register),
2204  // we can't remove the stack restore.
2205  CannotRemove = true;
2206  break;
2207  }
2208  if (CannotRemove)
2209  break;
2210  }
2211 
2212  // If the stack restore is in a return, resume, or unwind block and if there
2213  // are no allocas or calls between the restore and the return, nuke the
2214  // restore.
2215  if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
2216  return eraseInstFromFunction(CI);
2217  break;
2218  }
2219  case Intrinsic::lifetime_end:
2220  // Asan needs to poison memory to detect invalid access which is possible
2221  // even for empty lifetime range.
2222  if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
2223  II->getFunction()->hasFnAttribute(Attribute::SanitizeMemory) ||
2224  II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress))
2225  break;
2226 
2227  if (removeTriviallyEmptyRange(*II, *this, [](const IntrinsicInst &I) {
2228  return I.getIntrinsicID() == Intrinsic::lifetime_start;
2229  }))
2230  return nullptr;
2231  break;
2232  case Intrinsic::assume: {
2233  Value *IIOperand = II->getArgOperand(0);
2235  II->getOperandBundlesAsDefs(OpBundles);
2236 
2237  /// This will remove the boolean Condition from the assume given as
2238  /// argument and remove the assume if it becomes useless.
2239  /// always returns nullptr for use as a return values.
2240  auto RemoveConditionFromAssume = [&](Instruction *Assume) -> Instruction * {
2241  assert(isa<AssumeInst>(Assume));
2242  if (isAssumeWithEmptyBundle(*cast<AssumeInst>(II)))
2243  return eraseInstFromFunction(CI);
2244  replaceUse(II->getOperandUse(0), ConstantInt::getTrue(II->getContext()));
2245  return nullptr;
2246  };
2247  // Remove an assume if it is followed by an identical assume.
2248  // TODO: Do we need this? Unless there are conflicting assumptions, the
2249  // computeKnownBits(IIOperand) below here eliminates redundant assumes.
2251  if (match(Next, m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))))
2252  return RemoveConditionFromAssume(Next);
2253 
2254  // Canonicalize assume(a && b) -> assume(a); assume(b);
2255  // Note: New assumption intrinsics created here are registered by
2256  // the InstCombineIRInserter object.
2257  FunctionType *AssumeIntrinsicTy = II->getFunctionType();
2258  Value *AssumeIntrinsic = II->getCalledOperand();
2259  Value *A, *B;
2260  if (match(IIOperand, m_LogicalAnd(m_Value(A), m_Value(B)))) {
2261  Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, A, OpBundles,
2262  II->getName());
2263  Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, B, II->getName());
2264  return eraseInstFromFunction(*II);
2265  }
2266  // assume(!(a || b)) -> assume(!a); assume(!b);
2267  if (match(IIOperand, m_Not(m_LogicalOr(m_Value(A), m_Value(B))))) {
2268  Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
2269  Builder.CreateNot(A), OpBundles, II->getName());
2270  Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
2271  Builder.CreateNot(B), II->getName());
2272  return eraseInstFromFunction(*II);
2273  }
2274 
2275  // assume( (load addr) != null ) -> add 'nonnull' metadata to load
2276  // (if assume is valid at the load)
2277  CmpInst::Predicate Pred;
2278  Instruction *LHS;
2279  if (match(IIOperand, m_ICmp(Pred, m_Instruction(LHS), m_Zero())) &&
2280  Pred == ICmpInst::ICMP_NE && LHS->getOpcode() == Instruction::Load &&
2281  LHS->getType()->isPointerTy() &&
2282  isValidAssumeForContext(II, LHS, &DT)) {
2283  MDNode *MD = MDNode::get(II->getContext(), None);
2284  LHS->setMetadata(LLVMContext::MD_nonnull, MD);
2285  return RemoveConditionFromAssume(II);
2286 
2287  // TODO: apply nonnull return attributes to calls and invokes
2288  // TODO: apply range metadata for range check patterns?
2289  }
2290 
2291  // Convert nonnull assume like:
2292  // %A = icmp ne i32* %PTR, null
2293  // call void @llvm.assume(i1 %A)
2294  // into
2295  // call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
2297  match(IIOperand, m_Cmp(Pred, m_Value(A), m_Zero())) &&
2298  Pred == CmpInst::ICMP_NE && A->getType()->isPointerTy()) {
2299  if (auto *Replacement = buildAssumeFromKnowledge(
2300  {RetainedKnowledge{Attribute::NonNull, 0, A}}, Next, &AC, &DT)) {
2301 
2302  Replacement->insertBefore(Next);
2303  AC.registerAssumption(Replacement);
2304  return RemoveConditionFromAssume(II);
2305  }
2306  }
2307 
2308  // Convert alignment assume like:
2309  // %B = ptrtoint i32* %A to i64
2310  // %C = and i64 %B, Constant
2311  // %D = icmp eq i64 %C, 0
2312  // call void @llvm.assume(i1 %D)
2313  // into
2314  // call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 Constant + 1)]
2315  uint64_t AlignMask;
2317  match(IIOperand,
2318  m_Cmp(Pred, m_And(m_Value(A), m_ConstantInt(AlignMask)),
2319  m_Zero())) &&
2320  Pred == CmpInst::ICMP_EQ) {
2321  if (isPowerOf2_64(AlignMask + 1)) {
2322  uint64_t Offset = 0;
2323  match(A, m_Add(m_Value(A), m_ConstantInt(Offset)));
2324  if (match(A, m_PtrToInt(m_Value(A)))) {
2325  /// Note: this doesn't preserve the offset information but merges
2326  /// offset and alignment.
2327  /// TODO: we can generate a GEP instead of merging the alignment with
2328  /// the offset.
2329  RetainedKnowledge RK{Attribute::Alignment,
2330  (unsigned)MinAlign(Offset, AlignMask + 1), A};
2331  if (auto *Replacement =
2332  buildAssumeFromKnowledge(RK, Next, &AC, &DT)) {
2333 
2334  Replacement->insertAfter(II);
2335  AC.registerAssumption(Replacement);
2336  }
2337  return RemoveConditionFromAssume(II);
2338  }
2339  }
2340  }
2341 
2342  /// Canonicalize Knowledge in operand bundles.
2344  for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) {
2345  auto &BOI = II->bundle_op_info_begin()[Idx];
2346  RetainedKnowledge RK =
2347  llvm::getKnowledgeFromBundle(cast<AssumeInst>(*II), BOI);
2348  if (BOI.End - BOI.Begin > 2)
2349  continue; // Prevent reducing knowledge in an align with offset since
2350  // extracting a RetainedKnowledge form them looses offset
2351  // information
2352  RetainedKnowledge CanonRK =
2353  llvm::simplifyRetainedKnowledge(cast<AssumeInst>(II), RK,
2354  &getAssumptionCache(),
2355  &getDominatorTree());
2356  if (CanonRK == RK)
2357  continue;
2358  if (!CanonRK) {
2359  if (BOI.End - BOI.Begin > 0) {
2360  Worklist.pushValue(II->op_begin()[BOI.Begin]);
2361  Value::dropDroppableUse(II->op_begin()[BOI.Begin]);
2362  }
2363  continue;
2364  }
2365  assert(RK.AttrKind == CanonRK.AttrKind);
2366  if (BOI.End - BOI.Begin > 0)
2367  II->op_begin()[BOI.Begin].set(CanonRK.WasOn);
2368  if (BOI.End - BOI.Begin > 1)
2369  II->op_begin()[BOI.Begin + 1].set(ConstantInt::get(
2370  Type::getInt64Ty(II->getContext()), CanonRK.ArgValue));
2371  if (RK.WasOn)
2372  Worklist.pushValue(RK.WasOn);
2373  return II;
2374  }
2375  }
2376 
2377  // If there is a dominating assume with the same condition as this one,
2378  // then this one is redundant, and should be removed.
2379  KnownBits Known(1);
2380  computeKnownBits(IIOperand, Known, 0, II);
2381  if (Known.isAllOnes() && isAssumeWithEmptyBundle(cast<AssumeInst>(*II)))
2382  return eraseInstFromFunction(*II);
2383 
2384  // Update the cache of affected values for this assumption (we might be
2385  // here because we just simplified the condition).
2386  AC.updateAffectedValues(cast<AssumeInst>(II));
2387  break;
2388  }
2389  case Intrinsic::experimental_guard: {
2390  // Is this guard followed by another guard? We scan forward over a small
2391  // fixed window of instructions to handle common cases with conditions
2392  // computed between guards.
2393  Instruction *NextInst = II->getNextNonDebugInstruction();
2394  for (unsigned i = 0; i < GuardWideningWindow; i++) {
2395  // Note: Using context-free form to avoid compile time blow up
2396  if (!isSafeToSpeculativelyExecute(NextInst))
2397  break;
2398  NextInst = NextInst->getNextNonDebugInstruction();
2399  }
2400  Value *NextCond = nullptr;
2401  if (match(NextInst,
2402  m_Intrinsic<Intrinsic::experimental_guard>(m_Value(NextCond)))) {
2403  Value *CurrCond = II->getArgOperand(0);
2404 
2405  // Remove a guard that it is immediately preceded by an identical guard.
2406  // Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
2407  if (CurrCond != NextCond) {
2408  Instruction *MoveI = II->getNextNonDebugInstruction();
2409  while (MoveI != NextInst) {
2410  auto *Temp = MoveI;
2411  MoveI = MoveI->getNextNonDebugInstruction();
2412  Temp->moveBefore(II);
2413  }
2414  replaceOperand(*II, 0, Builder.CreateAnd(CurrCond, NextCond));
2415  }
2416  eraseInstFromFunction(*NextInst);
2417  return II;
2418  }
2419  break;
2420  }
2421  case Intrinsic::vector_insert: {
2422  Value *Vec = II->getArgOperand(0);
2423  Value *SubVec = II->getArgOperand(1);
2424  Value *Idx = II->getArgOperand(2);
2425  auto *DstTy = dyn_cast<FixedVectorType>(II->getType());
2426  auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
2427  auto *SubVecTy = dyn_cast<FixedVectorType>(SubVec->getType());
2428 
2429  // Only canonicalize if the destination vector, Vec, and SubVec are all
2430  // fixed vectors.
2431  if (DstTy && VecTy && SubVecTy) {
2432  unsigned DstNumElts = DstTy->getNumElements();
2433  unsigned VecNumElts = VecTy->getNumElements();
2434  unsigned SubVecNumElts = SubVecTy->getNumElements();
2435  unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
2436 
2437  // An insert that entirely overwrites Vec with SubVec is a nop.
2438  if (VecNumElts == SubVecNumElts)
2439  return replaceInstUsesWith(CI, SubVec);
2440 
2441  // Widen SubVec into a vector of the same width as Vec, since
2442  // shufflevector requires the two input vectors to be the same width.
2443  // Elements beyond the bounds of SubVec within the widened vector are
2444  // undefined.
2445  SmallVector<int, 8> WidenMask;
2446  unsigned i;
2447  for (i = 0; i != SubVecNumElts; ++i)
2448  WidenMask.push_back(i);
2449  for (; i != VecNumElts; ++i)
2450  WidenMask.push_back(UndefMaskElem);
2451 
2452  Value *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask);
2453 
2455  for (unsigned i = 0; i != IdxN; ++i)
2456  Mask.push_back(i);
2457  for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i)
2458  Mask.push_back(i);
2459  for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i)
2460  Mask.push_back(i);
2461 
2462  Value *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask);
2463  return replaceInstUsesWith(CI, Shuffle);
2464  }
2465  break;
2466  }
2467  case Intrinsic::vector_extract: {
2468  Value *Vec = II->getArgOperand(0);
2469  Value *Idx = II->getArgOperand(1);
2470 
2471  Type *ReturnType = II->getType();
2472  // (extract_vector (insert_vector InsertTuple, InsertValue, InsertIdx),
2473  // ExtractIdx)
2474  unsigned ExtractIdx = cast<ConstantInt>(Idx)->getZExtValue();
2475  Value *InsertTuple, *InsertIdx, *InsertValue;
2476  if (match(Vec, m_Intrinsic<Intrinsic::vector_insert>(m_Value(InsertTuple),
2477  m_Value(InsertValue),
2478  m_Value(InsertIdx))) &&
2479  InsertValue->getType() == ReturnType) {
2480  unsigned Index = cast<ConstantInt>(InsertIdx)->getZExtValue();
2481  // Case where we get the same index right after setting it.
2482  // extract.vector(insert.vector(InsertTuple, InsertValue, Idx), Idx) -->
2483  // InsertValue
2484  if (ExtractIdx == Index)
2485  return replaceInstUsesWith(CI, InsertValue);
2486  // If we are getting a different index than what was set in the
2487  // insert.vector intrinsic. We can just set the input tuple to the one up
2488  // in the chain. extract.vector(insert.vector(InsertTuple, InsertValue,
2489  // InsertIndex), ExtractIndex)
2490  // --> extract.vector(InsertTuple, ExtractIndex)
2491  else
2492  return replaceOperand(CI, 0, InsertTuple);
2493  }
2494 
2495  auto *DstTy = dyn_cast<FixedVectorType>(ReturnType);
2496  auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
2497 
2498  // Only canonicalize if the the destination vector and Vec are fixed
2499  // vectors.
2500  if (DstTy && VecTy) {
2501  unsigned DstNumElts = DstTy->getNumElements();
2502  unsigned VecNumElts = VecTy->getNumElements();
2503  unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
2504 
2505  // Extracting the entirety of Vec is a nop.
2506  if (VecNumElts == DstNumElts) {
2507  replaceInstUsesWith(CI, Vec);
2508  return eraseInstFromFunction(CI);
2509  }
2510 
2512  for (unsigned i = 0; i != DstNumElts; ++i)
2513  Mask.push_back(IdxN + i);
2514 
2515  Value *Shuffle = Builder.CreateShuffleVector(Vec, Mask);
2516  return replaceInstUsesWith(CI, Shuffle);
2517  }
2518  break;
2519  }
2520  case Intrinsic::experimental_vector_reverse: {
2521  Value *BO0, *BO1, *X, *Y;
2522  Value *Vec = II->getArgOperand(0);
2523  if (match(Vec, m_OneUse(m_BinOp(m_Value(BO0), m_Value(BO1))))) {
2524  auto *OldBinOp = cast<BinaryOperator>(Vec);
2525  if (match(BO0, m_Intrinsic<Intrinsic::experimental_vector_reverse>(
2526  m_Value(X)))) {
2527  // rev(binop rev(X), rev(Y)) --> binop X, Y
2528  if (match(BO1, m_Intrinsic<Intrinsic::experimental_vector_reverse>(
2529  m_Value(Y))))
2530  return replaceInstUsesWith(CI,
2532  OldBinOp->getOpcode(), X, Y, OldBinOp,
2533  OldBinOp->getName(), II));
2534  // rev(binop rev(X), BO1Splat) --> binop X, BO1Splat
2535  if (isSplatValue(BO1))
2536  return replaceInstUsesWith(CI,
2538  OldBinOp->getOpcode(), X, BO1,
2539  OldBinOp, OldBinOp->getName(), II));
2540  }
2541  // rev(binop BO0Splat, rev(Y)) --> binop BO0Splat, Y
2542  if (match(BO1, m_Intrinsic<Intrinsic::experimental_vector_reverse>(
2543  m_Value(Y))) &&
2544  isSplatValue(BO0))
2545  return replaceInstUsesWith(CI, BinaryOperator::CreateWithCopiedFlags(
2546  OldBinOp->getOpcode(), BO0, Y,
2547  OldBinOp, OldBinOp->getName(), II));
2548  }
2549  // rev(unop rev(X)) --> unop X
2550  if (match(Vec, m_OneUse(m_UnOp(
2551  m_Intrinsic<Intrinsic::experimental_vector_reverse>(
2552  m_Value(X)))))) {
2553  auto *OldUnOp = cast<UnaryOperator>(Vec);
2554  auto *NewUnOp = UnaryOperator::CreateWithCopiedFlags(
2555  OldUnOp->getOpcode(), X, OldUnOp, OldUnOp->getName(), II);
2556  return replaceInstUsesWith(CI, NewUnOp);
2557  }
2558  break;
2559  }
2560  case Intrinsic::vector_reduce_or:
2561  case Intrinsic::vector_reduce_and: {
2562  // Canonicalize logical or/and reductions:
2563  // Or reduction for i1 is represented as:
2564  // %val = bitcast <ReduxWidth x i1> to iReduxWidth
2565  // %res = cmp ne iReduxWidth %val, 0
2566  // And reduction for i1 is represented as:
2567  // %val = bitcast <ReduxWidth x i1> to iReduxWidth
2568  // %res = cmp eq iReduxWidth %val, 11111
2569  Value *Arg = II->getArgOperand(0);
2570  Value *Vect;
2571  if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
2572  if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
2573  if (FTy->getElementType() == Builder.getInt1Ty()) {
2574  Value *Res = Builder.CreateBitCast(
2575  Vect, Builder.getIntNTy(FTy->getNumElements()));
2576  if (IID == Intrinsic::vector_reduce_and) {
2577  Res = Builder.CreateICmpEQ(
2578  Res, ConstantInt::getAllOnesValue(Res->getType()));
2579  } else {
2580  assert(IID == Intrinsic::vector_reduce_or &&
2581  "Expected or reduction.");
2582  Res = Builder.CreateIsNotNull(Res);
2583  }
2584  if (Arg != Vect)
2585  Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
2586  II->getType());
2587  return replaceInstUsesWith(CI, Res);
2588  }
2589  }
2590  [[fallthrough]];
2591  }
2592  case Intrinsic::vector_reduce_add: {
2593  if (IID == Intrinsic::vector_reduce_add) {
2594  // Convert vector_reduce_add(ZExt(<n x i1>)) to
2595  // ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
2596  // Convert vector_reduce_add(SExt(<n x i1>)) to
2597  // -ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
2598  // Convert vector_reduce_add(<n x i1>) to
2599  // Trunc(ctpop(bitcast <n x i1> to in)).
2600  Value *Arg = II->getArgOperand(0);
2601  Value *Vect;
2602  if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
2603  if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
2604  if (FTy->getElementType() == Builder.getInt1Ty()) {
2605  Value *V = Builder.CreateBitCast(
2606  Vect, Builder.getIntNTy(FTy->getNumElements()));
2607  Value *Res = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, V);
2608  if (Res->getType() != II->getType())
2609  Res = Builder.CreateZExtOrTrunc(Res, II->getType());
2610  if (Arg != Vect &&
2611  cast<Instruction>(Arg)->getOpcode() == Instruction::SExt)
2612  Res = Builder.CreateNeg(Res);
2613  return replaceInstUsesWith(CI, Res);
2614  }
2615  }
2616  }
2617  [[fallthrough]];
2618  }
2619  case Intrinsic::vector_reduce_xor: {
2620  if (IID == Intrinsic::vector_reduce_xor) {
2621  // Exclusive disjunction reduction over the vector with
2622  // (potentially-extended) i1 element type is actually a
2623  // (potentially-extended) arithmetic `add` reduction over the original
2624  // non-extended value:
2625  // vector_reduce_xor(?ext(<n x i1>))
2626  // -->
2627  // ?ext(vector_reduce_add(<n x i1>))
2628  Value *Arg = II->getArgOperand(0);
2629  Value *Vect;
2630  if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
2631  if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
2632  if (FTy->getElementType() == Builder.getInt1Ty()) {
2633  Value *Res = Builder.CreateAddReduce(Vect);
2634  if (Arg != Vect)
2635  Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
2636  II->getType());
2637  return replaceInstUsesWith(CI, Res);
2638  }
2639  }
2640  }
2641  [[fallthrough]];
2642  }
2643  case Intrinsic::vector_reduce_mul: {
2644  if (IID == Intrinsic::vector_reduce_mul) {
2645  // Multiplicative reduction over the vector with (potentially-extended)
2646  // i1 element type is actually a (potentially zero-extended)
2647  // logical `and` reduction over the original non-extended value:
2648  // vector_reduce_mul(?ext(<n x i1>))
2649  // -->
2650  // zext(vector_reduce_and(<n x i1>))
2651  Value *Arg = II->getArgOperand(0);
2652  Value *Vect;
2653  if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
2654  if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
2655  if (FTy->getElementType() == Builder.getInt1Ty()) {
2656  Value *Res = Builder.CreateAndReduce(Vect);
2657  if (Res->getType() != II->getType())
2658  Res = Builder.CreateZExt(Res, II->getType());
2659  return replaceInstUsesWith(CI, Res);
2660  }
2661  }
2662  }
2663  [[fallthrough]];
2664  }
2665  case Intrinsic::vector_reduce_umin:
2666  case Intrinsic::vector_reduce_umax: {
2667  if (IID == Intrinsic::vector_reduce_umin ||
2668  IID == Intrinsic::vector_reduce_umax) {
2669  // UMin/UMax reduction over the vector with (potentially-extended)
2670  // i1 element type is actually a (potentially-extended)
2671  // logical `and`/`or` reduction over the original non-extended value:
2672  // vector_reduce_u{min,max}(?ext(<n x i1>))
2673  // -->
2674  // ?ext(vector_reduce_{and,or}(<n x i1>))
2675  Value *Arg = II->getArgOperand(0);
2676  Value *Vect;
2677  if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
2678  if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
2679  if (FTy->getElementType() == Builder.getInt1Ty()) {
2680  Value *Res = IID == Intrinsic::vector_reduce_umin
2681  ? Builder.CreateAndReduce(Vect)
2682  : Builder.CreateOrReduce(Vect);
2683  if (Arg != Vect)
2684  Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
2685  II->getType());
2686  return replaceInstUsesWith(CI, Res);
2687  }
2688  }
2689  }
2690  [[fallthrough]];
2691  }
2692  case Intrinsic::vector_reduce_smin:
2693  case Intrinsic::vector_reduce_smax: {
2694  if (IID == Intrinsic::vector_reduce_smin ||
2695  IID == Intrinsic::vector_reduce_smax) {
2696  // SMin/SMax reduction over the vector with (potentially-extended)
2697  // i1 element type is actually a (potentially-extended)
2698  // logical `and`/`or` reduction over the original non-extended value:
2699  // vector_reduce_s{min,max}(<n x i1>)
2700  // -->
2701  // vector_reduce_{or,and}(<n x i1>)
2702  // and
2703  // vector_reduce_s{min,max}(sext(<n x i1>))
2704  // -->
2705  // sext(vector_reduce_{or,and}(<n x i1>))
2706  // and
2707  // vector_reduce_s{min,max}(zext(<n x i1>))
2708  // -->
2709  // zext(vector_reduce_{and,or}(<n x i1>))
2710  Value *Arg = II->getArgOperand(0);
2711  Value *Vect;
2712  if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
2713  if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
2714  if (FTy->getElementType() == Builder.getInt1Ty()) {
2715  Instruction::CastOps ExtOpc = Instruction::CastOps::CastOpsEnd;
2716  if (Arg != Vect)
2717  ExtOpc = cast<CastInst>(Arg)->getOpcode();
2718  Value *Res = ((IID == Intrinsic::vector_reduce_smin) ==
2719  (ExtOpc == Instruction::CastOps::ZExt))
2720  ? Builder.CreateAndReduce(Vect)
2721  : Builder.CreateOrReduce(Vect);
2722  if (Arg != Vect)
2723  Res = Builder.CreateCast(ExtOpc, Res, II->getType());
2724  return replaceInstUsesWith(CI, Res);
2725  }
2726  }
2727  }
2728  [[fallthrough]];
2729  }
2730  case Intrinsic::vector_reduce_fmax:
2731  case Intrinsic::vector_reduce_fmin:
2732  case Intrinsic::vector_reduce_fadd:
2733  case Intrinsic::vector_reduce_fmul: {
2734  bool CanBeReassociated = (IID != Intrinsic::vector_reduce_fadd &&
2735  IID != Intrinsic::vector_reduce_fmul) ||
2736  II->hasAllowReassoc();
2737  const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd ||
2738  IID == Intrinsic::vector_reduce_fmul)
2739  ? 1
2740  : 0;
2741  Value *Arg = II->getArgOperand(ArgIdx);
2742  Value *V;
2744  if (!isa<FixedVectorType>(Arg->getType()) || !CanBeReassociated ||
2745  !match(Arg, m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))) ||
2746  !cast<ShuffleVectorInst>(Arg)->isSingleSource())
2747  break;
2748  int Sz = Mask.size();
2749  SmallBitVector UsedIndices(Sz);
2750  for (int Idx : Mask) {
2751  if (Idx == UndefMaskElem || UsedIndices.test(Idx))
2752  break;
2753  UsedIndices.set(Idx);
2754  }
2755  // Can remove shuffle iff just shuffled elements, no repeats, undefs, or
2756  // other changes.
2757  if (UsedIndices.all()) {
2758  replaceUse(II->getOperandUse(ArgIdx), V);
2759  return nullptr;
2760  }
2761  break;
2762  }
2763  default: {
2764  // Handle target specific intrinsics
2765  Optional<Instruction *> V = targetInstCombineIntrinsic(*II);
2766  if (V)
2767  return V.value();
2768  break;
2769  }
2770  }
2771 
2773  return Shuf;
2774 
2775  // Some intrinsics (like experimental_gc_statepoint) can be used in invoke
2776  // context, so it is handled in visitCallBase and we should trigger it.
2777  return visitCallBase(*II);
2778 }
2779 
2780 // Fence instruction simplification
2782  auto *NFI = dyn_cast<FenceInst>(FI.getNextNonDebugInstruction());
2783  // This check is solely here to handle arbitrary target-dependent syncscopes.
2784  // TODO: Can remove if does not matter in practice.
2785  if (NFI && FI.isIdenticalTo(NFI))
2786  return eraseInstFromFunction(FI);
2787 
2788  // Returns true if FI1 is identical or stronger fence than FI2.
2789  auto isIdenticalOrStrongerFence = [](FenceInst *FI1, FenceInst *FI2) {
2790  auto FI1SyncScope = FI1->getSyncScopeID();
2791  // Consider same scope, where scope is global or single-thread.
2792  if (FI1SyncScope != FI2->getSyncScopeID() ||
2793  (FI1SyncScope != SyncScope::System &&
2794  FI1SyncScope != SyncScope::SingleThread))
2795  return false;
2796 
2797  return isAtLeastOrStrongerThan(FI1->getOrdering(), FI2->getOrdering());
2798  };
2799  if (NFI && isIdenticalOrStrongerFence(NFI, &FI))
2800  return eraseInstFromFunction(FI);
2801 
2802  if (auto *PFI = dyn_cast_or_null<FenceInst>(FI.getPrevNonDebugInstruction()))
2803  if (isIdenticalOrStrongerFence(PFI, &FI))
2804  return eraseInstFromFunction(FI);
2805  return nullptr;
2806 }
2807 
2808 // InvokeInst simplification
2810  return visitCallBase(II);
2811 }
2812 
2813 // CallBrInst simplification
2815  return visitCallBase(CBI);
2816 }
2817 
2818 /// If this cast does not affect the value passed through the varargs area, we
2819 /// can eliminate the use of the cast.
2820 static bool isSafeToEliminateVarargsCast(const CallBase &Call,
2821  const DataLayout &DL,
2822  const CastInst *const CI,
2823  const int ix) {
2824  if (!CI->isLosslessCast())
2825  return false;
2826 
2827  // If this is a GC intrinsic, avoid munging types. We need types for
2828  // statepoint reconstruction in SelectionDAG.
2829  // TODO: This is probably something which should be expanded to all
2830  // intrinsics since the entire point of intrinsics is that
2831  // they are understandable by the optimizer.
2832  if (isa<GCStatepointInst>(Call) || isa<GCRelocateInst>(Call) ||
2833  isa<GCResultInst>(Call))
2834  return false;
2835 
2836  // Opaque pointers are compatible with any byval types.
2837  PointerType *SrcTy = cast<PointerType>(CI->getOperand(0)->getType());
2838  if (SrcTy->isOpaque())
2839  return true;
2840 
2841  // The size of ByVal or InAlloca arguments is derived from the type, so we
2842  // can't change to a type with a different size. If the size were
2843  // passed explicitly we could avoid this check.
2844  if (!Call.isPassPointeeByValueArgument(ix))
2845  return true;
2846 
2847  // The transform currently only handles type replacement for byval, not other
2848  // type-carrying attributes.
2849  if (!Call.isByValArgument(ix))
2850  return false;
2851 
2852  Type *SrcElemTy = SrcTy->getNonOpaquePointerElementType();
2853  Type *DstElemTy = Call.getParamByValType(ix);
2854  if (!SrcElemTy->isSized() || !DstElemTy->isSized())
2855  return false;
2856  if (DL.getTypeAllocSize(SrcElemTy) != DL.getTypeAllocSize(DstElemTy))
2857  return false;
2858  return true;
2859 }
2860 
2861 Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
2862  if (!CI->getCalledFunction()) return nullptr;
2863 
2864  // Skip optimizing notail and musttail calls so
2865  // LibCallSimplifier::optimizeCall doesn't have to preserve those invariants.
2866  // LibCallSimplifier::optimizeCall should try to preseve tail calls though.
2867  if (CI->isMustTailCall() || CI->isNoTailCall())
2868  return nullptr;
2869 
2870  auto InstCombineRAUW = [this](Instruction *From, Value *With) {
2871  replaceInstUsesWith(*From, With);
2872  };
2873  auto InstCombineErase = [this](Instruction *I) {
2874  eraseInstFromFunction(*I);
2875  };
2876  LibCallSimplifier Simplifier(DL, &TLI, ORE, BFI, PSI, InstCombineRAUW,
2877  InstCombineErase);
2878  if (Value *With = Simplifier.optimizeCall(CI, Builder)) {
2879  ++NumSimplified;
2880  return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
2881  }
2882 
2883  return nullptr;
2884 }
2885 
2887  // Strip off at most one level of pointer casts, looking for an alloca. This
2888  // is good enough in practice and simpler than handling any number of casts.
2889  Value *Underlying = TrampMem->stripPointerCasts();
2890  if (Underlying != TrampMem &&
2891  (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
2892  return nullptr;
2893  if (!isa<AllocaInst>(Underlying))
2894  return nullptr;
2895 
2896  IntrinsicInst *InitTrampoline = nullptr;
2897  for (User *U : TrampMem->users()) {
2898  IntrinsicInst *II = dyn_cast<IntrinsicInst>(U);
2899  if (!II)
2900  return nullptr;
2901  if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
2902  if (InitTrampoline)
2903  // More than one init_trampoline writes to this value. Give up.
2904  return nullptr;
2905  InitTrampoline = II;
2906  continue;
2907  }
2908  if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
2909  // Allow any number of calls to adjust.trampoline.
2910  continue;
2911  return nullptr;
2912  }
2913 
2914  // No call to init.trampoline found.
2915  if (!InitTrampoline)
2916  return nullptr;
2917 
2918  // Check that the alloca is being used in the expected way.
2919  if (InitTrampoline->getOperand(0) != TrampMem)
2920  return nullptr;
2921 
2922  return InitTrampoline;
2923 }
2924 
2926  Value *TrampMem) {
2927  // Visit all the previous instructions in the basic block, and try to find a
2928  // init.trampoline which has a direct path to the adjust.trampoline.
2929  for (BasicBlock::iterator I = AdjustTramp->getIterator(),
2930  E = AdjustTramp->getParent()->begin();
2931  I != E;) {
2932  Instruction *Inst = &*--I;
2933  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
2934  if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
2935  II->getOperand(0) == TrampMem)
2936  return II;
2937  if (Inst->mayWriteToMemory())
2938  return nullptr;
2939  }
2940  return nullptr;
2941 }
2942 
2943 // Given a call to llvm.adjust.trampoline, find and return the corresponding
2944 // call to llvm.init.trampoline if the call to the trampoline can be optimized
2945 // to a direct call to a function. Otherwise return NULL.
2947  Callee = Callee->stripPointerCasts();
2948  IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
2949  if (!AdjustTramp ||
2950  AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
2951  return nullptr;
2952 
2953  Value *TrampMem = AdjustTramp->getOperand(0);
2954 
2956  return IT;
2957  if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
2958  return IT;
2959  return nullptr;
2960 }
2961 
2962 bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call,
2963  const TargetLibraryInfo *TLI) {
2964  // Note: We only handle cases which can't be driven from generic attributes
2965  // here. So, for example, nonnull and noalias (which are common properties
2966  // of some allocation functions) are expected to be handled via annotation
2967  // of the respective allocator declaration with generic attributes.
2968  bool Changed = false;
2969 
2970  if (!Call.getType()->isPointerTy())
2971  return Changed;
2972 
2973  Optional<APInt> Size = getAllocSize(&Call, TLI);
2974  if (Size && *Size != 0) {
2975  // TODO: We really should just emit deref_or_null here and then
2976  // let the generic inference code combine that with nonnull.
2977  if (Call.hasRetAttr(Attribute::NonNull)) {
2978  Changed = !Call.hasRetAttr(Attribute::Dereferenceable);
2980  Call.getContext(), Size->getLimitedValue()));
2981  } else {
2982  Changed = !Call.hasRetAttr(Attribute::DereferenceableOrNull);
2984  Call.getContext(), Size->getLimitedValue()));
2985  }
2986  }
2987 
2988  // Add alignment attribute if alignment is a power of two constant.
2989  Value *Alignment = getAllocAlignment(&Call, TLI);
2990  if (!Alignment)
2991  return Changed;
2992 
2993  ConstantInt *AlignOpC = dyn_cast<ConstantInt>(Alignment);
2994  if (AlignOpC && AlignOpC->getValue().ult(llvm::Value::MaximumAlignment)) {
2995  uint64_t AlignmentVal = AlignOpC->getZExtValue();
2996  if (llvm::isPowerOf2_64(AlignmentVal)) {
2997  Align ExistingAlign = Call.getRetAlign().valueOrOne();
2998  Align NewAlign = Align(AlignmentVal);
2999  if (NewAlign > ExistingAlign) {
3000  Call.addRetAttr(
3001  Attribute::getWithAlignment(Call.getContext(), NewAlign));
3002  Changed = true;
3003  }
3004  }
3005  }
3006  return Changed;
3007 }
3008 
3009 /// Improvements for call, callbr and invoke instructions.
3010 Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
3011  bool Changed = annotateAnyAllocSite(Call, &TLI);
3012 
3013  // Mark any parameters that are known to be non-null with the nonnull
3014  // attribute. This is helpful for inlining calls to functions with null
3015  // checks on their arguments.
3016  SmallVector<unsigned, 4> ArgNos;
3017  unsigned ArgNo = 0;
3018 
3019  for (Value *V : Call.args()) {
3020  if (V->getType()->isPointerTy() &&
3021  !Call.paramHasAttr(ArgNo, Attribute::NonNull) &&
3022  isKnownNonZero(V, DL, 0, &AC, &Call, &DT))
3023  ArgNos.push_back(ArgNo);
3024  ArgNo++;
3025  }
3026 
3027  assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly.");
3028 
3029  if (!ArgNos.empty()) {
3030  AttributeList AS = Call.getAttributes();
3031  LLVMContext &Ctx = Call.getContext();
3032  AS = AS.addParamAttribute(Ctx, ArgNos,
3033  Attribute::get(Ctx, Attribute::NonNull));
3034  Call.setAttributes(AS);
3035  Changed = true;
3036  }
3037 
3038  // If the callee is a pointer to a function, attempt to move any casts to the
3039  // arguments of the call/callbr/invoke.
3040  Value *Callee = Call.getCalledOperand();
3041  Function *CalleeF = dyn_cast<Function>(Callee);
3042  if ((!CalleeF || CalleeF->getFunctionType() != Call.getFunctionType()) &&
3043  transformConstExprCastCall(Call))
3044  return nullptr;
3045 
3046  if (CalleeF) {
3047  // Remove the convergent attr on calls when the callee is not convergent.
3048  if (Call.isConvergent() && !CalleeF->isConvergent() &&
3049  !CalleeF->isIntrinsic()) {
3050  LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call
3051  << "\n");
3052  Call.setNotConvergent();
3053  return &Call;
3054  }
3055 
3056  // If the call and callee calling conventions don't match, and neither one
3057  // of the calling conventions is compatible with C calling convention
3058  // this call must be unreachable, as the call is undefined.
3059  if ((CalleeF->getCallingConv() != Call.getCallingConv() &&
3060  !(CalleeF->getCallingConv() == llvm::CallingConv::C &&
3062  !(Call.getCallingConv() == llvm::CallingConv::C &&
3064  // Only do this for calls to a function with a body. A prototype may
3065  // not actually end up matching the implementation's calling conv for a
3066  // variety of reasons (e.g. it may be written in assembly).
3067  !CalleeF->isDeclaration()) {
3068  Instruction *OldCall = &Call;
3069  CreateNonTerminatorUnreachable(OldCall);
3070  // If OldCall does not return void then replaceInstUsesWith poison.
3071  // This allows ValueHandlers and custom metadata to adjust itself.
3072  if (!OldCall->getType()->isVoidTy())
3073  replaceInstUsesWith(*OldCall, PoisonValue::get(OldCall->getType()));
3074  if (isa<CallInst>(OldCall))
3075  return eraseInstFromFunction(*OldCall);
3076 
3077  // We cannot remove an invoke or a callbr, because it would change thexi
3078  // CFG, just change the callee to a null pointer.
3079  cast<CallBase>(OldCall)->setCalledFunction(
3080  CalleeF->getFunctionType(),
3081  Constant::getNullValue(CalleeF->getType()));
3082  return nullptr;
3083  }
3084  }
3085 
3086  // Calling a null function pointer is undefined if a null address isn't
3087  // dereferenceable.
3088  if ((isa<ConstantPointerNull>(Callee) &&
3089  !NullPointerIsDefined(Call.getFunction())) ||
3090  isa<UndefValue>(Callee)) {
3091  // If Call does not return void then replaceInstUsesWith poison.
3092  // This allows ValueHandlers and custom metadata to adjust itself.
3093  if (!Call.getType()->isVoidTy())
3094  replaceInstUsesWith(Call, PoisonValue::get(Call.getType()));
3095 
3096  if (Call.isTerminator()) {
3097  // Can't remove an invoke or callbr because we cannot change the CFG.
3098  return nullptr;
3099  }
3100 
3101  // This instruction is not reachable, just remove it.
3102  CreateNonTerminatorUnreachable(&Call);
3103  return eraseInstFromFunction(Call);
3104  }
3105 
3106  if (IntrinsicInst *II = findInitTrampoline(Callee))
3107  return transformCallThroughTrampoline(Call, *II);
3108 
3109  // TODO: Drop this transform once opaque pointer transition is done.
3110  FunctionType *FTy = Call.getFunctionType();
3111  if (FTy->isVarArg()) {
3112  int ix = FTy->getNumParams();
3113  // See if we can optimize any arguments passed through the varargs area of
3114  // the call.
3115  for (auto I = Call.arg_begin() + FTy->getNumParams(), E = Call.arg_end();
3116  I != E; ++I, ++ix) {
3117  CastInst *CI = dyn_cast<CastInst>(*I);
3118  if (CI && isSafeToEliminateVarargsCast(Call, DL, CI, ix)) {
3119  replaceUse(*I, CI->getOperand(0));
3120 
3121  // Update the byval type to match the pointer type.
3122  // Not necessary for opaque pointers.
3123  PointerType *NewTy = cast<PointerType>(CI->getOperand(0)->getType());
3124  if (!NewTy->isOpaque() && Call.isByValArgument(ix)) {
3125  Call.removeParamAttr(ix, Attribute::ByVal);
3126  Call.addParamAttr(ix, Attribute::getWithByValType(
3127  Call.getContext(),
3128  NewTy->getNonOpaquePointerElementType()));
3129  }
3130  Changed = true;
3131  }
3132  }
3133  }
3134 
3135  if (isa<InlineAsm>(Callee) && !Call.doesNotThrow()) {
3136  InlineAsm *IA = cast<InlineAsm>(Callee);
3137  if (!IA->canThrow()) {
3138  // Normal inline asm calls cannot throw - mark them
3139  // 'nounwind'.
3140  Call.setDoesNotThrow();
3141  Changed = true;
3142  }
3143  }
3144 
3145  // Try to optimize the call if possible, we require DataLayout for most of
3146  // this. None of these calls are seen as possibly dead so go ahead and
3147  // delete the instruction now.
3148  if (CallInst *CI = dyn_cast<CallInst>(&Call)) {
3149  Instruction *I = tryOptimizeCall(CI);
3150  // If we changed something return the result, etc. Otherwise let
3151  // the fallthrough check.
3152  if (I) return eraseInstFromFunction(*I);
3153  }
3154 
3155  if (!Call.use_empty() && !Call.isMustTailCall())
3156  if (Value *ReturnedArg = Call.getReturnedArgOperand()) {
3157  Type *CallTy = Call.getType();
3158  Type *RetArgTy = ReturnedArg->getType();
3159  if (RetArgTy->canLosslesslyBitCastTo(CallTy))
3160  return replaceInstUsesWith(
3161  Call, Builder.CreateBitOrPointerCast(ReturnedArg, CallTy));
3162  }
3163 
3164  // Drop unnecessary kcfi operand bundles from calls that were converted
3165  // into direct calls.
3166  auto Bundle = Call.getOperandBundle(LLVMContext::OB_kcfi);
3167  if (Bundle && !Call.isIndirectCall()) {
3168  DEBUG_WITH_TYPE(DEBUG_TYPE "-kcfi", {
3169  if (CalleeF) {
3170  ConstantInt *FunctionType = nullptr;
3171  ConstantInt *ExpectedType = cast<ConstantInt>(Bundle->Inputs[0]);
3172 
3173  if (MDNode *MD = CalleeF->getMetadata(LLVMContext::MD_kcfi_type))
3174  FunctionType = mdconst::extract<ConstantInt>(MD->getOperand(0));
3175 
3176  if (FunctionType &&
3177  FunctionType->getZExtValue() != ExpectedType->getZExtValue())
3178  dbgs() << Call.getModule()->getName()
3179  << ": warning: kcfi: " << Call.getCaller()->getName()
3180  << ": call to " << CalleeF->getName()
3181  << " using a mismatching function pointer type\n";
3182  }
3183  });
3184 
3186  }
3187 
3188  if (isRemovableAlloc(&Call, &TLI))
3189  return visitAllocSite(Call);
3190 
3191  // Handle intrinsics which can be used in both call and invoke context.
3192  switch (Call.getIntrinsicID()) {
3193  case Intrinsic::experimental_gc_statepoint: {
3194  GCStatepointInst &GCSP = *cast<GCStatepointInst>(&Call);
3195  SmallPtrSet<Value *, 32> LiveGcValues;
3196  for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
3197  GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
3198 
3199  // Remove the relocation if unused.
3200  if (GCR.use_empty()) {
3201  eraseInstFromFunction(GCR);
3202  continue;
3203  }
3204 
3205  Value *DerivedPtr = GCR.getDerivedPtr();
3206  Value *BasePtr = GCR.getBasePtr();
3207 
3208  // Undef is undef, even after relocation.
3209  if (isa<UndefValue>(DerivedPtr) || isa<UndefValue>(BasePtr)) {
3210  replaceInstUsesWith(GCR, UndefValue::get(GCR.getType()));
3211  eraseInstFromFunction(GCR);
3212  continue;
3213  }
3214 
3215  if (auto *PT = dyn_cast<PointerType>(GCR.getType())) {
3216  // The relocation of null will be null for most any collector.
3217  // TODO: provide a hook for this in GCStrategy. There might be some
3218  // weird collector this property does not hold for.
3219  if (isa<ConstantPointerNull>(DerivedPtr)) {
3220  // Use null-pointer of gc_relocate's type to replace it.
3221  replaceInstUsesWith(GCR, ConstantPointerNull::get(PT));
3222  eraseInstFromFunction(GCR);
3223  continue;
3224  }
3225 
3226  // isKnownNonNull -> nonnull attribute
3227  if (!GCR.hasRetAttr(Attribute::NonNull) &&
3228  isKnownNonZero(DerivedPtr, DL, 0, &AC, &Call, &DT)) {
3229  GCR.addRetAttr(Attribute::NonNull);
3230  // We discovered new fact, re-check users.
3231  Worklist.pushUsersToWorkList(GCR);
3232  }
3233  }
3234 
3235  // If we have two copies of the same pointer in the statepoint argument
3236  // list, canonicalize to one. This may let us common gc.relocates.
3237  if (GCR.getBasePtr() == GCR.getDerivedPtr() &&
3238  GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) {
3239  auto *OpIntTy = GCR.getOperand(2)->getType();
3240  GCR.setOperand(2, ConstantInt::get(OpIntTy, GCR.getBasePtrIndex()));
3241  }
3242 
3243  // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
3244  // Canonicalize on the type from the uses to the defs
3245 
3246  // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
3247  LiveGcValues.insert(BasePtr);
3248  LiveGcValues.insert(DerivedPtr);
3249  }
3252  unsigned NumOfGCLives = LiveGcValues.size();
3253  if (!Bundle || NumOfGCLives == Bundle->Inputs.size())
3254  break;
3255  // We can reduce the size of gc live bundle.
3257  std::vector<Value *> NewLiveGc;
3258  for (Value *V : Bundle->Inputs) {
3259  if (Val2Idx.count(V))
3260  continue;
3261  if (LiveGcValues.count(V)) {
3262  Val2Idx[V] = NewLiveGc.size();
3263  NewLiveGc.push_back(V);
3264  } else
3265  Val2Idx[V] = NumOfGCLives;
3266  }
3267  // Update all gc.relocates
3268  for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
3269  GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
3270  Value *BasePtr = GCR.getBasePtr();
3271  assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives &&
3272  "Missed live gc for base pointer");
3273  auto *OpIntTy1 = GCR.getOperand(1)->getType();
3274  GCR.setOperand(1, ConstantInt::get(OpIntTy1, Val2Idx[BasePtr]));
3275  Value *DerivedPtr = GCR.getDerivedPtr();
3276  assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives &&
3277  "Missed live gc for derived pointer");
3278  auto *OpIntTy2 = GCR.getOperand(2)->getType();
3279  GCR.setOperand(2, ConstantInt::get(OpIntTy2, Val2Idx[DerivedPtr]));
3280  }
3281  // Create new statepoint instruction.
3282  OperandBundleDef NewBundle("gc-live", NewLiveGc);
3283  return CallBase::Create(&Call, NewBundle);
3284  }
3285  default: { break; }
3286  }
3287 
3288  return Changed ? &Call : nullptr;
3289 }
3290 
3291 /// If the callee is a constexpr cast of a function, attempt to move the cast to
3292 /// the arguments of the call/callbr/invoke.
3293 bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
3294  auto *Callee =
3295  dyn_cast<Function>(Call.getCalledOperand()->stripPointerCasts());
3296  if (!Callee)
3297  return false;
3298 
3299  // If this is a call to a thunk function, don't remove the cast. Thunks are
3300  // used to transparently forward all incoming parameters and outgoing return
3301  // values, so it's important to leave the cast in place.
3302  if (Callee->hasFnAttribute("thunk"))
3303  return false;
3304 
3305  // If this is a musttail call, the callee's prototype must match the caller's
3306  // prototype with the exception of pointee types. The code below doesn't
3307  // implement that, so we can't do this transform.
3308  // TODO: Do the transform if it only requires adding pointer casts.
3309  if (Call.isMustTailCall())
3310  return false;
3311 
3312  Instruction *Caller = &Call;
3313  const AttributeList &CallerPAL = Call.getAttributes();
3314 
3315  // Okay, this is a cast from a function to a different type. Unless doing so
3316  // would cause a type conversion of one of our arguments, change this call to
3317  // be a direct call with arguments casted to the appropriate types.
3319  Type *OldRetTy = Caller->getType();
3320  Type *NewRetTy = FT->getReturnType();
3321 
3322  // Check to see if we are changing the return type...
3323  if (OldRetTy != NewRetTy) {
3324 
3325  if (NewRetTy->isStructTy())
3326  return false; // TODO: Handle multiple return values.
3327 
3328  if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
3329  if (Callee->isDeclaration())
3330  return false; // Cannot transform this return value.
3331 
3332  if (!Caller->use_empty() &&
3333  // void -> non-void is handled specially
3334  !NewRetTy->isVoidTy())
3335  return false; // Cannot transform this return value.
3336  }
3337 
3338  if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
3339  AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
3340  if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(NewRetTy)))
3341  return false; // Attribute not compatible with transformed value.
3342  }
3343 
3344  // If the callbase is an invoke/callbr instruction, and the return value is
3345  // used by a PHI node in a successor, we cannot change the return type of
3346  // the call because there is no place to put the cast instruction (without
3347  // breaking the critical edge). Bail out in this case.
3348  if (!Caller->use_empty()) {
3349  BasicBlock *PhisNotSupportedBlock = nullptr;
3350  if (auto *II = dyn_cast<InvokeInst>(Caller))
3351  PhisNotSupportedBlock = II->getNormalDest();
3352  if (auto *CB = dyn_cast<CallBrInst>(Caller))
3353  PhisNotSupportedBlock = CB->getDefaultDest();
3354  if (PhisNotSupportedBlock)
3355  for (User *U : Caller->users())
3356  if (PHINode *PN = dyn_cast<PHINode>(U))
3357  if (PN->getParent() == PhisNotSupportedBlock)
3358  return false;
3359  }
3360  }
3361 
3362  unsigned NumActualArgs = Call.arg_size();
3363  unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
3364 
3365  // Prevent us turning:
3366  // declare void @takes_i32_inalloca(i32* inalloca)
3367  // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
3368  //
3369  // into:
3370  // call void @takes_i32_inalloca(i32* null)
3371  //
3372  // Similarly, avoid folding away bitcasts of byval calls.
3373  if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
3374  Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated))
3375  return false;
3376 
3377  auto AI = Call.arg_begin();
3378  for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
3379  Type *ParamTy = FT->getParamType(i);
3380  Type *ActTy = (*AI)->getType();
3381 
3382  if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
3383  return false; // Cannot transform this parameter value.
3384 
3385  // Check if there are any incompatible attributes we cannot drop safely.
3386  if (AttrBuilder(FT->getContext(), CallerPAL.getParamAttrs(i))
3389  return false; // Attribute not compatible with transformed value.
3390 
3391  if (Call.isInAllocaArgument(i) ||
3392  CallerPAL.hasParamAttr(i, Attribute::Preallocated))
3393  return false; // Cannot transform to and from inalloca/preallocated.
3394 
3395  if (CallerPAL.hasParamAttr(i, Attribute::SwiftError))
3396  return false;
3397 
3398  // If the parameter is passed as a byval argument, then we have to have a
3399  // sized type and the sized type has to have the same size as the old type.
3400  if (ParamTy != ActTy && CallerPAL.hasParamAttr(i, Attribute::ByVal)) {
3401  PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
3402  if (!ParamPTy)
3403  return false;
3404 
3405  if (!ParamPTy->isOpaque()) {
3406  Type *ParamElTy = ParamPTy->getNonOpaquePointerElementType();
3407  if (!ParamElTy->isSized())
3408  return false;
3409 
3410  Type *CurElTy = Call.getParamByValType(i);
3411  if (DL.getTypeAllocSize(CurElTy) != DL.getTypeAllocSize(ParamElTy))
3412  return false;
3413  }
3414  }
3415  }
3416 
3417  if (Callee->isDeclaration()) {
3418  // Do not delete arguments unless we have a function body.
3419  if (FT->getNumParams() < NumActualArgs && !FT->isVarArg())
3420  return false;
3421 
3422  // If the callee is just a declaration, don't change the varargsness of the
3423  // call. We don't want to introduce a varargs call where one doesn't
3424  // already exist.
3425  if (FT->isVarArg() != Call.getFunctionType()->isVarArg())
3426  return false;
3427 
3428  // If both the callee and the cast type are varargs, we still have to make
3429  // sure the number of fixed parameters are the same or we have the same
3430  // ABI issues as if we introduce a varargs call.
3431  if (FT->isVarArg() && Call.getFunctionType()->isVarArg() &&
3432  FT->getNumParams() != Call.getFunctionType()->getNumParams())
3433  return false;
3434  }
3435 
3436  if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
3437  !CallerPAL.isEmpty()) {
3438  // In this case we have more arguments than the new function type, but we
3439  // won't be dropping them. Check that these extra arguments have attributes
3440  // that are compatible with being a vararg call argument.
3441  unsigned SRetIdx;
3442  if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) &&
3443  SRetIdx - AttributeList::FirstArgIndex >= FT->getNumParams())
3444  return false;
3445  }
3446 
3447  // Okay, we decided that this is a safe thing to do: go ahead and start
3448  // inserting cast instructions as necessary.
3451  Args.reserve(NumActualArgs);
3452  ArgAttrs.reserve(NumActualArgs);
3453 
3454  // Get any return attributes.
3455  AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
3456 
3457  // If the return value is not being used, the type may not be compatible
3458  // with the existing attributes. Wipe out any problematic attributes.
3459  RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy));
3460 
3461  LLVMContext &Ctx = Call.getContext();
3462  AI = Call.arg_begin();
3463  for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
3464  Type *ParamTy = FT->getParamType(i);
3465 
3466  Value *NewArg = *AI;
3467  if ((*AI)->getType() != ParamTy)
3468  NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy);
3469  Args.push_back(NewArg);
3470 
3471  // Add any parameter attributes except the ones incompatible with the new
3472  // type. Note that we made sure all incompatible ones are safe to drop.
3473  AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(
3475  if (CallerPAL.hasParamAttr(i, Attribute::ByVal) &&
3476  !ParamTy->isOpaquePointerTy()) {
3477  AttrBuilder AB(Ctx, CallerPAL.getParamAttrs(i).removeAttributes(
3478  Ctx, IncompatibleAttrs));
3479  AB.addByValAttr(ParamTy->getNonOpaquePointerElementType());
3480  ArgAttrs.push_back(AttributeSet::get(Ctx, AB));
3481  } else {
3482  ArgAttrs.push_back(
3483  CallerPAL.getParamAttrs(i).removeAttributes(Ctx, IncompatibleAttrs));
3484  }
3485  }
3486 
3487  // If the function takes more arguments than the call was taking, add them
3488  // now.
3489  for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
3490  Args.push_back(Constant::getNullValue(FT->getParamType(i)));
3491  ArgAttrs.push_back(AttributeSet());
3492  }
3493 
3494  // If we are removing arguments to the function, emit an obnoxious warning.
3495  if (FT->getNumParams() < NumActualArgs) {
3496  // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
3497  if (FT->isVarArg()) {
3498  // Add all of the arguments in their promoted form to the arg list.
3499  for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
3500  Type *PTy = getPromotedType((*AI)->getType());
3501  Value *NewArg = *AI;
3502  if (PTy != (*AI)->getType()) {
3503  // Must promote to pass through va_arg area!
3504  Instruction::CastOps opcode =
3505  CastInst::getCastOpcode(*AI, false, PTy, false);
3506  NewArg = Builder.CreateCast(opcode, *AI, PTy);
3507  }
3508  Args.push_back(NewArg);
3509 
3510  // Add any parameter attributes.
3511  ArgAttrs.push_back(CallerPAL.getParamAttrs(i));
3512  }
3513  }
3514  }
3515 
3516  AttributeSet FnAttrs = CallerPAL.getFnAttrs();
3517 
3518  if (NewRetTy->isVoidTy())
3519  Caller->setName(""); // Void type should not have a name.
3520 
3521  assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
3522  "missing argument attributes");
3523  AttributeList NewCallerPAL = AttributeList::get(
3524  Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs);
3525 
3527  Call.getOperandBundlesAsDefs(OpBundles);
3528 
3529  CallBase *NewCall;
3530  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
3531  NewCall = Builder.CreateInvoke(Callee, II->getNormalDest(),
3532  II->getUnwindDest(), Args, OpBundles);
3533  } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(Caller)) {
3534  NewCall = Builder.CreateCallBr(Callee, CBI->getDefaultDest(),
3535  CBI->getIndirectDests(), Args, OpBundles);
3536  } else {
3537  NewCall = Builder.CreateCall(Callee, Args, OpBundles);
3538  cast<CallInst>(NewCall)->setTailCallKind(
3539  cast<CallInst>(Caller)->getTailCallKind());
3540  }
3541  NewCall->takeName(Caller);
3542  NewCall->setCallingConv(Call.getCallingConv());
3543  NewCall->setAttributes(NewCallerPAL);
3544 
3545  // Preserve prof metadata if any.
3546  NewCall->copyMetadata(*Caller, {LLVMContext::MD_prof});
3547 
3548  // Insert a cast of the return type as necessary.
3549  Instruction *NC = NewCall;
3550  Value *NV = NC;
3551  if (OldRetTy != NV->getType() && !Caller->use_empty()) {
3552  if (!NV->getType()->isVoidTy()) {
3553  NV = NC = CastInst::CreateBitOrPointerCast(NC, OldRetTy);
3554  NC->setDebugLoc(Caller->getDebugLoc());
3555 
3556  Instruction *InsertPt = NewCall->getInsertionPointAfterDef();
3557  assert(InsertPt && "No place to insert cast");
3558  InsertNewInstBefore(NC, *InsertPt);
3559  Worklist.pushUsersToWorkList(*Caller);
3560  } else {
3561  NV = PoisonValue::get(Caller->getType());
3562  }
3563  }
3564 
3565  if (!Caller->use_empty())
3566  replaceInstUsesWith(*Caller, NV);
3567  else if (Caller->hasValueHandle()) {
3568  if (OldRetTy == NV->getType())
3570  else
3571  // We cannot call ValueIsRAUWd with a different type, and the
3572  // actual tracked value will disappear.
3574  }
3575 
3576  eraseInstFromFunction(*Caller);
3577  return true;
3578 }
3579 
3580 /// Turn a call to a function created by init_trampoline / adjust_trampoline
3581 /// intrinsic pair into a direct call to the underlying function.
3582 Instruction *
3583 InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
3584  IntrinsicInst &Tramp) {
3585  Value *Callee = Call.getCalledOperand();
3586  Type *CalleeTy = Callee->getType();
3587  FunctionType *FTy = Call.getFunctionType();
3588  AttributeList Attrs = Call.getAttributes();
3589 
3590  // If the call already has the 'nest' attribute somewhere then give up -
3591  // otherwise 'nest' would occur twice after splicing in the chain.
3592  if (Attrs.hasAttrSomewhere(Attribute::Nest))
3593  return nullptr;
3594 
3595  Function *NestF = cast<Function>(Tramp.getArgOperand(1)->stripPointerCasts());
3596  FunctionType *NestFTy = NestF->getFunctionType();
3597 
3598  AttributeList NestAttrs = NestF->getAttributes();
3599  if (!NestAttrs.isEmpty()) {
3600  unsigned NestArgNo = 0;
3601  Type *NestTy = nullptr;
3602  AttributeSet NestAttr;
3603 
3604  // Look for a parameter marked with the 'nest' attribute.
3605  for (FunctionType::param_iterator I = NestFTy->param_begin(),
3606  E = NestFTy->param_end();
3607  I != E; ++NestArgNo, ++I) {
3608  AttributeSet AS = NestAttrs.getParamAttrs(NestArgNo);
3609  if (AS.hasAttribute(Attribute::Nest)) {
3610  // Record the parameter type and any other attributes.
3611  NestTy = *I;
3612  NestAttr = AS;
3613  break;
3614  }
3615  }
3616 
3617  if (NestTy) {
3618  std::vector<Value*> NewArgs;
3619  std::vector<AttributeSet> NewArgAttrs;
3620  NewArgs.reserve(Call.arg_size() + 1);
3621  NewArgAttrs.reserve(Call.arg_size());
3622 
3623  // Insert the nest argument into the call argument list, which may
3624  // mean appending it. Likewise for attributes.
3625 
3626  {
3627  unsigned ArgNo = 0;
3628  auto I = Call.arg_begin(), E = Call.arg_end();
3629  do {
3630  if (ArgNo == NestArgNo) {
3631  // Add the chain argument and attributes.
3632  Value *NestVal = Tramp.getArgOperand(2);
3633  if (NestVal->getType() != NestTy)
3634  NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest");
3635  NewArgs.push_back(NestVal);
3636  NewArgAttrs.push_back(NestAttr);
3637  }
3638 
3639  if (I == E)
3640  break;
3641 
3642  // Add the original argument and attributes.
3643  NewArgs.push_back(*I);
3644  NewArgAttrs.push_back(Attrs.getParamAttrs(ArgNo));
3645 
3646  ++ArgNo;
3647  ++I;
3648  } while (true);
3649  }
3650 
3651  // The trampoline may have been bitcast to a bogus type (FTy).
3652  // Handle this by synthesizing a new function type, equal to FTy
3653  // with the chain parameter inserted.
3654 
3655  std::vector<Type*> NewTypes;
3656  NewTypes.reserve(FTy->getNumParams()+1);
3657 
3658  // Insert the chain's type into the list of parameter types, which may
3659  // mean appending it.
3660  {
3661  unsigned ArgNo = 0;
3663  E = FTy->param_end();
3664 
3665  do {
3666  if (ArgNo == NestArgNo)
3667  // Add the chain's type.
3668  NewTypes.push_back(NestTy);
3669 
3670  if (I == E)
3671  break;
3672 
3673  // Add the original type.
3674  NewTypes.push_back(*I);
3675 
3676  ++ArgNo;
3677  ++I;
3678  } while (true);
3679  }
3680 
3681  // Replace the trampoline call with a direct call. Let the generic
3682  // code sort out any function type mismatches.
3683  FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes,
3684  FTy->isVarArg());
3685  Constant *NewCallee =
3686  NestF->getType() == PointerType::getUnqual(NewFTy) ?
3687  NestF : ConstantExpr::getBitCast(NestF,
3688  PointerType::getUnqual(NewFTy));
3689  AttributeList NewPAL =
3690  AttributeList::get(FTy->getContext(), Attrs.getFnAttrs(),
3691  Attrs.getRetAttrs(), NewArgAttrs);
3692 
3694  Call.getOperandBundlesAsDefs(OpBundles);
3695 
3696  Instruction *NewCaller;
3697  if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
3698  NewCaller = InvokeInst::Create(NewFTy, NewCallee,
3699  II->getNormalDest(), II->getUnwindDest(),
3700  NewArgs, OpBundles);
3701  cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
3702  cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
3703  } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(&Call)) {
3704  NewCaller =
3705  CallBrInst::Create(NewFTy, NewCallee, CBI->getDefaultDest(),
3706  CBI->getIndirectDests(), NewArgs, OpBundles);
3707  cast<CallBrInst>(NewCaller)->setCallingConv(CBI->getCallingConv());
3708  cast<CallBrInst>(NewCaller)->setAttributes(NewPAL);
3709  } else {
3710  NewCaller = CallInst::Create(NewFTy, NewCallee, NewArgs, OpBundles);
3711  cast<CallInst>(NewCaller)->setTailCallKind(
3712  cast<CallInst>(Call).getTailCallKind());
3713  cast<CallInst>(NewCaller)->setCallingConv(
3714  cast<CallInst>(Call).getCallingConv());
3715  cast<CallInst>(NewCaller)->setAttributes(NewPAL);
3716  }
3717  NewCaller->setDebugLoc(Call.getDebugLoc());
3718 
3719  return NewCaller;
3720  }
3721  }
3722 
3723  // Replace the trampoline call with a direct call. Since there is no 'nest'
3724  // parameter, there is no need to adjust the argument list. Let the generic
3725  // code sort out any function type mismatches.
3726  Constant *NewCallee = ConstantExpr::getBitCast(NestF, CalleeTy);
3727  Call.setCalledFunction(FTy, NewCallee);
3728  return &Call;
3729 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
llvm::Instruction::getPrevNonDebugInstruction
const Instruction * getPrevNonDebugInstruction(bool SkipPseudoOp=false) const
Return a pointer to the previous non-debug instruction in the same basic block as 'this',...
Definition: Instruction.cpp:774
llvm::RetainedKnowledge::WasOn
Value * WasOn
Definition: AssumeBundleQueries.h:103
i
i
Definition: README.txt:29
llvm::CallBase::getNumOperandBundles
unsigned getNumOperandBundles() const
Return the number of operand bundles associated with this User.
Definition: InstrTypes.h:1940
llvm::simplifyFMulInst
Value * simplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior=fp::ebIgnore, RoundingMode Rounding=RoundingMode::NearestTiesToEven)
Given operands for an FMul, fold the result or return null.
Definition: InstructionSimplify.cpp:5363
llvm::SmallBitVector::set
SmallBitVector & set()
Definition: SmallBitVector.h:366
AssumptionCache.h
llvm::BinaryOpIntrinsic::getBinaryOp
Instruction::BinaryOps getBinaryOp() const
Returns the binary operation underlying the intrinsic.
Definition: IntrinsicInst.cpp:690
llvm::PatternMatch::m_FShr
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_FShr(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Definition: PatternMatch.h:2200
Attrs
Function Attrs
Definition: README_ALTIVEC.txt:215
llvm::Type::isSized
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:269
llvm::Function::isIntrinsic
bool isIntrinsic() const
isIntrinsic - Returns true if the function's name starts with "llvm.".
Definition: Function.h:210
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:108
MathExtras.h
llvm::APInt::sadd_ov
APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition: APInt.cpp:1926
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::maskIsAllOneOrUndef
bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
Definition: VectorUtils.cpp:1043
llvm::CallBase::getOperandBundlesAsDefs
void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Definition: Instructions.cpp:397
llvm::InvokeInst::Create
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3855
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:65
llvm::Value::dropDroppableUse
static void dropDroppableUse(Use &U)
Remove the droppable use U.
Definition: Value.cpp:216
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:740
llvm::none_of
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1604
llvm::OverflowResult::NeverOverflows
@ NeverOverflows
Never overflows.
llvm::CallBase::getOperandBundle
Optional< OperandBundleUse > getOperandBundle(StringRef Name) const
Return an operand bundle by name, if present.
Definition: InstrTypes.h:2027
Optional.h
llvm::MaskedValueIsZero
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if 'V & Mask' is known to be zero.
Definition: ValueTracking.cpp:391
llvm::PatternMatch::m_Mask
Definition: PatternMatch.h:1514
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:667
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
InstCombiner.h
llvm::LinearPolySize< ElementCount >::isKnownGT
static bool isKnownGT(const LinearPolySize &LHS, const LinearPolySize &RHS)
Definition: TypeSize.h:334
llvm::Instruction::getInsertionPointAfterDef
Instruction * getInsertionPointAfterDef()
Get the first insertion point at which the result of this instruction is defined.
Definition: Instruction.cpp:119
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1421
Metadata.h
llvm::AttributeList::hasAttrSomewhere
bool hasAttrSomewhere(Attribute::AttrKind Kind, unsigned *Index=nullptr) const
Return true if the specified attribute is set for at least one parameter or for the return value.
Definition: Attributes.cpp:1413
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:719
llvm::Value::MaximumAlignment
static constexpr uint64_t MaximumAlignment
Definition: Value.h:793
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::ConstantInt::getType
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:173
llvm::InstCombiner::getDominatorTree
DominatorTree & getDominatorTree() const
Definition: InstCombiner.h:370
IntrinsicInst.h
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:223
ceil
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g ceil
Definition: README-FPStack.txt:54
AtomicOrdering.h
llvm::ElementCount
Definition: TypeSize.h:404
llvm::ConstantExpr::getZExt
static Constant * getZExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2092
llvm::Instruction::getNextNonDebugInstruction
const Instruction * getNextNonDebugInstruction(bool SkipPseudoOp=false) const
Return a pointer to the next non-debug instruction in the same basic block as 'this',...
Definition: Instruction.cpp:766
llvm::CallBase::removeOperandBundle
static CallBase * removeOperandBundle(CallBase *CB, uint32_t ID, Instruction *InsertPt=nullptr)
Create a clone of CB with operand bundle ID removed.
Definition: Instructions.cpp:488
Loads.h
GuardWideningWindow
static cl::opt< unsigned > GuardWideningWindow("instcombine-guard-widening-window", cl::init(3), cl::desc("How wide an instruction window to bypass looking for " "another guard"))
llvm::Function
Definition: Function.h:60
llvm::ConstantStruct::get
static Constant * get(StructType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1306
llvm::AttributeFuncs::ASK_UNSAFE_TO_DROP
@ ASK_UNSAFE_TO_DROP
Definition: Attributes.h:1244
llvm::BinaryOperator::CreateWithCopiedFlags
static BinaryOperator * CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Instruction *CopyO, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:249
llvm::BinaryOperator::CreateNot
static BinaryOperator * CreateNot(Value *Op, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: Instructions.cpp:2902
llvm::IntrinsicInst::getIntrinsicID
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:53
llvm::SelectPatternResult::Flavor
SelectPatternFlavor Flavor
Definition: ValueTracking.h:711
llvm::CallBase::setCalledFunction
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
Definition: InstrTypes.h:1435
llvm::possiblyDemandedEltsInMask
APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
Definition: VectorUtils.cpp:1071
llvm::PointerType::get
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Definition: Type.cpp:727
minimum
Should compile r2 movcc movcs str strb mov lr r1 movcs movcc mov lr r1 str mov mov cmp r1 movlo r2 str bx lr r0 mov mov cmp r0 movhs r2 mov r1 bx lr Some of the NEON intrinsics may be appropriate for more general either as target independent intrinsics or perhaps elsewhere in the ARM backend Some of them may also be lowered to target independent and perhaps some new SDNodes could be added For minimum
Definition: README.txt:489
llvm::KnownBits::Zero
APInt Zero
Definition: KnownBits.h:24
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::Attribute::get
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:90
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:314
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
llvm::BinaryOpIntrinsic::getRHS
Value * getRHS() const
Definition: IntrinsicInst.h:668
llvm::ConstantExpr::getSExt
static Constant * getSExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2078
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1181
llvm::InstCombiner::Builder
BuilderTy & Builder
Definition: InstCombiner.h:58
Statistic.h
llvm::PatternMatch::m_SpecificMask
Definition: PatternMatch.h:1529
InlineAsm.h
llvm::ConstantExpr::getICmp
static Constant * getICmp(unsigned short pred, Constant *LHS, Constant *RHS, bool OnlyIfReduced=false)
get* - Return some common constants without having to specify the full Instruction::OPCODE identifier...
Definition: Constants.cpp:2496
DEBUG_TYPE
#define DEBUG_TYPE
Definition: InstCombineCalls.cpp:77
llvm::BinaryOperator::CreateFDivFMF
static BinaryOperator * CreateFDivFMF(Value *V1, Value *V2, Instruction *FMFSource, const Twine &Name="")
Definition: InstrTypes.h:272
llvm::PatternMatch::m_Add
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
Definition: PatternMatch.h:979
llvm::BinaryOpIntrinsic::getLHS
Value * getLHS() const
Definition: IntrinsicInst.h:667
llvm::getSplatValue
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
Definition: VectorUtils.cpp:371
ErrorHandling.h
llvm::CmpInst::makeCmpResultType
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:1044
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
llvm::IRBuilder< TargetFolder, IRBuilderCallbackInserter >
llvm::InsertValueInst::Create
static InsertValueInst * Create(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:2596
llvm::X86::SecondMacroFusionInstKind::AB
@ AB
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::CastInst::Create
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Definition: Instructions.cpp:3250
llvm::GlobalObject::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the current metadata attachments for the given kind, if any.
Definition: Metadata.cpp:1289
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:741
llvm::SyncScope::System
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
llvm::ConstantExpr::getBitCast
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2202
llvm::FunctionType::get
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:361
llvm::SignBitMustBeZero
bool SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI)
Return true if we can prove that the specified FP value's sign bit is 0.
Definition: ValueTracking.cpp:3755
ValueTracking.h
llvm::isAssumeWithEmptyBundle
bool isAssumeWithEmptyBundle(AssumeInst &Assume)
Return true iff the operand bundles of the provided llvm.assume doesn't contain any valuable informat...
Definition: AssumeBundleQueries.cpp:125
Local.h
llvm::AttributeList::get
static AttributeList get(LLVMContext &C, ArrayRef< std::pair< unsigned, Attribute >> Attrs)
Create an AttributeList with the specified parameters in it.
Definition: Attributes.cpp:1050
llvm::ComputeMaxSignificantBits
unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Get the upper bound on bit size for this Value Op as a signed integer.
Definition: ValueTracking.cpp:423
llvm::ConstantExpr::getSelect
static Constant * getSelect(Constant *C, Constant *V1, Constant *V2, Type *OnlyIfReducedTy=nullptr)
Select constant expr.
Definition: Constants.cpp:2414
llvm::PatternMatch::m_APFloat
apfloat_match m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
Definition: PatternMatch.h:295
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::minimum
LLVM_READONLY APFloat minimum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2018 minimum semantics.
Definition: APFloat.h:1318
llvm::IRBuilderBase::CreateUnaryIntrinsic
CallInst * CreateUnaryIntrinsic(Intrinsic::ID ID, Value *V, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with 1 operand which is mangled on its type.
Definition: IRBuilder.cpp:945
reassociateMinMaxWithConstants
static Instruction * reassociateMinMaxWithConstants(IntrinsicInst *II)
If this min/max has a constant operand and an operand that is a matching min/max with a constant oper...
Definition: InstCombineCalls.cpp:975
IT
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
llvm::PatternMatch::m_BitReverse
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
Definition: PatternMatch.h:2161
llvm::isKnownNonZero
bool isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if the given value is known to be non-zero when defined.
Definition: ValueTracking.cpp:340
llvm::ConstantInt::getLimitedValue
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:244
APInt.h
llvm::IRBuilderBase::CreateStripInvariantGroup
Value * CreateStripInvariantGroup(Value *Ptr)
Create a strip.invariant.group intrinsic call.
Definition: IRBuilder.cpp:1177
llvm::PatternMatch::m_NegZeroFP
cstfp_pred_ty< is_neg_zero_fp > m_NegZeroFP()
Match a floating-point negative zero.
Definition: PatternMatch.h:682
llvm::InstCombinerImpl::visitInvokeInst
Instruction * visitInvokeInst(InvokeInst &II)
Definition: InstCombineCalls.cpp:2809
llvm::CmpInst::ICMP_SGT
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:746
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::OverflowResult::AlwaysOverflowsLow
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1411
llvm::InstCombinerImpl::visitCallInst
Instruction * visitCallInst(CallInst &CI)
CallInst simplification.
Definition: InstCombineCalls.cpp:1136
MemoryBuiltins.h
llvm::AttributeList
Definition: Attributes.h:425
llvm::AttributeMask
Definition: Attributes.h:969
foldShuffledIntrinsicOperands
static Instruction * foldShuffledIntrinsicOperands(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
If all arguments of the intrinsic are unary shuffles with the same mask, try to shuffle after the int...
Definition: InstCombineCalls.cpp:1086
llvm::OperandBundleDefT
A container for an operand bundle being viewed as a set of values rather than a set of uses.
Definition: AutoUpgrade.h:32
llvm::CallBase::getFunctionType
FunctionType * getFunctionType() const
Definition: InstrTypes.h:1254
llvm::simplifyRetainedKnowledge
RetainedKnowledge simplifyRetainedKnowledge(AssumeInst *Assume, RetainedKnowledge RK, AssumptionCache *AC, DominatorTree *DT)
canonicalize the RetainedKnowledge RK.
Definition: AssumeBundleBuilder.cpp:316
llvm::FastMathFlags::noSignedZeros
bool noSignedZeros() const
Definition: FMF.h:69
llvm::AttributeList::getFnAttrs
AttributeSet getFnAttrs() const
The function attributes are returned.
Definition: Attributes.cpp:1388
llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
llvm::GCRelocateInst::getBasePtrIndex
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
Definition: IntrinsicInst.h:1402
llvm::PatternMatch::m_ZExtOrSExtOrSelf
match_combine_or< match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, CastClass_match< OpTy, Instruction::SExt > >, OpTy > m_ZExtOrSExtOrSelf(const OpTy &Op)
Definition: PatternMatch.h:1657
llvm::EnableKnowledgeRetention
cl::opt< bool > EnableKnowledgeRetention
enable preservation of attributes in assume like: call void @llvm.assume(i1 true) [ "nonnull"(i32* PT...
Definition: InstCombineCalls.cpp:94
llvm::InstCombinerImpl::replaceInstUsesWith
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
Definition: InstCombineInternal.h:406
llvm::IRBuilderBase::CreateBinaryIntrinsic
CallInst * CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with 2 operands which is mangled on the first type.
Definition: IRBuilder.cpp:953
llvm::Optional< bool >
foldCttzCtlz
static Instruction * foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC)
Definition: InstCombineCalls.cpp:503
llvm::DenseMapBase::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::CastInst::getCastOpcode
static Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
Definition: Instructions.cpp:3532
llvm::FenceInst
An instruction for ordering other memory operations.
Definition: Instructions.h:433
llvm::PatternMatch::m_BinOp
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:84
llvm::ConstantAsMetadata::get
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:420
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::Instruction::copyMetadata
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Definition: Instruction.cpp:867
llvm::FunctionType::getNumParams
unsigned getNumParams() const
Return the number of fixed parameters this function type requires.
Definition: DerivedTypes.h:139
llvm::CastInst::CreateIntegerCast
static CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
Definition: Instructions.cpp:3416
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::SmallBitVector::test
bool test(unsigned Idx) const
Definition: SmallBitVector.h:472
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:459
llvm::matchSelectPattern
SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
Definition: ValueTracking.cpp:6383
llvm::PatternMatch::m_c_And
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
Definition: PatternMatch.h:2251
llvm::BinaryOperator::CreateNeg
NUW NUW NUW NUW Exact static Exact BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", Instruction *InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
Definition: Instructions.cpp:2862
llvm::CallBase::setDoesNotThrow
void setDoesNotThrow()
Definition: InstrTypes.h:1900
findInitTrampolineFromBB
static IntrinsicInst * findInitTrampolineFromBB(IntrinsicInst *AdjustTramp, Value *TrampMem)
Definition: InstCombineCalls.cpp:2925
llvm::PatternMatch::m_Not
BinaryOp_match< ValTy, cst_pred_ty< is_all_ones >, Instruction::Xor, true > m_Not(const ValTy &V)
Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
Definition: PatternMatch.h:2289
llvm::SelectPatternFlavor
SelectPatternFlavor
Specific patterns of select instructions we can match.
Definition: ValueTracking.h:687
llvm::CallBrInst::Create
static CallBrInst * Create(FunctionType *Ty, Value *Func, BasicBlock *DefaultDest, ArrayRef< BasicBlock * > IndirectDests, ArrayRef< Value * > Args, const Twine &NameStr, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:4060
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::Type::getNonOpaquePointerElementType
Type * getNonOpaquePointerElementType() const
Only use this method in code that is not reachable with opaque pointers, or part of deprecated method...
Definition: Type.h:387
llvm::InstCombinerImpl::SimplifyAnyMemTransfer
Instruction * SimplifyAnyMemTransfer(AnyMemTransferInst *MI)
Definition: InstCombineCalls.cpp:120
llvm::BasicBlock::rend
reverse_iterator rend()
Definition: BasicBlock.h:313
llvm::LinearPolySize::isScalable
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:298
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:239
llvm::PatternMatch::m_Deferred
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition: PatternMatch.h:790
llvm::APIntOps::umin
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition: APInt.h:2137
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::MDNode::get
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1400
llvm::Instruction::mayHaveSideEffects
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.cpp:721
llvm::PatternMatch::m_c_MaxOrMin
match_combine_or< match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty, true >, MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty, true > >, match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty, true >, MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty, true > > > m_c_MaxOrMin(const LHS &L, const RHS &R)
Definition: PatternMatch.h:2349
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::FunctionType::isVarArg
bool isVarArg() const
Definition: DerivedTypes.h:123
llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1428
llvm::LinearPolySize< ElementCount >::isKnownLT
static bool isKnownLT(const LinearPolySize &LHS, const LinearPolySize &RHS)
Definition: TypeSize.h:328
KnownBits.h
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::KnownBits::countMinTrailingZeros
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:233
floor
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g floor
Definition: README-FPStack.txt:54
llvm::GCRelocateInst::getDerivedPtrIndex
unsigned getDerivedPtrIndex() const
The index into the associate statepoint's argument list which contains the pointer whose relocation t...
Definition: IntrinsicInst.h:1408
llvm::UndefMaskElem
constexpr int UndefMaskElem
Definition: Instructions.h:1996
AliasAnalysis.h
llvm::PatternMatch::m_OneUse
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
factorizeMinMaxTree
static Instruction * factorizeMinMaxTree(IntrinsicInst *II)
Reduce a sequence of min/max intrinsics with a common operand.
Definition: InstCombineCalls.cpp:1027
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::ConstantExpr::getSub
static Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2637
llvm::InstCombinerImpl::eraseInstFromFunction
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Definition: InstCombineInternal.h:454
llvm::GCStatepointInst
Represents a gc.statepoint intrinsic call.
Definition: Statepoint.h:61
llvm::isAtLeastOrStrongerThan
bool isAtLeastOrStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
Definition: AtomicOrdering.h:105
llvm::SmallBitVector
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
Definition: SmallBitVector.h:35
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:186
llvm::User::getOperandUse
const Use & getOperandUse(unsigned i) const
Definition: User.h:182
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
Instruction.h
llvm::PatternMatch::m_APInt
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:278
llvm::SPF_NABS
@ SPF_NABS
Absolute value.
Definition: ValueTracking.h:696
CommandLine.h
llvm::PatternMatch::m_UMax
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1865
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::KnownBits::isNonNegative
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition: KnownBits.h:99
llvm::MinAlign
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:601
llvm::FenceInst::getSyncScopeID
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this fence instruction.
Definition: Instructions.h:469
llvm::APInt::isNonNegative
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:317
llvm::X86AS::SS
@ SS
Definition: X86.h:201
DEBUG_WITH_TYPE
#define DEBUG_WITH_TYPE(TYPE, X)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition: Debug.h:64
llvm::isValidAssumeForContext
bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, const DominatorTree *DT=nullptr)
Return true if it is valid to use the assumptions provided by an assume intrinsic,...
Definition: ValueTracking.cpp:566
llvm::isSplatValue
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
Definition: VectorUtils.cpp:386
llvm::SelectInst::Create
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", Instruction *InsertBefore=nullptr, Instruction *MDFrom=nullptr)
Definition: Instructions.h:1768
InstCombineInternal.h
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:271
llvm::PatternMatch::m_Select
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
Definition: PatternMatch.h:1462
Constants.h
llvm::maximum
LLVM_READONLY APFloat maximum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2018 maximum semantics.
Definition: APFloat.h:1331
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::PatternMatch::m_FShl
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_FShl(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
Definition: PatternMatch.h:2194
llvm::APInt::isZero
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:359
llvm::OverflowResult::MayOverflow
@ MayOverflow
May or may not overflow.
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::ConstantFoldBinaryOpOperands
Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
Definition: ConstantFolding.cpp:1339
llvm::User
Definition: User.h:44
Intr
unsigned Intr
Definition: AMDGPUBaseInfo.cpp:2383
llvm::PatternMatch::m_UnOp
class_match< UnaryOperator > m_UnOp()
Match an arbitrary unary operation and ignore it.
Definition: PatternMatch.h:79
llvm::getKnownAlignment
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition: Local.h:222
Intrinsics.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::FunctionType::param_end
param_iterator param_end() const
Definition: DerivedTypes.h:129
llvm::AttributeSet::hasAttribute
bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
Definition: Attributes.cpp:690
round
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:56
InstrTypes.h
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1396
llvm::CallBase::setAttributes
void setAttributes(AttributeList A)
Set the parameter attributes for this call.
Definition: InstrTypes.h:1478
llvm::BinaryOperator::CreateNSW
static BinaryOperator * CreateNSW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition: InstrTypes.h:283
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:306
llvm::KnownBits::One
APInt One
Definition: KnownBits.h:25
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
llvm::PatternMatch::m_PosZeroFP
cstfp_pred_ty< is_pos_zero_fp > m_PosZeroFP()
Match a floating-point positive zero.
Definition: PatternMatch.h:673
llvm::X86::FirstMacroFusionInstKind::AddSub
@ AddSub
llvm::SyncScope::SingleThread
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
InstructionWorklist.h
llvm::LoadInst::setVolatile
void setVolatile(bool V)
Specify whether this is a volatile load or not.
Definition: Instructions.h:214
llvm::CallInst::Create
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1517
AssumeBundleQueries.h
getOpcode
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:190
llvm::APInt::getAllOnes
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
llvm::AMDGPU::PALMD::Key
Key
PAL metadata keys.
Definition: AMDGPUMetadata.h:486
llvm::BitTracker
Definition: BitTracker.h:35
llvm::PatternMatch::m_Instruction
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:716
AssumeBundleBuilder.h
llvm::VectorType::getElementCount
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:627
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
llvm::alignDown
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:695
llvm::PatternMatch::m_ZExt
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
Definition: PatternMatch.h:1629
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::SmallBitVector::all
bool all() const
Returns true if all bits are set.
Definition: SmallBitVector.h:216
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::Instruction::CastOps
CastOps
Definition: Instruction.h:787
llvm::PatternMatch::m_MaxOrMin
match_combine_or< match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty >, MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > >, match_combine_or< MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty >, MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > > > m_MaxOrMin(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1882
llvm::PatternMatch::m_FNeg
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
Definition: PatternMatch.h:1033
llvm::SaturatingInst
Represents a saturating add/sub intrinsic.
Definition: IntrinsicInst.h:702
llvm::Constant::getAllOnesValue
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:395
llvm::pdb::PDB_SymType::Caller
@ Caller
llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:147
llvm::FunctionType::param_begin
param_iterator param_begin() const
Definition: DerivedTypes.h:128
llvm::Instruction
Definition: Instruction.h:42
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:189
llvm::PatternMatch::m_UMin
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1871
llvm::PatternMatch::m_c_Or
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
Definition: PatternMatch.h:2258
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
APFloat.h
llvm::UnaryOperator::CreateWithCopiedFlags
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: InstrTypes.h:156
llvm::codeview::EncodedFramePtrReg::BasePtr
@ BasePtr
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1713
llvm::CallBase::bundle_op_info_begin
bundle_op_iterator bundle_op_info_begin()
Return the start of the list of BundleOpInfo instances associated with this OperandBundleUser.
Definition: InstrTypes.h:2225
llvm::InstCombinerImpl
Definition: InstCombineInternal.h:61
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:879
llvm::PatternMatch::m_NSWSub
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWSub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1164
llvm::FunctionCallee::getFunctionType
FunctionType * getFunctionType()
Definition: DerivedTypes.h:182
llvm::simplifyFMAFMul
Value * simplifyFMAFMul(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior=fp::ebIgnore, RoundingMode Rounding=RoundingMode::NearestTiesToEven)
Given operands for the multiplication of a FMA, fold the result or return null.
Definition: InstructionSimplify.cpp:5371
STLFunctionalExtras.h
llvm::APSInt::getMinValue
static APSInt getMinValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the minimum integer value with the given bit width and signedness.
Definition: APSInt.h:290
llvm::getKnowledgeFromBundle
RetainedKnowledge getKnowledgeFromBundle(AssumeInst &Assume, const CallBase::BundleOpInfo &BOI)
This extracts the Knowledge from an element of an operand bundle.
Definition: AssumeBundleQueries.cpp:99
llvm::KnownBits::isNegative
bool isNegative() const
Returns true if this value is known to be negative.
Definition: KnownBits.h:96
canonicalizeConstantArg0ToArg1
static CallInst * canonicalizeConstantArg0ToArg1(CallInst &Call)
Definition: InstCombineCalls.cpp:783
Align
uint64_t Align
Definition: ELFObjHandler.cpp:81
PatternMatch.h
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:155
llvm::Metadata
Root of the metadata hierarchy.
Definition: Metadata.h:62
llvm::Instruction::mayWriteToMemory
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
Definition: Instruction.cpp:622
llvm::PatternMatch::m_APIntAllowUndef
apint_match m_APIntAllowUndef(const APInt *&Res)
Match APInt while allowing undefs in splat vector constants.
Definition: PatternMatch.h:284
llvm::PointerType::isOpaque
bool isOpaque() const
Definition: DerivedTypes.h:673
llvm::CallBase::getCallingConv
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1455
llvm::None
const NoneType None
Definition: None.h:24
Statepoint.h
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
llvm::AttributeList::addParamAttribute
AttributeList addParamAttribute(LLVMContext &C, unsigned ArgNo, Attribute::AttrKind Kind) const
Add an argument attribute to the list.
Definition: Attributes.h:563
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:344
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Type.h
llvm::IRBuilderBase::CreateAddrSpaceCast
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1989
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::AnyMemTransferInst
Definition: IntrinsicInst.h:1140
llvm::Log2_32_Ceil
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:561
llvm::CastInst::isLosslessCast
bool isLosslessCast() const
A lossless cast is one that does not alter the basic value.
Definition: Instructions.cpp:2958
llvm::PatternMatch::m_One
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:517
llvm::Instruction::getMetadata
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:271
llvm::OverflowResult
OverflowResult
Definition: ValueTracking.h:501
llvm::maxnum
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1307
findInitTrampolineFromAlloca
static IntrinsicInst * findInitTrampolineFromAlloca(Value *TrampMem)
Definition: InstCombineCalls.cpp:2886
reassociateMinMaxWithConstantInOperand
static Instruction * reassociateMinMaxWithConstantInOperand(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
If this min/max has a matching min/max operand with a constant, try to push the constant operand into...
Definition: InstCombineCalls.cpp:999
llvm::getFreedOperand
Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
Definition: MemoryBuiltins.cpp:582
llvm::ValueHandleBase::ValueIsRAUWd
static void ValueIsRAUWd(Value *Old, Value *New)
Definition: Value.cpp:1190
llvm::PatternMatch::m_NSWAdd
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1156
llvm::IRBuilderBase::CreateLaunderInvariantGroup
Value * CreateLaunderInvariantGroup(Value *Ptr)
Create a launder.invariant.group intrinsic call.
Definition: IRBuilder.cpp:1153
llvm::Function::getAttributes
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:314
llvm::InvokeInst
Invoke instruction.
Definition: Instructions.h:3811
llvm::IRBuilderBase::CreateZExt
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1892
llvm::PatternMatch::m_Xor
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1105
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
removeTriviallyEmptyRange
static bool removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC, std::function< bool(const IntrinsicInst &)> IsStart)
Definition: InstCombineCalls.cpp:748
llvm::InlineAsm
Definition: InlineAsm.h:32
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
llvm::Instruction::andIRFlags
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
Definition: Instruction.cpp:348
llvm::IRBuilderBase::FastMathFlagGuard
Definition: IRBuilder.h:380
VectorUtils.h
llvm::KnownBits::isAllOnes
bool isAllOnes() const
Returns true if value is all one bits.
Definition: KnownBits.h:78
llvm::Use::set
void set(Value *Val)
Definition: Value.h:868
llvm::ConstantExpr::getTrunc
static Constant * getTrunc(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2064
llvm::Function::hasFnAttribute
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:628
llvm::GCStatepointInst::getGCRelocates
std::vector< const GCRelocateInst * > getGCRelocates() const
Get list of all gc reloactes linked to this statepoint May contain several relocations for the same b...
Definition: Statepoint.h:206
BasicBlock.h
llvm::cl::opt
Definition: CommandLine.h:1399
llvm::APFloat
Definition: APFloat.h:701
llvm::IRBuilderBase::CreateBitCast
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1984
llvm::RetainedKnowledge::AttrKind
Attribute::AttrKind AttrKind
Definition: AssumeBundleQueries.h:101
llvm::PatternMatch::m_Zero
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:537
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:298
llvm::PatternMatch::m_NUWAdd
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWAdd(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1189
llvm::AttributeList::getRetAttrs
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
Definition: Attributes.cpp:1384
llvm::FunctionType::param_iterator
Type::subtype_iterator param_iterator
Definition: DerivedTypes.h:126
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::GCRelocateInst
Represents calls to the gc.relocate intrinsic.
Definition: IntrinsicInst.h:1389
llvm::simplifyConstrainedFPCall
Value * simplifyConstrainedFPCall(CallBase *Call, const SimplifyQuery &Q)
Given a constrained FP intrinsic call, tries to compute its simplified version.
Definition: InstructionSimplify.cpp:6298
findInitTrampoline
static IntrinsicInst * findInitTrampoline(Value *Callee)
Definition: InstCombineCalls.cpp:2946
llvm::PointerType::getUnqual
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:651
llvm::InlineAsm::canThrow
bool canThrow() const
Definition: InlineAsm.h:72
llvm::LibCallSimplifier
LibCallSimplifier - This class implements a collection of optimizations that replace well formed call...
Definition: SimplifyLibCalls.h:100
llvm::InstCombiner::getAssumptionCache
AssumptionCache & getAssumptionCache() const
Definition: InstCombiner.h:368
llvm::BinaryOperator::CreateFMulFMF
static BinaryOperator * CreateFMulFMF(Value *V1, Value *V2, Instruction *FMFSource, const Twine &Name="")
Definition: InstrTypes.h:267
llvm::BinaryOpIntrinsic::isSigned
bool isSigned() const
Whether the intrinsic is signed or unsigned.
Definition: IntrinsicInst.cpp:710
uint64_t
llvm::Type::getWithNewBitWidth
Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
Definition: DerivedTypes.h:722
llvm::InstCombiner::getDataLayout
const DataLayout & getDataLayout() const
Definition: InstCombiner.h:371
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:238
llvm::PatternMatch::m_LogicalOr
LogicalOp_match< LHS, RHS, Instruction::Or > m_LogicalOr(const LHS &L, const RHS &R)
Matches L || R either in the form of L | R or L ? true : R.
Definition: PatternMatch.h:2555
llvm::BitmaskEnumDetail::Underlying
constexpr std::underlying_type_t< E > Underlying(E Val)
Check that Val is in range for E, and return Val cast to E's underlying type.
Definition: BitmaskEnum.h:90
llvm::RetainedKnowledge::ArgValue
uint64_t ArgValue
Definition: AssumeBundleQueries.h:102
llvm::LoadInst::setOrdering
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this load instruction.
Definition: Instructions.h:231
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::TruncInst
This class represents a truncation of integer types.
Definition: Instructions.h:4811
llvm::GCRelocateInst::getDerivedPtr
Value * getDerivedPtr() const
Definition: IntrinsicInst.cpp:762
llvm::AttributeSet::get
static AttributeSet get(LLVMContext &C, const AttrBuilder &B)
Definition: Attributes.cpp:623
llvm::ConstantPointerNull::get
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1699
llvm::AttributeFuncs::ASK_SAFE_TO_DROP
@ ASK_SAFE_TO_DROP
Definition: Attributes.h:1243
llvm::CallBrInst
CallBr instruction, tracking function calls that may not return control but instead transfer it to a ...
Definition: Instructions.h:4020
llvm::PatternMatch::m_AllOnes
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
Definition: PatternMatch.h:453
simplifyNeonTbl1
static Value * simplifyNeonTbl1(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder)
Convert a table lookup to shufflevector if the mask is constant.
Definition: InstCombineCalls.cpp:692
llvm::Instruction::isIdenticalTo
bool isIdenticalTo(const Instruction *I) const
Return true if the specified instruction is exactly identical to the current one.
Definition: Instruction.cpp:528
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:54
llvm::PatternMatch::m_ImmConstant
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:751
llvm::DenseMap< Value *, unsigned >
llvm::InstCombinerImpl::visitFenceInst
Instruction * visitFenceInst(FenceInst &FI)
Definition: InstCombineCalls.cpp:2781
llvm::AtomicOrdering::Unordered
@ Unordered
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::AttrBuilder
Definition: Attributes.h:1030
llvm::FunctionType::getParamType
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
llvm::LoadInst::setAlignment
void setAlignment(Align Align)
Definition: Instructions.h:221
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:439
llvm::lowerObjectSizeCall
Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
Definition: MemoryBuiltins.cpp:625
llvm::PatternMatch::m_FAbs
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
Definition: PatternMatch.h:2171
llvm::Attribute::getWithDereferenceableBytes
static Attribute getWithDereferenceableBytes(LLVMContext &Context, uint64_t Bytes)
Definition: Attributes.cpp:174
llvm::PatternMatch::m_And
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1093
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
ArrayRef.h
llvm::CallBase::addRetAttr
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Definition: InstrTypes.h:1516
llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:351
llvm::PatternMatch::m_SRem
BinaryOp_match< LHS, RHS, Instruction::SRem > m_SRem(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1081
llvm::AttributeSet::removeAttributes
AttributeSet removeAttributes(LLVMContext &C, const AttributeMask &AttrsToRemove) const
Remove the specified attributes from this set.
Definition: Attributes.cpp:675
llvm::computeKnownBits
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, OptimizationRemarkEmitter *ORE=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
Definition: ValueTracking.cpp:222
maximum
Should compile r2 movcc movcs str strb mov lr r1 movcs movcc mov lr r1 str mov mov cmp r1 movlo r2 str bx lr r0 mov mov cmp r0 movhs r2 mov r1 bx lr Some of the NEON intrinsics may be appropriate for more general either as target independent intrinsics or perhaps elsewhere in the ARM backend Some of them may also be lowered to target independent and perhaps some new SDNodes could be added For maximum
Definition: README.txt:489
llvm::KnownBits::countMaxLeadingZeros
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition: KnownBits.h:283
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
foldClampRangeOfTwo
static Instruction * foldClampRangeOfTwo(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
If we have a clamp pattern like max (min X, 42), 41 – where the output can only be one of two possibl...
Definition: InstCombineCalls.cpp:935
llvm::KnownBits::countMaxTrailingZeros
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition: KnownBits.h:273
llvm::SPF_ABS
@ SPF_ABS
Floating point maxnum.
Definition: ValueTracking.h:695
llvm::CallBase::hasOperandBundles
bool hasOperandBundles() const
Return true if this User has any operand bundles.
Definition: InstrTypes.h:1945
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::isKnownNegation
bool isKnownNegation(const Value *X, const Value *Y, bool NeedNSW=false)
Return true if the two given values are negation.
Definition: ValueTracking.cpp:6063
llvm::FenceInst::getOrdering
AtomicOrdering getOrdering() const
Returns the ordering constraint of this fence instruction.
Definition: Instructions.h:458
llvm::Instruction::isFast
bool isFast() const
Determine whether all fast-math-flags are set.
Definition: Instruction.cpp:275
llvm::codeview::CompileSym2Flags::EC
@ EC
llvm::KnownBits::countMinLeadingZeros
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition: KnownBits.h:243
llvm::KnownBits::countMaxPopulation
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition: KnownBits.h:298
llvm::CmpInst::BAD_ICMP_PREDICATE
@ BAD_ICMP_PREDICATE
Definition: InstrTypes.h:752
APSInt.h
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
llvm::PatternMatch::m_SMin
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1859
llvm::MinMaxIntrinsic::getPredicate
ICmpInst::Predicate getPredicate() const
Returns the comparison predicate underlying the intrinsic.
Definition: IntrinsicInst.h:589
simplifyInvariantGroupIntrinsic
static Instruction * simplifyInvariantGroupIntrinsic(IntrinsicInst &II, InstCombinerImpl &IC)
This function transforms launder.invariant.group and strip.invariant.group like: launder(launder(x)) ...
Definition: InstCombineCalls.cpp:471
foldCtpop
static Instruction * foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC)
Definition: InstCombineCalls.cpp:609
llvm::IRBuilderBase::getTrue
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:449
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::MDNode
Metadata node.
Definition: Metadata.h:944
llvm::CallBase::Create
static CallBase * Create(CallBase *CB, ArrayRef< OperandBundleDef > Bundles, Instruction *InsertPt=nullptr)
Create a clone of CB with a different set of operand bundles and insert it before InsertPt.
Definition: Instructions.cpp:256
llvm::PatternMatch::m_Constant
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:144
llvm::LinearPolySize::getKnownMinValue
ScalarTy getKnownMinValue() const
Returns the minimum value this size can represent.
Definition: TypeSize.h:296
haveSameOperands
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E, unsigned NumOperands)
Definition: InstCombineCalls.cpp:728
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
llvm::ARM::WinEH::ReturnType
ReturnType
Definition: ARMWinEH.h:25
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::User::setOperand
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
llvm::CmpInst::ICMP_SLT
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:748
llvm::APSInt::getMaxValue
static APSInt getMaxValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the maximum integer value with the given bit width and signedness.
Definition: APSInt.h:283
llvm::APIntOps::smin
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2127
llvm::PatternMatch::m_SExt
CastClass_match< OpTy, Instruction::SExt > m_SExt(const OpTy &Op)
Matches SExt.
Definition: PatternMatch.h:1623
llvm::PatternMatch::m_SpecificInt
specific_intval< false > m_SpecificInt(APInt V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:854
llvm::Function::isConvergent
bool isConvergent() const
Determine if the call is convergent.
Definition: Function.h:571
llvm::PatternMatch::m_CombineAnd
match_combine_and< LTy, RTy > m_CombineAnd(const LTy &L, const RTy &R)
Combine two pattern matchers matching L && R.
Definition: PatternMatch.h:224
llvm::ValueHandleBase::ValueIsDeleted
static void ValueIsDeleted(Value *V)
Definition: Value.cpp:1137
llvm::ArrayRef< int >
llvm::VAEndInst
This represents the llvm.va_end intrinsic.
Definition: IntrinsicInst.h:1212
llvm::Instruction::getFastMathFlags
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
Definition: Instruction.cpp:315
llvm::BinaryOperator
Definition: InstrTypes.h:188
None.h
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
Mul
BinaryOperator * Mul
Definition: X86PartialReduction.cpp:70
DataLayout.h
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:137
llvm::isDereferenceablePointer
bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition: Loads.cpp:219
llvm::PatternMatch::m_Undef
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:136
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
SimplifyLibCalls.h
llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:744
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Constant::getAggregateElement
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
Definition: Constants.cpp:410
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
isSafeToEliminateVarargsCast
static bool isSafeToEliminateVarargsCast(const CallBase &Call, const DataLayout &DL, const CastInst *const CI, const int ix)
If this cast does not affect the value passed through the varargs area, we can eliminate the use of t...
Definition: InstCombineCalls.cpp:2820
llvm::Instruction::getFunction
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:69
llvm::PatternMatch::m_Shuffle
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
Definition: PatternMatch.h:1551
moveAddAfterMinMax
static Instruction * moveAddAfterMinMax(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0.
Definition: InstCombineCalls.cpp:834
uint32_t
Compiler.h
llvm::CallInst::isNoTailCall
bool isNoTailCall() const
Definition: Instructions.h:1675
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:994
llvm::InstCombinerImpl::visitVAEndInst
Instruction * visitVAEndInst(VAEndInst &I)
Definition: InstCombineCalls.cpp:775
llvm::MinMax
Definition: AssumeBundleQueries.h:71
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::CastInst::isBitOrNoopPointerCastable
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
Definition: Instructions.cpp:3510
llvm::ConstantExpr::getIntegerCast
static Constant * getIntegerCast(Constant *C, Type *Ty, bool IsSigned)
Create a ZExt, Bitcast or Trunc for integer -> integer casts.
Definition: Constants.cpp:2040
llvm::FPExtInst
This class represents an extension of floating point types.
Definition: Instructions.h:4967
trunc
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g trunc
Definition: README-FPStack.txt:63
llvm::RetainedKnowledge
Represent one information held inside an operand bundle of an llvm.assume.
Definition: AssumeBundleQueries.h:100
llvm::Instruction::copyFastMathFlags
void copyFastMathFlags(FastMathFlags FMF)
Convenience function for transferring all fast-math flag values to this instruction,...
Definition: Instruction.cpp:270
llvm::CastInst
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:429
llvm::WithOverflowInst
Represents an op.with.overflow intrinsic.
Definition: IntrinsicInst.h:681
llvm::CallInst::isMustTailCall
bool isMustTailCall() const
Definition: Instructions.h:1673
llvm::LLVMContext::OB_kcfi
@ OB_kcfi
Definition: LLVMContext.h:97
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:429
hasUndefSource
static bool hasUndefSource(AnyMemTransferInst *MI)
Recognize a memcpy/memmove from a trivially otherwise unused alloca.
Definition: InstCombineCalls.cpp:110
llvm::APInt::ult
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1061
llvm::Attribute::getWithDereferenceableOrNullBytes
static Attribute getWithDereferenceableOrNullBytes(LLVMContext &Context, uint64_t Bytes)
Definition: Attributes.cpp:180
llvm::CallBase::hasRetAttr
bool hasRetAttr(Attribute::AttrKind Kind) const
Determine whether the return value has the given attribute.
Definition: InstrTypes.h:1595
llvm::Value::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set a particular kind of metadata attachment.
Definition: Metadata.cpp:1326
llvm::PatternMatch::m_SMax
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1853
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
ValueHandle.h
llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:685
llvm::SmallPtrSetImplBase::size
size_type size() const
Definition: SmallPtrSet.h:93
llvm::CallBase::doesNotThrow
bool doesNotThrow() const
Determine if the call cannot unwind.
Definition: InstrTypes.h:1899
llvm::OperandBundleUse::Inputs
ArrayRef< Use > Inputs
Definition: InstrTypes.h:1070
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:186
getPromotedType
static Type * getPromotedType(Type *Ty)
Return the specified type promoted as it would be to pass though a va_arg area.
Definition: InstCombineCalls.cpp:99
llvm::Attribute::getWithAlignment
static Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
Definition: Attributes.cpp:164
llvm::Function::doesNotThrow
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition: Function.h:555
Attributes.h
llvm::PatternMatch::m_FPExt
CastClass_match< OpTy, Instruction::FPExt > m_FPExt(const OpTy &Op)
Definition: PatternMatch.h:1687
llvm::APIntOps::umax
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:2142
Constant.h
llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:123
llvm::GCRelocateInst::getBasePtr
Value * getBasePtr() const
Definition: IntrinsicInst.cpp:751
llvm::minnum
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1296
llvm::IRBuilderBase::getFalse
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition: IRBuilder.h:454
llvm::APInt::uadd_sat
APInt uadd_sat(const APInt &RHS) const
Definition: APInt.cpp:2019
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:240
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:827
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:350
llvm::KnownBits
Definition: KnownBits.h:23
llvm::ConstantExpr::getNeg
static Constant * getNeg(Constant *C, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2617
llvm::AnyMemSetInst
This class represents any memset intrinsic.
Definition: IntrinsicInst.h:1120
llvm::CastInst::CreateBitOrPointerCast
static CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
Definition: Instructions.cpp:3405
llvm::KnownBits::countMinPopulation
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition: KnownBits.h:293
NC
#define NC
Definition: regutils.h:42
llvm::BinaryOperator::CreateNUW
static BinaryOperator * CreateNUW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition: InstrTypes.h:302
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::ConstantInt::getBool
static ConstantInt * getBool(LLVMContext &Context, bool V)
Definition: Constants.cpp:841
llvm::CallBase::arg_size
unsigned arg_size() const
Definition: InstrTypes.h:1339
llvm::Type::isIntOrIntVectorTy
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:202
GlobalVariable.h
llvm::ConstantFP::get
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:926
llvm::getAllocAlignment
Value * getAllocAlignment(const CallBase *V, const TargetLibraryInfo *TLI)
Gets the alignment argument for an aligned_alloc-like function, using either built-in knowledge based...
Definition: MemoryBuiltins.cpp:372
llvm::getAllocSize
Optional< APInt > getAllocSize(const CallBase *CB, const TargetLibraryInfo *TLI, function_ref< const Value *(const Value *)> Mapper=[](const Value *V) { return V;})
Return the size of the requested allocation.
Definition: MemoryBuiltins.cpp:398
Casting.h
llvm::InstCombinerImpl::visitCallBrInst
Instruction * visitCallBrInst(CallBrInst &CBI)
Definition: InstCombineCalls.cpp:2814
LowAndHigh
Metadata * LowAndHigh[]
Definition: NVVMIntrRange.cpp:68
llvm::isRemovableAlloc
bool isRemovableAlloc(const CallBase *V, const TargetLibraryInfo *TLI)
Return true if this is a call to an allocation function that does not have side effects that we are r...
Definition: MemoryBuiltins.cpp:361
Function.h
llvm::InstCombinerImpl::FoldOpIntoSelect
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Definition: InstructionCombining.cpp:1075
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::BinaryOperator::CreateNSWNeg
static BinaryOperator * CreateNSWNeg(Value *Op, const Twine &Name="", Instruction *InsertBefore=nullptr)
Definition: Instructions.cpp:2878
llvm::DenseMapBase::size
unsigned size() const
Definition: DenseMap.h:99
llvm::PatternMatch::m_BSwap
m_Intrinsic_Ty< Opnd0 >::Ty m_BSwap(const Opnd0 &Op0)
Definition: PatternMatch.h:2166
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:222
powi
This is blocked on not handling X *X *X powi(X, 3)(see note above). The issue is that we end up getting t
llvm::AttributeFuncs::typeIncompatible
AttributeMask typeIncompatible(Type *Ty, AttributeSafetyKind ASK=ASK_ALL)
Which attributes cannot be applied to a type.
Definition: Attributes.cpp:1811
llvm::OverflowResult::AlwaysOverflowsHigh
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
llvm::Function::getFunctionType
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:175
llvm::PatternMatch::m_ZeroInt
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:524
llvm::getInverseMinMaxIntrinsic
Intrinsic::ID getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID)
Definition: ValueTracking.cpp:6461
llvm::PatternMatch::m_PtrToInt
CastClass_match< OpTy, Instruction::PtrToInt > m_PtrToInt(const OpTy &Op)
Matches PtrToInt.
Definition: PatternMatch.h:1599
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
llvm::Instruction::hasAllowReassoc
bool hasAllowReassoc() const
Determine whether the allow-reassociation flag is set.
Definition: Instruction.cpp:280
llvm::CallBase::getCalledOperand
Value * getCalledOperand() const
Definition: InstrTypes.h:1389
createOverflowTuple
static Instruction * createOverflowTuple(IntrinsicInst *II, Value *Result, Constant *Overflow)
Creates a result tuple for an overflow intrinsic II with a given Result and a constant Overflow value...
Definition: InstCombineCalls.cpp:796
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
llvm::PatternMatch::m_LogicalShift
BinOpPred_match< LHS, RHS, is_logical_shift_op > m_LogicalShift(const LHS &L, const RHS &R)
Matches logical shift operations.
Definition: PatternMatch.h:1311
llvm::makeArrayRef
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:475
AA
llvm::InstCombinerImpl::replaceOperand
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
Definition: InstCombineInternal.h:427
llvm::Instruction::BinaryOps
BinaryOps
Definition: Instruction.h:773
llvm::BasicBlock::reverse_iterator
InstListType::reverse_iterator reverse_iterator
Definition: BasicBlock.h:89
llvm::ShuffleVectorInst
This instruction constructs a fixed permutation of two input vectors.
Definition: Instructions.h:2008
llvm::Type::isOpaquePointerTy
bool isOpaquePointerTy() const
True if this is an instance of an opaque PointerType.
Definition: Type.cpp:61
llvm::RecurKind::FAdd
@ FAdd
Sum of floats.
Instructions.h
llvm::isImpliedByDomCondition
Optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
Definition: ValueTracking.cpp:6894
llvm::Type::isStructTy
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:217
llvm::AttributeSet
Definition: Attributes.h:290
SmallVector.h
getKnownSign
static Optional< bool > getKnownSign(Value *Op, Instruction *CxtI, const DataLayout &DL, AssumptionCache *AC, DominatorTree *DT)
Definition: InstCombineCalls.cpp:815
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:772
User.h
llvm::PatternMatch::m_ICmp
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
Definition: PatternMatch.h:1394
llvm::CmpInst::ICMP_UGT
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:742
llvm::User::op_begin
op_iterator op_begin()
Definition: User.h:234
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1341
SmallBitVector.h
llvm::IntrinsicInst::isCommutative
bool isCommutative() const
Return true if swapping the first two arguments to the intrinsic produces the same result.
Definition: IntrinsicInst.h:59