LLVM  16.0.0git
AMDGPULibCalls.cpp
Go to the documentation of this file.
1 //===- AMDGPULibCalls.cpp -------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file does AMD library function optimizations.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "AMDGPULibFunc.h"
16 #include "GCNSubtarget.h"
18 #include "llvm/Analysis/Loads.h"
19 #include "llvm/IR/IRBuilder.h"
20 #include "llvm/IR/IntrinsicInst.h"
21 #include "llvm/IR/IntrinsicsAMDGPU.h"
22 #include "llvm/InitializePasses.h"
24 #include <cmath>
25 
26 #define DEBUG_TYPE "amdgpu-simplifylib"
27 
28 using namespace llvm;
29 
30 static cl::opt<bool> EnablePreLink("amdgpu-prelink",
31  cl::desc("Enable pre-link mode optimizations"),
32  cl::init(false),
33  cl::Hidden);
34 
35 static cl::list<std::string> UseNative("amdgpu-use-native",
36  cl::desc("Comma separated list of functions to replace with native, or all"),
38  cl::Hidden);
39 
40 #define MATH_PI numbers::pi
41 #define MATH_E numbers::e
42 #define MATH_SQRT2 numbers::sqrt2
43 #define MATH_SQRT1_2 numbers::inv_sqrt2
44 
45 namespace llvm {
46 
48 private:
49 
51 
52  const TargetMachine *TM;
53 
54  // -fuse-native.
55  bool AllNative = false;
56 
57  bool useNativeFunc(const StringRef F) const;
58 
59  // Return a pointer (pointer expr) to the function if function definition with
60  // "FuncName" exists. It may create a new function prototype in pre-link mode.
61  FunctionCallee getFunction(Module *M, const FuncInfo &fInfo);
62 
63  bool parseFunctionName(const StringRef &FMangledName, FuncInfo &FInfo);
64 
65  bool TDOFold(CallInst *CI, const FuncInfo &FInfo);
66 
67  /* Specialized optimizations */
68 
69  // recip (half or native)
70  bool fold_recip(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
71 
72  // divide (half or native)
73  bool fold_divide(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
74 
75  // pow/powr/pown
76  bool fold_pow(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
77 
78  // rootn
79  bool fold_rootn(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
80 
81  // fma/mad
82  bool fold_fma_mad(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
83 
84  // -fuse-native for sincos
85  bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo);
86 
87  // evaluate calls if calls' arguments are constants.
88  bool evaluateScalarMathFunc(const FuncInfo &FInfo, double& Res0,
89  double& Res1, Constant *copr0, Constant *copr1, Constant *copr2);
90  bool evaluateCall(CallInst *aCI, const FuncInfo &FInfo);
91 
92  // sqrt
93  bool fold_sqrt(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
94 
95  // sin/cos
96  bool fold_sincos(CallInst * CI, IRBuilder<> &B, AliasAnalysis * AA);
97 
98  // __read_pipe/__write_pipe
99  bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
100  const FuncInfo &FInfo);
101 
102  // llvm.amdgcn.wavefrontsize
103  bool fold_wavefrontsize(CallInst *CI, IRBuilder<> &B);
104 
105  // Get insertion point at entry.
106  BasicBlock::iterator getEntryIns(CallInst * UI);
107  // Insert an Alloc instruction.
108  AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix);
109  // Get a scalar native builtin single argument FP function
110  FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo);
111 
112 protected:
114 
115  bool isUnsafeMath(const CallInst *CI) const;
116 
117  void replaceCall(Value *With) {
118  CI->replaceAllUsesWith(With);
119  CI->eraseFromParent();
120  }
121 
122 public:
123  AMDGPULibCalls(const TargetMachine *TM_ = nullptr) : TM(TM_) {}
124 
125  bool fold(CallInst *CI, AliasAnalysis *AA = nullptr);
126 
127  void initNativeFuncs();
128 
129  // Replace a normal math function call with that native version
130  bool useNative(CallInst *CI);
131 };
132 
133 } // end llvm namespace
134 
135 namespace {
136 
137  class AMDGPUSimplifyLibCalls : public FunctionPass {
138 
139  AMDGPULibCalls Simplifier;
140 
141  public:
142  static char ID; // Pass identification
143 
144  AMDGPUSimplifyLibCalls(const TargetMachine *TM = nullptr)
145  : FunctionPass(ID), Simplifier(TM) {
147  }
148 
149  void getAnalysisUsage(AnalysisUsage &AU) const override {
151  }
152 
153  bool runOnFunction(Function &M) override;
154  };
155 
156  class AMDGPUUseNativeCalls : public FunctionPass {
157 
158  AMDGPULibCalls Simplifier;
159 
160  public:
161  static char ID; // Pass identification
162 
163  AMDGPUUseNativeCalls() : FunctionPass(ID) {
165  Simplifier.initNativeFuncs();
166  }
167 
168  bool runOnFunction(Function &F) override;
169  };
170 
171 } // end anonymous namespace.
172 
174 char AMDGPUUseNativeCalls::ID = 0;
175 
176 INITIALIZE_PASS_BEGIN(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",
177  "Simplify well-known AMD library calls", false, false)
179 INITIALIZE_PASS_END(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",
181 
182 INITIALIZE_PASS(AMDGPUUseNativeCalls, "amdgpu-usenative",
183  "Replace builtin math calls with that native versions.",
184  false, false)
185 
186 template <typename IRB>
187 static CallInst *CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg,
188  const Twine &Name = "") {
189  CallInst *R = B.CreateCall(Callee, Arg, Name);
190  if (Function *F = dyn_cast<Function>(Callee.getCallee()))
191  R->setCallingConv(F->getCallingConv());
192  return R;
193 }
194 
195 template <typename IRB>
196 static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1,
197  Value *Arg2, const Twine &Name = "") {
198  CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name);
199  if (Function *F = dyn_cast<Function>(Callee.getCallee()))
200  R->setCallingConv(F->getCallingConv());
201  return R;
202 }
203 
204 // Data structures for table-driven optimizations.
205 // FuncTbl works for both f32 and f64 functions with 1 input argument
206 
207 struct TableEntry {
208  double result;
209  double input;
210 };
211 
212 /* a list of {result, input} */
213 static const TableEntry tbl_acos[] = {
214  {MATH_PI / 2.0, 0.0},
215  {MATH_PI / 2.0, -0.0},
216  {0.0, 1.0},
217  {MATH_PI, -1.0}
218 };
219 static const TableEntry tbl_acosh[] = {
220  {0.0, 1.0}
221 };
222 static const TableEntry tbl_acospi[] = {
223  {0.5, 0.0},
224  {0.5, -0.0},
225  {0.0, 1.0},
226  {1.0, -1.0}
227 };
228 static const TableEntry tbl_asin[] = {
229  {0.0, 0.0},
230  {-0.0, -0.0},
231  {MATH_PI / 2.0, 1.0},
232  {-MATH_PI / 2.0, -1.0}
233 };
234 static const TableEntry tbl_asinh[] = {
235  {0.0, 0.0},
236  {-0.0, -0.0}
237 };
238 static const TableEntry tbl_asinpi[] = {
239  {0.0, 0.0},
240  {-0.0, -0.0},
241  {0.5, 1.0},
242  {-0.5, -1.0}
243 };
244 static const TableEntry tbl_atan[] = {
245  {0.0, 0.0},
246  {-0.0, -0.0},
247  {MATH_PI / 4.0, 1.0},
248  {-MATH_PI / 4.0, -1.0}
249 };
250 static const TableEntry tbl_atanh[] = {
251  {0.0, 0.0},
252  {-0.0, -0.0}
253 };
254 static const TableEntry tbl_atanpi[] = {
255  {0.0, 0.0},
256  {-0.0, -0.0},
257  {0.25, 1.0},
258  {-0.25, -1.0}
259 };
260 static const TableEntry tbl_cbrt[] = {
261  {0.0, 0.0},
262  {-0.0, -0.0},
263  {1.0, 1.0},
264  {-1.0, -1.0},
265 };
266 static const TableEntry tbl_cos[] = {
267  {1.0, 0.0},
268  {1.0, -0.0}
269 };
270 static const TableEntry tbl_cosh[] = {
271  {1.0, 0.0},
272  {1.0, -0.0}
273 };
274 static const TableEntry tbl_cospi[] = {
275  {1.0, 0.0},
276  {1.0, -0.0}
277 };
278 static const TableEntry tbl_erfc[] = {
279  {1.0, 0.0},
280  {1.0, -0.0}
281 };
282 static const TableEntry tbl_erf[] = {
283  {0.0, 0.0},
284  {-0.0, -0.0}
285 };
286 static const TableEntry tbl_exp[] = {
287  {1.0, 0.0},
288  {1.0, -0.0},
289  {MATH_E, 1.0}
290 };
291 static const TableEntry tbl_exp2[] = {
292  {1.0, 0.0},
293  {1.0, -0.0},
294  {2.0, 1.0}
295 };
296 static const TableEntry tbl_exp10[] = {
297  {1.0, 0.0},
298  {1.0, -0.0},
299  {10.0, 1.0}
300 };
301 static const TableEntry tbl_expm1[] = {
302  {0.0, 0.0},
303  {-0.0, -0.0}
304 };
305 static const TableEntry tbl_log[] = {
306  {0.0, 1.0},
307  {1.0, MATH_E}
308 };
309 static const TableEntry tbl_log2[] = {
310  {0.0, 1.0},
311  {1.0, 2.0}
312 };
313 static const TableEntry tbl_log10[] = {
314  {0.0, 1.0},
315  {1.0, 10.0}
316 };
317 static const TableEntry tbl_rsqrt[] = {
318  {1.0, 1.0},
319  {MATH_SQRT1_2, 2.0}
320 };
321 static const TableEntry tbl_sin[] = {
322  {0.0, 0.0},
323  {-0.0, -0.0}
324 };
325 static const TableEntry tbl_sinh[] = {
326  {0.0, 0.0},
327  {-0.0, -0.0}
328 };
329 static const TableEntry tbl_sinpi[] = {
330  {0.0, 0.0},
331  {-0.0, -0.0}
332 };
333 static const TableEntry tbl_sqrt[] = {
334  {0.0, 0.0},
335  {1.0, 1.0},
336  {MATH_SQRT2, 2.0}
337 };
338 static const TableEntry tbl_tan[] = {
339  {0.0, 0.0},
340  {-0.0, -0.0}
341 };
342 static const TableEntry tbl_tanh[] = {
343  {0.0, 0.0},
344  {-0.0, -0.0}
345 };
346 static const TableEntry tbl_tanpi[] = {
347  {0.0, 0.0},
348  {-0.0, -0.0}
349 };
350 static const TableEntry tbl_tgamma[] = {
351  {1.0, 1.0},
352  {1.0, 2.0},
353  {2.0, 3.0},
354  {6.0, 4.0}
355 };
356 
358  switch(id) {
374  return true;
375  default:;
376  }
377  return false;
378 }
379 
381 
383  switch(id) {
395  case AMDGPULibFunc::EI_COS: return TableRef(tbl_cos);
399  case AMDGPULibFunc::EI_ERF: return TableRef(tbl_erf);
400  case AMDGPULibFunc::EI_EXP: return TableRef(tbl_exp);
405  case AMDGPULibFunc::EI_LOG: return TableRef(tbl_log);
412  case AMDGPULibFunc::EI_SIN: return TableRef(tbl_sin);
417  case AMDGPULibFunc::EI_TAN: return TableRef(tbl_tan);
421  default:;
422  }
423  return TableRef();
424 }
425 
426 static inline int getVecSize(const AMDGPULibFunc& FInfo) {
427  return FInfo.getLeads()[0].VectorSize;
428 }
429 
430 static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) {
431  return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType;
432 }
433 
434 FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) {
435  // If we are doing PreLinkOpt, the function is external. So it is safe to
436  // use getOrInsertFunction() at this stage.
437 
439  : AMDGPULibFunc::getFunction(M, fInfo);
440 }
441 
442 bool AMDGPULibCalls::parseFunctionName(const StringRef &FMangledName,
443  FuncInfo &FInfo) {
444  return AMDGPULibFunc::parse(FMangledName, FInfo);
445 }
446 
447 bool AMDGPULibCalls::isUnsafeMath(const CallInst *CI) const {
448  if (auto Op = dyn_cast<FPMathOperator>(CI))
449  if (Op->isFast())
450  return true;
451  const Function *F = CI->getParent()->getParent();
452  Attribute Attr = F->getFnAttribute("unsafe-fp-math");
453  return Attr.getValueAsBool();
454 }
455 
456 bool AMDGPULibCalls::useNativeFunc(const StringRef F) const {
457  return AllNative || llvm::is_contained(UseNative, F);
458 }
459 
461  AllNative = useNativeFunc("all") ||
462  (UseNative.getNumOccurrences() && UseNative.size() == 1 &&
463  UseNative.begin()->empty());
464 }
465 
466 bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
467  bool native_sin = useNativeFunc("sin");
468  bool native_cos = useNativeFunc("cos");
469 
470  if (native_sin && native_cos) {
471  Module *M = aCI->getModule();
472  Value *opr0 = aCI->getArgOperand(0);
473 
474  AMDGPULibFunc nf;
475  nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType;
476  nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize;
477 
480  FunctionCallee sinExpr = getFunction(M, nf);
481 
484  FunctionCallee cosExpr = getFunction(M, nf);
485  if (sinExpr && cosExpr) {
486  Value *sinval = CallInst::Create(sinExpr, opr0, "splitsin", aCI);
487  Value *cosval = CallInst::Create(cosExpr, opr0, "splitcos", aCI);
488  new StoreInst(cosval, aCI->getArgOperand(1), aCI);
489 
490  DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
491  << " with native version of sin/cos");
492 
493  replaceCall(sinval);
494  return true;
495  }
496  }
497  return false;
498 }
499 
501  CI = aCI;
503 
504  FuncInfo FInfo;
505  if (!parseFunctionName(Callee->getName(), FInfo) || !FInfo.isMangled() ||
506  FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
507  getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) ||
508  !(AllNative || useNativeFunc(FInfo.getName()))) {
509  return false;
510  }
511 
512  if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS)
513  return sincosUseNative(aCI, FInfo);
514 
516  FunctionCallee F = getFunction(aCI->getModule(), FInfo);
517  if (!F)
518  return false;
519 
520  aCI->setCalledFunction(F);
521  DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
522  << " with native version");
523  return true;
524 }
525 
526 // Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe
527 // builtin, with appended type size and alignment arguments, where 2 or 4
528 // indicates the original number of arguments. The library has optimized version
529 // of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same
530 // power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N
531 // for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ...,
532 // 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4.
533 bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
534  const FuncInfo &FInfo) {
535  auto *Callee = CI->getCalledFunction();
536  if (!Callee->isDeclaration())
537  return false;
538 
539  assert(Callee->hasName() && "Invalid read_pipe/write_pipe function");
540  auto *M = Callee->getParent();
541  auto &Ctx = M->getContext();
542  std::string Name = std::string(Callee->getName());
543  auto NumArg = CI->arg_size();
544  if (NumArg != 4 && NumArg != 6)
545  return false;
546  auto *PacketSize = CI->getArgOperand(NumArg - 2);
547  auto *PacketAlign = CI->getArgOperand(NumArg - 1);
548  if (!isa<ConstantInt>(PacketSize) || !isa<ConstantInt>(PacketAlign))
549  return false;
550  unsigned Size = cast<ConstantInt>(PacketSize)->getZExtValue();
551  Align Alignment = cast<ConstantInt>(PacketAlign)->getAlignValue();
552  if (Alignment != Size)
553  return false;
554 
555  Type *PtrElemTy;
556  if (Size <= 8)
557  PtrElemTy = Type::getIntNTy(Ctx, Size * 8);
558  else
559  PtrElemTy = FixedVectorType::get(Type::getInt64Ty(Ctx), Size / 8);
560  unsigned PtrArgLoc = CI->arg_size() - 3;
561  auto PtrArg = CI->getArgOperand(PtrArgLoc);
562  unsigned PtrArgAS = PtrArg->getType()->getPointerAddressSpace();
563  auto *PtrTy = llvm::PointerType::get(PtrElemTy, PtrArgAS);
564 
566  for (unsigned I = 0; I != PtrArgLoc; ++I)
567  ArgTys.push_back(CI->getArgOperand(I)->getType());
568  ArgTys.push_back(PtrTy);
569 
570  Name = Name + "_" + std::to_string(Size);
571  auto *FTy = FunctionType::get(Callee->getReturnType(),
572  ArrayRef<Type *>(ArgTys), false);
573  AMDGPULibFunc NewLibFunc(Name, FTy);
575  if (!F)
576  return false;
577 
578  auto *BCast = B.CreatePointerCast(PtrArg, PtrTy);
580  for (unsigned I = 0; I != PtrArgLoc; ++I)
581  Args.push_back(CI->getArgOperand(I));
582  Args.push_back(BCast);
583 
584  auto *NCI = B.CreateCall(F, Args);
585  NCI->setAttributes(CI->getAttributes());
586  CI->replaceAllUsesWith(NCI);
588  CI->eraseFromParent();
589 
590  return true;
591 }
592 
593 // This function returns false if no change; return true otherwise.
595  this->CI = CI;
597 
598  // Ignore indirect calls.
599  if (Callee == nullptr)
600  return false;
601 
602  BasicBlock *BB = CI->getParent();
605 
606  // Set the builder to the instruction after the call.
607  B.SetInsertPoint(BB, CI->getIterator());
608 
609  // Copy fast flags from the original call.
610  if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(CI))
611  B.setFastMathFlags(FPOp->getFastMathFlags());
612 
613  switch (Callee->getIntrinsicID()) {
614  default:
615  break;
616  case Intrinsic::amdgcn_wavefrontsize:
617  return !EnablePreLink && fold_wavefrontsize(CI, B);
618  }
619 
620  FuncInfo FInfo;
621  if (!parseFunctionName(Callee->getName(), FInfo))
622  return false;
623 
624  // Further check the number of arguments to see if they match.
625  if (CI->arg_size() != FInfo.getNumArgs())
626  return false;
627 
628  if (TDOFold(CI, FInfo))
629  return true;
630 
631  // Under unsafe-math, evaluate calls if possible.
632  // According to Brian Sumner, we can do this for all f32 function calls
633  // using host's double function calls.
634  if (isUnsafeMath(CI) && evaluateCall(CI, FInfo))
635  return true;
636 
637  // Specialized optimizations for each function call
638  switch (FInfo.getId()) {
640  // skip vector function
641  assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||
642  FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
643  "recip must be an either native or half function");
644  return (getVecSize(FInfo) != 1) ? false : fold_recip(CI, B, FInfo);
645 
647  // skip vector function
648  assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||
649  FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
650  "divide must be an either native or half function");
651  return (getVecSize(FInfo) != 1) ? false : fold_divide(CI, B, FInfo);
652 
656  return fold_pow(CI, B, FInfo);
657 
659  // skip vector function
660  return (getVecSize(FInfo) != 1) ? false : fold_rootn(CI, B, FInfo);
661 
665  // skip vector function
666  return (getVecSize(FInfo) != 1) ? false : fold_fma_mad(CI, B, FInfo);
667 
669  return isUnsafeMath(CI) && fold_sqrt(CI, B, FInfo);
672  if ((getArgType(FInfo) == AMDGPULibFunc::F32 ||
673  getArgType(FInfo) == AMDGPULibFunc::F64)
674  && (FInfo.getPrefix() == AMDGPULibFunc::NOPFX))
675  return fold_sincos(CI, B, AA);
676 
677  break;
682  return fold_read_write_pipe(CI, B, FInfo);
683 
684  default:
685  break;
686  }
687 
688  return false;
689 }
690 
691 bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
692  // Table-Driven optimization
693  const TableRef tr = getOptTable(FInfo.getId());
694  if (tr.empty())
695  return false;
696 
697  int const sz = (int)tr.size();
698  Value *opr0 = CI->getArgOperand(0);
699 
700  if (getVecSize(FInfo) > 1) {
701  if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(opr0)) {
703  for (int eltNo = 0; eltNo < getVecSize(FInfo); ++eltNo) {
704  ConstantFP *eltval = dyn_cast<ConstantFP>(
705  CV->getElementAsConstant((unsigned)eltNo));
706  assert(eltval && "Non-FP arguments in math function!");
707  bool found = false;
708  for (int i=0; i < sz; ++i) {
709  if (eltval->isExactlyValue(tr[i].input)) {
710  DVal.push_back(tr[i].result);
711  found = true;
712  break;
713  }
714  }
715  if (!found) {
716  // This vector constants not handled yet.
717  return false;
718  }
719  }
720  LLVMContext &context = CI->getParent()->getParent()->getContext();
721  Constant *nval;
722  if (getArgType(FInfo) == AMDGPULibFunc::F32) {
724  for (unsigned i = 0; i < DVal.size(); ++i) {
725  FVal.push_back((float)DVal[i]);
726  }
727  ArrayRef<float> tmp(FVal);
728  nval = ConstantDataVector::get(context, tmp);
729  } else { // F64
730  ArrayRef<double> tmp(DVal);
731  nval = ConstantDataVector::get(context, tmp);
732  }
733  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
734  replaceCall(nval);
735  return true;
736  }
737  } else {
738  // Scalar version
739  if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
740  for (int i = 0; i < sz; ++i) {
741  if (CF->isExactlyValue(tr[i].input)) {
742  Value *nval = ConstantFP::get(CF->getType(), tr[i].result);
743  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
744  replaceCall(nval);
745  return true;
746  }
747  }
748  }
749  }
750 
751  return false;
752 }
753 
754 // [native_]half_recip(c) ==> 1.0/c
755 bool AMDGPULibCalls::fold_recip(CallInst *CI, IRBuilder<> &B,
756  const FuncInfo &FInfo) {
757  Value *opr0 = CI->getArgOperand(0);
758  if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
759  // Just create a normal div. Later, InstCombine will be able
760  // to compute the divide into a constant (avoid check float infinity
761  // or subnormal at this point).
762  Value *nval = B.CreateFDiv(ConstantFP::get(CF->getType(), 1.0),
763  opr0,
764  "recip2div");
765  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
766  replaceCall(nval);
767  return true;
768  }
769  return false;
770 }
771 
772 // [native_]half_divide(x, c) ==> x/c
773 bool AMDGPULibCalls::fold_divide(CallInst *CI, IRBuilder<> &B,
774  const FuncInfo &FInfo) {
775  Value *opr0 = CI->getArgOperand(0);
776  Value *opr1 = CI->getArgOperand(1);
777  ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
778  ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
779 
780  if ((CF0 && CF1) || // both are constants
781  (CF1 && (getArgType(FInfo) == AMDGPULibFunc::F32)))
782  // CF1 is constant && f32 divide
783  {
784  Value *nval1 = B.CreateFDiv(ConstantFP::get(opr1->getType(), 1.0),
785  opr1, "__div2recip");
786  Value *nval = B.CreateFMul(opr0, nval1, "__div2mul");
787  replaceCall(nval);
788  return true;
789  }
790  return false;
791 }
792 
793 namespace llvm {
794 static double log2(double V) {
795 #if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L
796  return ::log2(V);
797 #else
798  return log(V) / numbers::ln2;
799 #endif
800 }
801 }
802 
803 bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
804  const FuncInfo &FInfo) {
805  assert((FInfo.getId() == AMDGPULibFunc::EI_POW ||
806  FInfo.getId() == AMDGPULibFunc::EI_POWR ||
807  FInfo.getId() == AMDGPULibFunc::EI_POWN) &&
808  "fold_pow: encounter a wrong function call");
809 
810  Value *opr0, *opr1;
811  ConstantFP *CF;
812  ConstantInt *CINT;
813  ConstantAggregateZero *CZero;
814  Type *eltType;
815 
816  opr0 = CI->getArgOperand(0);
817  opr1 = CI->getArgOperand(1);
818  CZero = dyn_cast<ConstantAggregateZero>(opr1);
819  if (getVecSize(FInfo) == 1) {
820  eltType = opr0->getType();
821  CF = dyn_cast<ConstantFP>(opr1);
822  CINT = dyn_cast<ConstantInt>(opr1);
823  } else {
824  VectorType *VTy = dyn_cast<VectorType>(opr0->getType());
825  assert(VTy && "Oprand of vector function should be of vectortype");
826  eltType = VTy->getElementType();
827  ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1);
828 
829  // Now, only Handle vector const whose elements have the same value.
830  CF = CDV ? dyn_cast_or_null<ConstantFP>(CDV->getSplatValue()) : nullptr;
831  CINT = CDV ? dyn_cast_or_null<ConstantInt>(CDV->getSplatValue()) : nullptr;
832  }
833 
834  // No unsafe math , no constant argument, do nothing
835  if (!isUnsafeMath(CI) && !CF && !CINT && !CZero)
836  return false;
837 
838  // 0x1111111 means that we don't do anything for this call.
839  int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111);
840 
841  if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0) || CZero) {
842  // pow/powr/pown(x, 0) == 1
843  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1\n");
844  Constant *cnval = ConstantFP::get(eltType, 1.0);
845  if (getVecSize(FInfo) > 1) {
846  cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
847  }
848  replaceCall(cnval);
849  return true;
850  }
851  if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) {
852  // pow/powr/pown(x, 1.0) = x
853  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n");
854  replaceCall(opr0);
855  return true;
856  }
857  if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) {
858  // pow/powr/pown(x, 2.0) = x*x
859  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * " << *opr0
860  << "\n");
861  Value *nval = B.CreateFMul(opr0, opr0, "__pow2");
862  replaceCall(nval);
863  return true;
864  }
865  if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) {
866  // pow/powr/pown(x, -1.0) = 1.0/x
867  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1 / " << *opr0 << "\n");
868  Constant *cnval = ConstantFP::get(eltType, 1.0);
869  if (getVecSize(FInfo) > 1) {
870  cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
871  }
872  Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip");
873  replaceCall(nval);
874  return true;
875  }
876 
877  Module *M = CI->getModule();
878  if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) {
879  // pow[r](x, [-]0.5) = sqrt(x)
880  bool issqrt = CF->isExactlyValue(0.5);
881  if (FunctionCallee FPExpr =
882  getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
884  FInfo))) {
885  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
886  << FInfo.getName().c_str() << "(" << *opr0 << ")\n");
887  Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt"
888  : "__pow2rsqrt");
889  replaceCall(nval);
890  return true;
891  }
892  }
893 
894  if (!isUnsafeMath(CI))
895  return false;
896 
897  // Unsafe Math optimization
898 
899  // Remember that ci_opr1 is set if opr1 is integral
900  if (CF) {
901  double dval = (getArgType(FInfo) == AMDGPULibFunc::F32)
902  ? (double)CF->getValueAPF().convertToFloat()
903  : CF->getValueAPF().convertToDouble();
904  int ival = (int)dval;
905  if ((double)ival == dval) {
906  ci_opr1 = ival;
907  } else
908  ci_opr1 = 0x11111111;
909  }
910 
911  // pow/powr/pown(x, c) = [1/](x*x*..x); where
912  // trunc(c) == c && the number of x == c && |c| <= 12
913  unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1;
914  if (abs_opr1 <= 12) {
915  Constant *cnval;
916  Value *nval;
917  if (abs_opr1 == 0) {
918  cnval = ConstantFP::get(eltType, 1.0);
919  if (getVecSize(FInfo) > 1) {
920  cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
921  }
922  nval = cnval;
923  } else {
924  Value *valx2 = nullptr;
925  nval = nullptr;
926  while (abs_opr1 > 0) {
927  valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0;
928  if (abs_opr1 & 1) {
929  nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2;
930  }
931  abs_opr1 >>= 1;
932  }
933  }
934 
935  if (ci_opr1 < 0) {
936  cnval = ConstantFP::get(eltType, 1.0);
937  if (getVecSize(FInfo) > 1) {
938  cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
939  }
940  nval = B.CreateFDiv(cnval, nval, "__1powprod");
941  }
942  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
943  << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0
944  << ")\n");
945  replaceCall(nval);
946  return true;
947  }
948 
949  // powr ---> exp2(y * log2(x))
950  // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31))
951  FunctionCallee ExpExpr =
952  getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo));
953  if (!ExpExpr)
954  return false;
955 
956  bool needlog = false;
957  bool needabs = false;
958  bool needcopysign = false;
959  Constant *cnval = nullptr;
960  if (getVecSize(FInfo) == 1) {
961  CF = dyn_cast<ConstantFP>(opr0);
962 
963  if (CF) {
964  double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
965  ? (double)CF->getValueAPF().convertToFloat()
966  : CF->getValueAPF().convertToDouble();
967 
968  V = log2(std::abs(V));
969  cnval = ConstantFP::get(eltType, V);
970  needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR) &&
971  CF->isNegative();
972  } else {
973  needlog = true;
974  needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR &&
975  (!CF || CF->isNegative());
976  }
977  } else {
978  ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0);
979 
980  if (!CDV) {
981  needlog = true;
982  needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR;
983  } else {
984  assert ((int)CDV->getNumElements() == getVecSize(FInfo) &&
985  "Wrong vector size detected");
986 
988  for (int i=0; i < getVecSize(FInfo); ++i) {
989  double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
990  ? (double)CDV->getElementAsFloat(i)
991  : CDV->getElementAsDouble(i);
992  if (V < 0.0) needcopysign = true;
993  V = log2(std::abs(V));
994  DVal.push_back(V);
995  }
996  if (getArgType(FInfo) == AMDGPULibFunc::F32) {
998  for (unsigned i=0; i < DVal.size(); ++i) {
999  FVal.push_back((float)DVal[i]);
1000  }
1001  ArrayRef<float> tmp(FVal);
1002  cnval = ConstantDataVector::get(M->getContext(), tmp);
1003  } else {
1004  ArrayRef<double> tmp(DVal);
1005  cnval = ConstantDataVector::get(M->getContext(), tmp);
1006  }
1007  }
1008  }
1009 
1010  if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW)) {
1011  // We cannot handle corner cases for a general pow() function, give up
1012  // unless y is a constant integral value. Then proceed as if it were pown.
1013  if (getVecSize(FInfo) == 1) {
1014  if (const ConstantFP *CF = dyn_cast<ConstantFP>(opr1)) {
1015  double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
1016  ? (double)CF->getValueAPF().convertToFloat()
1017  : CF->getValueAPF().convertToDouble();
1018  if (y != (double)(int64_t)y)
1019  return false;
1020  } else
1021  return false;
1022  } else {
1023  if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1)) {
1024  for (int i=0; i < getVecSize(FInfo); ++i) {
1025  double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
1026  ? (double)CDV->getElementAsFloat(i)
1027  : CDV->getElementAsDouble(i);
1028  if (y != (double)(int64_t)y)
1029  return false;
1030  }
1031  } else
1032  return false;
1033  }
1034  }
1035 
1036  Value *nval;
1037  if (needabs) {
1038  FunctionCallee AbsExpr =
1039  getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_FABS, FInfo));
1040  if (!AbsExpr)
1041  return false;
1042  nval = CreateCallEx(B, AbsExpr, opr0, "__fabs");
1043  } else {
1044  nval = cnval ? cnval : opr0;
1045  }
1046  if (needlog) {
1047  FunctionCallee LogExpr =
1048  getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo));
1049  if (!LogExpr)
1050  return false;
1051  nval = CreateCallEx(B,LogExpr, nval, "__log2");
1052  }
1053 
1054  if (FInfo.getId() == AMDGPULibFunc::EI_POWN) {
1055  // convert int(32) to fp(f32 or f64)
1056  opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F");
1057  }
1058  nval = B.CreateFMul(opr1, nval, "__ylogx");
1059  nval = CreateCallEx(B,ExpExpr, nval, "__exp2");
1060 
1061  if (needcopysign) {
1062  Value *opr_n;
1063  Type* rTy = opr0->getType();
1064  Type* nTyS = eltType->isDoubleTy() ? B.getInt64Ty() : B.getInt32Ty();
1065  Type *nTy = nTyS;
1066  if (const auto *vTy = dyn_cast<FixedVectorType>(rTy))
1067  nTy = FixedVectorType::get(nTyS, vTy);
1068  unsigned size = nTy->getScalarSizeInBits();
1069  opr_n = CI->getArgOperand(1);
1070  if (opr_n->getType()->isIntegerTy())
1071  opr_n = B.CreateZExtOrBitCast(opr_n, nTy, "__ytou");
1072  else
1073  opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");
1074 
1075  Value *sign = B.CreateShl(opr_n, size-1, "__yeven");
1076  sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign");
1077  nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign);
1078  nval = B.CreateBitCast(nval, opr0->getType());
1079  }
1080 
1081  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
1082  << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n");
1083  replaceCall(nval);
1084 
1085  return true;
1086 }
1087 
1088 bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
1089  const FuncInfo &FInfo) {
1090  Value *opr0 = CI->getArgOperand(0);
1091  Value *opr1 = CI->getArgOperand(1);
1092 
1093  ConstantInt *CINT = dyn_cast<ConstantInt>(opr1);
1094  if (!CINT) {
1095  return false;
1096  }
1097  int ci_opr1 = (int)CINT->getSExtValue();
1098  if (ci_opr1 == 1) { // rootn(x, 1) = x
1099  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n");
1100  replaceCall(opr0);
1101  return true;
1102  }
1103  if (ci_opr1 == 2) { // rootn(x, 2) = sqrt(x)
1104  Module *M = CI->getModule();
1105  if (FunctionCallee FPExpr =
1106  getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
1107  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> sqrt(" << *opr0 << ")\n");
1108  Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt");
1109  replaceCall(nval);
1110  return true;
1111  }
1112  } else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x)
1113  Module *M = CI->getModule();
1114  if (FunctionCallee FPExpr =
1115  getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) {
1116  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> cbrt(" << *opr0 << ")\n");
1117  Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt");
1118  replaceCall(nval);
1119  return true;
1120  }
1121  } else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x
1122  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1.0 / " << *opr0 << "\n");
1123  Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0),
1124  opr0,
1125  "__rootn2div");
1126  replaceCall(nval);
1127  return true;
1128  } else if (ci_opr1 == -2) { // rootn(x, -2) = rsqrt(x)
1129  Module *M = CI->getModule();
1130  if (FunctionCallee FPExpr =
1131  getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT, FInfo))) {
1132  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> rsqrt(" << *opr0
1133  << ")\n");
1134  Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt");
1135  replaceCall(nval);
1136  return true;
1137  }
1138  }
1139  return false;
1140 }
1141 
1142 bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B,
1143  const FuncInfo &FInfo) {
1144  Value *opr0 = CI->getArgOperand(0);
1145  Value *opr1 = CI->getArgOperand(1);
1146  Value *opr2 = CI->getArgOperand(2);
1147 
1148  ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
1149  ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
1150  if ((CF0 && CF0->isZero()) || (CF1 && CF1->isZero())) {
1151  // fma/mad(a, b, c) = c if a=0 || b=0
1152  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr2 << "\n");
1153  replaceCall(opr2);
1154  return true;
1155  }
1156  if (CF0 && CF0->isExactlyValue(1.0f)) {
1157  // fma/mad(a, b, c) = b+c if a=1
1158  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr1 << " + " << *opr2
1159  << "\n");
1160  Value *nval = B.CreateFAdd(opr1, opr2, "fmaadd");
1161  replaceCall(nval);
1162  return true;
1163  }
1164  if (CF1 && CF1->isExactlyValue(1.0f)) {
1165  // fma/mad(a, b, c) = a+c if b=1
1166  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " + " << *opr2
1167  << "\n");
1168  Value *nval = B.CreateFAdd(opr0, opr2, "fmaadd");
1169  replaceCall(nval);
1170  return true;
1171  }
1172  if (ConstantFP *CF = dyn_cast<ConstantFP>(opr2)) {
1173  if (CF->isZero()) {
1174  // fma/mad(a, b, c) = a*b if c=0
1175  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * "
1176  << *opr1 << "\n");
1177  Value *nval = B.CreateFMul(opr0, opr1, "fmamul");
1178  replaceCall(nval);
1179  return true;
1180  }
1181  }
1182 
1183  return false;
1184 }
1185 
1186 // Get a scalar native builtin single argument FP function
1187 FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M,
1188  const FuncInfo &FInfo) {
1189  if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))
1190  return nullptr;
1191  FuncInfo nf = FInfo;
1193  return getFunction(M, nf);
1194 }
1195 
1196 // fold sqrt -> native_sqrt (x)
1197 bool AMDGPULibCalls::fold_sqrt(CallInst *CI, IRBuilder<> &B,
1198  const FuncInfo &FInfo) {
1199  if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) &&
1200  (FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) {
1201  if (FunctionCallee FPExpr = getNativeFunction(
1203  Value *opr0 = CI->getArgOperand(0);
1204  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
1205  << "sqrt(" << *opr0 << ")\n");
1206  Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt");
1207  replaceCall(nval);
1208  return true;
1209  }
1210  }
1211  return false;
1212 }
1213 
1214 // fold sin, cos -> sincos.
1215 bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
1216  AliasAnalysis *AA) {
1217  AMDGPULibFunc fInfo;
1218  if (!AMDGPULibFunc::parse(CI->getCalledFunction()->getName(), fInfo))
1219  return false;
1220 
1221  assert(fInfo.getId() == AMDGPULibFunc::EI_SIN ||
1222  fInfo.getId() == AMDGPULibFunc::EI_COS);
1223  bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN;
1224 
1225  Value *CArgVal = CI->getArgOperand(0);
1226  BasicBlock * const CBB = CI->getParent();
1227 
1228  int const MaxScan = 30;
1229  bool Changed = false;
1230 
1231  { // fold in load value.
1232  LoadInst *LI = dyn_cast<LoadInst>(CArgVal);
1233  if (LI && LI->getParent() == CBB) {
1234  BasicBlock::iterator BBI = LI->getIterator();
1235  Value *AvailableVal = FindAvailableLoadedValue(LI, CBB, BBI, MaxScan, AA);
1236  if (AvailableVal) {
1237  Changed = true;
1238  CArgVal->replaceAllUsesWith(AvailableVal);
1239  if (CArgVal->getNumUses() == 0)
1240  LI->eraseFromParent();
1241  CArgVal = CI->getArgOperand(0);
1242  }
1243  }
1244  }
1245 
1246  Module *M = CI->getModule();
1248  std::string const PairName = fInfo.mangle();
1249 
1250  CallInst *UI = nullptr;
1251  for (User* U : CArgVal->users()) {
1252  CallInst *XI = dyn_cast_or_null<CallInst>(U);
1253  if (!XI || XI == CI || XI->getParent() != CBB)
1254  continue;
1255 
1256  Function *UCallee = XI->getCalledFunction();
1257  if (!UCallee || !UCallee->getName().equals(PairName))
1258  continue;
1259 
1261  if (BBI == CI->getParent()->begin())
1262  break;
1263  --BBI;
1264  for (int I = MaxScan; I > 0 && BBI != CBB->begin(); --BBI, --I) {
1265  if (cast<Instruction>(BBI) == XI) {
1266  UI = XI;
1267  break;
1268  }
1269  }
1270  if (UI) break;
1271  }
1272 
1273  if (!UI)
1274  return Changed;
1275 
1276  // Merge the sin and cos.
1277 
1278  // for OpenCL 2.0 we have only generic implementation of sincos
1279  // function.
1282  FunctionCallee Fsincos = getFunction(M, nf);
1283  if (!Fsincos)
1284  return Changed;
1285 
1286  BasicBlock::iterator ItOld = B.GetInsertPoint();
1287  AllocaInst *Alloc = insertAlloca(UI, B, "__sincos_");
1288  B.SetInsertPoint(UI);
1289 
1290  Value *P = Alloc;
1291  Type *PTy = Fsincos.getFunctionType()->getParamType(1);
1292  // The allocaInst allocates the memory in private address space. This need
1293  // to be bitcasted to point to the address space of cos pointer type.
1294  // In OpenCL 2.0 this is generic, while in 1.2 that is private.
1296  P = B.CreateAddrSpaceCast(Alloc, PTy);
1297  CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P);
1298 
1299  LLVM_DEBUG(errs() << "AMDIC: fold_sincos (" << *CI << ", " << *UI << ") with "
1300  << *Call << "\n");
1301 
1302  if (!isSin) { // CI->cos, UI->sin
1303  B.SetInsertPoint(&*ItOld);
1304  UI->replaceAllUsesWith(&*Call);
1305  Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
1306  CI->replaceAllUsesWith(Reload);
1307  UI->eraseFromParent();
1308  CI->eraseFromParent();
1309  } else { // CI->sin, UI->cos
1310  Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
1311  UI->replaceAllUsesWith(Reload);
1312  CI->replaceAllUsesWith(Call);
1313  UI->eraseFromParent();
1314  CI->eraseFromParent();
1315  }
1316  return true;
1317 }
1318 
1319 bool AMDGPULibCalls::fold_wavefrontsize(CallInst *CI, IRBuilder<> &B) {
1320  if (!TM)
1321  return false;
1322 
1323  StringRef CPU = TM->getTargetCPU();
1324  StringRef Features = TM->getTargetFeatureString();
1325  if ((CPU.empty() || CPU.equals_insensitive("generic")) &&
1326  (Features.empty() || !Features.contains_insensitive("wavefrontsize")))
1327  return false;
1328 
1329  Function *F = CI->getParent()->getParent();
1330  const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(*F);
1331  unsigned N = ST.getWavefrontSize();
1332 
1333  LLVM_DEBUG(errs() << "AMDIC: fold_wavefrontsize (" << *CI << ") with "
1334  << N << "\n");
1335 
1336  CI->replaceAllUsesWith(ConstantInt::get(B.getInt32Ty(), N));
1337  CI->eraseFromParent();
1338  return true;
1339 }
1340 
1341 // Get insertion point at entry.
1342 BasicBlock::iterator AMDGPULibCalls::getEntryIns(CallInst * UI) {
1343  Function * Func = UI->getParent()->getParent();
1344  BasicBlock * BB = &Func->getEntryBlock();
1345  assert(BB && "Entry block not found!");
1346  BasicBlock::iterator ItNew = BB->begin();
1347  return ItNew;
1348 }
1349 
1350 // Insert a AllocsInst at the beginning of function entry block.
1351 AllocaInst* AMDGPULibCalls::insertAlloca(CallInst *UI, IRBuilder<> &B,
1352  const char *prefix) {
1353  BasicBlock::iterator ItNew = getEntryIns(UI);
1354  Function *UCallee = UI->getCalledFunction();
1355  Type *RetType = UCallee->getReturnType();
1356  B.SetInsertPoint(&*ItNew);
1357  AllocaInst *Alloc =
1358  B.CreateAlloca(RetType, nullptr, std::string(prefix) + UI->getName());
1359  Alloc->setAlignment(
1360  Align(UCallee->getParent()->getDataLayout().getTypeAllocSize(RetType)));
1361  return Alloc;
1362 }
1363 
1364 bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo,
1365  double& Res0, double& Res1,
1366  Constant *copr0, Constant *copr1,
1367  Constant *copr2) {
1368  // By default, opr0/opr1/opr3 holds values of float/double type.
1369  // If they are not float/double, each function has to its
1370  // operand separately.
1371  double opr0=0.0, opr1=0.0, opr2=0.0;
1372  ConstantFP *fpopr0 = dyn_cast_or_null<ConstantFP>(copr0);
1373  ConstantFP *fpopr1 = dyn_cast_or_null<ConstantFP>(copr1);
1374  ConstantFP *fpopr2 = dyn_cast_or_null<ConstantFP>(copr2);
1375  if (fpopr0) {
1376  opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1377  ? fpopr0->getValueAPF().convertToDouble()
1378  : (double)fpopr0->getValueAPF().convertToFloat();
1379  }
1380 
1381  if (fpopr1) {
1382  opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1383  ? fpopr1->getValueAPF().convertToDouble()
1384  : (double)fpopr1->getValueAPF().convertToFloat();
1385  }
1386 
1387  if (fpopr2) {
1388  opr2 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1389  ? fpopr2->getValueAPF().convertToDouble()
1390  : (double)fpopr2->getValueAPF().convertToFloat();
1391  }
1392 
1393  switch (FInfo.getId()) {
1394  default : return false;
1395 
1397  Res0 = acos(opr0);
1398  return true;
1399 
1401  // acosh(x) == log(x + sqrt(x*x - 1))
1402  Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0));
1403  return true;
1404 
1406  Res0 = acos(opr0) / MATH_PI;
1407  return true;
1408 
1410  Res0 = asin(opr0);
1411  return true;
1412 
1414  // asinh(x) == log(x + sqrt(x*x + 1))
1415  Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0));
1416  return true;
1417 
1419  Res0 = asin(opr0) / MATH_PI;
1420  return true;
1421 
1423  Res0 = atan(opr0);
1424  return true;
1425 
1427  // atanh(x) == (log(x+1) - log(x-1))/2;
1428  Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0;
1429  return true;
1430 
1432  Res0 = atan(opr0) / MATH_PI;
1433  return true;
1434 
1436  Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0);
1437  return true;
1438 
1439  case AMDGPULibFunc::EI_COS:
1440  Res0 = cos(opr0);
1441  return true;
1442 
1444  Res0 = cosh(opr0);
1445  return true;
1446 
1448  Res0 = cos(MATH_PI * opr0);
1449  return true;
1450 
1451  case AMDGPULibFunc::EI_EXP:
1452  Res0 = exp(opr0);
1453  return true;
1454 
1456  Res0 = pow(2.0, opr0);
1457  return true;
1458 
1460  Res0 = pow(10.0, opr0);
1461  return true;
1462 
1464  Res0 = exp(opr0) - 1.0;
1465  return true;
1466 
1467  case AMDGPULibFunc::EI_LOG:
1468  Res0 = log(opr0);
1469  return true;
1470 
1472  Res0 = log(opr0) / log(2.0);
1473  return true;
1474 
1476  Res0 = log(opr0) / log(10.0);
1477  return true;
1478 
1480  Res0 = 1.0 / sqrt(opr0);
1481  return true;
1482 
1483  case AMDGPULibFunc::EI_SIN:
1484  Res0 = sin(opr0);
1485  return true;
1486 
1488  Res0 = sinh(opr0);
1489  return true;
1490 
1492  Res0 = sin(MATH_PI * opr0);
1493  return true;
1494 
1496  Res0 = sqrt(opr0);
1497  return true;
1498 
1499  case AMDGPULibFunc::EI_TAN:
1500  Res0 = tan(opr0);
1501  return true;
1502 
1504  Res0 = tanh(opr0);
1505  return true;
1506 
1508  Res0 = tan(MATH_PI * opr0);
1509  return true;
1510 
1512  Res0 = 1.0 / opr0;
1513  return true;
1514 
1515  // two-arg functions
1517  Res0 = opr0 / opr1;
1518  return true;
1519 
1520  case AMDGPULibFunc::EI_POW:
1522  Res0 = pow(opr0, opr1);
1523  return true;
1524 
1525  case AMDGPULibFunc::EI_POWN: {
1526  if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
1527  double val = (double)iopr1->getSExtValue();
1528  Res0 = pow(opr0, val);
1529  return true;
1530  }
1531  return false;
1532  }
1533 
1534  case AMDGPULibFunc::EI_ROOTN: {
1535  if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
1536  double val = (double)iopr1->getSExtValue();
1537  Res0 = pow(opr0, 1.0 / val);
1538  return true;
1539  }
1540  return false;
1541  }
1542 
1543  // with ptr arg
1545  Res0 = sin(opr0);
1546  Res1 = cos(opr0);
1547  return true;
1548 
1549  // three-arg functions
1550  case AMDGPULibFunc::EI_FMA:
1551  case AMDGPULibFunc::EI_MAD:
1552  Res0 = opr0 * opr1 + opr2;
1553  return true;
1554  }
1555 
1556  return false;
1557 }
1558 
1559 bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) {
1560  int numArgs = (int)aCI->arg_size();
1561  if (numArgs > 3)
1562  return false;
1563 
1564  Constant *copr0 = nullptr;
1565  Constant *copr1 = nullptr;
1566  Constant *copr2 = nullptr;
1567  if (numArgs > 0) {
1568  if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr)
1569  return false;
1570  }
1571 
1572  if (numArgs > 1) {
1573  if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) {
1574  if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS)
1575  return false;
1576  }
1577  }
1578 
1579  if (numArgs > 2) {
1580  if ((copr2 = dyn_cast<Constant>(aCI->getArgOperand(2))) == nullptr)
1581  return false;
1582  }
1583 
1584  // At this point, all arguments to aCI are constants.
1585 
1586  // max vector size is 16, and sincos will generate two results.
1587  double DVal0[16], DVal1[16];
1588  int FuncVecSize = getVecSize(FInfo);
1589  bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS);
1590  if (FuncVecSize == 1) {
1591  if (!evaluateScalarMathFunc(FInfo, DVal0[0],
1592  DVal1[0], copr0, copr1, copr2)) {
1593  return false;
1594  }
1595  } else {
1596  ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0);
1597  ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1);
1598  ConstantDataVector *CDV2 = dyn_cast_or_null<ConstantDataVector>(copr2);
1599  for (int i = 0; i < FuncVecSize; ++i) {
1600  Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr;
1601  Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr;
1602  Constant *celt2 = CDV2 ? CDV2->getElementAsConstant(i) : nullptr;
1603  if (!evaluateScalarMathFunc(FInfo, DVal0[i],
1604  DVal1[i], celt0, celt1, celt2)) {
1605  return false;
1606  }
1607  }
1608  }
1609 
1610  LLVMContext &context = CI->getParent()->getParent()->getContext();
1611  Constant *nval0, *nval1;
1612  if (FuncVecSize == 1) {
1613  nval0 = ConstantFP::get(CI->getType(), DVal0[0]);
1614  if (hasTwoResults)
1615  nval1 = ConstantFP::get(CI->getType(), DVal1[0]);
1616  } else {
1617  if (getArgType(FInfo) == AMDGPULibFunc::F32) {
1618  SmallVector <float, 0> FVal0, FVal1;
1619  for (int i = 0; i < FuncVecSize; ++i)
1620  FVal0.push_back((float)DVal0[i]);
1621  ArrayRef<float> tmp0(FVal0);
1622  nval0 = ConstantDataVector::get(context, tmp0);
1623  if (hasTwoResults) {
1624  for (int i = 0; i < FuncVecSize; ++i)
1625  FVal1.push_back((float)DVal1[i]);
1626  ArrayRef<float> tmp1(FVal1);
1627  nval1 = ConstantDataVector::get(context, tmp1);
1628  }
1629  } else {
1630  ArrayRef<double> tmp0(DVal0);
1631  nval0 = ConstantDataVector::get(context, tmp0);
1632  if (hasTwoResults) {
1633  ArrayRef<double> tmp1(DVal1);
1634  nval1 = ConstantDataVector::get(context, tmp1);
1635  }
1636  }
1637  }
1638 
1639  if (hasTwoResults) {
1640  // sincos
1641  assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS &&
1642  "math function with ptr arg not supported yet");
1643  new StoreInst(nval1, aCI->getArgOperand(1), aCI);
1644  }
1645 
1646  replaceCall(nval0);
1647  return true;
1648 }
1649 
1650 // Public interface to the Simplify LibCalls pass.
1652  return new AMDGPUSimplifyLibCalls(TM);
1653 }
1654 
1656  return new AMDGPUUseNativeCalls();
1657 }
1658 
1660  if (skipFunction(F))
1661  return false;
1662 
1663  bool Changed = false;
1664  auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1665 
1666  LLVM_DEBUG(dbgs() << "AMDIC: process function ";
1667  F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
1668 
1669  for (auto &BB : F) {
1670  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
1671  // Ignore non-calls.
1672  CallInst *CI = dyn_cast<CallInst>(I);
1673  ++I;
1674  // Ignore intrinsics that do not become real instructions.
1675  if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd())
1676  continue;
1677 
1678  // Ignore indirect calls.
1680  if (Callee == nullptr)
1681  continue;
1682 
1683  LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";
1684  dbgs().flush());
1685  if(Simplifier.fold(CI, AA))
1686  Changed = true;
1687  }
1688  }
1689  return Changed;
1690 }
1691 
1694  AMDGPULibCalls Simplifier(&TM);
1695  Simplifier.initNativeFuncs();
1696 
1697  bool Changed = false;
1698  auto AA = &AM.getResult<AAManager>(F);
1699 
1700  LLVM_DEBUG(dbgs() << "AMDIC: process function ";
1701  F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
1702 
1703  for (auto &BB : F) {
1704  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
1705  // Ignore non-calls.
1706  CallInst *CI = dyn_cast<CallInst>(I);
1707  ++I;
1708  // Ignore intrinsics that do not become real instructions.
1709  if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd())
1710  continue;
1711 
1712  // Ignore indirect calls.
1714  if (Callee == nullptr)
1715  continue;
1716 
1717  LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";
1718  dbgs().flush());
1719  if (Simplifier.fold(CI, AA))
1720  Changed = true;
1721  }
1722  }
1723  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
1724 }
1725 
1727  if (skipFunction(F) || UseNative.empty())
1728  return false;
1729 
1730  bool Changed = false;
1731  for (auto &BB : F) {
1732  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
1733  // Ignore non-calls.
1734  CallInst *CI = dyn_cast<CallInst>(I);
1735  ++I;
1736  if (!CI) continue;
1737 
1738  // Ignore indirect calls.
1740  if (Callee == nullptr)
1741  continue;
1742 
1743  if (Simplifier.useNative(CI))
1744  Changed = true;
1745  }
1746  }
1747  return Changed;
1748 }
1749 
1752  if (UseNative.empty())
1753  return PreservedAnalyses::all();
1754 
1755  AMDGPULibCalls Simplifier;
1756  Simplifier.initNativeFuncs();
1757 
1758  bool Changed = false;
1759  for (auto &BB : F) {
1760  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
1761  // Ignore non-calls.
1762  CallInst *CI = dyn_cast<CallInst>(I);
1763  ++I;
1764  if (!CI)
1765  continue;
1766 
1767  // Ignore indirect calls.
1769  if (Callee == nullptr)
1770  continue;
1771 
1772  if (Simplifier.useNative(CI))
1773  Changed = true;
1774  }
1775  }
1776  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
1777 }
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::AMDGPULibFunc::isMangled
bool isMangled() const
Definition: AMDGPULibFunc.h:380
llvm::AMDGPULibFuncBase::EI_ASINH
@ EI_ASINH
Definition: AMDGPULibFunc.h:47
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1260
llvm::APFloat::convertToFloat
float convertToFloat() const
Converts this APFloat to host float value.
Definition: APFloat.cpp:4915
llvm::AMDGPULibFuncBase::EI_NFMA
@ EI_NFMA
Definition: AMDGPULibFunc.h:225
llvm::AMDGPULibCalls::AMDGPULibCalls
AMDGPULibCalls(const TargetMachine *TM_=nullptr)
Definition: AMDGPULibCalls.cpp:123
getOptTable
static TableRef getOptTable(AMDGPULibFunc::EFuncId id)
Definition: AMDGPULibCalls.cpp:382
tbl_asin
static const TableEntry tbl_asin[]
Definition: AMDGPULibCalls.cpp:228
tbl_sinpi
static const TableEntry tbl_sinpi[]
Definition: AMDGPULibCalls.cpp:329
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:65
llvm::AMDGPULibFuncBase::EI_ACOSH
@ EI_ACOSH
Definition: AMDGPULibFunc.h:41
llvm::AMDGPULibFuncBase::F64
@ F64
Definition: AMDGPULibFunc.h:273
tbl_tan
static const TableEntry tbl_tan[]
Definition: AMDGPULibCalls.cpp:338
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
tbl_acos
static const TableEntry tbl_acos[]
Definition: AMDGPULibCalls.cpp:213
IntrinsicInst.h
tbl_log2
static const TableEntry tbl_log2[]
Definition: AMDGPULibCalls.cpp:309
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
Loads.h
llvm::Function
Definition: Function.h:60
llvm::Attribute
Definition: Attributes.h:65
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::CallBase::setCalledFunction
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
Definition: InstrTypes.h:1435
llvm::PointerType::get
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Definition: Type.cpp:727
double
into xmm2 addss xmm2 xmm1 xmm3 addss xmm3 movaps xmm0 unpcklps xmm0 ret seems silly when it could just be one addps Expand libm rounding functions main should enable SSE DAZ mode and other fast SSE modes Think about doing i64 math in SSE regs on x86 This testcase should have no SSE instructions in and only one load from a constant double
Definition: README-SSE.txt:85
llvm::AMDGPULibFuncBase::EI_NCOS
@ EI_NCOS
Definition: AMDGPULibFunc.h:223
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1181
tbl_atanh
static const TableEntry tbl_atanh[]
Definition: AMDGPULibCalls.cpp:250
llvm::AMDGPULibFuncBase::EI_ROOTN
@ EI_ROOTN
Definition: AMDGPULibFunc.h:163
llvm::AMDGPULibFuncBase::EI_READ_PIPE_2
@ EI_READ_PIPE_2
Definition: AMDGPULibFunc.h:239
llvm::User::dropAllReferences
void dropAllReferences()
Drop all references to operands.
Definition: User.h:299
MATH_PI
#define MATH_PI
Definition: AMDGPULibCalls.cpp:40
llvm::cl::CommaSeparated
@ CommaSeparated
Definition: CommandLine.h:166
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
tbl_log
static const TableEntry tbl_log[]
Definition: AMDGPULibCalls.cpp:305
llvm::IRBuilder<>
llvm::cl::ValueOptional
@ ValueOptional
Definition: CommandLine.h:133
llvm::AMDGPULibFuncBase::EI_ATANPI
@ EI_ATANPI
Definition: AMDGPULibFunc.h:55
llvm::FunctionType::get
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:361
tbl_log10
static const TableEntry tbl_log10[]
Definition: AMDGPULibCalls.cpp:313
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::AMDGPULibFuncBase::EI_POWR
@ EI_POWR
Definition: AMDGPULibFunc.h:153
llvm::ConstantDataVector::get
static Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
Definition: Constants.cpp:2983
llvm::ConstantDataSequential::getElementAsFloat
float getElementAsFloat(unsigned i) const
If this is an sequential container of floats, return the specified element as a float.
Definition: Constants.cpp:3164
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:155
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:320
llvm::AMDGPULibFunc::parse
static bool parse(StringRef MangledName, AMDGPULibFunc &Ptr)
Definition: AMDGPULibFunc.cpp:681
llvm::ConstantFP::isZero
bool isZero() const
Return true if the value is positive or negative zero.
Definition: Constants.h:302
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::AMDGPULibFuncBase::EI_ERFC
@ EI_ERFC
Definition: AMDGPULibFunc.h:85
llvm::AMDGPULibFuncBase::EI_TANPI
@ EI_TANPI
Definition: AMDGPULibFunc.h:196
llvm::CallBase::getAttributes
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1474
llvm::ConstantAggregateZero
All zero aggregate value.
Definition: Constants.h:337
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:377
llvm::ConstantFP::getValueAPF
const APFloat & getValueAPF() const
Definition: Constants.h:298
llvm::AMDGPULibFuncBase::EI_COSH
@ EI_COSH
Definition: AMDGPULibFunc.h:76
tbl_cbrt
static const TableEntry tbl_cbrt[]
Definition: AMDGPULibCalls.cpp:260
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:422
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:891
llvm::AMDGPULibFuncBase::EI_CBRT
@ EI_CBRT
Definition: AMDGPULibFunc.h:68
llvm::AMDGPULibFuncBase::EI_SIN
@ EI_SIN
Definition: AMDGPULibFunc.h:172
llvm::AMDGPULibFuncBase::EI_LOG
@ EI_LOG
Definition: AMDGPULibFunc.h:130
tmp1
urem i32 %X, 255 ret i32 %tmp1 } Currently it compiles to:... movl $2155905153, %ecx movl 8(%esp), %esi movl %esi, %eax mull %ecx ... This could be "reassociated" into:movl $2155905153, %eax movl 8(%esp), %ecx mull %ecx to avoid the copy. In fact, the existing two-address stuff would do this except that mul isn 't a commutative 2-addr instruction. I guess this has to be done at isel time based on the #uses to mul? Make sure the instruction which starts a loop does not cross a cacheline boundary. This requires knowning the exact length of each machine instruction. That is somewhat complicated, but doable. Example 256.bzip2:In the new trace, the hot loop has an instruction which crosses a cacheline boundary. In addition to potential cache misses, this can 't help decoding as I imagine there has to be some kind of complicated decoder reset and realignment to grab the bytes from the next cacheline. 532 532 0x3cfc movb(1809(%esp, %esi), %bl<<<--- spans 2 64 byte lines 942 942 0x3d03 movl %dh,(1809(%esp, %esi) 937 937 0x3d0a incl %esi 3 3 0x3d0b cmpb %bl, %dl 27 27 0x3d0d jnz 0x000062db< main+11707 > In c99 mode, the preprocessor doesn 't like assembly comments like #TRUNCATE. This could be a single 16-bit load. int f(char *p) { if((p[0]==1) &(p[1]==2)) return 1 tmp1
Definition: README.txt:375
llvm::AMDGPULibFuncBase::EI_POW
@ EI_POW
Definition: AMDGPULibFunc.h:151
that
we should consider alternate ways to model stack dependencies Lots of things could be done in WebAssemblyTargetTransformInfo cpp there are numerous optimization related hooks that can be overridden in WebAssemblyTargetLowering Instead of the OptimizeReturned which should consider preserving the returned attribute through to MachineInstrs and extending the MemIntrinsicResults pass to do this optimization on calls too That would also let the WebAssemblyPeephole pass clean up dead defs for such as it does for stores Consider implementing and or getMachineCombinerPatterns Find a clean way to fix the problem which leads to the Shrink Wrapping pass being run after the WebAssembly PEI pass When setting multiple variables to the same we currently get code like const It could be done with a smaller encoding like local tee $pop5 local $pop6 WebAssembly registers are implicitly initialized to zero Explicit zeroing is therefore often redundant and could be optimized away Small indices may use smaller encodings than large indices WebAssemblyRegColoring and or WebAssemblyRegRenumbering should sort registers according to their usage frequency to maximize the usage of smaller encodings Many cases of irreducible control flow could be transformed more optimally than via the transform in WebAssemblyFixIrreducibleControlFlow cpp It may also be worthwhile to do transforms before register particularly when duplicating to allow register coloring to be aware of the duplication WebAssemblyRegStackify could use AliasAnalysis to reorder loads and stores more aggressively WebAssemblyRegStackify is currently a greedy algorithm This means that
Definition: README.txt:130
tmp
alloca< 16 x float >, align 16 %tmp2=alloca< 16 x float >, align 16 store< 16 x float > %A,< 16 x float > *%tmp %s=bitcast< 16 x float > *%tmp to i8 *%s2=bitcast< 16 x float > *%tmp2 to i8 *call void @llvm.memcpy.i64(i8 *%s, i8 *%s2, i64 64, i32 16) %R=load< 16 x float > *%tmp2 ret< 16 x float > %R } declare void @llvm.memcpy.i64(i8 *nocapture, i8 *nocapture, i64, i32) nounwind which compiles to:_foo:subl $140, %esp movaps %xmm3, 112(%esp) movaps %xmm2, 96(%esp) movaps %xmm1, 80(%esp) movaps %xmm0, 64(%esp) movl 60(%esp), %eax movl %eax, 124(%esp) movl 56(%esp), %eax movl %eax, 120(%esp) movl 52(%esp), %eax< many many more 32-bit copies > movaps(%esp), %xmm0 movaps 16(%esp), %xmm1 movaps 32(%esp), %xmm2 movaps 48(%esp), %xmm3 addl $140, %esp ret On Nehalem, it may even be cheaper to just use movups when unaligned than to fall back to lower-granularity chunks. Implement processor-specific optimizations for parity with GCC on these processors. GCC does two optimizations:1. ix86_pad_returns inserts a noop before ret instructions if immediately preceded by a conditional branch or is the target of a jump. 2. ix86_avoid_jump_misspredicts inserts noops in cases where a 16-byte block of code contains more than 3 branches. The first one is done for all AMDs, Core2, and "Generic" The second one is done for:Atom, Pentium Pro, all AMDs, Pentium 4, Nocona, Core 2, and "Generic" Testcase:int x(int a) { return(a &0xf0)> >4 tmp
Definition: README.txt:1347
llvm::AMDGPULibFuncBase::EI_RECIP
@ EI_RECIP
Definition: AMDGPULibFunc.h:156
llvm::AMDGPULibFuncBase::EI_NSIN
@ EI_NSIN
Definition: AMDGPULibFunc.h:229
llvm::Attribute::getValueAsBool
bool getValueAsBool() const
Return the attribute's value as a boolean.
Definition: Attributes.cpp:290
llvm::initializeAMDGPUSimplifyLibCallsPass
void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &)
llvm::AMDGPULibCalls
Definition: AMDGPULibCalls.cpp:47
tbl_erf
static const TableEntry tbl_erf[]
Definition: AMDGPULibCalls.cpp:282
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:159
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
library
Itanium Name Demangler i e convert the string _Z1fv into and both[sub] projects need to demangle but neither can depend on each other *libcxxabi needs the demangler to implement which is part of the itanium ABI spec *LLVM needs a copy for a bunch of and cannot rely on the system s __cxa_demangle because it a might not be and b may not be up to date on the latest language features The copy of the demangler in LLVM has some extra stuff that aren t needed in which depend on the shared generic components Despite these we want to keep the core generic demangling library identical between both copies to simplify development and testing If you re working on the generic library
Definition: README.txt:36
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::AMDGPULibFuncBase::EI_NSQRT
@ EI_NSQRT
Definition: AMDGPULibFunc.h:230
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:265
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
AliasAnalysis.h
result
It looks like we only need to define PPCfmarto for these because according to these instructions perform RTO on fma s result
Definition: README_P9.txt:256
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::AMDGPULibCalls::replaceCall
void replaceCall(Value *With)
Definition: AMDGPULibCalls.cpp:117
llvm::AMDGPULibFunc::getLeads
Param * getLeads()
Get leading parameters for mangled lib functions.
Definition: AMDGPULibFunc.cpp:1050
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:187
llvm::AMDGPULibFuncBase::getEPtrKindFromAddrSpace
static unsigned getEPtrKindFromAddrSpace(unsigned AS)
Definition: AMDGPULibFunc.h:313
llvm::AMDGPULibFunc
Wrapper class for AMDGPULIbFuncImpl.
Definition: AMDGPULibFunc.h:358
tbl_acospi
static const TableEntry tbl_acospi[]
Definition: AMDGPULibCalls.cpp:222
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::AMDGPULibFuncBase::EI_TANH
@ EI_TANH
Definition: AMDGPULibFunc.h:195
llvm::AMDGPULibFuncBase::EI_FMA
@ EI_FMA
Definition: AMDGPULibFunc.h:96
DEBUG_WITH_TYPE
#define DEBUG_WITH_TYPE(TYPE, X)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition: Debug.h:64
llvm::AMDGPULibFuncBase::EI_ATANH
@ EI_ATANH
Definition: AMDGPULibFunc.h:54
llvm::createAMDGPUUseNativeCallsPass
FunctionPass * createAMDGPUUseNativeCallsPass()
Definition: AMDGPULibCalls.cpp:1655
TargetMachine.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
llvm::AMDGPULibFuncBase::EType
EType
Definition: AMDGPULibFunc.h:253
llvm::AAResults
Definition: AliasAnalysis.h:518
GCNSubtarget.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
tbl_exp
static const TableEntry tbl_exp[]
Definition: AMDGPULibCalls.cpp:286
llvm::AMDGPULibFuncBase::EI_FABS
@ EI_FABS
Definition: AMDGPULibFunc.h:90
llvm::User
Definition: User.h:44
llvm::AMDGPULibCalls::useNative
bool useNative(CallInst *CI)
Definition: AMDGPULibCalls.cpp:500
llvm::AMDGPULibFunc::getOrInsertFunction
static FunctionCallee getOrInsertFunction(llvm::Module *M, const AMDGPULibFunc &fInfo)
Definition: AMDGPULibFunc.cpp:961
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1396
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:306
llvm::initializeAMDGPUUseNativeCallsPass
void initializeAMDGPUUseNativeCallsPass(PassRegistry &)
llvm::CallInst::Create
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1517
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::StringRef::contains_insensitive
bool contains_insensitive(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition: StringRef.h:427
tbl_asinpi
static const TableEntry tbl_asinpi[]
Definition: AMDGPULibCalls.cpp:238
llvm::AMDGPULibFuncBase::EI_ACOSPI
@ EI_ACOSPI
Definition: AMDGPULibFunc.h:42
false
Definition: StackSlotColoring.cpp:141
llvm::AMDGPULibFuncBase::EI_NEXP2
@ EI_NEXP2
Definition: AMDGPULibFunc.h:224
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::APFloat::convertToDouble
double convertToDouble() const
Converts this APFloat to host double value.
Definition: APFloat.cpp:4902
llvm::Instruction
Definition: Instruction.h:42
llvm::ConstantDataVector::getSplatValue
Constant * getSplatValue() const
If this is a splat constant, meaning that all of the elements have the same value,...
Definition: Constants.cpp:3221
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:189
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:257
llvm::ConstantFP::isExactlyValue
bool isExactlyValue(const APFloat &V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: Constants.cpp:1049
llvm::raw_ostream::flush
void flush()
Definition: raw_ostream.h:185
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:879
llvm::FunctionCallee::getFunctionType
FunctionType * getFunctionType()
Definition: DerivedTypes.h:182
getVecSize
static int getVecSize(const AMDGPULibFunc &FInfo)
Definition: AMDGPULibCalls.cpp:426
llvm::AMDGPULibFuncBase::EI_ASIN
@ EI_ASIN
Definition: AMDGPULibFunc.h:46
Align
uint64_t Align
Definition: ELFObjHandler.cpp:81
llvm::AMDGPULibFuncBase::EI_TAN
@ EI_TAN
Definition: AMDGPULibFunc.h:194
tbl_expm1
static const TableEntry tbl_expm1[]
Definition: AMDGPULibCalls.cpp:301
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:684
input
The initial backend is deliberately restricted to z10 We should add support for later architectures at some point If an asm ties an i32 r result to an i64 input
Definition: README.txt:10
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:155
tbl_atan
static const TableEntry tbl_atan[]
Definition: AMDGPULibCalls.cpp:244
llvm::Instruction::isLifetimeStartOrEnd
bool isLifetimeStartOrEnd() const
Return true if the instruction is a llvm.lifetime.start or llvm.lifetime.end marker.
Definition: Instruction.cpp:744
Name
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
Definition: AMDGPULibCalls.cpp:188
llvm::AMDGPUSimplifyLibCallsPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: AMDGPULibCalls.cpp:1692
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
HasNative
static bool HasNative(AMDGPULibFunc::EFuncId id)
Definition: AMDGPULibCalls.cpp:357
tbl_tanh
static const TableEntry tbl_tanh[]
Definition: AMDGPULibCalls.cpp:342
llvm::AMDGPULibFuncBase::NOPFX
@ NOPFX
Definition: AMDGPULibFunc.h:248
llvm::AMDGPULibFuncBase::EI_EXP10
@ EI_EXP10
Definition: AMDGPULibFunc.h:87
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::AMDGPULibFuncBase::EI_READ_PIPE_4
@ EI_READ_PIPE_4
Definition: AMDGPULibFunc.h:240
llvm::StringRef::equals
bool equals(StringRef RHS) const
equals - Check for string equality, this is more efficient than compare() when the relative ordering ...
Definition: StringRef.h:164
llvm::AMDGPULibFuncBase::EI_ACOS
@ EI_ACOS
Definition: AMDGPULibFunc.h:40
AMDGPULibFunc.h
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
llvm::StringRef::empty
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
llvm::cl::opt< bool >
val
The initial backend is deliberately restricted to z10 We should add support for later architectures at some point If an asm ties an i32 r result to an i64 the input will be treated as an leaving the upper bits uninitialised For i64 store i32 val
Definition: README.txt:15
llvm::AMDGPULibFuncBase::HALF
@ HALF
Definition: AMDGPULibFunc.h:250
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:298
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::AMDGPULibCalls::isUnsafeMath
bool isUnsafeMath(const CallInst *CI) const
Definition: AMDGPULibCalls.cpp:447
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:77
llvm::Function::getReturnType
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:180
Alloc
llvm::AMDGPULibFuncBase::F32
@ F32
Definition: AMDGPULibFunc.h:272
tbl_cospi
static const TableEntry tbl_cospi[]
Definition: AMDGPULibCalls.cpp:274
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:650
const
aarch64 promote const
Definition: AArch64PromoteConstant.cpp:232
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::AMDGPULibFuncBase::EI_EXP
@ EI_EXP
Definition: AMDGPULibFunc.h:86
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::AMDGPULibFuncBase::EI_TGAMMA
@ EI_TGAMMA
Definition: AMDGPULibFunc.h:197
llvm::AMDGPULibFuncBase::EI_WRITE_PIPE_4
@ EI_WRITE_PIPE_4
Definition: AMDGPULibFunc.h:242
llvm::AMDGPULibFuncBase::EI_COSPI
@ EI_COSPI
Definition: AMDGPULibFunc.h:77
llvm::AMDGPULibFunc::mangle
std::string mangle() const
Definition: AMDGPULibFunc.h:388
TableRef
ArrayRef< TableEntry > TableRef
Definition: AMDGPULibCalls.cpp:380
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::ConstantDataVector
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
Definition: Constants.h:753
llvm::FunctionType::getParamType
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
getArgType
static AMDGPULibFunc::EType getArgType(const AMDGPULibFunc &FInfo)
Definition: AMDGPULibCalls.cpp:430
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:440
llvm::AMDGPUUseNativeCallsPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: AMDGPULibCalls.cpp:1750
llvm::AMDGPULibCalls::initNativeFuncs
void initNativeFuncs()
Definition: AMDGPULibCalls.cpp:460
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1673
tbl_asinh
static const TableEntry tbl_asinh[]
Definition: AMDGPULibCalls.cpp:234
false
amdgpu Simplify well known AMD library false
Definition: AMDGPULibCalls.cpp:180
llvm::AMDGPULibFunc::getNumArgs
unsigned getNumArgs() const
Definition: AMDGPULibFunc.h:373
IRBuilder.h
llvm::ConstantFP::isNegative
bool isNegative() const
Return true if the sign bit is set.
Definition: Constants.h:305
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::FPMathOperator
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition: Operator.h:167
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::ConstantDataSequential::getElementAsConstant
Constant * getElementAsConstant(unsigned i) const
Return a Constant for a specified index's element.
Definition: Constants.cpp:3176
llvm::AMDGPULibFuncBase::EI_ASINPI
@ EI_ASINPI
Definition: AMDGPULibFunc.h:48
llvm::AMDGPULibFuncBase::EI_SINCOS
@ EI_SINCOS
Definition: AMDGPULibFunc.h:173
llvm::AMDGPULibFuncBase::EI_WRITE_PIPE_2
@ EI_WRITE_PIPE_2
Definition: AMDGPULibFunc.h:241
llvm::AMDGPULibFuncBase::EI_SINH
@ EI_SINH
Definition: AMDGPULibFunc.h:174
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
tbl_cos
static const TableEntry tbl_cos[]
Definition: AMDGPULibCalls.cpp:266
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1571
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::AMDGPULibFuncBase::Param::VectorSize
unsigned char VectorSize
Definition: AMDGPULibFunc.h:294
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
AMDGPU.h
Simplify
assume Assume Simplify
Definition: AssumeBundleBuilder.cpp:604
llvm::AMDGPULibFuncBase::EI_DIVIDE
@ EI_DIVIDE
Definition: AMDGPULibFunc.h:82
tbl_erfc
static const TableEntry tbl_erfc[]
Definition: AMDGPULibCalls.cpp:278
llvm::Value::getNumUses
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:254
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::AMDGPULibFuncBase::EI_COS
@ EI_COS
Definition: AMDGPULibFunc.h:75
llvm::AMDGPULibFuncBase::EI_LOG2
@ EI_LOG2
Definition: AMDGPULibFunc.h:133
tbl_atanpi
static const TableEntry tbl_atanpi[]
Definition: AMDGPULibCalls.cpp:254
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
llvm::AMDGPULibFuncBase::EFuncId
EFuncId
Definition: AMDGPULibFunc.h:24
llvm::AMDGPULibFunc::setId
void setId(EFuncId Id)
Definition: AMDGPULibFunc.h:381
llvm::AMDGPULibFuncBase::EI_EXP2
@ EI_EXP2
Definition: AMDGPULibFunc.h:88
tbl_tanpi
static const TableEntry tbl_tanpi[]
Definition: AMDGPULibCalls.cpp:346
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
llvm::TargetMachine::getTargetFeatureString
StringRef getTargetFeatureString() const
Definition: TargetMachine.h:128
llvm::AMDGPULibFunc::setPrefix
void setPrefix(ENamePrefix PFX)
Definition: AMDGPULibFunc.h:391
llvm::ConstantInt::getSExtValue
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:148
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::AMDGPULibFuncBase::EI_NRSQRT
@ EI_NRSQRT
Definition: AMDGPULibFunc.h:228
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:308
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
UseNative
static cl::list< std::string > UseNative("amdgpu-use-native", cl::desc("Comma separated list of functions to replace with native, or all"), cl::CommaSeparated, cl::ValueOptional, cl::Hidden)
well
llvm ldr ldrb ldrh str strh strb strb gcc and possibly speed as well(we don 't have a good way to measure on ARM). *Consider this silly example
Definition: README.txt:138
llvm::FunctionCallee::getCallee
Value * getCallee()
Definition: DerivedTypes.h:184
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:187
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
EnablePreLink
static cl::opt< bool > EnablePreLink("amdgpu-prelink", cl::desc("Enable pre-link mode optimizations"), cl::init(false), cl::Hidden)
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:240
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib", "Simplify well-known AMD library calls", false, false) INITIALIZE_PASS_END(AMDGPUSimplifyLibCalls
llvm::Type::getIntNTy
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:243
llvm::AMDGPULibFuncBase::EI_ERF
@ EI_ERF
Definition: AMDGPULibFunc.h:84
llvm::ConstantDataSequential::getElementAsDouble
double getElementAsDouble(unsigned i) const
If this is an sequential container of doubles, return the specified element as a double.
Definition: Constants.cpp:3170
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::AMDGPULibFuncBase::EI_MAD
@ EI_MAD
Definition: AMDGPULibFunc.h:135
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::CallBase::arg_size
unsigned arg_size() const
Definition: InstrTypes.h:1339
llvm::AMDGPULibFuncBase::Param::ArgType
unsigned char ArgType
Definition: AMDGPULibFunc.h:293
llvm::AMDGPULibFuncBase::EI_POWN
@ EI_POWN
Definition: AMDGPULibFunc.h:152
llvm::ConstantFP::get
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:926
llvm::AMDGPULibFunc::getPrefix
ENamePrefix getPrefix() const
Definition: AMDGPULibFunc.h:375
llvm::FindAvailableLoadedValue
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, AAResults *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:426
llvm::AMDGPULibFuncBase::EI_LOG10
@ EI_LOG10
Definition: AMDGPULibFunc.h:131
llvm::AMDGPULibFunc::getId
EFuncId getId() const
Definition: AMDGPULibFunc.h:374
llvm::Type::isDoubleTy
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:156
y
into llvm powi allowing the code generator to produce balanced multiplication trees the intrinsic needs to be extended to support and second the code generator needs to be enhanced to lower these to multiplication trees Interesting testcase for add shift mul int y
Definition: README.txt:61
llvm::log2
static double log2(double V)
Definition: AMDGPULibCalls.cpp:794
tbl_rsqrt
static const TableEntry tbl_rsqrt[]
Definition: AMDGPULibCalls.cpp:317
simplifylib
amdgpu simplifylib
Definition: AMDGPULibCalls.cpp:179
llvm::AMDGPULibFuncBase::EI_EXPM1
@ EI_EXPM1
Definition: AMDGPULibFunc.h:89
MATH_SQRT2
#define MATH_SQRT2
Definition: AMDGPULibCalls.cpp:42
AA
llvm::numbers::ln2
constexpr double ln2
Definition: MathExtras.h:55
llvm::StringRef::equals_insensitive
bool equals_insensitive(StringRef RHS) const
Check for string equality, ignoring case.
Definition: StringRef.h:170
llvm::AMDGPULibCalls::CI
CallInst * CI
Definition: AMDGPULibCalls.cpp:113
llvm::TargetMachine::getTargetCPU
StringRef getTargetCPU() const
Definition: TargetMachine.h:127
tbl_exp10
static const TableEntry tbl_exp10[]
Definition: AMDGPULibCalls.cpp:296
llvm::TargetMachine::getSubtarget
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Definition: TargetMachine.h:164
llvm::AMDGPULibCalls::fold
bool fold(CallInst *CI, AliasAnalysis *AA=nullptr)
Definition: AMDGPULibCalls.cpp:594
tbl_sin
static const TableEntry tbl_sin[]
Definition: AMDGPULibCalls.cpp:321
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1341
llvm::ConstantDataVector::getSplat
static Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:3044
N
#define N
tbl_exp2
static const TableEntry tbl_exp2[]
Definition: AMDGPULibCalls.cpp:291
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1308
llvm::AMDGPULibFunc::getName
std::string getName() const
Get unmangled name for mangled library function and name for unmangled library function.
Definition: AMDGPULibFunc.h:372
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::to_string
std::string to_string(const T &Value)
Definition: ScopedPrinter.h:85
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:164
llvm::AMDGPULibFuncBase::EI_SQRT
@ EI_SQRT
Definition: AMDGPULibFunc.h:177
MATH_E
#define MATH_E
Definition: AMDGPULibCalls.cpp:41
tbl_sinh
static const TableEntry tbl_sinh[]
Definition: AMDGPULibCalls.cpp:325
llvm::FunctionCallee
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:165
calls
amdgpu Simplify well known AMD library calls
Definition: AMDGPULibCalls.cpp:180
tbl_cosh
static const TableEntry tbl_cosh[]
Definition: AMDGPULibCalls.cpp:270
llvm::AMDGPULibFuncBase::EI_NLOG2
@ EI_NLOG2
Definition: AMDGPULibFunc.h:226
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1474
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
tbl_sqrt
static const TableEntry tbl_sqrt[]
Definition: AMDGPULibCalls.cpp:333
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:59
llvm::cl::desc
Definition: CommandLine.h:413
llvm::AMDGPULibFuncBase::EI_RSQRT
@ EI_RSQRT
Definition: AMDGPULibFunc.h:166
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1282
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:371
llvm::AMDGPULibFuncBase::EI_ATAN
@ EI_ATAN
Definition: AMDGPULibFunc.h:51
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::AMDGPULibFuncBase::EI_SINPI
@ EI_SINPI
Definition: AMDGPULibFunc.h:175
llvm::AMDGPULibFuncBase::NATIVE
@ NATIVE
Definition: AMDGPULibFunc.h:249
MATH_SQRT1_2
#define MATH_SQRT1_2
Definition: AMDGPULibCalls.cpp:43
tbl_acosh
static const TableEntry tbl_acosh[]
Definition: AMDGPULibCalls.cpp:219
tbl_tgamma
static const TableEntry tbl_tgamma[]
Definition: AMDGPULibCalls.cpp:350
llvm::createAMDGPUSimplifyLibCallsPass
FunctionPass * createAMDGPUSimplifyLibCallsPass(const TargetMachine *)
Definition: AMDGPULibCalls.cpp:1651
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
INITIALIZE_PASS
TargetPassConfig.
Definition: TargetPassConfig.cpp:367
llvm::DataLayout::getTypeAllocSize
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:506
llvm::ConstantDataSequential::getNumElements
unsigned getNumElements() const
Return the number of elements in the array or vector.
Definition: Constants.cpp:2834
llvm::cl::list
Definition: CommandLine.h:1609