LLVM  15.0.0git
AMDGPULibCalls.cpp
Go to the documentation of this file.
1 //===- AMDGPULibCalls.cpp -------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file does AMD library function optimizations.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "AMDGPULibFunc.h"
16 #include "GCNSubtarget.h"
18 #include "llvm/Analysis/Loads.h"
19 #include "llvm/IR/IRBuilder.h"
20 #include "llvm/IR/IntrinsicInst.h"
21 #include "llvm/IR/IntrinsicsAMDGPU.h"
22 #include "llvm/InitializePasses.h"
24 
25 #define DEBUG_TYPE "amdgpu-simplifylib"
26 
27 using namespace llvm;
28 
29 static cl::opt<bool> EnablePreLink("amdgpu-prelink",
30  cl::desc("Enable pre-link mode optimizations"),
31  cl::init(false),
32  cl::Hidden);
33 
34 static cl::list<std::string> UseNative("amdgpu-use-native",
35  cl::desc("Comma separated list of functions to replace with native, or all"),
37  cl::Hidden);
38 
39 #define MATH_PI numbers::pi
40 #define MATH_E numbers::e
41 #define MATH_SQRT2 numbers::sqrt2
42 #define MATH_SQRT1_2 numbers::inv_sqrt2
43 
44 namespace llvm {
45 
47 private:
48 
50 
51  const TargetMachine *TM;
52 
53  // -fuse-native.
54  bool AllNative = false;
55 
56  bool useNativeFunc(const StringRef F) const;
57 
58  // Return a pointer (pointer expr) to the function if function definition with
59  // "FuncName" exists. It may create a new function prototype in pre-link mode.
60  FunctionCallee getFunction(Module *M, const FuncInfo &fInfo);
61 
62  bool parseFunctionName(const StringRef &FMangledName, FuncInfo &FInfo);
63 
64  bool TDOFold(CallInst *CI, const FuncInfo &FInfo);
65 
66  /* Specialized optimizations */
67 
68  // recip (half or native)
69  bool fold_recip(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
70 
71  // divide (half or native)
72  bool fold_divide(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
73 
74  // pow/powr/pown
75  bool fold_pow(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
76 
77  // rootn
78  bool fold_rootn(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
79 
80  // fma/mad
81  bool fold_fma_mad(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
82 
83  // -fuse-native for sincos
84  bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo);
85 
86  // evaluate calls if calls' arguments are constants.
87  bool evaluateScalarMathFunc(const FuncInfo &FInfo, double& Res0,
88  double& Res1, Constant *copr0, Constant *copr1, Constant *copr2);
89  bool evaluateCall(CallInst *aCI, const FuncInfo &FInfo);
90 
91  // sqrt
92  bool fold_sqrt(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
93 
94  // sin/cos
95  bool fold_sincos(CallInst * CI, IRBuilder<> &B, AliasAnalysis * AA);
96 
97  // __read_pipe/__write_pipe
98  bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
99  const FuncInfo &FInfo);
100 
101  // llvm.amdgcn.wavefrontsize
102  bool fold_wavefrontsize(CallInst *CI, IRBuilder<> &B);
103 
104  // Get insertion point at entry.
105  BasicBlock::iterator getEntryIns(CallInst * UI);
106  // Insert an Alloc instruction.
107  AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix);
108  // Get a scalar native builtin single argument FP function
109  FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo);
110 
111 protected:
113 
114  bool isUnsafeMath(const CallInst *CI) const;
115 
116  void replaceCall(Value *With) {
117  CI->replaceAllUsesWith(With);
118  CI->eraseFromParent();
119  }
120 
121 public:
122  AMDGPULibCalls(const TargetMachine *TM_ = nullptr) : TM(TM_) {}
123 
124  bool fold(CallInst *CI, AliasAnalysis *AA = nullptr);
125 
126  void initNativeFuncs();
127 
128  // Replace a normal math function call with that native version
129  bool useNative(CallInst *CI);
130 };
131 
132 } // end llvm namespace
133 
134 namespace {
135 
136  class AMDGPUSimplifyLibCalls : public FunctionPass {
137 
138  AMDGPULibCalls Simplifier;
139 
140  public:
141  static char ID; // Pass identification
142 
143  AMDGPUSimplifyLibCalls(const TargetMachine *TM = nullptr)
144  : FunctionPass(ID), Simplifier(TM) {
146  }
147 
148  void getAnalysisUsage(AnalysisUsage &AU) const override {
150  }
151 
152  bool runOnFunction(Function &M) override;
153  };
154 
155  class AMDGPUUseNativeCalls : public FunctionPass {
156 
157  AMDGPULibCalls Simplifier;
158 
159  public:
160  static char ID; // Pass identification
161 
162  AMDGPUUseNativeCalls() : FunctionPass(ID) {
164  Simplifier.initNativeFuncs();
165  }
166 
167  bool runOnFunction(Function &F) override;
168  };
169 
170 } // end anonymous namespace.
171 
173 char AMDGPUUseNativeCalls::ID = 0;
174 
175 INITIALIZE_PASS_BEGIN(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",
176  "Simplify well-known AMD library calls", false, false)
178 INITIALIZE_PASS_END(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",
180 
181 INITIALIZE_PASS(AMDGPUUseNativeCalls, "amdgpu-usenative",
182  "Replace builtin math calls with that native versions.",
183  false, false)
184 
185 template <typename IRB>
186 static CallInst *CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg,
187  const Twine &Name = "") {
188  CallInst *R = B.CreateCall(Callee, Arg, Name);
189  if (Function *F = dyn_cast<Function>(Callee.getCallee()))
190  R->setCallingConv(F->getCallingConv());
191  return R;
192 }
193 
194 template <typename IRB>
195 static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1,
196  Value *Arg2, const Twine &Name = "") {
197  CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name);
198  if (Function *F = dyn_cast<Function>(Callee.getCallee()))
199  R->setCallingConv(F->getCallingConv());
200  return R;
201 }
202 
203 // Data structures for table-driven optimizations.
204 // FuncTbl works for both f32 and f64 functions with 1 input argument
205 
206 struct TableEntry {
207  double result;
208  double input;
209 };
210 
211 /* a list of {result, input} */
212 static const TableEntry tbl_acos[] = {
213  {MATH_PI / 2.0, 0.0},
214  {MATH_PI / 2.0, -0.0},
215  {0.0, 1.0},
216  {MATH_PI, -1.0}
217 };
218 static const TableEntry tbl_acosh[] = {
219  {0.0, 1.0}
220 };
221 static const TableEntry tbl_acospi[] = {
222  {0.5, 0.0},
223  {0.5, -0.0},
224  {0.0, 1.0},
225  {1.0, -1.0}
226 };
227 static const TableEntry tbl_asin[] = {
228  {0.0, 0.0},
229  {-0.0, -0.0},
230  {MATH_PI / 2.0, 1.0},
231  {-MATH_PI / 2.0, -1.0}
232 };
233 static const TableEntry tbl_asinh[] = {
234  {0.0, 0.0},
235  {-0.0, -0.0}
236 };
237 static const TableEntry tbl_asinpi[] = {
238  {0.0, 0.0},
239  {-0.0, -0.0},
240  {0.5, 1.0},
241  {-0.5, -1.0}
242 };
243 static const TableEntry tbl_atan[] = {
244  {0.0, 0.0},
245  {-0.0, -0.0},
246  {MATH_PI / 4.0, 1.0},
247  {-MATH_PI / 4.0, -1.0}
248 };
249 static const TableEntry tbl_atanh[] = {
250  {0.0, 0.0},
251  {-0.0, -0.0}
252 };
253 static const TableEntry tbl_atanpi[] = {
254  {0.0, 0.0},
255  {-0.0, -0.0},
256  {0.25, 1.0},
257  {-0.25, -1.0}
258 };
259 static const TableEntry tbl_cbrt[] = {
260  {0.0, 0.0},
261  {-0.0, -0.0},
262  {1.0, 1.0},
263  {-1.0, -1.0},
264 };
265 static const TableEntry tbl_cos[] = {
266  {1.0, 0.0},
267  {1.0, -0.0}
268 };
269 static const TableEntry tbl_cosh[] = {
270  {1.0, 0.0},
271  {1.0, -0.0}
272 };
273 static const TableEntry tbl_cospi[] = {
274  {1.0, 0.0},
275  {1.0, -0.0}
276 };
277 static const TableEntry tbl_erfc[] = {
278  {1.0, 0.0},
279  {1.0, -0.0}
280 };
281 static const TableEntry tbl_erf[] = {
282  {0.0, 0.0},
283  {-0.0, -0.0}
284 };
285 static const TableEntry tbl_exp[] = {
286  {1.0, 0.0},
287  {1.0, -0.0},
288  {MATH_E, 1.0}
289 };
290 static const TableEntry tbl_exp2[] = {
291  {1.0, 0.0},
292  {1.0, -0.0},
293  {2.0, 1.0}
294 };
295 static const TableEntry tbl_exp10[] = {
296  {1.0, 0.0},
297  {1.0, -0.0},
298  {10.0, 1.0}
299 };
300 static const TableEntry tbl_expm1[] = {
301  {0.0, 0.0},
302  {-0.0, -0.0}
303 };
304 static const TableEntry tbl_log[] = {
305  {0.0, 1.0},
306  {1.0, MATH_E}
307 };
308 static const TableEntry tbl_log2[] = {
309  {0.0, 1.0},
310  {1.0, 2.0}
311 };
312 static const TableEntry tbl_log10[] = {
313  {0.0, 1.0},
314  {1.0, 10.0}
315 };
316 static const TableEntry tbl_rsqrt[] = {
317  {1.0, 1.0},
318  {MATH_SQRT1_2, 2.0}
319 };
320 static const TableEntry tbl_sin[] = {
321  {0.0, 0.0},
322  {-0.0, -0.0}
323 };
324 static const TableEntry tbl_sinh[] = {
325  {0.0, 0.0},
326  {-0.0, -0.0}
327 };
328 static const TableEntry tbl_sinpi[] = {
329  {0.0, 0.0},
330  {-0.0, -0.0}
331 };
332 static const TableEntry tbl_sqrt[] = {
333  {0.0, 0.0},
334  {1.0, 1.0},
335  {MATH_SQRT2, 2.0}
336 };
337 static const TableEntry tbl_tan[] = {
338  {0.0, 0.0},
339  {-0.0, -0.0}
340 };
341 static const TableEntry tbl_tanh[] = {
342  {0.0, 0.0},
343  {-0.0, -0.0}
344 };
345 static const TableEntry tbl_tanpi[] = {
346  {0.0, 0.0},
347  {-0.0, -0.0}
348 };
349 static const TableEntry tbl_tgamma[] = {
350  {1.0, 1.0},
351  {1.0, 2.0},
352  {2.0, 3.0},
353  {6.0, 4.0}
354 };
355 
357  switch(id) {
373  return true;
374  default:;
375  }
376  return false;
377 }
378 
379 struct TableRef {
380  size_t size;
381  const TableEntry *table; // variable size: from 0 to (size - 1)
382 
383  TableRef() : size(0), table(nullptr) {}
384 
385  template <size_t N>
386  TableRef(const TableEntry (&tbl)[N]) : size(N), table(&tbl[0]) {}
387 };
388 
390  switch(id) {
402  case AMDGPULibFunc::EI_COS: return TableRef(tbl_cos);
406  case AMDGPULibFunc::EI_ERF: return TableRef(tbl_erf);
407  case AMDGPULibFunc::EI_EXP: return TableRef(tbl_exp);
412  case AMDGPULibFunc::EI_LOG: return TableRef(tbl_log);
419  case AMDGPULibFunc::EI_SIN: return TableRef(tbl_sin);
424  case AMDGPULibFunc::EI_TAN: return TableRef(tbl_tan);
428  default:;
429  }
430  return TableRef();
431 }
432 
433 static inline int getVecSize(const AMDGPULibFunc& FInfo) {
434  return FInfo.getLeads()[0].VectorSize;
435 }
436 
437 static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) {
438  return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType;
439 }
440 
441 FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) {
442  // If we are doing PreLinkOpt, the function is external. So it is safe to
443  // use getOrInsertFunction() at this stage.
444 
446  : AMDGPULibFunc::getFunction(M, fInfo);
447 }
448 
449 bool AMDGPULibCalls::parseFunctionName(const StringRef &FMangledName,
450  FuncInfo &FInfo) {
451  return AMDGPULibFunc::parse(FMangledName, FInfo);
452 }
453 
454 bool AMDGPULibCalls::isUnsafeMath(const CallInst *CI) const {
455  if (auto Op = dyn_cast<FPMathOperator>(CI))
456  if (Op->isFast())
457  return true;
458  const Function *F = CI->getParent()->getParent();
459  Attribute Attr = F->getFnAttribute("unsafe-fp-math");
460  return Attr.getValueAsBool();
461 }
462 
463 bool AMDGPULibCalls::useNativeFunc(const StringRef F) const {
464  return AllNative || llvm::is_contained(UseNative, F);
465 }
466 
468  AllNative = useNativeFunc("all") ||
469  (UseNative.getNumOccurrences() && UseNative.size() == 1 &&
470  UseNative.begin()->empty());
471 }
472 
473 bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
474  bool native_sin = useNativeFunc("sin");
475  bool native_cos = useNativeFunc("cos");
476 
477  if (native_sin && native_cos) {
478  Module *M = aCI->getModule();
479  Value *opr0 = aCI->getArgOperand(0);
480 
481  AMDGPULibFunc nf;
482  nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType;
483  nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize;
484 
487  FunctionCallee sinExpr = getFunction(M, nf);
488 
491  FunctionCallee cosExpr = getFunction(M, nf);
492  if (sinExpr && cosExpr) {
493  Value *sinval = CallInst::Create(sinExpr, opr0, "splitsin", aCI);
494  Value *cosval = CallInst::Create(cosExpr, opr0, "splitcos", aCI);
495  new StoreInst(cosval, aCI->getArgOperand(1), aCI);
496 
497  DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
498  << " with native version of sin/cos");
499 
500  replaceCall(sinval);
501  return true;
502  }
503  }
504  return false;
505 }
506 
508  CI = aCI;
510 
511  FuncInfo FInfo;
512  if (!parseFunctionName(Callee->getName(), FInfo) || !FInfo.isMangled() ||
513  FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
514  getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) ||
515  !(AllNative || useNativeFunc(FInfo.getName()))) {
516  return false;
517  }
518 
519  if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS)
520  return sincosUseNative(aCI, FInfo);
521 
523  FunctionCallee F = getFunction(aCI->getModule(), FInfo);
524  if (!F)
525  return false;
526 
527  aCI->setCalledFunction(F);
528  DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
529  << " with native version");
530  return true;
531 }
532 
533 // Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe
534 // builtin, with appended type size and alignment arguments, where 2 or 4
535 // indicates the original number of arguments. The library has optimized version
536 // of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same
537 // power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N
538 // for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ...,
539 // 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4.
540 bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
541  const FuncInfo &FInfo) {
542  auto *Callee = CI->getCalledFunction();
543  if (!Callee->isDeclaration())
544  return false;
545 
546  assert(Callee->hasName() && "Invalid read_pipe/write_pipe function");
547  auto *M = Callee->getParent();
548  auto &Ctx = M->getContext();
549  std::string Name = std::string(Callee->getName());
550  auto NumArg = CI->arg_size();
551  if (NumArg != 4 && NumArg != 6)
552  return false;
553  auto *PacketSize = CI->getArgOperand(NumArg - 2);
554  auto *PacketAlign = CI->getArgOperand(NumArg - 1);
555  if (!isa<ConstantInt>(PacketSize) || !isa<ConstantInt>(PacketAlign))
556  return false;
557  unsigned Size = cast<ConstantInt>(PacketSize)->getZExtValue();
558  Align Alignment = cast<ConstantInt>(PacketAlign)->getAlignValue();
559  if (Alignment != Size)
560  return false;
561 
562  Type *PtrElemTy;
563  if (Size <= 8)
564  PtrElemTy = Type::getIntNTy(Ctx, Size * 8);
565  else
566  PtrElemTy = FixedVectorType::get(Type::getInt64Ty(Ctx), Size / 8);
567  unsigned PtrArgLoc = CI->arg_size() - 3;
568  auto PtrArg = CI->getArgOperand(PtrArgLoc);
569  unsigned PtrArgAS = PtrArg->getType()->getPointerAddressSpace();
570  auto *PtrTy = llvm::PointerType::get(PtrElemTy, PtrArgAS);
571 
573  for (unsigned I = 0; I != PtrArgLoc; ++I)
574  ArgTys.push_back(CI->getArgOperand(I)->getType());
575  ArgTys.push_back(PtrTy);
576 
577  Name = Name + "_" + std::to_string(Size);
578  auto *FTy = FunctionType::get(Callee->getReturnType(),
579  ArrayRef<Type *>(ArgTys), false);
580  AMDGPULibFunc NewLibFunc(Name, FTy);
582  if (!F)
583  return false;
584 
585  auto *BCast = B.CreatePointerCast(PtrArg, PtrTy);
587  for (unsigned I = 0; I != PtrArgLoc; ++I)
588  Args.push_back(CI->getArgOperand(I));
589  Args.push_back(BCast);
590 
591  auto *NCI = B.CreateCall(F, Args);
592  NCI->setAttributes(CI->getAttributes());
593  CI->replaceAllUsesWith(NCI);
595  CI->eraseFromParent();
596 
597  return true;
598 }
599 
600 // This function returns false if no change; return true otherwise.
602  this->CI = CI;
604 
605  // Ignore indirect calls.
606  if (Callee == nullptr)
607  return false;
608 
609  BasicBlock *BB = CI->getParent();
612 
613  // Set the builder to the instruction after the call.
614  B.SetInsertPoint(BB, CI->getIterator());
615 
616  // Copy fast flags from the original call.
617  if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(CI))
618  B.setFastMathFlags(FPOp->getFastMathFlags());
619 
620  switch (Callee->getIntrinsicID()) {
621  default:
622  break;
623  case Intrinsic::amdgcn_wavefrontsize:
624  return !EnablePreLink && fold_wavefrontsize(CI, B);
625  }
626 
627  FuncInfo FInfo;
628  if (!parseFunctionName(Callee->getName(), FInfo))
629  return false;
630 
631  // Further check the number of arguments to see if they match.
632  if (CI->arg_size() != FInfo.getNumArgs())
633  return false;
634 
635  if (TDOFold(CI, FInfo))
636  return true;
637 
638  // Under unsafe-math, evaluate calls if possible.
639  // According to Brian Sumner, we can do this for all f32 function calls
640  // using host's double function calls.
641  if (isUnsafeMath(CI) && evaluateCall(CI, FInfo))
642  return true;
643 
644  // Specialized optimizations for each function call
645  switch (FInfo.getId()) {
647  // skip vector function
648  assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||
649  FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
650  "recip must be an either native or half function");
651  return (getVecSize(FInfo) != 1) ? false : fold_recip(CI, B, FInfo);
652 
654  // skip vector function
655  assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||
656  FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
657  "divide must be an either native or half function");
658  return (getVecSize(FInfo) != 1) ? false : fold_divide(CI, B, FInfo);
659 
663  return fold_pow(CI, B, FInfo);
664 
666  // skip vector function
667  return (getVecSize(FInfo) != 1) ? false : fold_rootn(CI, B, FInfo);
668 
672  // skip vector function
673  return (getVecSize(FInfo) != 1) ? false : fold_fma_mad(CI, B, FInfo);
674 
676  return isUnsafeMath(CI) && fold_sqrt(CI, B, FInfo);
679  if ((getArgType(FInfo) == AMDGPULibFunc::F32 ||
680  getArgType(FInfo) == AMDGPULibFunc::F64)
681  && (FInfo.getPrefix() == AMDGPULibFunc::NOPFX))
682  return fold_sincos(CI, B, AA);
683 
684  break;
689  return fold_read_write_pipe(CI, B, FInfo);
690 
691  default:
692  break;
693  }
694 
695  return false;
696 }
697 
698 bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
699  // Table-Driven optimization
700  const TableRef tr = getOptTable(FInfo.getId());
701  if (tr.size==0)
702  return false;
703 
704  int const sz = (int)tr.size;
705  const TableEntry * const ftbl = tr.table;
706  Value *opr0 = CI->getArgOperand(0);
707 
708  if (getVecSize(FInfo) > 1) {
709  if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(opr0)) {
711  for (int eltNo = 0; eltNo < getVecSize(FInfo); ++eltNo) {
712  ConstantFP *eltval = dyn_cast<ConstantFP>(
713  CV->getElementAsConstant((unsigned)eltNo));
714  assert(eltval && "Non-FP arguments in math function!");
715  bool found = false;
716  for (int i=0; i < sz; ++i) {
717  if (eltval->isExactlyValue(ftbl[i].input)) {
718  DVal.push_back(ftbl[i].result);
719  found = true;
720  break;
721  }
722  }
723  if (!found) {
724  // This vector constants not handled yet.
725  return false;
726  }
727  }
728  LLVMContext &context = CI->getParent()->getParent()->getContext();
729  Constant *nval;
730  if (getArgType(FInfo) == AMDGPULibFunc::F32) {
732  for (unsigned i = 0; i < DVal.size(); ++i) {
733  FVal.push_back((float)DVal[i]);
734  }
735  ArrayRef<float> tmp(FVal);
736  nval = ConstantDataVector::get(context, tmp);
737  } else { // F64
738  ArrayRef<double> tmp(DVal);
739  nval = ConstantDataVector::get(context, tmp);
740  }
741  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
742  replaceCall(nval);
743  return true;
744  }
745  } else {
746  // Scalar version
747  if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
748  for (int i = 0; i < sz; ++i) {
749  if (CF->isExactlyValue(ftbl[i].input)) {
750  Value *nval = ConstantFP::get(CF->getType(), ftbl[i].result);
751  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
752  replaceCall(nval);
753  return true;
754  }
755  }
756  }
757  }
758 
759  return false;
760 }
761 
762 // [native_]half_recip(c) ==> 1.0/c
763 bool AMDGPULibCalls::fold_recip(CallInst *CI, IRBuilder<> &B,
764  const FuncInfo &FInfo) {
765  Value *opr0 = CI->getArgOperand(0);
766  if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
767  // Just create a normal div. Later, InstCombine will be able
768  // to compute the divide into a constant (avoid check float infinity
769  // or subnormal at this point).
770  Value *nval = B.CreateFDiv(ConstantFP::get(CF->getType(), 1.0),
771  opr0,
772  "recip2div");
773  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
774  replaceCall(nval);
775  return true;
776  }
777  return false;
778 }
779 
780 // [native_]half_divide(x, c) ==> x/c
781 bool AMDGPULibCalls::fold_divide(CallInst *CI, IRBuilder<> &B,
782  const FuncInfo &FInfo) {
783  Value *opr0 = CI->getArgOperand(0);
784  Value *opr1 = CI->getArgOperand(1);
785  ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
786  ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
787 
788  if ((CF0 && CF1) || // both are constants
789  (CF1 && (getArgType(FInfo) == AMDGPULibFunc::F32)))
790  // CF1 is constant && f32 divide
791  {
792  Value *nval1 = B.CreateFDiv(ConstantFP::get(opr1->getType(), 1.0),
793  opr1, "__div2recip");
794  Value *nval = B.CreateFMul(opr0, nval1, "__div2mul");
795  replaceCall(nval);
796  return true;
797  }
798  return false;
799 }
800 
801 namespace llvm {
802 static double log2(double V) {
803 #if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L
804  return ::log2(V);
805 #else
806  return log(V) / numbers::ln2;
807 #endif
808 }
809 }
810 
811 bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
812  const FuncInfo &FInfo) {
813  assert((FInfo.getId() == AMDGPULibFunc::EI_POW ||
814  FInfo.getId() == AMDGPULibFunc::EI_POWR ||
815  FInfo.getId() == AMDGPULibFunc::EI_POWN) &&
816  "fold_pow: encounter a wrong function call");
817 
818  Value *opr0, *opr1;
819  ConstantFP *CF;
820  ConstantInt *CINT;
821  ConstantAggregateZero *CZero;
822  Type *eltType;
823 
824  opr0 = CI->getArgOperand(0);
825  opr1 = CI->getArgOperand(1);
826  CZero = dyn_cast<ConstantAggregateZero>(opr1);
827  if (getVecSize(FInfo) == 1) {
828  eltType = opr0->getType();
829  CF = dyn_cast<ConstantFP>(opr1);
830  CINT = dyn_cast<ConstantInt>(opr1);
831  } else {
832  VectorType *VTy = dyn_cast<VectorType>(opr0->getType());
833  assert(VTy && "Oprand of vector function should be of vectortype");
834  eltType = VTy->getElementType();
835  ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1);
836 
837  // Now, only Handle vector const whose elements have the same value.
838  CF = CDV ? dyn_cast_or_null<ConstantFP>(CDV->getSplatValue()) : nullptr;
839  CINT = CDV ? dyn_cast_or_null<ConstantInt>(CDV->getSplatValue()) : nullptr;
840  }
841 
842  // No unsafe math , no constant argument, do nothing
843  if (!isUnsafeMath(CI) && !CF && !CINT && !CZero)
844  return false;
845 
846  // 0x1111111 means that we don't do anything for this call.
847  int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111);
848 
849  if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0) || CZero) {
850  // pow/powr/pown(x, 0) == 1
851  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1\n");
852  Constant *cnval = ConstantFP::get(eltType, 1.0);
853  if (getVecSize(FInfo) > 1) {
854  cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
855  }
856  replaceCall(cnval);
857  return true;
858  }
859  if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) {
860  // pow/powr/pown(x, 1.0) = x
861  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n");
862  replaceCall(opr0);
863  return true;
864  }
865  if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) {
866  // pow/powr/pown(x, 2.0) = x*x
867  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * " << *opr0
868  << "\n");
869  Value *nval = B.CreateFMul(opr0, opr0, "__pow2");
870  replaceCall(nval);
871  return true;
872  }
873  if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) {
874  // pow/powr/pown(x, -1.0) = 1.0/x
875  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1 / " << *opr0 << "\n");
876  Constant *cnval = ConstantFP::get(eltType, 1.0);
877  if (getVecSize(FInfo) > 1) {
878  cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
879  }
880  Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip");
881  replaceCall(nval);
882  return true;
883  }
884 
885  Module *M = CI->getModule();
886  if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) {
887  // pow[r](x, [-]0.5) = sqrt(x)
888  bool issqrt = CF->isExactlyValue(0.5);
889  if (FunctionCallee FPExpr =
890  getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
892  FInfo))) {
893  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
894  << FInfo.getName().c_str() << "(" << *opr0 << ")\n");
895  Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt"
896  : "__pow2rsqrt");
897  replaceCall(nval);
898  return true;
899  }
900  }
901 
902  if (!isUnsafeMath(CI))
903  return false;
904 
905  // Unsafe Math optimization
906 
907  // Remember that ci_opr1 is set if opr1 is integral
908  if (CF) {
909  double dval = (getArgType(FInfo) == AMDGPULibFunc::F32)
910  ? (double)CF->getValueAPF().convertToFloat()
911  : CF->getValueAPF().convertToDouble();
912  int ival = (int)dval;
913  if ((double)ival == dval) {
914  ci_opr1 = ival;
915  } else
916  ci_opr1 = 0x11111111;
917  }
918 
919  // pow/powr/pown(x, c) = [1/](x*x*..x); where
920  // trunc(c) == c && the number of x == c && |c| <= 12
921  unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1;
922  if (abs_opr1 <= 12) {
923  Constant *cnval;
924  Value *nval;
925  if (abs_opr1 == 0) {
926  cnval = ConstantFP::get(eltType, 1.0);
927  if (getVecSize(FInfo) > 1) {
928  cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
929  }
930  nval = cnval;
931  } else {
932  Value *valx2 = nullptr;
933  nval = nullptr;
934  while (abs_opr1 > 0) {
935  valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0;
936  if (abs_opr1 & 1) {
937  nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2;
938  }
939  abs_opr1 >>= 1;
940  }
941  }
942 
943  if (ci_opr1 < 0) {
944  cnval = ConstantFP::get(eltType, 1.0);
945  if (getVecSize(FInfo) > 1) {
946  cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
947  }
948  nval = B.CreateFDiv(cnval, nval, "__1powprod");
949  }
950  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
951  << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0
952  << ")\n");
953  replaceCall(nval);
954  return true;
955  }
956 
957  // powr ---> exp2(y * log2(x))
958  // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31))
959  FunctionCallee ExpExpr =
960  getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo));
961  if (!ExpExpr)
962  return false;
963 
964  bool needlog = false;
965  bool needabs = false;
966  bool needcopysign = false;
967  Constant *cnval = nullptr;
968  if (getVecSize(FInfo) == 1) {
969  CF = dyn_cast<ConstantFP>(opr0);
970 
971  if (CF) {
972  double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
973  ? (double)CF->getValueAPF().convertToFloat()
974  : CF->getValueAPF().convertToDouble();
975 
976  V = log2(std::abs(V));
977  cnval = ConstantFP::get(eltType, V);
978  needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR) &&
979  CF->isNegative();
980  } else {
981  needlog = true;
982  needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR &&
983  (!CF || CF->isNegative());
984  }
985  } else {
986  ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0);
987 
988  if (!CDV) {
989  needlog = true;
990  needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR;
991  } else {
992  assert ((int)CDV->getNumElements() == getVecSize(FInfo) &&
993  "Wrong vector size detected");
994 
996  for (int i=0; i < getVecSize(FInfo); ++i) {
997  double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
998  ? (double)CDV->getElementAsFloat(i)
999  : CDV->getElementAsDouble(i);
1000  if (V < 0.0) needcopysign = true;
1001  V = log2(std::abs(V));
1002  DVal.push_back(V);
1003  }
1004  if (getArgType(FInfo) == AMDGPULibFunc::F32) {
1005  SmallVector<float, 0> FVal;
1006  for (unsigned i=0; i < DVal.size(); ++i) {
1007  FVal.push_back((float)DVal[i]);
1008  }
1009  ArrayRef<float> tmp(FVal);
1010  cnval = ConstantDataVector::get(M->getContext(), tmp);
1011  } else {
1012  ArrayRef<double> tmp(DVal);
1013  cnval = ConstantDataVector::get(M->getContext(), tmp);
1014  }
1015  }
1016  }
1017 
1018  if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW)) {
1019  // We cannot handle corner cases for a general pow() function, give up
1020  // unless y is a constant integral value. Then proceed as if it were pown.
1021  if (getVecSize(FInfo) == 1) {
1022  if (const ConstantFP *CF = dyn_cast<ConstantFP>(opr1)) {
1023  double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
1024  ? (double)CF->getValueAPF().convertToFloat()
1025  : CF->getValueAPF().convertToDouble();
1026  if (y != (double)(int64_t)y)
1027  return false;
1028  } else
1029  return false;
1030  } else {
1031  if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1)) {
1032  for (int i=0; i < getVecSize(FInfo); ++i) {
1033  double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
1034  ? (double)CDV->getElementAsFloat(i)
1035  : CDV->getElementAsDouble(i);
1036  if (y != (double)(int64_t)y)
1037  return false;
1038  }
1039  } else
1040  return false;
1041  }
1042  }
1043 
1044  Value *nval;
1045  if (needabs) {
1046  FunctionCallee AbsExpr =
1047  getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_FABS, FInfo));
1048  if (!AbsExpr)
1049  return false;
1050  nval = CreateCallEx(B, AbsExpr, opr0, "__fabs");
1051  } else {
1052  nval = cnval ? cnval : opr0;
1053  }
1054  if (needlog) {
1055  FunctionCallee LogExpr =
1056  getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo));
1057  if (!LogExpr)
1058  return false;
1059  nval = CreateCallEx(B,LogExpr, nval, "__log2");
1060  }
1061 
1062  if (FInfo.getId() == AMDGPULibFunc::EI_POWN) {
1063  // convert int(32) to fp(f32 or f64)
1064  opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F");
1065  }
1066  nval = B.CreateFMul(opr1, nval, "__ylogx");
1067  nval = CreateCallEx(B,ExpExpr, nval, "__exp2");
1068 
1069  if (needcopysign) {
1070  Value *opr_n;
1071  Type* rTy = opr0->getType();
1072  Type* nTyS = eltType->isDoubleTy() ? B.getInt64Ty() : B.getInt32Ty();
1073  Type *nTy = nTyS;
1074  if (const auto *vTy = dyn_cast<FixedVectorType>(rTy))
1075  nTy = FixedVectorType::get(nTyS, vTy);
1076  unsigned size = nTy->getScalarSizeInBits();
1077  opr_n = CI->getArgOperand(1);
1078  if (opr_n->getType()->isIntegerTy())
1079  opr_n = B.CreateZExtOrBitCast(opr_n, nTy, "__ytou");
1080  else
1081  opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");
1082 
1083  Value *sign = B.CreateShl(opr_n, size-1, "__yeven");
1084  sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign");
1085  nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign);
1086  nval = B.CreateBitCast(nval, opr0->getType());
1087  }
1088 
1089  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
1090  << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n");
1091  replaceCall(nval);
1092 
1093  return true;
1094 }
1095 
1096 bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
1097  const FuncInfo &FInfo) {
1098  Value *opr0 = CI->getArgOperand(0);
1099  Value *opr1 = CI->getArgOperand(1);
1100 
1101  ConstantInt *CINT = dyn_cast<ConstantInt>(opr1);
1102  if (!CINT) {
1103  return false;
1104  }
1105  int ci_opr1 = (int)CINT->getSExtValue();
1106  if (ci_opr1 == 1) { // rootn(x, 1) = x
1107  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n");
1108  replaceCall(opr0);
1109  return true;
1110  }
1111  if (ci_opr1 == 2) { // rootn(x, 2) = sqrt(x)
1112  Module *M = CI->getModule();
1113  if (FunctionCallee FPExpr =
1114  getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
1115  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> sqrt(" << *opr0 << ")\n");
1116  Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt");
1117  replaceCall(nval);
1118  return true;
1119  }
1120  } else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x)
1121  Module *M = CI->getModule();
1122  if (FunctionCallee FPExpr =
1123  getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) {
1124  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> cbrt(" << *opr0 << ")\n");
1125  Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt");
1126  replaceCall(nval);
1127  return true;
1128  }
1129  } else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x
1130  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1.0 / " << *opr0 << "\n");
1131  Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0),
1132  opr0,
1133  "__rootn2div");
1134  replaceCall(nval);
1135  return true;
1136  } else if (ci_opr1 == -2) { // rootn(x, -2) = rsqrt(x)
1137  Module *M = CI->getModule();
1138  if (FunctionCallee FPExpr =
1139  getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT, FInfo))) {
1140  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> rsqrt(" << *opr0
1141  << ")\n");
1142  Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt");
1143  replaceCall(nval);
1144  return true;
1145  }
1146  }
1147  return false;
1148 }
1149 
1150 bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B,
1151  const FuncInfo &FInfo) {
1152  Value *opr0 = CI->getArgOperand(0);
1153  Value *opr1 = CI->getArgOperand(1);
1154  Value *opr2 = CI->getArgOperand(2);
1155 
1156  ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
1157  ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
1158  if ((CF0 && CF0->isZero()) || (CF1 && CF1->isZero())) {
1159  // fma/mad(a, b, c) = c if a=0 || b=0
1160  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr2 << "\n");
1161  replaceCall(opr2);
1162  return true;
1163  }
1164  if (CF0 && CF0->isExactlyValue(1.0f)) {
1165  // fma/mad(a, b, c) = b+c if a=1
1166  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr1 << " + " << *opr2
1167  << "\n");
1168  Value *nval = B.CreateFAdd(opr1, opr2, "fmaadd");
1169  replaceCall(nval);
1170  return true;
1171  }
1172  if (CF1 && CF1->isExactlyValue(1.0f)) {
1173  // fma/mad(a, b, c) = a+c if b=1
1174  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " + " << *opr2
1175  << "\n");
1176  Value *nval = B.CreateFAdd(opr0, opr2, "fmaadd");
1177  replaceCall(nval);
1178  return true;
1179  }
1180  if (ConstantFP *CF = dyn_cast<ConstantFP>(opr2)) {
1181  if (CF->isZero()) {
1182  // fma/mad(a, b, c) = a*b if c=0
1183  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * "
1184  << *opr1 << "\n");
1185  Value *nval = B.CreateFMul(opr0, opr1, "fmamul");
1186  replaceCall(nval);
1187  return true;
1188  }
1189  }
1190 
1191  return false;
1192 }
1193 
1194 // Get a scalar native builtin single argument FP function
1195 FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M,
1196  const FuncInfo &FInfo) {
1197  if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))
1198  return nullptr;
1199  FuncInfo nf = FInfo;
1201  return getFunction(M, nf);
1202 }
1203 
1204 // fold sqrt -> native_sqrt (x)
1205 bool AMDGPULibCalls::fold_sqrt(CallInst *CI, IRBuilder<> &B,
1206  const FuncInfo &FInfo) {
1207  if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) &&
1208  (FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) {
1209  if (FunctionCallee FPExpr = getNativeFunction(
1211  Value *opr0 = CI->getArgOperand(0);
1212  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
1213  << "sqrt(" << *opr0 << ")\n");
1214  Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt");
1215  replaceCall(nval);
1216  return true;
1217  }
1218  }
1219  return false;
1220 }
1221 
1222 // fold sin, cos -> sincos.
1223 bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
1224  AliasAnalysis *AA) {
1225  AMDGPULibFunc fInfo;
1226  if (!AMDGPULibFunc::parse(CI->getCalledFunction()->getName(), fInfo))
1227  return false;
1228 
1229  assert(fInfo.getId() == AMDGPULibFunc::EI_SIN ||
1230  fInfo.getId() == AMDGPULibFunc::EI_COS);
1231  bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN;
1232 
1233  Value *CArgVal = CI->getArgOperand(0);
1234  BasicBlock * const CBB = CI->getParent();
1235 
1236  int const MaxScan = 30;
1237  bool Changed = false;
1238 
1239  { // fold in load value.
1240  LoadInst *LI = dyn_cast<LoadInst>(CArgVal);
1241  if (LI && LI->getParent() == CBB) {
1242  BasicBlock::iterator BBI = LI->getIterator();
1243  Value *AvailableVal = FindAvailableLoadedValue(LI, CBB, BBI, MaxScan, AA);
1244  if (AvailableVal) {
1245  Changed = true;
1246  CArgVal->replaceAllUsesWith(AvailableVal);
1247  if (CArgVal->getNumUses() == 0)
1248  LI->eraseFromParent();
1249  CArgVal = CI->getArgOperand(0);
1250  }
1251  }
1252  }
1253 
1254  Module *M = CI->getModule();
1256  std::string const PairName = fInfo.mangle();
1257 
1258  CallInst *UI = nullptr;
1259  for (User* U : CArgVal->users()) {
1260  CallInst *XI = dyn_cast_or_null<CallInst>(U);
1261  if (!XI || XI == CI || XI->getParent() != CBB)
1262  continue;
1263 
1264  Function *UCallee = XI->getCalledFunction();
1265  if (!UCallee || !UCallee->getName().equals(PairName))
1266  continue;
1267 
1269  if (BBI == CI->getParent()->begin())
1270  break;
1271  --BBI;
1272  for (int I = MaxScan; I > 0 && BBI != CBB->begin(); --BBI, --I) {
1273  if (cast<Instruction>(BBI) == XI) {
1274  UI = XI;
1275  break;
1276  }
1277  }
1278  if (UI) break;
1279  }
1280 
1281  if (!UI)
1282  return Changed;
1283 
1284  // Merge the sin and cos.
1285 
1286  // for OpenCL 2.0 we have only generic implementation of sincos
1287  // function.
1290  FunctionCallee Fsincos = getFunction(M, nf);
1291  if (!Fsincos)
1292  return Changed;
1293 
1294  BasicBlock::iterator ItOld = B.GetInsertPoint();
1295  AllocaInst *Alloc = insertAlloca(UI, B, "__sincos_");
1296  B.SetInsertPoint(UI);
1297 
1298  Value *P = Alloc;
1299  Type *PTy = Fsincos.getFunctionType()->getParamType(1);
1300  // The allocaInst allocates the memory in private address space. This need
1301  // to be bitcasted to point to the address space of cos pointer type.
1302  // In OpenCL 2.0 this is generic, while in 1.2 that is private.
1304  P = B.CreateAddrSpaceCast(Alloc, PTy);
1305  CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P);
1306 
1307  LLVM_DEBUG(errs() << "AMDIC: fold_sincos (" << *CI << ", " << *UI << ") with "
1308  << *Call << "\n");
1309 
1310  if (!isSin) { // CI->cos, UI->sin
1311  B.SetInsertPoint(&*ItOld);
1312  UI->replaceAllUsesWith(&*Call);
1313  Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
1314  CI->replaceAllUsesWith(Reload);
1315  UI->eraseFromParent();
1316  CI->eraseFromParent();
1317  } else { // CI->sin, UI->cos
1318  Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
1319  UI->replaceAllUsesWith(Reload);
1320  CI->replaceAllUsesWith(Call);
1321  UI->eraseFromParent();
1322  CI->eraseFromParent();
1323  }
1324  return true;
1325 }
1326 
1327 bool AMDGPULibCalls::fold_wavefrontsize(CallInst *CI, IRBuilder<> &B) {
1328  if (!TM)
1329  return false;
1330 
1331  StringRef CPU = TM->getTargetCPU();
1332  StringRef Features = TM->getTargetFeatureString();
1333  if ((CPU.empty() || CPU.equals_insensitive("generic")) &&
1334  (Features.empty() || !Features.contains_insensitive("wavefrontsize")))
1335  return false;
1336 
1337  Function *F = CI->getParent()->getParent();
1338  const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(*F);
1339  unsigned N = ST.getWavefrontSize();
1340 
1341  LLVM_DEBUG(errs() << "AMDIC: fold_wavefrontsize (" << *CI << ") with "
1342  << N << "\n");
1343 
1344  CI->replaceAllUsesWith(ConstantInt::get(B.getInt32Ty(), N));
1345  CI->eraseFromParent();
1346  return true;
1347 }
1348 
1349 // Get insertion point at entry.
1350 BasicBlock::iterator AMDGPULibCalls::getEntryIns(CallInst * UI) {
1351  Function * Func = UI->getParent()->getParent();
1352  BasicBlock * BB = &Func->getEntryBlock();
1353  assert(BB && "Entry block not found!");
1354  BasicBlock::iterator ItNew = BB->begin();
1355  return ItNew;
1356 }
1357 
1358 // Insert a AllocsInst at the beginning of function entry block.
1359 AllocaInst* AMDGPULibCalls::insertAlloca(CallInst *UI, IRBuilder<> &B,
1360  const char *prefix) {
1361  BasicBlock::iterator ItNew = getEntryIns(UI);
1362  Function *UCallee = UI->getCalledFunction();
1363  Type *RetType = UCallee->getReturnType();
1364  B.SetInsertPoint(&*ItNew);
1365  AllocaInst *Alloc =
1366  B.CreateAlloca(RetType, nullptr, std::string(prefix) + UI->getName());
1367  Alloc->setAlignment(
1368  Align(UCallee->getParent()->getDataLayout().getTypeAllocSize(RetType)));
1369  return Alloc;
1370 }
1371 
1372 bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo,
1373  double& Res0, double& Res1,
1374  Constant *copr0, Constant *copr1,
1375  Constant *copr2) {
1376  // By default, opr0/opr1/opr3 holds values of float/double type.
1377  // If they are not float/double, each function has to its
1378  // operand separately.
1379  double opr0=0.0, opr1=0.0, opr2=0.0;
1380  ConstantFP *fpopr0 = dyn_cast_or_null<ConstantFP>(copr0);
1381  ConstantFP *fpopr1 = dyn_cast_or_null<ConstantFP>(copr1);
1382  ConstantFP *fpopr2 = dyn_cast_or_null<ConstantFP>(copr2);
1383  if (fpopr0) {
1384  opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1385  ? fpopr0->getValueAPF().convertToDouble()
1386  : (double)fpopr0->getValueAPF().convertToFloat();
1387  }
1388 
1389  if (fpopr1) {
1390  opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1391  ? fpopr1->getValueAPF().convertToDouble()
1392  : (double)fpopr1->getValueAPF().convertToFloat();
1393  }
1394 
1395  if (fpopr2) {
1396  opr2 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1397  ? fpopr2->getValueAPF().convertToDouble()
1398  : (double)fpopr2->getValueAPF().convertToFloat();
1399  }
1400 
1401  switch (FInfo.getId()) {
1402  default : return false;
1403 
1405  Res0 = acos(opr0);
1406  return true;
1407 
1409  // acosh(x) == log(x + sqrt(x*x - 1))
1410  Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0));
1411  return true;
1412 
1414  Res0 = acos(opr0) / MATH_PI;
1415  return true;
1416 
1418  Res0 = asin(opr0);
1419  return true;
1420 
1422  // asinh(x) == log(x + sqrt(x*x + 1))
1423  Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0));
1424  return true;
1425 
1427  Res0 = asin(opr0) / MATH_PI;
1428  return true;
1429 
1431  Res0 = atan(opr0);
1432  return true;
1433 
1435  // atanh(x) == (log(x+1) - log(x-1))/2;
1436  Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0;
1437  return true;
1438 
1440  Res0 = atan(opr0) / MATH_PI;
1441  return true;
1442 
1444  Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0);
1445  return true;
1446 
1447  case AMDGPULibFunc::EI_COS:
1448  Res0 = cos(opr0);
1449  return true;
1450 
1452  Res0 = cosh(opr0);
1453  return true;
1454 
1456  Res0 = cos(MATH_PI * opr0);
1457  return true;
1458 
1459  case AMDGPULibFunc::EI_EXP:
1460  Res0 = exp(opr0);
1461  return true;
1462 
1464  Res0 = pow(2.0, opr0);
1465  return true;
1466 
1468  Res0 = pow(10.0, opr0);
1469  return true;
1470 
1472  Res0 = exp(opr0) - 1.0;
1473  return true;
1474 
1475  case AMDGPULibFunc::EI_LOG:
1476  Res0 = log(opr0);
1477  return true;
1478 
1480  Res0 = log(opr0) / log(2.0);
1481  return true;
1482 
1484  Res0 = log(opr0) / log(10.0);
1485  return true;
1486 
1488  Res0 = 1.0 / sqrt(opr0);
1489  return true;
1490 
1491  case AMDGPULibFunc::EI_SIN:
1492  Res0 = sin(opr0);
1493  return true;
1494 
1496  Res0 = sinh(opr0);
1497  return true;
1498 
1500  Res0 = sin(MATH_PI * opr0);
1501  return true;
1502 
1504  Res0 = sqrt(opr0);
1505  return true;
1506 
1507  case AMDGPULibFunc::EI_TAN:
1508  Res0 = tan(opr0);
1509  return true;
1510 
1512  Res0 = tanh(opr0);
1513  return true;
1514 
1516  Res0 = tan(MATH_PI * opr0);
1517  return true;
1518 
1520  Res0 = 1.0 / opr0;
1521  return true;
1522 
1523  // two-arg functions
1525  Res0 = opr0 / opr1;
1526  return true;
1527 
1528  case AMDGPULibFunc::EI_POW:
1530  Res0 = pow(opr0, opr1);
1531  return true;
1532 
1533  case AMDGPULibFunc::EI_POWN: {
1534  if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
1535  double val = (double)iopr1->getSExtValue();
1536  Res0 = pow(opr0, val);
1537  return true;
1538  }
1539  return false;
1540  }
1541 
1542  case AMDGPULibFunc::EI_ROOTN: {
1543  if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
1544  double val = (double)iopr1->getSExtValue();
1545  Res0 = pow(opr0, 1.0 / val);
1546  return true;
1547  }
1548  return false;
1549  }
1550 
1551  // with ptr arg
1553  Res0 = sin(opr0);
1554  Res1 = cos(opr0);
1555  return true;
1556 
1557  // three-arg functions
1558  case AMDGPULibFunc::EI_FMA:
1559  case AMDGPULibFunc::EI_MAD:
1560  Res0 = opr0 * opr1 + opr2;
1561  return true;
1562  }
1563 
1564  return false;
1565 }
1566 
1567 bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) {
1568  int numArgs = (int)aCI->arg_size();
1569  if (numArgs > 3)
1570  return false;
1571 
1572  Constant *copr0 = nullptr;
1573  Constant *copr1 = nullptr;
1574  Constant *copr2 = nullptr;
1575  if (numArgs > 0) {
1576  if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr)
1577  return false;
1578  }
1579 
1580  if (numArgs > 1) {
1581  if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) {
1582  if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS)
1583  return false;
1584  }
1585  }
1586 
1587  if (numArgs > 2) {
1588  if ((copr2 = dyn_cast<Constant>(aCI->getArgOperand(2))) == nullptr)
1589  return false;
1590  }
1591 
1592  // At this point, all arguments to aCI are constants.
1593 
1594  // max vector size is 16, and sincos will generate two results.
1595  double DVal0[16], DVal1[16];
1596  int FuncVecSize = getVecSize(FInfo);
1597  bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS);
1598  if (FuncVecSize == 1) {
1599  if (!evaluateScalarMathFunc(FInfo, DVal0[0],
1600  DVal1[0], copr0, copr1, copr2)) {
1601  return false;
1602  }
1603  } else {
1604  ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0);
1605  ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1);
1606  ConstantDataVector *CDV2 = dyn_cast_or_null<ConstantDataVector>(copr2);
1607  for (int i = 0; i < FuncVecSize; ++i) {
1608  Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr;
1609  Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr;
1610  Constant *celt2 = CDV2 ? CDV2->getElementAsConstant(i) : nullptr;
1611  if (!evaluateScalarMathFunc(FInfo, DVal0[i],
1612  DVal1[i], celt0, celt1, celt2)) {
1613  return false;
1614  }
1615  }
1616  }
1617 
1618  LLVMContext &context = CI->getParent()->getParent()->getContext();
1619  Constant *nval0, *nval1;
1620  if (FuncVecSize == 1) {
1621  nval0 = ConstantFP::get(CI->getType(), DVal0[0]);
1622  if (hasTwoResults)
1623  nval1 = ConstantFP::get(CI->getType(), DVal1[0]);
1624  } else {
1625  if (getArgType(FInfo) == AMDGPULibFunc::F32) {
1626  SmallVector <float, 0> FVal0, FVal1;
1627  for (int i = 0; i < FuncVecSize; ++i)
1628  FVal0.push_back((float)DVal0[i]);
1629  ArrayRef<float> tmp0(FVal0);
1630  nval0 = ConstantDataVector::get(context, tmp0);
1631  if (hasTwoResults) {
1632  for (int i = 0; i < FuncVecSize; ++i)
1633  FVal1.push_back((float)DVal1[i]);
1634  ArrayRef<float> tmp1(FVal1);
1635  nval1 = ConstantDataVector::get(context, tmp1);
1636  }
1637  } else {
1638  ArrayRef<double> tmp0(DVal0);
1639  nval0 = ConstantDataVector::get(context, tmp0);
1640  if (hasTwoResults) {
1641  ArrayRef<double> tmp1(DVal1);
1642  nval1 = ConstantDataVector::get(context, tmp1);
1643  }
1644  }
1645  }
1646 
1647  if (hasTwoResults) {
1648  // sincos
1649  assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS &&
1650  "math function with ptr arg not supported yet");
1651  new StoreInst(nval1, aCI->getArgOperand(1), aCI);
1652  }
1653 
1654  replaceCall(nval0);
1655  return true;
1656 }
1657 
1658 // Public interface to the Simplify LibCalls pass.
1660  return new AMDGPUSimplifyLibCalls(TM);
1661 }
1662 
1664  return new AMDGPUUseNativeCalls();
1665 }
1666 
1668  if (skipFunction(F))
1669  return false;
1670 
1671  bool Changed = false;
1672  auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1673 
1674  LLVM_DEBUG(dbgs() << "AMDIC: process function ";
1675  F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
1676 
1677  for (auto &BB : F) {
1678  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
1679  // Ignore non-calls.
1680  CallInst *CI = dyn_cast<CallInst>(I);
1681  ++I;
1682  // Ignore intrinsics that do not become real instructions.
1683  if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd())
1684  continue;
1685 
1686  // Ignore indirect calls.
1688  if (Callee == nullptr)
1689  continue;
1690 
1691  LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";
1692  dbgs().flush());
1693  if(Simplifier.fold(CI, AA))
1694  Changed = true;
1695  }
1696  }
1697  return Changed;
1698 }
1699 
1702  AMDGPULibCalls Simplifier(&TM);
1703  Simplifier.initNativeFuncs();
1704 
1705  bool Changed = false;
1706  auto AA = &AM.getResult<AAManager>(F);
1707 
1708  LLVM_DEBUG(dbgs() << "AMDIC: process function ";
1709  F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
1710 
1711  for (auto &BB : F) {
1712  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
1713  // Ignore non-calls.
1714  CallInst *CI = dyn_cast<CallInst>(I);
1715  ++I;
1716  // Ignore intrinsics that do not become real instructions.
1717  if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd())
1718  continue;
1719 
1720  // Ignore indirect calls.
1722  if (Callee == nullptr)
1723  continue;
1724 
1725  LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";
1726  dbgs().flush());
1727  if (Simplifier.fold(CI, AA))
1728  Changed = true;
1729  }
1730  }
1731  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
1732 }
1733 
1735  if (skipFunction(F) || UseNative.empty())
1736  return false;
1737 
1738  bool Changed = false;
1739  for (auto &BB : F) {
1740  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
1741  // Ignore non-calls.
1742  CallInst *CI = dyn_cast<CallInst>(I);
1743  ++I;
1744  if (!CI) continue;
1745 
1746  // Ignore indirect calls.
1748  if (Callee == nullptr)
1749  continue;
1750 
1751  if (Simplifier.useNative(CI))
1752  Changed = true;
1753  }
1754  }
1755  return Changed;
1756 }
1757 
1760  if (UseNative.empty())
1761  return PreservedAnalyses::all();
1762 
1763  AMDGPULibCalls Simplifier;
1764  Simplifier.initNativeFuncs();
1765 
1766  bool Changed = false;
1767  for (auto &BB : F) {
1768  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
1769  // Ignore non-calls.
1770  CallInst *CI = dyn_cast<CallInst>(I);
1771  ++I;
1772  if (!CI)
1773  continue;
1774 
1775  // Ignore indirect calls.
1777  if (Callee == nullptr)
1778  continue;
1779 
1780  if (Simplifier.useNative(CI))
1781  Changed = true;
1782  }
1783  }
1784  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
1785 }
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::AMDGPULibFunc::isMangled
bool isMangled() const
Definition: AMDGPULibFunc.h:380
llvm::AMDGPULibFuncBase::EI_ASINH
@ EI_ASINH
Definition: AMDGPULibFunc.h:47
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1303
llvm::APFloat::convertToFloat
float convertToFloat() const
Converts this APFloat to host float value.
Definition: APFloat.cpp:4915
llvm::AMDGPULibFuncBase::EI_NFMA
@ EI_NFMA
Definition: AMDGPULibFunc.h:225
llvm::AMDGPULibCalls::AMDGPULibCalls
AMDGPULibCalls(const TargetMachine *TM_=nullptr)
Definition: AMDGPULibCalls.cpp:122
getOptTable
static TableRef getOptTable(AMDGPULibFunc::EFuncId id)
Definition: AMDGPULibCalls.cpp:389
tbl_asin
static const TableEntry tbl_asin[]
Definition: AMDGPULibCalls.cpp:227
tbl_sinpi
static const TableEntry tbl_sinpi[]
Definition: AMDGPULibCalls.cpp:328
TableRef::table
const TableEntry * table
Definition: AMDGPULibCalls.cpp:381
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:65
llvm::AMDGPULibFuncBase::EI_ACOSH
@ EI_ACOSH
Definition: AMDGPULibFunc.h:41
llvm::AMDGPULibFuncBase::F64
@ F64
Definition: AMDGPULibFunc.h:273
tbl_tan
static const TableEntry tbl_tan[]
Definition: AMDGPULibCalls.cpp:337
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
tbl_acos
static const TableEntry tbl_acos[]
Definition: AMDGPULibCalls.cpp:212
IntrinsicInst.h
tbl_log2
static const TableEntry tbl_log2[]
Definition: AMDGPULibCalls.cpp:308
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:780
Loads.h
llvm::Function
Definition: Function.h:60
llvm::Attribute
Definition: Attributes.h:65
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::CallBase::setCalledFunction
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
Definition: InstrTypes.h:1435
TableRef::size
size_t size
Definition: AMDGPULibCalls.cpp:380
llvm::PointerType::get
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Definition: Type.cpp:727
double
into xmm2 addss xmm2 xmm1 xmm3 addss xmm3 movaps xmm0 unpcklps xmm0 ret seems silly when it could just be one addps Expand libm rounding functions main should enable SSE DAZ mode and other fast SSE modes Think about doing i64 math in SSE regs on x86 This testcase should have no SSE instructions in and only one load from a constant double
Definition: README-SSE.txt:85
llvm::AMDGPULibFuncBase::EI_NCOS
@ EI_NCOS
Definition: AMDGPULibFunc.h:223
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
tbl_atanh
static const TableEntry tbl_atanh[]
Definition: AMDGPULibCalls.cpp:249
llvm::AMDGPULibFuncBase::EI_ROOTN
@ EI_ROOTN
Definition: AMDGPULibFunc.h:163
llvm::AMDGPULibFuncBase::EI_READ_PIPE_2
@ EI_READ_PIPE_2
Definition: AMDGPULibFunc.h:239
llvm::User::dropAllReferences
void dropAllReferences()
Drop all references to operands.
Definition: User.h:299
MATH_PI
#define MATH_PI
Definition: AMDGPULibCalls.cpp:39
llvm::cl::CommaSeparated
@ CommaSeparated
Definition: CommandLine.h:165
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
tbl_log
static const TableEntry tbl_log[]
Definition: AMDGPULibCalls.cpp:304
llvm::IRBuilder<>
llvm::cl::ValueOptional
@ ValueOptional
Definition: CommandLine.h:132
llvm::AMDGPULibFuncBase::EI_ATANPI
@ EI_ATANPI
Definition: AMDGPULibFunc.h:55
llvm::FunctionType::get
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:361
tbl_log10
static const TableEntry tbl_log10[]
Definition: AMDGPULibCalls.cpp:312
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
llvm::AMDGPULibFuncBase::EI_POWR
@ EI_POWR
Definition: AMDGPULibFunc.h:153
llvm::ConstantDataVector::get
static Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
Definition: Constants.cpp:3114
llvm::ConstantDataSequential::getElementAsFloat
float getElementAsFloat(unsigned i) const
If this is an sequential container of floats, return the specified element as a float.
Definition: Constants.cpp:3295
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:155
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:319
llvm::AMDGPULibFunc::parse
static bool parse(StringRef MangledName, AMDGPULibFunc &Ptr)
Definition: AMDGPULibFunc.cpp:681
llvm::ConstantFP::isZero
bool isZero() const
Return true if the value is positive or negative zero.
Definition: Constants.h:302
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::AMDGPULibFuncBase::EI_ERFC
@ EI_ERFC
Definition: AMDGPULibFunc.h:85
llvm::AMDGPULibFuncBase::EI_TANPI
@ EI_TANPI
Definition: AMDGPULibFunc.h:196
llvm::CallBase::getAttributes
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1474
llvm::ConstantAggregateZero
All zero aggregate value.
Definition: Constants.h:337
llvm::ConstantFP::getValueAPF
const APFloat & getValueAPF() const
Definition: Constants.h:298
llvm::AMDGPULibFuncBase::EI_COSH
@ EI_COSH
Definition: AMDGPULibFunc.h:76
tbl_cbrt
static const TableEntry tbl_cbrt[]
Definition: AMDGPULibCalls.cpp:259
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:422
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:893
llvm::AMDGPULibFuncBase::EI_CBRT
@ EI_CBRT
Definition: AMDGPULibFunc.h:68
llvm::AMDGPULibFuncBase::EI_SIN
@ EI_SIN
Definition: AMDGPULibFunc.h:172
llvm::AMDGPULibFuncBase::EI_LOG
@ EI_LOG
Definition: AMDGPULibFunc.h:130
tmp1
urem i32 %X, 255 ret i32 %tmp1 } Currently it compiles to:... movl $2155905153, %ecx movl 8(%esp), %esi movl %esi, %eax mull %ecx ... This could be "reassociated" into:movl $2155905153, %eax movl 8(%esp), %ecx mull %ecx to avoid the copy. In fact, the existing two-address stuff would do this except that mul isn 't a commutative 2-addr instruction. I guess this has to be done at isel time based on the #uses to mul? Make sure the instruction which starts a loop does not cross a cacheline boundary. This requires knowning the exact length of each machine instruction. That is somewhat complicated, but doable. Example 256.bzip2:In the new trace, the hot loop has an instruction which crosses a cacheline boundary. In addition to potential cache misses, this can 't help decoding as I imagine there has to be some kind of complicated decoder reset and realignment to grab the bytes from the next cacheline. 532 532 0x3cfc movb(1809(%esp, %esi), %bl<<<--- spans 2 64 byte lines 942 942 0x3d03 movl %dh,(1809(%esp, %esi) 937 937 0x3d0a incl %esi 3 3 0x3d0b cmpb %bl, %dl 27 27 0x3d0d jnz 0x000062db< main+11707 > In c99 mode, the preprocessor doesn 't like assembly comments like #TRUNCATE. This could be a single 16-bit load. int f(char *p) { if((p[0]==1) &(p[1]==2)) return 1 tmp1
Definition: README.txt:375
llvm::AMDGPULibFuncBase::EI_POW
@ EI_POW
Definition: AMDGPULibFunc.h:151
that
we should consider alternate ways to model stack dependencies Lots of things could be done in WebAssemblyTargetTransformInfo cpp there are numerous optimization related hooks that can be overridden in WebAssemblyTargetLowering Instead of the OptimizeReturned which should consider preserving the returned attribute through to MachineInstrs and extending the MemIntrinsicResults pass to do this optimization on calls too That would also let the WebAssemblyPeephole pass clean up dead defs for such as it does for stores Consider implementing and or getMachineCombinerPatterns Find a clean way to fix the problem which leads to the Shrink Wrapping pass being run after the WebAssembly PEI pass When setting multiple variables to the same we currently get code like const It could be done with a smaller encoding like local tee $pop5 local $pop6 WebAssembly registers are implicitly initialized to zero Explicit zeroing is therefore often redundant and could be optimized away Small indices may use smaller encodings than large indices WebAssemblyRegColoring and or WebAssemblyRegRenumbering should sort registers according to their usage frequency to maximize the usage of smaller encodings Many cases of irreducible control flow could be transformed more optimally than via the transform in WebAssemblyFixIrreducibleControlFlow cpp It may also be worthwhile to do transforms before register particularly when duplicating to allow register coloring to be aware of the duplication WebAssemblyRegStackify could use AliasAnalysis to reorder loads and stores more aggressively WebAssemblyRegStackify is currently a greedy algorithm This means that
Definition: README.txt:130
tmp
alloca< 16 x float >, align 16 %tmp2=alloca< 16 x float >, align 16 store< 16 x float > %A,< 16 x float > *%tmp %s=bitcast< 16 x float > *%tmp to i8 *%s2=bitcast< 16 x float > *%tmp2 to i8 *call void @llvm.memcpy.i64(i8 *%s, i8 *%s2, i64 64, i32 16) %R=load< 16 x float > *%tmp2 ret< 16 x float > %R } declare void @llvm.memcpy.i64(i8 *nocapture, i8 *nocapture, i64, i32) nounwind which compiles to:_foo:subl $140, %esp movaps %xmm3, 112(%esp) movaps %xmm2, 96(%esp) movaps %xmm1, 80(%esp) movaps %xmm0, 64(%esp) movl 60(%esp), %eax movl %eax, 124(%esp) movl 56(%esp), %eax movl %eax, 120(%esp) movl 52(%esp), %eax< many many more 32-bit copies > movaps(%esp), %xmm0 movaps 16(%esp), %xmm1 movaps 32(%esp), %xmm2 movaps 48(%esp), %xmm3 addl $140, %esp ret On Nehalem, it may even be cheaper to just use movups when unaligned than to fall back to lower-granularity chunks. Implement processor-specific optimizations for parity with GCC on these processors. GCC does two optimizations:1. ix86_pad_returns inserts a noop before ret instructions if immediately preceded by a conditional branch or is the target of a jump. 2. ix86_avoid_jump_misspredicts inserts noops in cases where a 16-byte block of code contains more than 3 branches. The first one is done for all AMDs, Core2, and "Generic" The second one is done for:Atom, Pentium Pro, all AMDs, Pentium 4, Nocona, Core 2, and "Generic" Testcase:int x(int a) { return(a &0xf0)> >4 tmp
Definition: README.txt:1347
llvm::AMDGPULibFuncBase::EI_RECIP
@ EI_RECIP
Definition: AMDGPULibFunc.h:156
llvm::AMDGPULibFuncBase::EI_NSIN
@ EI_NSIN
Definition: AMDGPULibFunc.h:229
llvm::Attribute::getValueAsBool
bool getValueAsBool() const
Return the attribute's value as a boolean.
Definition: Attributes.cpp:290
llvm::initializeAMDGPUSimplifyLibCallsPass
void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &)
llvm::AMDGPULibCalls
Definition: AMDGPULibCalls.cpp:46
tbl_erf
static const TableEntry tbl_erf[]
Definition: AMDGPULibCalls.cpp:281
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
library
Itanium Name Demangler i e convert the string _Z1fv into and both[sub] projects need to demangle but neither can depend on each other *libcxxabi needs the demangler to implement which is part of the itanium ABI spec *LLVM needs a copy for a bunch of and cannot rely on the system s __cxa_demangle because it a might not be and b may not be up to date on the latest language features The copy of the demangler in LLVM has some extra stuff that aren t needed in which depend on the shared generic components Despite these we want to keep the core generic demangling library identical between both copies to simplify development and testing If you re working on the generic library
Definition: README.txt:36
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::AMDGPULibFuncBase::EI_NSQRT
@ EI_NSQRT
Definition: AMDGPULibFunc.h:230
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:240
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
AliasAnalysis.h
result
It looks like we only need to define PPCfmarto for these because according to these instructions perform RTO on fma s result
Definition: README_P9.txt:256
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::AMDGPULibCalls::replaceCall
void replaceCall(Value *With)
Definition: AMDGPULibCalls.cpp:116
llvm::AMDGPULibFunc::getLeads
Param * getLeads()
Get leading parameters for mangled lib functions.
Definition: AMDGPULibFunc.cpp:1050
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:186
llvm::AMDGPULibFuncBase::getEPtrKindFromAddrSpace
static unsigned getEPtrKindFromAddrSpace(unsigned AS)
Definition: AMDGPULibFunc.h:313
llvm::AMDGPULibFunc
Wrapper class for AMDGPULIbFuncImpl.
Definition: AMDGPULibFunc.h:358
tbl_acospi
static const TableEntry tbl_acospi[]
Definition: AMDGPULibCalls.cpp:221
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::AMDGPULibFuncBase::EI_TANH
@ EI_TANH
Definition: AMDGPULibFunc.h:195
llvm::AMDGPULibFuncBase::EI_FMA
@ EI_FMA
Definition: AMDGPULibFunc.h:96
DEBUG_WITH_TYPE
#define DEBUG_WITH_TYPE(TYPE, X)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition: Debug.h:64
llvm::AMDGPULibFuncBase::EI_ATANH
@ EI_ATANH
Definition: AMDGPULibFunc.h:54
llvm::createAMDGPUUseNativeCallsPass
FunctionPass * createAMDGPUUseNativeCallsPass()
Definition: AMDGPULibCalls.cpp:1663
TargetMachine.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::AMDGPULibFuncBase::EType
EType
Definition: AMDGPULibFunc.h:253
llvm::AAResults
Definition: AliasAnalysis.h:511
GCNSubtarget.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
tbl_exp
static const TableEntry tbl_exp[]
Definition: AMDGPULibCalls.cpp:285
llvm::AMDGPULibFuncBase::EI_FABS
@ EI_FABS
Definition: AMDGPULibFunc.h:90
llvm::User
Definition: User.h:44
llvm::AMDGPULibCalls::useNative
bool useNative(CallInst *CI)
Definition: AMDGPULibCalls.cpp:507
llvm::AMDGPULibFunc::getOrInsertFunction
static FunctionCallee getOrInsertFunction(llvm::Module *M, const AMDGPULibFunc &fInfo)
Definition: AMDGPULibFunc.cpp:961
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1396
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:297
llvm::initializeAMDGPUUseNativeCallsPass
void initializeAMDGPUUseNativeCallsPass(PassRegistry &)
llvm::CallInst::Create
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1504
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
tbl_asinpi
static const TableEntry tbl_asinpi[]
Definition: AMDGPULibCalls.cpp:237
llvm::AMDGPULibFuncBase::EI_ACOSPI
@ EI_ACOSPI
Definition: AMDGPULibFunc.h:42
false
Definition: StackSlotColoring.cpp:141
llvm::AMDGPULibFuncBase::EI_NEXP2
@ EI_NEXP2
Definition: AMDGPULibFunc.h:224
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::APFloat::convertToDouble
double convertToDouble() const
Converts this APFloat to host double value.
Definition: APFloat.cpp:4902
llvm::Instruction
Definition: Instruction.h:42
llvm::ConstantDataVector::getSplatValue
Constant * getSplatValue() const
If this is a splat constant, meaning that all of the elements have the same value,...
Definition: Constants.cpp:3352
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:189
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:257
llvm::ConstantFP::isExactlyValue
bool isExactlyValue(const APFloat &V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: Constants.cpp:1098
llvm::raw_ostream::flush
void flush()
Definition: raw_ostream.h:187
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:928
llvm::FunctionCallee::getFunctionType
FunctionType * getFunctionType()
Definition: DerivedTypes.h:182
getVecSize
static int getVecSize(const AMDGPULibFunc &FInfo)
Definition: AMDGPULibCalls.cpp:433
llvm::AMDGPULibFuncBase::EI_ASIN
@ EI_ASIN
Definition: AMDGPULibFunc.h:46
Align
uint64_t Align
Definition: ELFObjHandler.cpp:81
llvm::AMDGPULibFuncBase::EI_TAN
@ EI_TAN
Definition: AMDGPULibFunc.h:194
tbl_expm1
static const TableEntry tbl_expm1[]
Definition: AMDGPULibCalls.cpp:300
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:684
input
The initial backend is deliberately restricted to z10 We should add support for later architectures at some point If an asm ties an i32 r result to an i64 input
Definition: README.txt:10
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:155
tbl_atan
static const TableEntry tbl_atan[]
Definition: AMDGPULibCalls.cpp:243
llvm::Instruction::isLifetimeStartOrEnd
bool isLifetimeStartOrEnd() const
Return true if the instruction is a llvm.lifetime.start or llvm.lifetime.end marker.
Definition: Instruction.cpp:718
Name
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
Definition: AMDGPULibCalls.cpp:187
llvm::AMDGPUSimplifyLibCallsPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: AMDGPULibCalls.cpp:1700
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
HasNative
static bool HasNative(AMDGPULibFunc::EFuncId id)
Definition: AMDGPULibCalls.cpp:356
tbl_tanh
static const TableEntry tbl_tanh[]
Definition: AMDGPULibCalls.cpp:341
llvm::AMDGPULibFuncBase::NOPFX
@ NOPFX
Definition: AMDGPULibFunc.h:248
llvm::AMDGPULibFuncBase::EI_EXP10
@ EI_EXP10
Definition: AMDGPULibFunc.h:87
llvm::StringRef::equals
LLVM_NODISCARD bool equals(StringRef RHS) const
equals - Check for string equality, this is more efficient than compare() when the relative ordering ...
Definition: StringRef.h:187
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::AMDGPULibFuncBase::EI_READ_PIPE_4
@ EI_READ_PIPE_4
Definition: AMDGPULibFunc.h:240
llvm::AMDGPULibFuncBase::EI_ACOS
@ EI_ACOS
Definition: AMDGPULibFunc.h:40
AMDGPULibFunc.h
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:191
TableRef::TableRef
TableRef()
Definition: AMDGPULibCalls.cpp:383
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
llvm::cl::opt< bool >
val
The initial backend is deliberately restricted to z10 We should add support for later architectures at some point If an asm ties an i32 r result to an i64 the input will be treated as an leaving the upper bits uninitialised For i64 store i32 val
Definition: README.txt:15
llvm::AMDGPULibFuncBase::HALF
@ HALF
Definition: AMDGPULibFunc.h:250
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:297
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::AMDGPULibCalls::isUnsafeMath
bool isUnsafeMath(const CallInst *CI) const
Definition: AMDGPULibCalls.cpp:454
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:77
llvm::StringRef::equals_insensitive
LLVM_NODISCARD bool equals_insensitive(StringRef RHS) const
Check for string equality, ignoring case.
Definition: StringRef.h:194
llvm::Function::getReturnType
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:180
llvm::StringRef::empty
constexpr LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:153
llvm::AMDGPULibFuncBase::F32
@ F32
Definition: AMDGPULibFunc.h:272
tbl_cospi
static const TableEntry tbl_cospi[]
Definition: AMDGPULibCalls.cpp:273
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:620
const
aarch64 promote const
Definition: AArch64PromoteConstant.cpp:232
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::AMDGPULibFuncBase::EI_EXP
@ EI_EXP
Definition: AMDGPULibFunc.h:86
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::AMDGPULibFuncBase::EI_TGAMMA
@ EI_TGAMMA
Definition: AMDGPULibFunc.h:197
llvm::AMDGPULibFuncBase::EI_WRITE_PIPE_4
@ EI_WRITE_PIPE_4
Definition: AMDGPULibFunc.h:242
llvm::AMDGPULibFuncBase::EI_COSPI
@ EI_COSPI
Definition: AMDGPULibFunc.h:77
llvm::AMDGPULibFunc::mangle
std::string mangle() const
Definition: AMDGPULibFunc.h:388
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::ConstantDataVector
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
Definition: Constants.h:753
llvm::FunctionType::getParamType
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
getArgType
static AMDGPULibFunc::EType getArgType(const AMDGPULibFunc &FInfo)
Definition: AMDGPULibCalls.cpp:437
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
llvm::AMDGPUUseNativeCallsPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: AMDGPULibCalls.cpp:1758
llvm::AMDGPULibCalls::initNativeFuncs
void initNativeFuncs()
Definition: AMDGPULibCalls.cpp:467
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1682
tbl_asinh
static const TableEntry tbl_asinh[]
Definition: AMDGPULibCalls.cpp:233
false
amdgpu Simplify well known AMD library false
Definition: AMDGPULibCalls.cpp:179
llvm::AMDGPULibFunc::getNumArgs
unsigned getNumArgs() const
Definition: AMDGPULibFunc.h:373
IRBuilder.h
llvm::ConstantFP::isNegative
bool isNegative() const
Return true if the sign bit is set.
Definition: Constants.h:305
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::FPMathOperator
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition: Operator.h:167
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::ConstantDataSequential::getElementAsConstant
Constant * getElementAsConstant(unsigned i) const
Return a Constant for a specified index's element.
Definition: Constants.cpp:3307
llvm::AMDGPULibFuncBase::EI_ASINPI
@ EI_ASINPI
Definition: AMDGPULibFunc.h:48
llvm::AMDGPULibFuncBase::EI_SINCOS
@ EI_SINCOS
Definition: AMDGPULibFunc.h:173
llvm::AMDGPULibFuncBase::EI_WRITE_PIPE_2
@ EI_WRITE_PIPE_2
Definition: AMDGPULibFunc.h:241
llvm::AMDGPULibFuncBase::EI_SINH
@ EI_SINH
Definition: AMDGPULibFunc.h:174
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
tbl_cos
static const TableEntry tbl_cos[]
Definition: AMDGPULibCalls.cpp:265
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:364
llvm::StringRef::contains_insensitive
LLVM_NODISCARD bool contains_insensitive(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition: StringRef.h:476
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1598
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::AMDGPULibFuncBase::Param::VectorSize
unsigned char VectorSize
Definition: AMDGPULibFunc.h:294
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
AMDGPU.h
Simplify
assume Assume Simplify
Definition: AssumeBundleBuilder.cpp:604
llvm::AMDGPULibFuncBase::EI_DIVIDE
@ EI_DIVIDE
Definition: AMDGPULibFunc.h:82
tbl_erfc
static const TableEntry tbl_erfc[]
Definition: AMDGPULibCalls.cpp:277
llvm::Value::getNumUses
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:251
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::AMDGPULibFuncBase::EI_COS
@ EI_COS
Definition: AMDGPULibFunc.h:75
llvm::AMDGPULibFuncBase::EI_LOG2
@ EI_LOG2
Definition: AMDGPULibFunc.h:133
tbl_atanpi
static const TableEntry tbl_atanpi[]
Definition: AMDGPULibCalls.cpp:253
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:529
llvm::AMDGPULibFuncBase::EFuncId
EFuncId
Definition: AMDGPULibFunc.h:24
llvm::AMDGPULibFunc::setId
void setId(EFuncId Id)
Definition: AMDGPULibFunc.h:381
llvm::AMDGPULibFuncBase::EI_EXP2
@ EI_EXP2
Definition: AMDGPULibFunc.h:88
tbl_tanpi
static const TableEntry tbl_tanpi[]
Definition: AMDGPULibCalls.cpp:345
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
llvm::TargetMachine::getTargetFeatureString
StringRef getTargetFeatureString() const
Definition: TargetMachine.h:128
llvm::AMDGPULibFunc::setPrefix
void setPrefix(ENamePrefix PFX)
Definition: AMDGPULibFunc.h:391
llvm::ConstantInt::getSExtValue
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:148
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::AMDGPULibFuncBase::EI_NRSQRT
@ EI_NRSQRT
Definition: AMDGPULibFunc.h:228
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:305
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:173
UseNative
static cl::list< std::string > UseNative("amdgpu-use-native", cl::desc("Comma separated list of functions to replace with native, or all"), cl::CommaSeparated, cl::ValueOptional, cl::Hidden)
well
llvm ldr ldrb ldrh str strh strb strb gcc and possibly speed as well(we don 't have a good way to measure on ARM). *Consider this silly example
Definition: README.txt:138
llvm::FunctionCallee::getCallee
Value * getCallee()
Definition: DerivedTypes.h:184
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:186
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
EnablePreLink
static cl::opt< bool > EnablePreLink("amdgpu-prelink", cl::desc("Enable pre-link mode optimizations"), cl::init(false), cl::Hidden)
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:240
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib", "Simplify well-known AMD library calls", false, false) INITIALIZE_PASS_END(AMDGPUSimplifyLibCalls
llvm::Type::getIntNTy
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:243
llvm::AMDGPULibFuncBase::EI_ERF
@ EI_ERF
Definition: AMDGPULibFunc.h:84
llvm::ConstantDataSequential::getElementAsDouble
double getElementAsDouble(unsigned i) const
If this is an sequential container of doubles, return the specified element as a double.
Definition: Constants.cpp:3301
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:345
llvm::AMDGPULibFuncBase::EI_MAD
@ EI_MAD
Definition: AMDGPULibFunc.h:135
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::CallBase::arg_size
unsigned arg_size() const
Definition: InstrTypes.h:1339
llvm::AMDGPULibFuncBase::Param::ArgType
unsigned char ArgType
Definition: AMDGPULibFunc.h:293
llvm::AMDGPULibFuncBase::EI_POWN
@ EI_POWN
Definition: AMDGPULibFunc.h:152
llvm::ConstantFP::get
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:975
TableRef::TableRef
TableRef(const TableEntry(&tbl)[N])
Definition: AMDGPULibCalls.cpp:386
llvm::AMDGPULibFunc::getPrefix
ENamePrefix getPrefix() const
Definition: AMDGPULibFunc.h:375
llvm::FindAvailableLoadedValue
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, AAResults *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:424
llvm::AMDGPULibFuncBase::EI_LOG10
@ EI_LOG10
Definition: AMDGPULibFunc.h:131
llvm::AMDGPULibFunc::getId
EFuncId getId() const
Definition: AMDGPULibFunc.h:374
llvm::Type::isDoubleTy
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:151
y
into llvm powi allowing the code generator to produce balanced multiplication trees the intrinsic needs to be extended to support and second the code generator needs to be enhanced to lower these to multiplication trees Interesting testcase for add shift mul int y
Definition: README.txt:61
llvm::log2
static double log2(double V)
Definition: AMDGPULibCalls.cpp:802
tbl_rsqrt
static const TableEntry tbl_rsqrt[]
Definition: AMDGPULibCalls.cpp:316
simplifylib
amdgpu simplifylib
Definition: AMDGPULibCalls.cpp:178
llvm::AMDGPULibFuncBase::EI_EXPM1
@ EI_EXPM1
Definition: AMDGPULibFunc.h:89
MATH_SQRT2
#define MATH_SQRT2
Definition: AMDGPULibCalls.cpp:41
AA
llvm::numbers::ln2
constexpr double ln2
Definition: MathExtras.h:59
llvm::AMDGPULibCalls::CI
CallInst * CI
Definition: AMDGPULibCalls.cpp:112
llvm::TargetMachine::getTargetCPU
StringRef getTargetCPU() const
Definition: TargetMachine.h:127
tbl_exp10
static const TableEntry tbl_exp10[]
Definition: AMDGPULibCalls.cpp:295
llvm::TargetMachine::getSubtarget
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Definition: TargetMachine.h:164
llvm::AMDGPULibCalls::fold
bool fold(CallInst *CI, AliasAnalysis *AA=nullptr)
Definition: AMDGPULibCalls.cpp:601
tbl_sin
static const TableEntry tbl_sin[]
Definition: AMDGPULibCalls.cpp:320
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1341
llvm::ConstantDataVector::getSplat
static Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:3175
N
#define N
tbl_exp2
static const TableEntry tbl_exp2[]
Definition: AMDGPULibCalls.cpp:290
llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:358
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1351
llvm::AMDGPULibFunc::getName
std::string getName() const
Get unmangled name for mangled library function and name for unmangled library function.
Definition: AMDGPULibFunc.h:372
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::to_string
std::string to_string(const T &Value)
Definition: ScopedPrinter.h:85
llvm::AMDGPULibFuncBase::EI_SQRT
@ EI_SQRT
Definition: AMDGPULibFunc.h:177
MATH_E
#define MATH_E
Definition: AMDGPULibCalls.cpp:40
tbl_sinh
static const TableEntry tbl_sinh[]
Definition: AMDGPULibCalls.cpp:324
llvm::FunctionCallee
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:165
calls
amdgpu Simplify well known AMD library calls
Definition: AMDGPULibCalls.cpp:179
tbl_cosh
static const TableEntry tbl_cosh[]
Definition: AMDGPULibCalls.cpp:269
llvm::AMDGPULibFuncBase::EI_NLOG2
@ EI_NLOG2
Definition: AMDGPULibFunc.h:226
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1461
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
tbl_sqrt
static const TableEntry tbl_sqrt[]
Definition: AMDGPULibCalls.cpp:332
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:58
llvm::cl::desc
Definition: CommandLine.h:405
llvm::AMDGPULibFuncBase::EI_RSQRT
@ EI_RSQRT
Definition: AMDGPULibFunc.h:166
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1282
TableRef
Definition: AMDGPULibCalls.cpp:379
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::AMDGPULibFuncBase::EI_ATAN
@ EI_ATAN
Definition: AMDGPULibFunc.h:51
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::AMDGPULibFuncBase::EI_SINPI
@ EI_SINPI
Definition: AMDGPULibFunc.h:175
llvm::AMDGPULibFuncBase::NATIVE
@ NATIVE
Definition: AMDGPULibFunc.h:249
MATH_SQRT1_2
#define MATH_SQRT1_2
Definition: AMDGPULibCalls.cpp:42
tbl_acosh
static const TableEntry tbl_acosh[]
Definition: AMDGPULibCalls.cpp:218
tbl_tgamma
static const TableEntry tbl_tgamma[]
Definition: AMDGPULibCalls.cpp:349
llvm::createAMDGPUSimplifyLibCallsPass
FunctionPass * createAMDGPUSimplifyLibCallsPass(const TargetMachine *)
Definition: AMDGPULibCalls.cpp:1659
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
INITIALIZE_PASS
TargetPassConfig.
Definition: TargetPassConfig.cpp:367
llvm::DataLayout::getTypeAllocSize
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:506
llvm::ConstantDataSequential::getNumElements
unsigned getNumElements() const
Return the number of elements in the array or vector.
Definition: Constants.cpp:2965
llvm::cl::list
Definition: CommandLine.h:1601