LLVM  14.0.0git
AMDGPULibCalls.cpp
Go to the documentation of this file.
1 //===- AMDGPULibCalls.cpp -------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file does AMD library function optimizations.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "AMDGPULibFunc.h"
16 #include "GCNSubtarget.h"
18 #include "llvm/Analysis/Loads.h"
19 #include "llvm/IR/IntrinsicsAMDGPU.h"
20 #include "llvm/IR/IRBuilder.h"
21 #include "llvm/InitializePasses.h"
23 
24 #define DEBUG_TYPE "amdgpu-simplifylib"
25 
26 using namespace llvm;
27 
28 static cl::opt<bool> EnablePreLink("amdgpu-prelink",
29  cl::desc("Enable pre-link mode optimizations"),
30  cl::init(false),
31  cl::Hidden);
32 
33 static cl::list<std::string> UseNative("amdgpu-use-native",
34  cl::desc("Comma separated list of functions to replace with native, or all"),
36  cl::Hidden);
37 
38 #define MATH_PI numbers::pi
39 #define MATH_E numbers::e
40 #define MATH_SQRT2 numbers::sqrt2
41 #define MATH_SQRT1_2 numbers::inv_sqrt2
42 
43 namespace llvm {
44 
46 private:
47 
49 
50  const TargetMachine *TM;
51 
52  // -fuse-native.
53  bool AllNative = false;
54 
55  bool useNativeFunc(const StringRef F) const;
56 
57  // Return a pointer (pointer expr) to the function if function definition with
58  // "FuncName" exists. It may create a new function prototype in pre-link mode.
59  FunctionCallee getFunction(Module *M, const FuncInfo &fInfo);
60 
61  bool parseFunctionName(const StringRef &FMangledName, FuncInfo &FInfo);
62 
63  bool TDOFold(CallInst *CI, const FuncInfo &FInfo);
64 
65  /* Specialized optimizations */
66 
67  // recip (half or native)
68  bool fold_recip(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
69 
70  // divide (half or native)
71  bool fold_divide(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
72 
73  // pow/powr/pown
74  bool fold_pow(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
75 
76  // rootn
77  bool fold_rootn(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
78 
79  // fma/mad
80  bool fold_fma_mad(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
81 
82  // -fuse-native for sincos
83  bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo);
84 
85  // evaluate calls if calls' arguments are constants.
86  bool evaluateScalarMathFunc(const FuncInfo &FInfo, double& Res0,
87  double& Res1, Constant *copr0, Constant *copr1, Constant *copr2);
88  bool evaluateCall(CallInst *aCI, const FuncInfo &FInfo);
89 
90  // sqrt
91  bool fold_sqrt(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
92 
93  // sin/cos
94  bool fold_sincos(CallInst * CI, IRBuilder<> &B, AliasAnalysis * AA);
95 
96  // __read_pipe/__write_pipe
97  bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
98  const FuncInfo &FInfo);
99 
100  // llvm.amdgcn.wavefrontsize
101  bool fold_wavefrontsize(CallInst *CI, IRBuilder<> &B);
102 
103  // Get insertion point at entry.
104  BasicBlock::iterator getEntryIns(CallInst * UI);
105  // Insert an Alloc instruction.
106  AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix);
107  // Get a scalar native builtin single argument FP function
108  FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo);
109 
110 protected:
112 
113  bool isUnsafeMath(const CallInst *CI) const;
114 
115  void replaceCall(Value *With) {
116  CI->replaceAllUsesWith(With);
117  CI->eraseFromParent();
118  }
119 
120 public:
121  AMDGPULibCalls(const TargetMachine *TM_ = nullptr) : TM(TM_) {}
122 
123  bool fold(CallInst *CI, AliasAnalysis *AA = nullptr);
124 
125  void initNativeFuncs();
126 
127  // Replace a normal math function call with that native version
128  bool useNative(CallInst *CI);
129 };
130 
131 } // end llvm namespace
132 
133 namespace {
134 
135  class AMDGPUSimplifyLibCalls : public FunctionPass {
136 
137  AMDGPULibCalls Simplifier;
138 
139  public:
140  static char ID; // Pass identification
141 
142  AMDGPUSimplifyLibCalls(const TargetMachine *TM = nullptr)
143  : FunctionPass(ID), Simplifier(TM) {
145  }
146 
147  void getAnalysisUsage(AnalysisUsage &AU) const override {
149  }
150 
151  bool runOnFunction(Function &M) override;
152  };
153 
154  class AMDGPUUseNativeCalls : public FunctionPass {
155 
156  AMDGPULibCalls Simplifier;
157 
158  public:
159  static char ID; // Pass identification
160 
161  AMDGPUUseNativeCalls() : FunctionPass(ID) {
163  Simplifier.initNativeFuncs();
164  }
165 
166  bool runOnFunction(Function &F) override;
167  };
168 
169 } // end anonymous namespace.
170 
172 char AMDGPUUseNativeCalls::ID = 0;
173 
174 INITIALIZE_PASS_BEGIN(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",
175  "Simplify well-known AMD library calls", false, false)
177 INITIALIZE_PASS_END(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",
179 
180 INITIALIZE_PASS(AMDGPUUseNativeCalls, "amdgpu-usenative",
181  "Replace builtin math calls with that native versions.",
182  false, false)
183 
184 template <typename IRB>
185 static CallInst *CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg,
186  const Twine &Name = "") {
187  CallInst *R = B.CreateCall(Callee, Arg, Name);
188  if (Function *F = dyn_cast<Function>(Callee.getCallee()))
189  R->setCallingConv(F->getCallingConv());
190  return R;
191 }
192 
193 template <typename IRB>
194 static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1,
195  Value *Arg2, const Twine &Name = "") {
196  CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name);
197  if (Function *F = dyn_cast<Function>(Callee.getCallee()))
198  R->setCallingConv(F->getCallingConv());
199  return R;
200 }
201 
202 // Data structures for table-driven optimizations.
203 // FuncTbl works for both f32 and f64 functions with 1 input argument
204 
205 struct TableEntry {
206  double result;
207  double input;
208 };
209 
210 /* a list of {result, input} */
211 static const TableEntry tbl_acos[] = {
212  {MATH_PI / 2.0, 0.0},
213  {MATH_PI / 2.0, -0.0},
214  {0.0, 1.0},
215  {MATH_PI, -1.0}
216 };
217 static const TableEntry tbl_acosh[] = {
218  {0.0, 1.0}
219 };
220 static const TableEntry tbl_acospi[] = {
221  {0.5, 0.0},
222  {0.5, -0.0},
223  {0.0, 1.0},
224  {1.0, -1.0}
225 };
226 static const TableEntry tbl_asin[] = {
227  {0.0, 0.0},
228  {-0.0, -0.0},
229  {MATH_PI / 2.0, 1.0},
230  {-MATH_PI / 2.0, -1.0}
231 };
232 static const TableEntry tbl_asinh[] = {
233  {0.0, 0.0},
234  {-0.0, -0.0}
235 };
236 static const TableEntry tbl_asinpi[] = {
237  {0.0, 0.0},
238  {-0.0, -0.0},
239  {0.5, 1.0},
240  {-0.5, -1.0}
241 };
242 static const TableEntry tbl_atan[] = {
243  {0.0, 0.0},
244  {-0.0, -0.0},
245  {MATH_PI / 4.0, 1.0},
246  {-MATH_PI / 4.0, -1.0}
247 };
248 static const TableEntry tbl_atanh[] = {
249  {0.0, 0.0},
250  {-0.0, -0.0}
251 };
252 static const TableEntry tbl_atanpi[] = {
253  {0.0, 0.0},
254  {-0.0, -0.0},
255  {0.25, 1.0},
256  {-0.25, -1.0}
257 };
258 static const TableEntry tbl_cbrt[] = {
259  {0.0, 0.0},
260  {-0.0, -0.0},
261  {1.0, 1.0},
262  {-1.0, -1.0},
263 };
264 static const TableEntry tbl_cos[] = {
265  {1.0, 0.0},
266  {1.0, -0.0}
267 };
268 static const TableEntry tbl_cosh[] = {
269  {1.0, 0.0},
270  {1.0, -0.0}
271 };
272 static const TableEntry tbl_cospi[] = {
273  {1.0, 0.0},
274  {1.0, -0.0}
275 };
276 static const TableEntry tbl_erfc[] = {
277  {1.0, 0.0},
278  {1.0, -0.0}
279 };
280 static const TableEntry tbl_erf[] = {
281  {0.0, 0.0},
282  {-0.0, -0.0}
283 };
284 static const TableEntry tbl_exp[] = {
285  {1.0, 0.0},
286  {1.0, -0.0},
287  {MATH_E, 1.0}
288 };
289 static const TableEntry tbl_exp2[] = {
290  {1.0, 0.0},
291  {1.0, -0.0},
292  {2.0, 1.0}
293 };
294 static const TableEntry tbl_exp10[] = {
295  {1.0, 0.0},
296  {1.0, -0.0},
297  {10.0, 1.0}
298 };
299 static const TableEntry tbl_expm1[] = {
300  {0.0, 0.0},
301  {-0.0, -0.0}
302 };
303 static const TableEntry tbl_log[] = {
304  {0.0, 1.0},
305  {1.0, MATH_E}
306 };
307 static const TableEntry tbl_log2[] = {
308  {0.0, 1.0},
309  {1.0, 2.0}
310 };
311 static const TableEntry tbl_log10[] = {
312  {0.0, 1.0},
313  {1.0, 10.0}
314 };
315 static const TableEntry tbl_rsqrt[] = {
316  {1.0, 1.0},
317  {MATH_SQRT1_2, 2.0}
318 };
319 static const TableEntry tbl_sin[] = {
320  {0.0, 0.0},
321  {-0.0, -0.0}
322 };
323 static const TableEntry tbl_sinh[] = {
324  {0.0, 0.0},
325  {-0.0, -0.0}
326 };
327 static const TableEntry tbl_sinpi[] = {
328  {0.0, 0.0},
329  {-0.0, -0.0}
330 };
331 static const TableEntry tbl_sqrt[] = {
332  {0.0, 0.0},
333  {1.0, 1.0},
334  {MATH_SQRT2, 2.0}
335 };
336 static const TableEntry tbl_tan[] = {
337  {0.0, 0.0},
338  {-0.0, -0.0}
339 };
340 static const TableEntry tbl_tanh[] = {
341  {0.0, 0.0},
342  {-0.0, -0.0}
343 };
344 static const TableEntry tbl_tanpi[] = {
345  {0.0, 0.0},
346  {-0.0, -0.0}
347 };
348 static const TableEntry tbl_tgamma[] = {
349  {1.0, 1.0},
350  {1.0, 2.0},
351  {2.0, 3.0},
352  {6.0, 4.0}
353 };
354 
356  switch(id) {
372  return true;
373  default:;
374  }
375  return false;
376 }
377 
378 struct TableRef {
379  size_t size;
380  const TableEntry *table; // variable size: from 0 to (size - 1)
381 
382  TableRef() : size(0), table(nullptr) {}
383 
384  template <size_t N>
385  TableRef(const TableEntry (&tbl)[N]) : size(N), table(&tbl[0]) {}
386 };
387 
389  switch(id) {
401  case AMDGPULibFunc::EI_COS: return TableRef(tbl_cos);
405  case AMDGPULibFunc::EI_ERF: return TableRef(tbl_erf);
406  case AMDGPULibFunc::EI_EXP: return TableRef(tbl_exp);
411  case AMDGPULibFunc::EI_LOG: return TableRef(tbl_log);
418  case AMDGPULibFunc::EI_SIN: return TableRef(tbl_sin);
423  case AMDGPULibFunc::EI_TAN: return TableRef(tbl_tan);
427  default:;
428  }
429  return TableRef();
430 }
431 
432 static inline int getVecSize(const AMDGPULibFunc& FInfo) {
433  return FInfo.getLeads()[0].VectorSize;
434 }
435 
436 static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) {
437  return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType;
438 }
439 
440 FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) {
441  // If we are doing PreLinkOpt, the function is external. So it is safe to
442  // use getOrInsertFunction() at this stage.
443 
445  : AMDGPULibFunc::getFunction(M, fInfo);
446 }
447 
448 bool AMDGPULibCalls::parseFunctionName(const StringRef &FMangledName,
449  FuncInfo &FInfo) {
450  return AMDGPULibFunc::parse(FMangledName, FInfo);
451 }
452 
453 bool AMDGPULibCalls::isUnsafeMath(const CallInst *CI) const {
454  if (auto Op = dyn_cast<FPMathOperator>(CI))
455  if (Op->isFast())
456  return true;
457  const Function *F = CI->getParent()->getParent();
458  Attribute Attr = F->getFnAttribute("unsafe-fp-math");
459  return Attr.getValueAsBool();
460 }
461 
462 bool AMDGPULibCalls::useNativeFunc(const StringRef F) const {
463  return AllNative || llvm::is_contained(UseNative, F);
464 }
465 
467  AllNative = useNativeFunc("all") ||
468  (UseNative.getNumOccurrences() && UseNative.size() == 1 &&
469  UseNative.begin()->empty());
470 }
471 
472 bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
473  bool native_sin = useNativeFunc("sin");
474  bool native_cos = useNativeFunc("cos");
475 
476  if (native_sin && native_cos) {
477  Module *M = aCI->getModule();
478  Value *opr0 = aCI->getArgOperand(0);
479 
480  AMDGPULibFunc nf;
481  nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType;
482  nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize;
483 
486  FunctionCallee sinExpr = getFunction(M, nf);
487 
490  FunctionCallee cosExpr = getFunction(M, nf);
491  if (sinExpr && cosExpr) {
492  Value *sinval = CallInst::Create(sinExpr, opr0, "splitsin", aCI);
493  Value *cosval = CallInst::Create(cosExpr, opr0, "splitcos", aCI);
494  new StoreInst(cosval, aCI->getArgOperand(1), aCI);
495 
496  DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
497  << " with native version of sin/cos");
498 
499  replaceCall(sinval);
500  return true;
501  }
502  }
503  return false;
504 }
505 
507  CI = aCI;
509 
510  FuncInfo FInfo;
511  if (!parseFunctionName(Callee->getName(), FInfo) || !FInfo.isMangled() ||
512  FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
513  getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) ||
514  !(AllNative || useNativeFunc(FInfo.getName()))) {
515  return false;
516  }
517 
518  if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS)
519  return sincosUseNative(aCI, FInfo);
520 
522  FunctionCallee F = getFunction(aCI->getModule(), FInfo);
523  if (!F)
524  return false;
525 
526  aCI->setCalledFunction(F);
527  DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
528  << " with native version");
529  return true;
530 }
531 
532 // Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe
533 // builtin, with appended type size and alignment arguments, where 2 or 4
534 // indicates the original number of arguments. The library has optimized version
535 // of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same
536 // power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N
537 // for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ...,
538 // 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4.
539 bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
540  const FuncInfo &FInfo) {
541  auto *Callee = CI->getCalledFunction();
542  if (!Callee->isDeclaration())
543  return false;
544 
545  assert(Callee->hasName() && "Invalid read_pipe/write_pipe function");
546  auto *M = Callee->getParent();
547  auto &Ctx = M->getContext();
548  std::string Name = std::string(Callee->getName());
549  auto NumArg = CI->arg_size();
550  if (NumArg != 4 && NumArg != 6)
551  return false;
552  auto *PacketSize = CI->getArgOperand(NumArg - 2);
553  auto *PacketAlign = CI->getArgOperand(NumArg - 1);
554  if (!isa<ConstantInt>(PacketSize) || !isa<ConstantInt>(PacketAlign))
555  return false;
556  unsigned Size = cast<ConstantInt>(PacketSize)->getZExtValue();
557  Align Alignment = cast<ConstantInt>(PacketAlign)->getAlignValue();
558  if (Alignment != Size)
559  return false;
560 
561  Type *PtrElemTy;
562  if (Size <= 8)
563  PtrElemTy = Type::getIntNTy(Ctx, Size * 8);
564  else
565  PtrElemTy = FixedVectorType::get(Type::getInt64Ty(Ctx), Size / 8);
566  unsigned PtrArgLoc = CI->arg_size() - 3;
567  auto PtrArg = CI->getArgOperand(PtrArgLoc);
568  unsigned PtrArgAS = PtrArg->getType()->getPointerAddressSpace();
569  auto *PtrTy = llvm::PointerType::get(PtrElemTy, PtrArgAS);
570 
572  for (unsigned I = 0; I != PtrArgLoc; ++I)
573  ArgTys.push_back(CI->getArgOperand(I)->getType());
574  ArgTys.push_back(PtrTy);
575 
576  Name = Name + "_" + std::to_string(Size);
577  auto *FTy = FunctionType::get(Callee->getReturnType(),
578  ArrayRef<Type *>(ArgTys), false);
579  AMDGPULibFunc NewLibFunc(Name, FTy);
581  if (!F)
582  return false;
583 
584  auto *BCast = B.CreatePointerCast(PtrArg, PtrTy);
586  for (unsigned I = 0; I != PtrArgLoc; ++I)
587  Args.push_back(CI->getArgOperand(I));
588  Args.push_back(BCast);
589 
590  auto *NCI = B.CreateCall(F, Args);
591  NCI->setAttributes(CI->getAttributes());
592  CI->replaceAllUsesWith(NCI);
594  CI->eraseFromParent();
595 
596  return true;
597 }
598 
599 // This function returns false if no change; return true otherwise.
601  this->CI = CI;
603 
604  // Ignore indirect calls.
605  if (Callee == nullptr)
606  return false;
607 
608  BasicBlock *BB = CI->getParent();
611 
612  // Set the builder to the instruction after the call.
613  B.SetInsertPoint(BB, CI->getIterator());
614 
615  // Copy fast flags from the original call.
616  if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(CI))
617  B.setFastMathFlags(FPOp->getFastMathFlags());
618 
619  switch (Callee->getIntrinsicID()) {
620  default:
621  break;
622  case Intrinsic::amdgcn_wavefrontsize:
623  return !EnablePreLink && fold_wavefrontsize(CI, B);
624  }
625 
626  FuncInfo FInfo;
627  if (!parseFunctionName(Callee->getName(), FInfo))
628  return false;
629 
630  // Further check the number of arguments to see if they match.
631  if (CI->arg_size() != FInfo.getNumArgs())
632  return false;
633 
634  if (TDOFold(CI, FInfo))
635  return true;
636 
637  // Under unsafe-math, evaluate calls if possible.
638  // According to Brian Sumner, we can do this for all f32 function calls
639  // using host's double function calls.
640  if (isUnsafeMath(CI) && evaluateCall(CI, FInfo))
641  return true;
642 
643  // Specialized optimizations for each function call
644  switch (FInfo.getId()) {
646  // skip vector function
647  assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||
648  FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
649  "recip must be an either native or half function");
650  return (getVecSize(FInfo) != 1) ? false : fold_recip(CI, B, FInfo);
651 
653  // skip vector function
654  assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||
655  FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
656  "divide must be an either native or half function");
657  return (getVecSize(FInfo) != 1) ? false : fold_divide(CI, B, FInfo);
658 
662  return fold_pow(CI, B, FInfo);
663 
665  // skip vector function
666  return (getVecSize(FInfo) != 1) ? false : fold_rootn(CI, B, FInfo);
667 
671  // skip vector function
672  return (getVecSize(FInfo) != 1) ? false : fold_fma_mad(CI, B, FInfo);
673 
675  return isUnsafeMath(CI) && fold_sqrt(CI, B, FInfo);
678  if ((getArgType(FInfo) == AMDGPULibFunc::F32 ||
679  getArgType(FInfo) == AMDGPULibFunc::F64)
680  && (FInfo.getPrefix() == AMDGPULibFunc::NOPFX))
681  return fold_sincos(CI, B, AA);
682 
683  break;
688  return fold_read_write_pipe(CI, B, FInfo);
689 
690  default:
691  break;
692  }
693 
694  return false;
695 }
696 
697 bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
698  // Table-Driven optimization
699  const TableRef tr = getOptTable(FInfo.getId());
700  if (tr.size==0)
701  return false;
702 
703  int const sz = (int)tr.size;
704  const TableEntry * const ftbl = tr.table;
705  Value *opr0 = CI->getArgOperand(0);
706 
707  if (getVecSize(FInfo) > 1) {
708  if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(opr0)) {
710  for (int eltNo = 0; eltNo < getVecSize(FInfo); ++eltNo) {
711  ConstantFP *eltval = dyn_cast<ConstantFP>(
712  CV->getElementAsConstant((unsigned)eltNo));
713  assert(eltval && "Non-FP arguments in math function!");
714  bool found = false;
715  for (int i=0; i < sz; ++i) {
716  if (eltval->isExactlyValue(ftbl[i].input)) {
717  DVal.push_back(ftbl[i].result);
718  found = true;
719  break;
720  }
721  }
722  if (!found) {
723  // This vector constants not handled yet.
724  return false;
725  }
726  }
727  LLVMContext &context = CI->getParent()->getParent()->getContext();
728  Constant *nval;
729  if (getArgType(FInfo) == AMDGPULibFunc::F32) {
731  for (unsigned i = 0; i < DVal.size(); ++i) {
732  FVal.push_back((float)DVal[i]);
733  }
734  ArrayRef<float> tmp(FVal);
735  nval = ConstantDataVector::get(context, tmp);
736  } else { // F64
737  ArrayRef<double> tmp(DVal);
738  nval = ConstantDataVector::get(context, tmp);
739  }
740  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
741  replaceCall(nval);
742  return true;
743  }
744  } else {
745  // Scalar version
746  if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
747  for (int i = 0; i < sz; ++i) {
748  if (CF->isExactlyValue(ftbl[i].input)) {
749  Value *nval = ConstantFP::get(CF->getType(), ftbl[i].result);
750  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
751  replaceCall(nval);
752  return true;
753  }
754  }
755  }
756  }
757 
758  return false;
759 }
760 
761 // [native_]half_recip(c) ==> 1.0/c
762 bool AMDGPULibCalls::fold_recip(CallInst *CI, IRBuilder<> &B,
763  const FuncInfo &FInfo) {
764  Value *opr0 = CI->getArgOperand(0);
765  if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
766  // Just create a normal div. Later, InstCombine will be able
767  // to compute the divide into a constant (avoid check float infinity
768  // or subnormal at this point).
769  Value *nval = B.CreateFDiv(ConstantFP::get(CF->getType(), 1.0),
770  opr0,
771  "recip2div");
772  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
773  replaceCall(nval);
774  return true;
775  }
776  return false;
777 }
778 
779 // [native_]half_divide(x, c) ==> x/c
780 bool AMDGPULibCalls::fold_divide(CallInst *CI, IRBuilder<> &B,
781  const FuncInfo &FInfo) {
782  Value *opr0 = CI->getArgOperand(0);
783  Value *opr1 = CI->getArgOperand(1);
784  ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
785  ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
786 
787  if ((CF0 && CF1) || // both are constants
788  (CF1 && (getArgType(FInfo) == AMDGPULibFunc::F32)))
789  // CF1 is constant && f32 divide
790  {
791  Value *nval1 = B.CreateFDiv(ConstantFP::get(opr1->getType(), 1.0),
792  opr1, "__div2recip");
793  Value *nval = B.CreateFMul(opr0, nval1, "__div2mul");
794  replaceCall(nval);
795  return true;
796  }
797  return false;
798 }
799 
800 namespace llvm {
801 static double log2(double V) {
802 #if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L
803  return ::log2(V);
804 #else
805  return log(V) / numbers::ln2;
806 #endif
807 }
808 }
809 
810 bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
811  const FuncInfo &FInfo) {
812  assert((FInfo.getId() == AMDGPULibFunc::EI_POW ||
813  FInfo.getId() == AMDGPULibFunc::EI_POWR ||
814  FInfo.getId() == AMDGPULibFunc::EI_POWN) &&
815  "fold_pow: encounter a wrong function call");
816 
817  Value *opr0, *opr1;
818  ConstantFP *CF;
819  ConstantInt *CINT;
820  ConstantAggregateZero *CZero;
821  Type *eltType;
822 
823  opr0 = CI->getArgOperand(0);
824  opr1 = CI->getArgOperand(1);
825  CZero = dyn_cast<ConstantAggregateZero>(opr1);
826  if (getVecSize(FInfo) == 1) {
827  eltType = opr0->getType();
828  CF = dyn_cast<ConstantFP>(opr1);
829  CINT = dyn_cast<ConstantInt>(opr1);
830  } else {
831  VectorType *VTy = dyn_cast<VectorType>(opr0->getType());
832  assert(VTy && "Oprand of vector function should be of vectortype");
833  eltType = VTy->getElementType();
834  ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1);
835 
836  // Now, only Handle vector const whose elements have the same value.
837  CF = CDV ? dyn_cast_or_null<ConstantFP>(CDV->getSplatValue()) : nullptr;
838  CINT = CDV ? dyn_cast_or_null<ConstantInt>(CDV->getSplatValue()) : nullptr;
839  }
840 
841  // No unsafe math , no constant argument, do nothing
842  if (!isUnsafeMath(CI) && !CF && !CINT && !CZero)
843  return false;
844 
845  // 0x1111111 means that we don't do anything for this call.
846  int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111);
847 
848  if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0) || CZero) {
849  // pow/powr/pown(x, 0) == 1
850  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1\n");
851  Constant *cnval = ConstantFP::get(eltType, 1.0);
852  if (getVecSize(FInfo) > 1) {
853  cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
854  }
855  replaceCall(cnval);
856  return true;
857  }
858  if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) {
859  // pow/powr/pown(x, 1.0) = x
860  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n");
861  replaceCall(opr0);
862  return true;
863  }
864  if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) {
865  // pow/powr/pown(x, 2.0) = x*x
866  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * " << *opr0
867  << "\n");
868  Value *nval = B.CreateFMul(opr0, opr0, "__pow2");
869  replaceCall(nval);
870  return true;
871  }
872  if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) {
873  // pow/powr/pown(x, -1.0) = 1.0/x
874  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1 / " << *opr0 << "\n");
875  Constant *cnval = ConstantFP::get(eltType, 1.0);
876  if (getVecSize(FInfo) > 1) {
877  cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
878  }
879  Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip");
880  replaceCall(nval);
881  return true;
882  }
883 
884  Module *M = CI->getModule();
885  if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) {
886  // pow[r](x, [-]0.5) = sqrt(x)
887  bool issqrt = CF->isExactlyValue(0.5);
888  if (FunctionCallee FPExpr =
889  getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
891  FInfo))) {
892  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
893  << FInfo.getName().c_str() << "(" << *opr0 << ")\n");
894  Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt"
895  : "__pow2rsqrt");
896  replaceCall(nval);
897  return true;
898  }
899  }
900 
901  if (!isUnsafeMath(CI))
902  return false;
903 
904  // Unsafe Math optimization
905 
906  // Remember that ci_opr1 is set if opr1 is integral
907  if (CF) {
908  double dval = (getArgType(FInfo) == AMDGPULibFunc::F32)
909  ? (double)CF->getValueAPF().convertToFloat()
910  : CF->getValueAPF().convertToDouble();
911  int ival = (int)dval;
912  if ((double)ival == dval) {
913  ci_opr1 = ival;
914  } else
915  ci_opr1 = 0x11111111;
916  }
917 
918  // pow/powr/pown(x, c) = [1/](x*x*..x); where
919  // trunc(c) == c && the number of x == c && |c| <= 12
920  unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1;
921  if (abs_opr1 <= 12) {
922  Constant *cnval;
923  Value *nval;
924  if (abs_opr1 == 0) {
925  cnval = ConstantFP::get(eltType, 1.0);
926  if (getVecSize(FInfo) > 1) {
927  cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
928  }
929  nval = cnval;
930  } else {
931  Value *valx2 = nullptr;
932  nval = nullptr;
933  while (abs_opr1 > 0) {
934  valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0;
935  if (abs_opr1 & 1) {
936  nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2;
937  }
938  abs_opr1 >>= 1;
939  }
940  }
941 
942  if (ci_opr1 < 0) {
943  cnval = ConstantFP::get(eltType, 1.0);
944  if (getVecSize(FInfo) > 1) {
945  cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
946  }
947  nval = B.CreateFDiv(cnval, nval, "__1powprod");
948  }
949  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
950  << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0
951  << ")\n");
952  replaceCall(nval);
953  return true;
954  }
955 
956  // powr ---> exp2(y * log2(x))
957  // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31))
958  FunctionCallee ExpExpr =
959  getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo));
960  if (!ExpExpr)
961  return false;
962 
963  bool needlog = false;
964  bool needabs = false;
965  bool needcopysign = false;
966  Constant *cnval = nullptr;
967  if (getVecSize(FInfo) == 1) {
968  CF = dyn_cast<ConstantFP>(opr0);
969 
970  if (CF) {
971  double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
972  ? (double)CF->getValueAPF().convertToFloat()
973  : CF->getValueAPF().convertToDouble();
974 
975  V = log2(std::abs(V));
976  cnval = ConstantFP::get(eltType, V);
977  needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR) &&
978  CF->isNegative();
979  } else {
980  needlog = true;
981  needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR &&
982  (!CF || CF->isNegative());
983  }
984  } else {
985  ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0);
986 
987  if (!CDV) {
988  needlog = true;
989  needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR;
990  } else {
991  assert ((int)CDV->getNumElements() == getVecSize(FInfo) &&
992  "Wrong vector size detected");
993 
995  for (int i=0; i < getVecSize(FInfo); ++i) {
996  double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
997  ? (double)CDV->getElementAsFloat(i)
998  : CDV->getElementAsDouble(i);
999  if (V < 0.0) needcopysign = true;
1000  V = log2(std::abs(V));
1001  DVal.push_back(V);
1002  }
1003  if (getArgType(FInfo) == AMDGPULibFunc::F32) {
1004  SmallVector<float, 0> FVal;
1005  for (unsigned i=0; i < DVal.size(); ++i) {
1006  FVal.push_back((float)DVal[i]);
1007  }
1008  ArrayRef<float> tmp(FVal);
1009  cnval = ConstantDataVector::get(M->getContext(), tmp);
1010  } else {
1011  ArrayRef<double> tmp(DVal);
1012  cnval = ConstantDataVector::get(M->getContext(), tmp);
1013  }
1014  }
1015  }
1016 
1017  if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW)) {
1018  // We cannot handle corner cases for a general pow() function, give up
1019  // unless y is a constant integral value. Then proceed as if it were pown.
1020  if (getVecSize(FInfo) == 1) {
1021  if (const ConstantFP *CF = dyn_cast<ConstantFP>(opr1)) {
1022  double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
1023  ? (double)CF->getValueAPF().convertToFloat()
1024  : CF->getValueAPF().convertToDouble();
1025  if (y != (double)(int64_t)y)
1026  return false;
1027  } else
1028  return false;
1029  } else {
1030  if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1)) {
1031  for (int i=0; i < getVecSize(FInfo); ++i) {
1032  double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
1033  ? (double)CDV->getElementAsFloat(i)
1034  : CDV->getElementAsDouble(i);
1035  if (y != (double)(int64_t)y)
1036  return false;
1037  }
1038  } else
1039  return false;
1040  }
1041  }
1042 
1043  Value *nval;
1044  if (needabs) {
1045  FunctionCallee AbsExpr =
1046  getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_FABS, FInfo));
1047  if (!AbsExpr)
1048  return false;
1049  nval = CreateCallEx(B, AbsExpr, opr0, "__fabs");
1050  } else {
1051  nval = cnval ? cnval : opr0;
1052  }
1053  if (needlog) {
1054  FunctionCallee LogExpr =
1055  getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo));
1056  if (!LogExpr)
1057  return false;
1058  nval = CreateCallEx(B,LogExpr, nval, "__log2");
1059  }
1060 
1061  if (FInfo.getId() == AMDGPULibFunc::EI_POWN) {
1062  // convert int(32) to fp(f32 or f64)
1063  opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F");
1064  }
1065  nval = B.CreateFMul(opr1, nval, "__ylogx");
1066  nval = CreateCallEx(B,ExpExpr, nval, "__exp2");
1067 
1068  if (needcopysign) {
1069  Value *opr_n;
1070  Type* rTy = opr0->getType();
1071  Type* nTyS = eltType->isDoubleTy() ? B.getInt64Ty() : B.getInt32Ty();
1072  Type *nTy = nTyS;
1073  if (const auto *vTy = dyn_cast<FixedVectorType>(rTy))
1074  nTy = FixedVectorType::get(nTyS, vTy);
1075  unsigned size = nTy->getScalarSizeInBits();
1076  opr_n = CI->getArgOperand(1);
1077  if (opr_n->getType()->isIntegerTy())
1078  opr_n = B.CreateZExtOrBitCast(opr_n, nTy, "__ytou");
1079  else
1080  opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");
1081 
1082  Value *sign = B.CreateShl(opr_n, size-1, "__yeven");
1083  sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign");
1084  nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign);
1085  nval = B.CreateBitCast(nval, opr0->getType());
1086  }
1087 
1088  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
1089  << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n");
1090  replaceCall(nval);
1091 
1092  return true;
1093 }
1094 
1095 bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
1096  const FuncInfo &FInfo) {
1097  Value *opr0 = CI->getArgOperand(0);
1098  Value *opr1 = CI->getArgOperand(1);
1099 
1100  ConstantInt *CINT = dyn_cast<ConstantInt>(opr1);
1101  if (!CINT) {
1102  return false;
1103  }
1104  int ci_opr1 = (int)CINT->getSExtValue();
1105  if (ci_opr1 == 1) { // rootn(x, 1) = x
1106  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n");
1107  replaceCall(opr0);
1108  return true;
1109  }
1110  if (ci_opr1 == 2) { // rootn(x, 2) = sqrt(x)
1111  Module *M = CI->getModule();
1112  if (FunctionCallee FPExpr =
1113  getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
1114  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> sqrt(" << *opr0 << ")\n");
1115  Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt");
1116  replaceCall(nval);
1117  return true;
1118  }
1119  } else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x)
1120  Module *M = CI->getModule();
1121  if (FunctionCallee FPExpr =
1122  getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) {
1123  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> cbrt(" << *opr0 << ")\n");
1124  Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt");
1125  replaceCall(nval);
1126  return true;
1127  }
1128  } else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x
1129  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1.0 / " << *opr0 << "\n");
1130  Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0),
1131  opr0,
1132  "__rootn2div");
1133  replaceCall(nval);
1134  return true;
1135  } else if (ci_opr1 == -2) { // rootn(x, -2) = rsqrt(x)
1136  Module *M = CI->getModule();
1137  if (FunctionCallee FPExpr =
1138  getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT, FInfo))) {
1139  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> rsqrt(" << *opr0
1140  << ")\n");
1141  Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt");
1142  replaceCall(nval);
1143  return true;
1144  }
1145  }
1146  return false;
1147 }
1148 
1149 bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B,
1150  const FuncInfo &FInfo) {
1151  Value *opr0 = CI->getArgOperand(0);
1152  Value *opr1 = CI->getArgOperand(1);
1153  Value *opr2 = CI->getArgOperand(2);
1154 
1155  ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
1156  ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
1157  if ((CF0 && CF0->isZero()) || (CF1 && CF1->isZero())) {
1158  // fma/mad(a, b, c) = c if a=0 || b=0
1159  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr2 << "\n");
1160  replaceCall(opr2);
1161  return true;
1162  }
1163  if (CF0 && CF0->isExactlyValue(1.0f)) {
1164  // fma/mad(a, b, c) = b+c if a=1
1165  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr1 << " + " << *opr2
1166  << "\n");
1167  Value *nval = B.CreateFAdd(opr1, opr2, "fmaadd");
1168  replaceCall(nval);
1169  return true;
1170  }
1171  if (CF1 && CF1->isExactlyValue(1.0f)) {
1172  // fma/mad(a, b, c) = a+c if b=1
1173  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " + " << *opr2
1174  << "\n");
1175  Value *nval = B.CreateFAdd(opr0, opr2, "fmaadd");
1176  replaceCall(nval);
1177  return true;
1178  }
1179  if (ConstantFP *CF = dyn_cast<ConstantFP>(opr2)) {
1180  if (CF->isZero()) {
1181  // fma/mad(a, b, c) = a*b if c=0
1182  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * "
1183  << *opr1 << "\n");
1184  Value *nval = B.CreateFMul(opr0, opr1, "fmamul");
1185  replaceCall(nval);
1186  return true;
1187  }
1188  }
1189 
1190  return false;
1191 }
1192 
1193 // Get a scalar native builtin single argument FP function
1194 FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M,
1195  const FuncInfo &FInfo) {
1196  if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))
1197  return nullptr;
1198  FuncInfo nf = FInfo;
1200  return getFunction(M, nf);
1201 }
1202 
1203 // fold sqrt -> native_sqrt (x)
1204 bool AMDGPULibCalls::fold_sqrt(CallInst *CI, IRBuilder<> &B,
1205  const FuncInfo &FInfo) {
1206  if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) &&
1207  (FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) {
1208  if (FunctionCallee FPExpr = getNativeFunction(
1210  Value *opr0 = CI->getArgOperand(0);
1211  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
1212  << "sqrt(" << *opr0 << ")\n");
1213  Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt");
1214  replaceCall(nval);
1215  return true;
1216  }
1217  }
1218  return false;
1219 }
1220 
1221 // fold sin, cos -> sincos.
1222 bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
1223  AliasAnalysis *AA) {
1224  AMDGPULibFunc fInfo;
1225  if (!AMDGPULibFunc::parse(CI->getCalledFunction()->getName(), fInfo))
1226  return false;
1227 
1228  assert(fInfo.getId() == AMDGPULibFunc::EI_SIN ||
1229  fInfo.getId() == AMDGPULibFunc::EI_COS);
1230  bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN;
1231 
1232  Value *CArgVal = CI->getArgOperand(0);
1233  BasicBlock * const CBB = CI->getParent();
1234 
1235  int const MaxScan = 30;
1236  bool Changed = false;
1237 
1238  { // fold in load value.
1239  LoadInst *LI = dyn_cast<LoadInst>(CArgVal);
1240  if (LI && LI->getParent() == CBB) {
1241  BasicBlock::iterator BBI = LI->getIterator();
1242  Value *AvailableVal = FindAvailableLoadedValue(LI, CBB, BBI, MaxScan, AA);
1243  if (AvailableVal) {
1244  Changed = true;
1245  CArgVal->replaceAllUsesWith(AvailableVal);
1246  if (CArgVal->getNumUses() == 0)
1247  LI->eraseFromParent();
1248  CArgVal = CI->getArgOperand(0);
1249  }
1250  }
1251  }
1252 
1253  Module *M = CI->getModule();
1255  std::string const PairName = fInfo.mangle();
1256 
1257  CallInst *UI = nullptr;
1258  for (User* U : CArgVal->users()) {
1259  CallInst *XI = dyn_cast_or_null<CallInst>(U);
1260  if (!XI || XI == CI || XI->getParent() != CBB)
1261  continue;
1262 
1263  Function *UCallee = XI->getCalledFunction();
1264  if (!UCallee || !UCallee->getName().equals(PairName))
1265  continue;
1266 
1268  if (BBI == CI->getParent()->begin())
1269  break;
1270  --BBI;
1271  for (int I = MaxScan; I > 0 && BBI != CBB->begin(); --BBI, --I) {
1272  if (cast<Instruction>(BBI) == XI) {
1273  UI = XI;
1274  break;
1275  }
1276  }
1277  if (UI) break;
1278  }
1279 
1280  if (!UI)
1281  return Changed;
1282 
1283  // Merge the sin and cos.
1284 
1285  // for OpenCL 2.0 we have only generic implementation of sincos
1286  // function.
1289  FunctionCallee Fsincos = getFunction(M, nf);
1290  if (!Fsincos)
1291  return Changed;
1292 
1293  BasicBlock::iterator ItOld = B.GetInsertPoint();
1294  AllocaInst *Alloc = insertAlloca(UI, B, "__sincos_");
1295  B.SetInsertPoint(UI);
1296 
1297  Value *P = Alloc;
1298  Type *PTy = Fsincos.getFunctionType()->getParamType(1);
1299  // The allocaInst allocates the memory in private address space. This need
1300  // to be bitcasted to point to the address space of cos pointer type.
1301  // In OpenCL 2.0 this is generic, while in 1.2 that is private.
1303  P = B.CreateAddrSpaceCast(Alloc, PTy);
1304  CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P);
1305 
1306  LLVM_DEBUG(errs() << "AMDIC: fold_sincos (" << *CI << ", " << *UI << ") with "
1307  << *Call << "\n");
1308 
1309  if (!isSin) { // CI->cos, UI->sin
1310  B.SetInsertPoint(&*ItOld);
1311  UI->replaceAllUsesWith(&*Call);
1312  Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
1313  CI->replaceAllUsesWith(Reload);
1314  UI->eraseFromParent();
1315  CI->eraseFromParent();
1316  } else { // CI->sin, UI->cos
1317  Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
1318  UI->replaceAllUsesWith(Reload);
1319  CI->replaceAllUsesWith(Call);
1320  UI->eraseFromParent();
1321  CI->eraseFromParent();
1322  }
1323  return true;
1324 }
1325 
1326 bool AMDGPULibCalls::fold_wavefrontsize(CallInst *CI, IRBuilder<> &B) {
1327  if (!TM)
1328  return false;
1329 
1330  StringRef CPU = TM->getTargetCPU();
1331  StringRef Features = TM->getTargetFeatureString();
1332  if ((CPU.empty() || CPU.equals_insensitive("generic")) &&
1333  (Features.empty() || !Features.contains_insensitive("wavefrontsize")))
1334  return false;
1335 
1336  Function *F = CI->getParent()->getParent();
1337  const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(*F);
1338  unsigned N = ST.getWavefrontSize();
1339 
1340  LLVM_DEBUG(errs() << "AMDIC: fold_wavefrontsize (" << *CI << ") with "
1341  << N << "\n");
1342 
1343  CI->replaceAllUsesWith(ConstantInt::get(B.getInt32Ty(), N));
1344  CI->eraseFromParent();
1345  return true;
1346 }
1347 
1348 // Get insertion point at entry.
1349 BasicBlock::iterator AMDGPULibCalls::getEntryIns(CallInst * UI) {
1350  Function * Func = UI->getParent()->getParent();
1351  BasicBlock * BB = &Func->getEntryBlock();
1352  assert(BB && "Entry block not found!");
1353  BasicBlock::iterator ItNew = BB->begin();
1354  return ItNew;
1355 }
1356 
1357 // Insert a AllocsInst at the beginning of function entry block.
1358 AllocaInst* AMDGPULibCalls::insertAlloca(CallInst *UI, IRBuilder<> &B,
1359  const char *prefix) {
1360  BasicBlock::iterator ItNew = getEntryIns(UI);
1361  Function *UCallee = UI->getCalledFunction();
1362  Type *RetType = UCallee->getReturnType();
1363  B.SetInsertPoint(&*ItNew);
1364  AllocaInst *Alloc =
1365  B.CreateAlloca(RetType, nullptr, std::string(prefix) + UI->getName());
1366  Alloc->setAlignment(
1367  Align(UCallee->getParent()->getDataLayout().getTypeAllocSize(RetType)));
1368  return Alloc;
1369 }
1370 
1371 bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo,
1372  double& Res0, double& Res1,
1373  Constant *copr0, Constant *copr1,
1374  Constant *copr2) {
1375  // By default, opr0/opr1/opr3 holds values of float/double type.
1376  // If they are not float/double, each function has to its
1377  // operand separately.
1378  double opr0=0.0, opr1=0.0, opr2=0.0;
1379  ConstantFP *fpopr0 = dyn_cast_or_null<ConstantFP>(copr0);
1380  ConstantFP *fpopr1 = dyn_cast_or_null<ConstantFP>(copr1);
1381  ConstantFP *fpopr2 = dyn_cast_or_null<ConstantFP>(copr2);
1382  if (fpopr0) {
1383  opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1384  ? fpopr0->getValueAPF().convertToDouble()
1385  : (double)fpopr0->getValueAPF().convertToFloat();
1386  }
1387 
1388  if (fpopr1) {
1389  opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1390  ? fpopr1->getValueAPF().convertToDouble()
1391  : (double)fpopr1->getValueAPF().convertToFloat();
1392  }
1393 
1394  if (fpopr2) {
1395  opr2 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1396  ? fpopr2->getValueAPF().convertToDouble()
1397  : (double)fpopr2->getValueAPF().convertToFloat();
1398  }
1399 
1400  switch (FInfo.getId()) {
1401  default : return false;
1402 
1404  Res0 = acos(opr0);
1405  return true;
1406 
1408  // acosh(x) == log(x + sqrt(x*x - 1))
1409  Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0));
1410  return true;
1411 
1413  Res0 = acos(opr0) / MATH_PI;
1414  return true;
1415 
1417  Res0 = asin(opr0);
1418  return true;
1419 
1421  // asinh(x) == log(x + sqrt(x*x + 1))
1422  Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0));
1423  return true;
1424 
1426  Res0 = asin(opr0) / MATH_PI;
1427  return true;
1428 
1430  Res0 = atan(opr0);
1431  return true;
1432 
1434  // atanh(x) == (log(x+1) - log(x-1))/2;
1435  Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0;
1436  return true;
1437 
1439  Res0 = atan(opr0) / MATH_PI;
1440  return true;
1441 
1443  Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0);
1444  return true;
1445 
1446  case AMDGPULibFunc::EI_COS:
1447  Res0 = cos(opr0);
1448  return true;
1449 
1451  Res0 = cosh(opr0);
1452  return true;
1453 
1455  Res0 = cos(MATH_PI * opr0);
1456  return true;
1457 
1458  case AMDGPULibFunc::EI_EXP:
1459  Res0 = exp(opr0);
1460  return true;
1461 
1463  Res0 = pow(2.0, opr0);
1464  return true;
1465 
1467  Res0 = pow(10.0, opr0);
1468  return true;
1469 
1471  Res0 = exp(opr0) - 1.0;
1472  return true;
1473 
1474  case AMDGPULibFunc::EI_LOG:
1475  Res0 = log(opr0);
1476  return true;
1477 
1479  Res0 = log(opr0) / log(2.0);
1480  return true;
1481 
1483  Res0 = log(opr0) / log(10.0);
1484  return true;
1485 
1487  Res0 = 1.0 / sqrt(opr0);
1488  return true;
1489 
1490  case AMDGPULibFunc::EI_SIN:
1491  Res0 = sin(opr0);
1492  return true;
1493 
1495  Res0 = sinh(opr0);
1496  return true;
1497 
1499  Res0 = sin(MATH_PI * opr0);
1500  return true;
1501 
1503  Res0 = sqrt(opr0);
1504  return true;
1505 
1506  case AMDGPULibFunc::EI_TAN:
1507  Res0 = tan(opr0);
1508  return true;
1509 
1511  Res0 = tanh(opr0);
1512  return true;
1513 
1515  Res0 = tan(MATH_PI * opr0);
1516  return true;
1517 
1519  Res0 = 1.0 / opr0;
1520  return true;
1521 
1522  // two-arg functions
1524  Res0 = opr0 / opr1;
1525  return true;
1526 
1527  case AMDGPULibFunc::EI_POW:
1529  Res0 = pow(opr0, opr1);
1530  return true;
1531 
1532  case AMDGPULibFunc::EI_POWN: {
1533  if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
1534  double val = (double)iopr1->getSExtValue();
1535  Res0 = pow(opr0, val);
1536  return true;
1537  }
1538  return false;
1539  }
1540 
1541  case AMDGPULibFunc::EI_ROOTN: {
1542  if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
1543  double val = (double)iopr1->getSExtValue();
1544  Res0 = pow(opr0, 1.0 / val);
1545  return true;
1546  }
1547  return false;
1548  }
1549 
1550  // with ptr arg
1552  Res0 = sin(opr0);
1553  Res1 = cos(opr0);
1554  return true;
1555 
1556  // three-arg functions
1557  case AMDGPULibFunc::EI_FMA:
1558  case AMDGPULibFunc::EI_MAD:
1559  Res0 = opr0 * opr1 + opr2;
1560  return true;
1561  }
1562 
1563  return false;
1564 }
1565 
1566 bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) {
1567  int numArgs = (int)aCI->arg_size();
1568  if (numArgs > 3)
1569  return false;
1570 
1571  Constant *copr0 = nullptr;
1572  Constant *copr1 = nullptr;
1573  Constant *copr2 = nullptr;
1574  if (numArgs > 0) {
1575  if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr)
1576  return false;
1577  }
1578 
1579  if (numArgs > 1) {
1580  if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) {
1581  if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS)
1582  return false;
1583  }
1584  }
1585 
1586  if (numArgs > 2) {
1587  if ((copr2 = dyn_cast<Constant>(aCI->getArgOperand(2))) == nullptr)
1588  return false;
1589  }
1590 
1591  // At this point, all arguments to aCI are constants.
1592 
1593  // max vector size is 16, and sincos will generate two results.
1594  double DVal0[16], DVal1[16];
1595  bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS);
1596  if (getVecSize(FInfo) == 1) {
1597  if (!evaluateScalarMathFunc(FInfo, DVal0[0],
1598  DVal1[0], copr0, copr1, copr2)) {
1599  return false;
1600  }
1601  } else {
1602  ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0);
1603  ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1);
1604  ConstantDataVector *CDV2 = dyn_cast_or_null<ConstantDataVector>(copr2);
1605  for (int i=0; i < getVecSize(FInfo); ++i) {
1606  Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr;
1607  Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr;
1608  Constant *celt2 = CDV2 ? CDV2->getElementAsConstant(i) : nullptr;
1609  if (!evaluateScalarMathFunc(FInfo, DVal0[i],
1610  DVal1[i], celt0, celt1, celt2)) {
1611  return false;
1612  }
1613  }
1614  }
1615 
1616  LLVMContext &context = CI->getParent()->getParent()->getContext();
1617  Constant *nval0, *nval1;
1618  if (getVecSize(FInfo) == 1) {
1619  nval0 = ConstantFP::get(CI->getType(), DVal0[0]);
1620  if (hasTwoResults)
1621  nval1 = ConstantFP::get(CI->getType(), DVal1[0]);
1622  } else {
1623  if (getArgType(FInfo) == AMDGPULibFunc::F32) {
1624  SmallVector <float, 0> FVal0, FVal1;
1625  for (int i=0; i < getVecSize(FInfo); ++i)
1626  FVal0.push_back((float)DVal0[i]);
1627  ArrayRef<float> tmp0(FVal0);
1628  nval0 = ConstantDataVector::get(context, tmp0);
1629  if (hasTwoResults) {
1630  for (int i=0; i < getVecSize(FInfo); ++i)
1631  FVal1.push_back((float)DVal1[i]);
1632  ArrayRef<float> tmp1(FVal1);
1633  nval1 = ConstantDataVector::get(context, tmp1);
1634  }
1635  } else {
1636  ArrayRef<double> tmp0(DVal0);
1637  nval0 = ConstantDataVector::get(context, tmp0);
1638  if (hasTwoResults) {
1639  ArrayRef<double> tmp1(DVal1);
1640  nval1 = ConstantDataVector::get(context, tmp1);
1641  }
1642  }
1643  }
1644 
1645  if (hasTwoResults) {
1646  // sincos
1647  assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS &&
1648  "math function with ptr arg not supported yet");
1649  new StoreInst(nval1, aCI->getArgOperand(1), aCI);
1650  }
1651 
1652  replaceCall(nval0);
1653  return true;
1654 }
1655 
1656 // Public interface to the Simplify LibCalls pass.
1658  return new AMDGPUSimplifyLibCalls(TM);
1659 }
1660 
1662  return new AMDGPUUseNativeCalls();
1663 }
1664 
1666  if (skipFunction(F))
1667  return false;
1668 
1669  bool Changed = false;
1670  auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1671 
1672  LLVM_DEBUG(dbgs() << "AMDIC: process function ";
1673  F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
1674 
1675  for (auto &BB : F) {
1676  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
1677  // Ignore non-calls.
1678  CallInst *CI = dyn_cast<CallInst>(I);
1679  ++I;
1680  // Ignore intrinsics that do not become real instructions.
1681  if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd())
1682  continue;
1683 
1684  // Ignore indirect calls.
1686  if (Callee == nullptr)
1687  continue;
1688 
1689  LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";
1690  dbgs().flush());
1691  if(Simplifier.fold(CI, AA))
1692  Changed = true;
1693  }
1694  }
1695  return Changed;
1696 }
1697 
1700  AMDGPULibCalls Simplifier(&TM);
1701  Simplifier.initNativeFuncs();
1702 
1703  bool Changed = false;
1704  auto AA = &AM.getResult<AAManager>(F);
1705 
1706  LLVM_DEBUG(dbgs() << "AMDIC: process function ";
1707  F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
1708 
1709  for (auto &BB : F) {
1710  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
1711  // Ignore non-calls.
1712  CallInst *CI = dyn_cast<CallInst>(I);
1713  ++I;
1714  // Ignore intrinsics that do not become real instructions.
1715  if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd())
1716  continue;
1717 
1718  // Ignore indirect calls.
1720  if (Callee == nullptr)
1721  continue;
1722 
1723  LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";
1724  dbgs().flush());
1725  if (Simplifier.fold(CI, AA))
1726  Changed = true;
1727  }
1728  }
1729  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
1730 }
1731 
1733  if (skipFunction(F) || UseNative.empty())
1734  return false;
1735 
1736  bool Changed = false;
1737  for (auto &BB : F) {
1738  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
1739  // Ignore non-calls.
1740  CallInst *CI = dyn_cast<CallInst>(I);
1741  ++I;
1742  if (!CI) continue;
1743 
1744  // Ignore indirect calls.
1746  if (Callee == nullptr)
1747  continue;
1748 
1749  if (Simplifier.useNative(CI))
1750  Changed = true;
1751  }
1752  }
1753  return Changed;
1754 }
1755 
1758  if (UseNative.empty())
1759  return PreservedAnalyses::all();
1760 
1761  AMDGPULibCalls Simplifier;
1762  Simplifier.initNativeFuncs();
1763 
1764  bool Changed = false;
1765  for (auto &BB : F) {
1766  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
1767  // Ignore non-calls.
1768  CallInst *CI = dyn_cast<CallInst>(I);
1769  ++I;
1770  if (!CI)
1771  continue;
1772 
1773  // Ignore indirect calls.
1775  if (Callee == nullptr)
1776  continue;
1777 
1778  if (Simplifier.useNative(CI))
1779  Changed = true;
1780  }
1781  }
1782  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
1783 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::AMDGPULibFunc::isMangled
bool isMangled() const
Definition: AMDGPULibFunc.h:379
llvm::AMDGPULibFuncBase::EI_ASINH
@ EI_ASINH
Definition: AMDGPULibFunc.h:46
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1287
llvm::APFloat::convertToFloat
float convertToFloat() const
Converts this APFloat to host float value.
Definition: APFloat.cpp:4908
llvm::AMDGPULibFuncBase::EI_NFMA
@ EI_NFMA
Definition: AMDGPULibFunc.h:224
llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:358
llvm::AMDGPULibCalls::AMDGPULibCalls
AMDGPULibCalls(const TargetMachine *TM_=nullptr)
Definition: AMDGPULibCalls.cpp:121
getOptTable
static TableRef getOptTable(AMDGPULibFunc::EFuncId id)
Definition: AMDGPULibCalls.cpp:388
tbl_asin
static const TableEntry tbl_asin[]
Definition: AMDGPULibCalls.cpp:226
tbl_sinpi
static const TableEntry tbl_sinpi[]
Definition: AMDGPULibCalls.cpp:327
TableRef::table
const TableEntry * table
Definition: AMDGPULibCalls.cpp:380
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
llvm::AMDGPULibFuncBase::EI_ACOSH
@ EI_ACOSH
Definition: AMDGPULibFunc.h:40
llvm::AMDGPULibFuncBase::F64
@ F64
Definition: AMDGPULibFunc.h:272
tbl_tan
static const TableEntry tbl_tan[]
Definition: AMDGPULibCalls.cpp:336
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
tbl_acos
static const TableEntry tbl_acos[]
Definition: AMDGPULibCalls.cpp:211
tbl_log2
static const TableEntry tbl_log2[]
Definition: AMDGPULibCalls.cpp:307
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:783
Loads.h
llvm::Function
Definition: Function.h:62
llvm::Attribute
Definition: Attributes.h:53
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::CallBase::setCalledFunction
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
Definition: InstrTypes.h:1434
TableRef::size
size_t size
Definition: AMDGPULibCalls.cpp:379
llvm::PointerType::get
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Definition: Type.cpp:729
double
into xmm2 addss xmm2 xmm1 xmm3 addss xmm3 movaps xmm0 unpcklps xmm0 ret seems silly when it could just be one addps Expand libm rounding functions main should enable SSE DAZ mode and other fast SSE modes Think about doing i64 math in SSE regs on x86 This testcase should have no SSE instructions in and only one load from a constant double
Definition: README-SSE.txt:85
llvm::AMDGPULibFuncBase::EI_NCOS
@ EI_NCOS
Definition: AMDGPULibFunc.h:222
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1177
tbl_atanh
static const TableEntry tbl_atanh[]
Definition: AMDGPULibCalls.cpp:248
llvm::AMDGPULibFuncBase::EI_ROOTN
@ EI_ROOTN
Definition: AMDGPULibFunc.h:162
llvm::AMDGPULibFuncBase::EI_READ_PIPE_2
@ EI_READ_PIPE_2
Definition: AMDGPULibFunc.h:238
llvm::User::dropAllReferences
void dropAllReferences()
Drop all references to operands.
Definition: User.h:299
MATH_PI
#define MATH_PI
Definition: AMDGPULibCalls.cpp:38
llvm::cl::CommaSeparated
@ CommaSeparated
Definition: CommandLine.h:169
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:734
tbl_log
static const TableEntry tbl_log[]
Definition: AMDGPULibCalls.cpp:303
llvm::IRBuilder<>
llvm::cl::ValueOptional
@ ValueOptional
Definition: CommandLine.h:136
llvm::AMDGPULibFuncBase::EI_ATANPI
@ EI_ATANPI
Definition: AMDGPULibFunc.h:54
llvm::FunctionType::get
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:363
tbl_log10
static const TableEntry tbl_log10[]
Definition: AMDGPULibCalls.cpp:311
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::AMDGPULibFuncBase::EI_POWR
@ EI_POWR
Definition: AMDGPULibFunc.h:152
llvm::ConstantDataVector::get
static Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
Definition: Constants.cpp:3131
llvm::ConstantDataSequential::getElementAsFloat
float getElementAsFloat(unsigned i) const
If this is an sequential container of floats, return the specified element as a float.
Definition: Constants.cpp:3312
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:158
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:321
llvm::AMDGPULibFunc::parse
static bool parse(StringRef MangledName, AMDGPULibFunc &Ptr)
Definition: AMDGPULibFunc.cpp:681
llvm::ConstantFP::isZero
bool isZero() const
Return true if the value is positive or negative zero.
Definition: Constants.h:301
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::AMDGPULibFuncBase::EI_ERFC
@ EI_ERFC
Definition: AMDGPULibFunc.h:84
llvm::AMDGPULibFuncBase::EI_TANPI
@ EI_TANPI
Definition: AMDGPULibFunc.h:195
llvm::CallBase::getAttributes
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1473
llvm::ConstantAggregateZero
All zero aggregate value.
Definition: Constants.h:336
llvm::ConstantFP::getValueAPF
const APFloat & getValueAPF() const
Definition: Constants.h:297
llvm::AMDGPULibFuncBase::EI_COSH
@ EI_COSH
Definition: AMDGPULibFunc.h:75
tbl_cbrt
static const TableEntry tbl_cbrt[]
Definition: AMDGPULibCalls.cpp:258
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:422
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:894
llvm::AMDGPULibFuncBase::EI_CBRT
@ EI_CBRT
Definition: AMDGPULibFunc.h:67
llvm::AMDGPULibFuncBase::EI_SIN
@ EI_SIN
Definition: AMDGPULibFunc.h:171
llvm::AMDGPULibFuncBase::EI_LOG
@ EI_LOG
Definition: AMDGPULibFunc.h:129
tmp1
urem i32 %X, 255 ret i32 %tmp1 } Currently it compiles to:... movl $2155905153, %ecx movl 8(%esp), %esi movl %esi, %eax mull %ecx ... This could be "reassociated" into:movl $2155905153, %eax movl 8(%esp), %ecx mull %ecx to avoid the copy. In fact, the existing two-address stuff would do this except that mul isn 't a commutative 2-addr instruction. I guess this has to be done at isel time based on the #uses to mul? Make sure the instruction which starts a loop does not cross a cacheline boundary. This requires knowning the exact length of each machine instruction. That is somewhat complicated, but doable. Example 256.bzip2:In the new trace, the hot loop has an instruction which crosses a cacheline boundary. In addition to potential cache misses, this can 't help decoding as I imagine there has to be some kind of complicated decoder reset and realignment to grab the bytes from the next cacheline. 532 532 0x3cfc movb(1809(%esp, %esi), %bl<<<--- spans 2 64 byte lines 942 942 0x3d03 movl %dh,(1809(%esp, %esi) 937 937 0x3d0a incl %esi 3 3 0x3d0b cmpb %bl, %dl 27 27 0x3d0d jnz 0x000062db< main+11707 > In c99 mode, the preprocessor doesn 't like assembly comments like #TRUNCATE. This could be a single 16-bit load. int f(char *p) { if((p[0]==1) &(p[1]==2)) return 1 tmp1
Definition: README.txt:375
llvm::AMDGPULibFuncBase::EI_POW
@ EI_POW
Definition: AMDGPULibFunc.h:150
that
we should consider alternate ways to model stack dependencies Lots of things could be done in WebAssemblyTargetTransformInfo cpp there are numerous optimization related hooks that can be overridden in WebAssemblyTargetLowering Instead of the OptimizeReturned which should consider preserving the returned attribute through to MachineInstrs and extending the MemIntrinsicResults pass to do this optimization on calls too That would also let the WebAssemblyPeephole pass clean up dead defs for such as it does for stores Consider implementing and or getMachineCombinerPatterns Find a clean way to fix the problem which leads to the Shrink Wrapping pass being run after the WebAssembly PEI pass When setting multiple variables to the same we currently get code like const It could be done with a smaller encoding like local tee $pop5 local $pop6 WebAssembly registers are implicitly initialized to zero Explicit zeroing is therefore often redundant and could be optimized away Small indices may use smaller encodings than large indices WebAssemblyRegColoring and or WebAssemblyRegRenumbering should sort registers according to their usage frequency to maximize the usage of smaller encodings Many cases of irreducible control flow could be transformed more optimally than via the transform in WebAssemblyFixIrreducibleControlFlow cpp It may also be worthwhile to do transforms before register particularly when duplicating to allow register coloring to be aware of the duplication WebAssemblyRegStackify could use AliasAnalysis to reorder loads and stores more aggressively WebAssemblyRegStackify is currently a greedy algorithm This means that
Definition: README.txt:130
tmp
alloca< 16 x float >, align 16 %tmp2=alloca< 16 x float >, align 16 store< 16 x float > %A,< 16 x float > *%tmp %s=bitcast< 16 x float > *%tmp to i8 *%s2=bitcast< 16 x float > *%tmp2 to i8 *call void @llvm.memcpy.i64(i8 *%s, i8 *%s2, i64 64, i32 16) %R=load< 16 x float > *%tmp2 ret< 16 x float > %R } declare void @llvm.memcpy.i64(i8 *nocapture, i8 *nocapture, i64, i32) nounwind which compiles to:_foo:subl $140, %esp movaps %xmm3, 112(%esp) movaps %xmm2, 96(%esp) movaps %xmm1, 80(%esp) movaps %xmm0, 64(%esp) movl 60(%esp), %eax movl %eax, 124(%esp) movl 56(%esp), %eax movl %eax, 120(%esp) movl 52(%esp), %eax< many many more 32-bit copies > movaps(%esp), %xmm0 movaps 16(%esp), %xmm1 movaps 32(%esp), %xmm2 movaps 48(%esp), %xmm3 addl $140, %esp ret On Nehalem, it may even be cheaper to just use movups when unaligned than to fall back to lower-granularity chunks. Implement processor-specific optimizations for parity with GCC on these processors. GCC does two optimizations:1. ix86_pad_returns inserts a noop before ret instructions if immediately preceded by a conditional branch or is the target of a jump. 2. ix86_avoid_jump_misspredicts inserts noops in cases where a 16-byte block of code contains more than 3 branches. The first one is done for all AMDs, Core2, and "Generic" The second one is done for:Atom, Pentium Pro, all AMDs, Pentium 4, Nocona, Core 2, and "Generic" Testcase:int x(int a) { return(a &0xf0)> >4 tmp
Definition: README.txt:1347
llvm::AMDGPULibFuncBase::EI_RECIP
@ EI_RECIP
Definition: AMDGPULibFunc.h:155
llvm::AMDGPULibFuncBase::EI_NSIN
@ EI_NSIN
Definition: AMDGPULibFunc.h:228
llvm::Attribute::getValueAsBool
bool getValueAsBool() const
Return the attribute's value as a boolean.
Definition: Attributes.cpp:290
llvm::initializeAMDGPUSimplifyLibCallsPass
void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &)
llvm::AMDGPULibCalls
Definition: AMDGPULibCalls.cpp:45
tbl_erf
static const TableEntry tbl_erf[]
Definition: AMDGPULibCalls.cpp:280
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::AMDGPULibFuncBase::EI_NSQRT
@ EI_NSQRT
Definition: AMDGPULibFunc.h:229
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:207
Context
ManagedStatic< detail::RecordContext > Context
Definition: Record.cpp:96
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
AliasAnalysis.h
result
It looks like we only need to define PPCfmarto for these because according to these instructions perform RTO on fma s result
Definition: README_P9.txt:256
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::AMDGPULibCalls::replaceCall
void replaceCall(Value *With)
Definition: AMDGPULibCalls.cpp:115
llvm::AMDGPULibFunc::getLeads
Param * getLeads()
Get leading parameters for mangled lib functions.
Definition: AMDGPULibFunc.cpp:1050
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:185
llvm::AMDGPULibFuncBase::getEPtrKindFromAddrSpace
static unsigned getEPtrKindFromAddrSpace(unsigned AS)
Definition: AMDGPULibFunc.h:312
llvm::AMDGPULibFunc
Wrapper class for AMDGPULIbFuncImpl.
Definition: AMDGPULibFunc.h:357
tbl_acospi
static const TableEntry tbl_acospi[]
Definition: AMDGPULibCalls.cpp:220
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::AMDGPULibFuncBase::EI_TANH
@ EI_TANH
Definition: AMDGPULibFunc.h:194
llvm::AMDGPULibFuncBase::EI_FMA
@ EI_FMA
Definition: AMDGPULibFunc.h:95
DEBUG_WITH_TYPE
#define DEBUG_WITH_TYPE(TYPE, X)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition: Debug.h:64
llvm::AMDGPULibFuncBase::EI_ATANH
@ EI_ATANH
Definition: AMDGPULibFunc.h:53
llvm::createAMDGPUUseNativeCallsPass
FunctionPass * createAMDGPUUseNativeCallsPass()
Definition: AMDGPULibCalls.cpp:1661
TargetMachine.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::AMDGPULibFuncBase::EType
EType
Definition: AMDGPULibFunc.h:252
llvm::AAResults
Definition: AliasAnalysis.h:507
GCNSubtarget.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
tbl_exp
static const TableEntry tbl_exp[]
Definition: AMDGPULibCalls.cpp:284
llvm::AMDGPULibFuncBase::EI_FABS
@ EI_FABS
Definition: AMDGPULibFunc.h:89
llvm::User
Definition: User.h:44
llvm::AMDGPULibCalls::useNative
bool useNative(CallInst *CI)
Definition: AMDGPULibCalls.cpp:506
llvm::AMDGPULibFunc::getOrInsertFunction
static FunctionCallee getOrInsertFunction(llvm::Module *M, const AMDGPULibFunc &fInfo)
Definition: AMDGPULibFunc.cpp:961
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1398
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
llvm::initializeAMDGPUUseNativeCallsPass
void initializeAMDGPUUseNativeCallsPass(PassRegistry &)
llvm::CallInst::Create
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1521
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:364
tbl_asinpi
static const TableEntry tbl_asinpi[]
Definition: AMDGPULibCalls.cpp:236
llvm::AMDGPULibFuncBase::EI_ACOSPI
@ EI_ACOSPI
Definition: AMDGPULibFunc.h:41
false
Definition: StackSlotColoring.cpp:142
llvm::AMDGPULibFuncBase::EI_NEXP2
@ EI_NEXP2
Definition: AMDGPULibFunc.h:223
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::APFloat::convertToDouble
double convertToDouble() const
Converts this APFloat to host double value.
Definition: APFloat.cpp:4895
llvm::Instruction
Definition: Instruction.h:45
llvm::ConstantDataVector::getSplatValue
Constant * getSplatValue() const
If this is a splat constant, meaning that all of the elements have the same value,...
Definition: Constants.cpp:3369
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:191
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:257
llvm::ConstantFP::isExactlyValue
bool isExactlyValue(const APFloat &V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: Constants.cpp:1103
llvm::raw_ostream::flush
void flush()
Definition: raw_ostream.h:186
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:932
llvm::FunctionCallee::getFunctionType
FunctionType * getFunctionType()
Definition: DerivedTypes.h:182
getVecSize
static int getVecSize(const AMDGPULibFunc &FInfo)
Definition: AMDGPULibCalls.cpp:432
llvm::AMDGPULibFuncBase::EI_ASIN
@ EI_ASIN
Definition: AMDGPULibFunc.h:45
Align
uint64_t Align
Definition: ELFObjHandler.cpp:82
llvm::AMDGPULibFuncBase::EI_TAN
@ EI_TAN
Definition: AMDGPULibFunc.h:193
tbl_expm1
static const TableEntry tbl_expm1[]
Definition: AMDGPULibCalls.cpp:299
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:686
input
The initial backend is deliberately restricted to z10 We should add support for later architectures at some point If an asm ties an i32 r result to an i64 input
Definition: README.txt:10
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:153
tbl_atan
static const TableEntry tbl_atan[]
Definition: AMDGPULibCalls.cpp:242
llvm::Instruction::isLifetimeStartOrEnd
bool isLifetimeStartOrEnd() const
Return true if the instruction is a llvm.lifetime.start or llvm.lifetime.end marker.
Definition: Instruction.cpp:716
Name
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
Definition: AMDGPULibCalls.cpp:186
llvm::AMDGPUSimplifyLibCallsPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: AMDGPULibCalls.cpp:1698
HasNative
static bool HasNative(AMDGPULibFunc::EFuncId id)
Definition: AMDGPULibCalls.cpp:355
tbl_tanh
static const TableEntry tbl_tanh[]
Definition: AMDGPULibCalls.cpp:340
llvm::AMDGPULibFuncBase::NOPFX
@ NOPFX
Definition: AMDGPULibFunc.h:247
llvm::AMDGPULibFuncBase::EI_EXP10
@ EI_EXP10
Definition: AMDGPULibFunc.h:86
llvm::StringRef::equals
LLVM_NODISCARD bool equals(StringRef RHS) const
equals - Check for string equality, this is more efficient than compare() when the relative ordering ...
Definition: StringRef.h:186
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::AMDGPULibFuncBase::EI_READ_PIPE_4
@ EI_READ_PIPE_4
Definition: AMDGPULibFunc.h:239
llvm::AMDGPULibFuncBase::EI_ACOS
@ EI_ACOS
Definition: AMDGPULibFunc.h:39
AMDGPULibFunc.h
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:190
TableRef::TableRef
TableRef()
Definition: AMDGPULibCalls.cpp:382
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
llvm::cl::opt< bool >
val
The initial backend is deliberately restricted to z10 We should add support for later architectures at some point If an asm ties an i32 r result to an i64 the input will be treated as an leaving the upper bits uninitialised For i64 store i32 val
Definition: README.txt:15
llvm::AMDGPULibFuncBase::HALF
@ HALF
Definition: AMDGPULibFunc.h:249
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:309
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::AMDGPULibCalls::isUnsafeMath
bool isUnsafeMath(const CallInst *CI) const
Definition: AMDGPULibCalls.cpp:453
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
llvm::StringRef::equals_insensitive
LLVM_NODISCARD bool equals_insensitive(StringRef RHS) const
Check for string equality, ignoring case.
Definition: StringRef.h:193
llvm::Function::getReturnType
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:182
llvm::StringRef::empty
constexpr LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:152
llvm::AMDGPULibFuncBase::F32
@ F32
Definition: AMDGPULibFunc.h:271
tbl_cospi
static const TableEntry tbl_cospi[]
Definition: AMDGPULibCalls.cpp:272
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:578
const
aarch64 promote const
Definition: AArch64PromoteConstant.cpp:232
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::AMDGPULibFuncBase::EI_EXP
@ EI_EXP
Definition: AMDGPULibFunc.h:85
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::AMDGPULibFuncBase::EI_TGAMMA
@ EI_TGAMMA
Definition: AMDGPULibFunc.h:196
llvm::AMDGPULibFuncBase::EI_WRITE_PIPE_4
@ EI_WRITE_PIPE_4
Definition: AMDGPULibFunc.h:241
llvm::AMDGPULibFuncBase::EI_COSPI
@ EI_COSPI
Definition: AMDGPULibFunc.h:76
llvm::AMDGPULibFunc::mangle
std::string mangle() const
Definition: AMDGPULibFunc.h:387
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::ConstantDataVector
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
Definition: Constants.h:752
llvm::FunctionType::getParamType
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
getArgType
static AMDGPULibFunc::EType getArgType(const AMDGPULibFunc &FInfo)
Definition: AMDGPULibCalls.cpp:436
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::AMDGPUUseNativeCallsPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: AMDGPULibCalls.cpp:1756
llvm::AMDGPULibCalls::initNativeFuncs
void initNativeFuncs()
Definition: AMDGPULibCalls.cpp:466
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1714
tbl_asinh
static const TableEntry tbl_asinh[]
Definition: AMDGPULibCalls.cpp:232
false
amdgpu Simplify well known AMD library false
Definition: AMDGPULibCalls.cpp:178
llvm::AMDGPULibFunc::getNumArgs
unsigned getNumArgs() const
Definition: AMDGPULibFunc.h:372
IRBuilder.h
llvm::ConstantFP::isNegative
bool isNegative() const
Return true if the sign bit is set.
Definition: Constants.h:304
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::FPMathOperator
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition: Operator.h:265
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:80
llvm::ConstantDataSequential::getElementAsConstant
Constant * getElementAsConstant(unsigned i) const
Return a Constant for a specified index's element.
Definition: Constants.cpp:3324
llvm::AMDGPULibFuncBase::EI_ASINPI
@ EI_ASINPI
Definition: AMDGPULibFunc.h:47
llvm::AMDGPULibFuncBase::EI_SINCOS
@ EI_SINCOS
Definition: AMDGPULibFunc.h:172
llvm::AMDGPULibFuncBase::EI_WRITE_PIPE_2
@ EI_WRITE_PIPE_2
Definition: AMDGPULibFunc.h:240
llvm::AMDGPULibFuncBase::EI_SINH
@ EI_SINH
Definition: AMDGPULibFunc.h:173
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
tbl_cos
static const TableEntry tbl_cos[]
Definition: AMDGPULibCalls.cpp:264
llvm::StringRef::contains_insensitive
LLVM_NODISCARD bool contains_insensitive(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition: StringRef.h:471
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1630
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::AMDGPULibFuncBase::Param::VectorSize
unsigned char VectorSize
Definition: AMDGPULibFunc.h:293
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
AMDGPU.h
Simplify
assume Assume Simplify
Definition: AssumeBundleBuilder.cpp:603
llvm::AMDGPULibFuncBase::EI_DIVIDE
@ EI_DIVIDE
Definition: AMDGPULibFunc.h:81
tbl_erfc
static const TableEntry tbl_erfc[]
Definition: AMDGPULibCalls.cpp:276
llvm::Value::getNumUses
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:255
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::AMDGPULibFuncBase::EI_COS
@ EI_COS
Definition: AMDGPULibFunc.h:74
llvm::AMDGPULibFuncBase::EI_LOG2
@ EI_LOG2
Definition: AMDGPULibFunc.h:132
tbl_atanpi
static const TableEntry tbl_atanpi[]
Definition: AMDGPULibCalls.cpp:252
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
library
Itanium Name Demangler i e convert the string _Z1fv into but neither can depend on each other libcxxabi needs the demangler to implement which is part of the itanium ABI spec LLVM needs a copy for a bunch of but doesn t want to use the system s __cxa_demangle because it a might not be and b probably isn t that up to date on the latest language features The copy of the demangler in LLVM has some extra stuff that aren t needed in which depend on the shared generic components Despite these we want to keep the core generic demangling library identical between both copies to simplify development and testing If you re working on the generic library
Definition: README.txt:30
llvm::AMDGPULibFuncBase::EFuncId
EFuncId
Definition: AMDGPULibFunc.h:23
llvm::AMDGPULibFunc::setId
void setId(EFuncId Id)
Definition: AMDGPULibFunc.h:380
llvm::AMDGPULibFuncBase::EI_EXP2
@ EI_EXP2
Definition: AMDGPULibFunc.h:87
tbl_tanpi
static const TableEntry tbl_tanpi[]
Definition: AMDGPULibCalls.cpp:344
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
llvm::TargetMachine::getTargetFeatureString
StringRef getTargetFeatureString() const
Definition: TargetMachine.h:131
llvm::AMDGPULibFunc::setPrefix
void setPrefix(ENamePrefix PFX)
Definition: AMDGPULibFunc.h:390
llvm::ConstantInt::getSExtValue
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:148
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::AMDGPULibFuncBase::EI_NRSQRT
@ EI_NRSQRT
Definition: AMDGPULibFunc.h:227
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:180
UseNative
static cl::list< std::string > UseNative("amdgpu-use-native", cl::desc("Comma separated list of functions to replace with native, or all"), cl::CommaSeparated, cl::ValueOptional, cl::Hidden)
well
llvm ldr ldrb ldrh str strh strb strb gcc and possibly speed as well(we don 't have a good way to measure on ARM). *Consider this silly example
Definition: README.txt:138
llvm::FunctionCallee::getCallee
Value * getCallee()
Definition: DerivedTypes.h:184
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:185
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:36
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
EnablePreLink
static cl::opt< bool > EnablePreLink("amdgpu-prelink", cl::desc("Enable pre-link mode optimizations"), cl::init(false), cl::Hidden)
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:242
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib", "Simplify well-known AMD library calls", false, false) INITIALIZE_PASS_END(AMDGPUSimplifyLibCalls
llvm::Type::getIntNTy
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:245
llvm::AMDGPULibFuncBase::EI_ERF
@ EI_ERF
Definition: AMDGPULibFunc.h:83
llvm::ConstantDataSequential::getElementAsDouble
double getElementAsDouble(unsigned i) const
If this is an sequential container of doubles, return the specified element as a double.
Definition: Constants.cpp:3318
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:325
llvm::AMDGPULibFuncBase::EI_MAD
@ EI_MAD
Definition: AMDGPULibFunc.h:134
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::CallBase::arg_size
unsigned arg_size() const
Definition: InstrTypes.h:1341
llvm::AMDGPULibFuncBase::Param::ArgType
unsigned char ArgType
Definition: AMDGPULibFunc.h:292
llvm::AMDGPULibFuncBase::EI_POWN
@ EI_POWN
Definition: AMDGPULibFunc.h:151
llvm::ConstantFP::get
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:979
TableRef::TableRef
TableRef(const TableEntry(&tbl)[N])
Definition: AMDGPULibCalls.cpp:385
llvm::AMDGPULibFunc::getPrefix
ENamePrefix getPrefix() const
Definition: AMDGPULibFunc.h:374
llvm::FindAvailableLoadedValue
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, AAResults *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:431
llvm::AMDGPULibFuncBase::EI_LOG10
@ EI_LOG10
Definition: AMDGPULibFunc.h:130
llvm::AMDGPULibFunc::getId
EFuncId getId() const
Definition: AMDGPULibFunc.h:373
llvm::Type::isDoubleTy
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:150
y
into llvm powi allowing the code generator to produce balanced multiplication trees the intrinsic needs to be extended to support and second the code generator needs to be enhanced to lower these to multiplication trees Interesting testcase for add shift mul int y
Definition: README.txt:61
llvm::log2
static double log2(double V)
Definition: AMDGPULibCalls.cpp:801
tbl_rsqrt
static const TableEntry tbl_rsqrt[]
Definition: AMDGPULibCalls.cpp:315
simplifylib
amdgpu simplifylib
Definition: AMDGPULibCalls.cpp:177
llvm::AMDGPULibFuncBase::EI_EXPM1
@ EI_EXPM1
Definition: AMDGPULibFunc.h:88
MATH_SQRT2
#define MATH_SQRT2
Definition: AMDGPULibCalls.cpp:40
llvm::numbers::ln2
constexpr double ln2
Definition: MathExtras.h:59
llvm::AMDGPULibCalls::CI
CallInst * CI
Definition: AMDGPULibCalls.cpp:111
llvm::TargetMachine::getTargetCPU
StringRef getTargetCPU() const
Definition: TargetMachine.h:130
tbl_exp10
static const TableEntry tbl_exp10[]
Definition: AMDGPULibCalls.cpp:294
llvm::TargetMachine::getSubtarget
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Definition: TargetMachine.h:167
llvm::AMDGPULibCalls::fold
bool fold(CallInst *CI, AliasAnalysis *AA=nullptr)
Definition: AMDGPULibCalls.cpp:600
tbl_sin
static const TableEntry tbl_sin[]
Definition: AMDGPULibCalls.cpp:319
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1343
llvm::ConstantDataVector::getSplat
static Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:3192
N
#define N
tbl_exp2
static const TableEntry tbl_exp2[]
Definition: AMDGPULibCalls.cpp:289
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1335
llvm::AMDGPULibFunc::getName
std::string getName() const
Get unmangled name for mangled library function and name for unmangled library function.
Definition: AMDGPULibFunc.h:371
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::to_string
std::string to_string(const T &Value)
Definition: ScopedPrinter.h:87
llvm::AMDGPULibFuncBase::EI_SQRT
@ EI_SQRT
Definition: AMDGPULibFunc.h:176
MATH_E
#define MATH_E
Definition: AMDGPULibCalls.cpp:39
tbl_sinh
static const TableEntry tbl_sinh[]
Definition: AMDGPULibCalls.cpp:323
llvm::FunctionCallee
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:165
calls
amdgpu Simplify well known AMD library calls
Definition: AMDGPULibCalls.cpp:178
tbl_cosh
static const TableEntry tbl_cosh[]
Definition: AMDGPULibCalls.cpp:268
llvm::AMDGPULibFuncBase::EI_NLOG2
@ EI_NLOG2
Definition: AMDGPULibFunc.h:225
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:401
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1478
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
tbl_sqrt
static const TableEntry tbl_sqrt[]
Definition: AMDGPULibCalls.cpp:331
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:62
llvm::cl::desc
Definition: CommandLine.h:412
llvm::AMDGPULibFuncBase::EI_RSQRT
@ EI_RSQRT
Definition: AMDGPULibFunc.h:165
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1282
TableRef
Definition: AMDGPULibCalls.cpp:378
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::AMDGPULibFuncBase::EI_ATAN
@ EI_ATAN
Definition: AMDGPULibFunc.h:50
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::AMDGPULibFuncBase::EI_SINPI
@ EI_SINPI
Definition: AMDGPULibFunc.h:174
llvm::AMDGPULibFuncBase::NATIVE
@ NATIVE
Definition: AMDGPULibFunc.h:248
MATH_SQRT1_2
#define MATH_SQRT1_2
Definition: AMDGPULibCalls.cpp:41
tbl_acosh
static const TableEntry tbl_acosh[]
Definition: AMDGPULibCalls.cpp:217
tbl_tgamma
static const TableEntry tbl_tgamma[]
Definition: AMDGPULibCalls.cpp:348
llvm::createAMDGPUSimplifyLibCallsPass
FunctionPass * createAMDGPUSimplifyLibCallsPass(const TargetMachine *)
Definition: AMDGPULibCalls.cpp:1657
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
INITIALIZE_PASS
TargetPassConfig.
Definition: TargetPassConfig.cpp:359
llvm::DataLayout::getTypeAllocSize
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:506
llvm::ConstantDataSequential::getNumElements
unsigned getNumElements() const
Return the number of elements in the array or vector.
Definition: Constants.cpp:2982
llvm::cl::list
Definition: CommandLine.h:1641