LLVM  14.0.0git
AMDGPULibCalls.cpp
Go to the documentation of this file.
1 //===- AMDGPULibCalls.cpp -------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file does AMD library function optimizations.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "AMDGPULibFunc.h"
16 #include "GCNSubtarget.h"
18 #include "llvm/Analysis/Loads.h"
19 #include "llvm/IR/IntrinsicsAMDGPU.h"
20 #include "llvm/IR/IRBuilder.h"
21 #include "llvm/InitializePasses.h"
23 
24 #define DEBUG_TYPE "amdgpu-simplifylib"
25 
26 using namespace llvm;
27 
28 static cl::opt<bool> EnablePreLink("amdgpu-prelink",
29  cl::desc("Enable pre-link mode optimizations"),
30  cl::init(false),
31  cl::Hidden);
32 
33 static cl::list<std::string> UseNative("amdgpu-use-native",
34  cl::desc("Comma separated list of functions to replace with native, or all"),
36  cl::Hidden);
37 
38 #define MATH_PI numbers::pi
39 #define MATH_E numbers::e
40 #define MATH_SQRT2 numbers::sqrt2
41 #define MATH_SQRT1_2 numbers::inv_sqrt2
42 
43 namespace llvm {
44 
46 private:
47 
49 
50  const TargetMachine *TM;
51 
52  // -fuse-native.
53  bool AllNative = false;
54 
55  bool useNativeFunc(const StringRef F) const;
56 
57  // Return a pointer (pointer expr) to the function if function definition with
58  // "FuncName" exists. It may create a new function prototype in pre-link mode.
59  FunctionCallee getFunction(Module *M, const FuncInfo &fInfo);
60 
61  // Replace a normal function with its native version.
62  bool replaceWithNative(CallInst *CI, const FuncInfo &FInfo);
63 
64  bool parseFunctionName(const StringRef &FMangledName, FuncInfo &FInfo);
65 
66  bool TDOFold(CallInst *CI, const FuncInfo &FInfo);
67 
68  /* Specialized optimizations */
69 
70  // recip (half or native)
71  bool fold_recip(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
72 
73  // divide (half or native)
74  bool fold_divide(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
75 
76  // pow/powr/pown
77  bool fold_pow(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
78 
79  // rootn
80  bool fold_rootn(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
81 
82  // fma/mad
83  bool fold_fma_mad(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
84 
85  // -fuse-native for sincos
86  bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo);
87 
88  // evaluate calls if calls' arguments are constants.
89  bool evaluateScalarMathFunc(const FuncInfo &FInfo, double& Res0,
90  double& Res1, Constant *copr0, Constant *copr1, Constant *copr2);
91  bool evaluateCall(CallInst *aCI, const FuncInfo &FInfo);
92 
93  // exp
94  bool fold_exp(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
95 
96  // exp2
97  bool fold_exp2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
98 
99  // exp10
100  bool fold_exp10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
101 
102  // log
103  bool fold_log(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
104 
105  // log2
106  bool fold_log2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
107 
108  // log10
109  bool fold_log10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
110 
111  // sqrt
112  bool fold_sqrt(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
113 
114  // sin/cos
115  bool fold_sincos(CallInst * CI, IRBuilder<> &B, AliasAnalysis * AA);
116 
117  // __read_pipe/__write_pipe
118  bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
119  const FuncInfo &FInfo);
120 
121  // llvm.amdgcn.wavefrontsize
122  bool fold_wavefrontsize(CallInst *CI, IRBuilder<> &B);
123 
124  // Get insertion point at entry.
125  BasicBlock::iterator getEntryIns(CallInst * UI);
126  // Insert an Alloc instruction.
127  AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix);
128  // Get a scalar native builtin signle argument FP function
129  FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo);
130 
131 protected:
133 
134  bool isUnsafeMath(const CallInst *CI) const;
135 
136  void replaceCall(Value *With) {
137  CI->replaceAllUsesWith(With);
138  CI->eraseFromParent();
139  }
140 
141 public:
142  AMDGPULibCalls(const TargetMachine *TM_ = nullptr) : TM(TM_) {}
143 
144  bool fold(CallInst *CI, AliasAnalysis *AA = nullptr);
145 
146  void initNativeFuncs();
147 
148  // Replace a normal math function call with that native version
149  bool useNative(CallInst *CI);
150 };
151 
152 } // end llvm namespace
153 
154 namespace {
155 
156  class AMDGPUSimplifyLibCalls : public FunctionPass {
157 
158  AMDGPULibCalls Simplifier;
159 
160  public:
161  static char ID; // Pass identification
162 
163  AMDGPUSimplifyLibCalls(const TargetMachine *TM = nullptr)
164  : FunctionPass(ID), Simplifier(TM) {
166  }
167 
168  void getAnalysisUsage(AnalysisUsage &AU) const override {
170  }
171 
172  bool runOnFunction(Function &M) override;
173  };
174 
175  class AMDGPUUseNativeCalls : public FunctionPass {
176 
177  AMDGPULibCalls Simplifier;
178 
179  public:
180  static char ID; // Pass identification
181 
182  AMDGPUUseNativeCalls() : FunctionPass(ID) {
184  Simplifier.initNativeFuncs();
185  }
186 
187  bool runOnFunction(Function &F) override;
188  };
189 
190 } // end anonymous namespace.
191 
193 char AMDGPUUseNativeCalls::ID = 0;
194 
195 INITIALIZE_PASS_BEGIN(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",
196  "Simplify well-known AMD library calls", false, false)
198 INITIALIZE_PASS_END(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",
200 
201 INITIALIZE_PASS(AMDGPUUseNativeCalls, "amdgpu-usenative",
202  "Replace builtin math calls with that native versions.",
203  false, false)
204 
205 template <typename IRB>
206 static CallInst *CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg,
207  const Twine &Name = "") {
208  CallInst *R = B.CreateCall(Callee, Arg, Name);
209  if (Function *F = dyn_cast<Function>(Callee.getCallee()))
210  R->setCallingConv(F->getCallingConv());
211  return R;
212 }
213 
214 template <typename IRB>
215 static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1,
216  Value *Arg2, const Twine &Name = "") {
217  CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name);
218  if (Function *F = dyn_cast<Function>(Callee.getCallee()))
219  R->setCallingConv(F->getCallingConv());
220  return R;
221 }
222 
223 // Data structures for table-driven optimizations.
224 // FuncTbl works for both f32 and f64 functions with 1 input argument
225 
226 struct TableEntry {
227  double result;
228  double input;
229 };
230 
231 /* a list of {result, input} */
232 static const TableEntry tbl_acos[] = {
233  {MATH_PI / 2.0, 0.0},
234  {MATH_PI / 2.0, -0.0},
235  {0.0, 1.0},
236  {MATH_PI, -1.0}
237 };
238 static const TableEntry tbl_acosh[] = {
239  {0.0, 1.0}
240 };
241 static const TableEntry tbl_acospi[] = {
242  {0.5, 0.0},
243  {0.5, -0.0},
244  {0.0, 1.0},
245  {1.0, -1.0}
246 };
247 static const TableEntry tbl_asin[] = {
248  {0.0, 0.0},
249  {-0.0, -0.0},
250  {MATH_PI / 2.0, 1.0},
251  {-MATH_PI / 2.0, -1.0}
252 };
253 static const TableEntry tbl_asinh[] = {
254  {0.0, 0.0},
255  {-0.0, -0.0}
256 };
257 static const TableEntry tbl_asinpi[] = {
258  {0.0, 0.0},
259  {-0.0, -0.0},
260  {0.5, 1.0},
261  {-0.5, -1.0}
262 };
263 static const TableEntry tbl_atan[] = {
264  {0.0, 0.0},
265  {-0.0, -0.0},
266  {MATH_PI / 4.0, 1.0},
267  {-MATH_PI / 4.0, -1.0}
268 };
269 static const TableEntry tbl_atanh[] = {
270  {0.0, 0.0},
271  {-0.0, -0.0}
272 };
273 static const TableEntry tbl_atanpi[] = {
274  {0.0, 0.0},
275  {-0.0, -0.0},
276  {0.25, 1.0},
277  {-0.25, -1.0}
278 };
279 static const TableEntry tbl_cbrt[] = {
280  {0.0, 0.0},
281  {-0.0, -0.0},
282  {1.0, 1.0},
283  {-1.0, -1.0},
284 };
285 static const TableEntry tbl_cos[] = {
286  {1.0, 0.0},
287  {1.0, -0.0}
288 };
289 static const TableEntry tbl_cosh[] = {
290  {1.0, 0.0},
291  {1.0, -0.0}
292 };
293 static const TableEntry tbl_cospi[] = {
294  {1.0, 0.0},
295  {1.0, -0.0}
296 };
297 static const TableEntry tbl_erfc[] = {
298  {1.0, 0.0},
299  {1.0, -0.0}
300 };
301 static const TableEntry tbl_erf[] = {
302  {0.0, 0.0},
303  {-0.0, -0.0}
304 };
305 static const TableEntry tbl_exp[] = {
306  {1.0, 0.0},
307  {1.0, -0.0},
308  {MATH_E, 1.0}
309 };
310 static const TableEntry tbl_exp2[] = {
311  {1.0, 0.0},
312  {1.0, -0.0},
313  {2.0, 1.0}
314 };
315 static const TableEntry tbl_exp10[] = {
316  {1.0, 0.0},
317  {1.0, -0.0},
318  {10.0, 1.0}
319 };
320 static const TableEntry tbl_expm1[] = {
321  {0.0, 0.0},
322  {-0.0, -0.0}
323 };
324 static const TableEntry tbl_log[] = {
325  {0.0, 1.0},
326  {1.0, MATH_E}
327 };
328 static const TableEntry tbl_log2[] = {
329  {0.0, 1.0},
330  {1.0, 2.0}
331 };
332 static const TableEntry tbl_log10[] = {
333  {0.0, 1.0},
334  {1.0, 10.0}
335 };
336 static const TableEntry tbl_rsqrt[] = {
337  {1.0, 1.0},
338  {MATH_SQRT1_2, 2.0}
339 };
340 static const TableEntry tbl_sin[] = {
341  {0.0, 0.0},
342  {-0.0, -0.0}
343 };
344 static const TableEntry tbl_sinh[] = {
345  {0.0, 0.0},
346  {-0.0, -0.0}
347 };
348 static const TableEntry tbl_sinpi[] = {
349  {0.0, 0.0},
350  {-0.0, -0.0}
351 };
352 static const TableEntry tbl_sqrt[] = {
353  {0.0, 0.0},
354  {1.0, 1.0},
355  {MATH_SQRT2, 2.0}
356 };
357 static const TableEntry tbl_tan[] = {
358  {0.0, 0.0},
359  {-0.0, -0.0}
360 };
361 static const TableEntry tbl_tanh[] = {
362  {0.0, 0.0},
363  {-0.0, -0.0}
364 };
365 static const TableEntry tbl_tanpi[] = {
366  {0.0, 0.0},
367  {-0.0, -0.0}
368 };
369 static const TableEntry tbl_tgamma[] = {
370  {1.0, 1.0},
371  {1.0, 2.0},
372  {2.0, 3.0},
373  {6.0, 4.0}
374 };
375 
377  switch(id) {
393  return true;
394  default:;
395  }
396  return false;
397 }
398 
399 struct TableRef {
400  size_t size;
401  const TableEntry *table; // variable size: from 0 to (size - 1)
402 
403  TableRef() : size(0), table(nullptr) {}
404 
405  template <size_t N>
406  TableRef(const TableEntry (&tbl)[N]) : size(N), table(&tbl[0]) {}
407 };
408 
410  switch(id) {
422  case AMDGPULibFunc::EI_COS: return TableRef(tbl_cos);
426  case AMDGPULibFunc::EI_ERF: return TableRef(tbl_erf);
427  case AMDGPULibFunc::EI_EXP: return TableRef(tbl_exp);
432  case AMDGPULibFunc::EI_LOG: return TableRef(tbl_log);
439  case AMDGPULibFunc::EI_SIN: return TableRef(tbl_sin);
444  case AMDGPULibFunc::EI_TAN: return TableRef(tbl_tan);
448  default:;
449  }
450  return TableRef();
451 }
452 
453 static inline int getVecSize(const AMDGPULibFunc& FInfo) {
454  return FInfo.getLeads()[0].VectorSize;
455 }
456 
457 static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) {
458  return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType;
459 }
460 
461 FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) {
462  // If we are doing PreLinkOpt, the function is external. So it is safe to
463  // use getOrInsertFunction() at this stage.
464 
466  : AMDGPULibFunc::getFunction(M, fInfo);
467 }
468 
469 bool AMDGPULibCalls::parseFunctionName(const StringRef &FMangledName,
470  FuncInfo &FInfo) {
471  return AMDGPULibFunc::parse(FMangledName, FInfo);
472 }
473 
474 bool AMDGPULibCalls::isUnsafeMath(const CallInst *CI) const {
475  if (auto Op = dyn_cast<FPMathOperator>(CI))
476  if (Op->isFast())
477  return true;
478  const Function *F = CI->getParent()->getParent();
479  Attribute Attr = F->getFnAttribute("unsafe-fp-math");
480  return Attr.getValueAsBool();
481 }
482 
483 bool AMDGPULibCalls::useNativeFunc(const StringRef F) const {
484  return AllNative || llvm::is_contained(UseNative, F);
485 }
486 
488  AllNative = useNativeFunc("all") ||
489  (UseNative.getNumOccurrences() && UseNative.size() == 1 &&
490  UseNative.begin()->empty());
491 }
492 
493 bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
494  bool native_sin = useNativeFunc("sin");
495  bool native_cos = useNativeFunc("cos");
496 
497  if (native_sin && native_cos) {
498  Module *M = aCI->getModule();
499  Value *opr0 = aCI->getArgOperand(0);
500 
501  AMDGPULibFunc nf;
502  nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType;
503  nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize;
504 
507  FunctionCallee sinExpr = getFunction(M, nf);
508 
511  FunctionCallee cosExpr = getFunction(M, nf);
512  if (sinExpr && cosExpr) {
513  Value *sinval = CallInst::Create(sinExpr, opr0, "splitsin", aCI);
514  Value *cosval = CallInst::Create(cosExpr, opr0, "splitcos", aCI);
515  new StoreInst(cosval, aCI->getArgOperand(1), aCI);
516 
517  DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
518  << " with native version of sin/cos");
519 
520  replaceCall(sinval);
521  return true;
522  }
523  }
524  return false;
525 }
526 
528  CI = aCI;
530 
531  FuncInfo FInfo;
532  if (!parseFunctionName(Callee->getName(), FInfo) || !FInfo.isMangled() ||
533  FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
534  getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) ||
535  !(AllNative || useNativeFunc(FInfo.getName()))) {
536  return false;
537  }
538 
539  if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS)
540  return sincosUseNative(aCI, FInfo);
541 
543  FunctionCallee F = getFunction(aCI->getModule(), FInfo);
544  if (!F)
545  return false;
546 
547  aCI->setCalledFunction(F);
548  DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
549  << " with native version");
550  return true;
551 }
552 
553 // Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe
554 // builtin, with appended type size and alignment arguments, where 2 or 4
555 // indicates the original number of arguments. The library has optimized version
556 // of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same
557 // power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N
558 // for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ...,
559 // 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4.
560 bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
561  const FuncInfo &FInfo) {
562  auto *Callee = CI->getCalledFunction();
563  if (!Callee->isDeclaration())
564  return false;
565 
566  assert(Callee->hasName() && "Invalid read_pipe/write_pipe function");
567  auto *M = Callee->getParent();
568  auto &Ctx = M->getContext();
569  std::string Name = std::string(Callee->getName());
570  auto NumArg = CI->arg_size();
571  if (NumArg != 4 && NumArg != 6)
572  return false;
573  auto *PacketSize = CI->getArgOperand(NumArg - 2);
574  auto *PacketAlign = CI->getArgOperand(NumArg - 1);
575  if (!isa<ConstantInt>(PacketSize) || !isa<ConstantInt>(PacketAlign))
576  return false;
577  unsigned Size = cast<ConstantInt>(PacketSize)->getZExtValue();
578  Align Alignment = cast<ConstantInt>(PacketAlign)->getAlignValue();
579  if (Alignment != Size)
580  return false;
581 
582  Type *PtrElemTy;
583  if (Size <= 8)
584  PtrElemTy = Type::getIntNTy(Ctx, Size * 8);
585  else
586  PtrElemTy = FixedVectorType::get(Type::getInt64Ty(Ctx), Size / 8);
587  unsigned PtrArgLoc = CI->arg_size() - 3;
588  auto PtrArg = CI->getArgOperand(PtrArgLoc);
589  unsigned PtrArgAS = PtrArg->getType()->getPointerAddressSpace();
590  auto *PtrTy = llvm::PointerType::get(PtrElemTy, PtrArgAS);
591 
593  for (unsigned I = 0; I != PtrArgLoc; ++I)
594  ArgTys.push_back(CI->getArgOperand(I)->getType());
595  ArgTys.push_back(PtrTy);
596 
597  Name = Name + "_" + std::to_string(Size);
598  auto *FTy = FunctionType::get(Callee->getReturnType(),
599  ArrayRef<Type *>(ArgTys), false);
600  AMDGPULibFunc NewLibFunc(Name, FTy);
602  if (!F)
603  return false;
604 
605  auto *BCast = B.CreatePointerCast(PtrArg, PtrTy);
607  for (unsigned I = 0; I != PtrArgLoc; ++I)
608  Args.push_back(CI->getArgOperand(I));
609  Args.push_back(BCast);
610 
611  auto *NCI = B.CreateCall(F, Args);
612  NCI->setAttributes(CI->getAttributes());
613  CI->replaceAllUsesWith(NCI);
615  CI->eraseFromParent();
616 
617  return true;
618 }
619 
620 // This function returns false if no change; return true otherwise.
622  this->CI = CI;
624 
625  // Ignore indirect calls.
626  if (Callee == 0) return false;
627 
628  BasicBlock *BB = CI->getParent();
631 
632  // Set the builder to the instruction after the call.
633  B.SetInsertPoint(BB, CI->getIterator());
634 
635  // Copy fast flags from the original call.
636  if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(CI))
637  B.setFastMathFlags(FPOp->getFastMathFlags());
638 
639  switch (Callee->getIntrinsicID()) {
640  default:
641  break;
642  case Intrinsic::amdgcn_wavefrontsize:
643  return !EnablePreLink && fold_wavefrontsize(CI, B);
644  }
645 
646  FuncInfo FInfo;
647  if (!parseFunctionName(Callee->getName(), FInfo))
648  return false;
649 
650  // Further check the number of arguments to see if they match.
651  if (CI->arg_size() != FInfo.getNumArgs())
652  return false;
653 
654  if (TDOFold(CI, FInfo))
655  return true;
656 
657  // Under unsafe-math, evaluate calls if possible.
658  // According to Brian Sumner, we can do this for all f32 function calls
659  // using host's double function calls.
660  if (isUnsafeMath(CI) && evaluateCall(CI, FInfo))
661  return true;
662 
663  // Specialized optimizations for each function call
664  switch (FInfo.getId()) {
666  // skip vector function
667  assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||
668  FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
669  "recip must be an either native or half function");
670  return (getVecSize(FInfo) != 1) ? false : fold_recip(CI, B, FInfo);
671 
673  // skip vector function
674  assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||
675  FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
676  "divide must be an either native or half function");
677  return (getVecSize(FInfo) != 1) ? false : fold_divide(CI, B, FInfo);
678 
682  return fold_pow(CI, B, FInfo);
683 
685  // skip vector function
686  return (getVecSize(FInfo) != 1) ? false : fold_rootn(CI, B, FInfo);
687 
691  // skip vector function
692  return (getVecSize(FInfo) != 1) ? false : fold_fma_mad(CI, B, FInfo);
693 
695  return isUnsafeMath(CI) && fold_sqrt(CI, B, FInfo);
698  if ((getArgType(FInfo) == AMDGPULibFunc::F32 ||
699  getArgType(FInfo) == AMDGPULibFunc::F64)
700  && (FInfo.getPrefix() == AMDGPULibFunc::NOPFX))
701  return fold_sincos(CI, B, AA);
702 
703  break;
708  return fold_read_write_pipe(CI, B, FInfo);
709 
710  default:
711  break;
712  }
713 
714  return false;
715 }
716 
717 bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
718  // Table-Driven optimization
719  const TableRef tr = getOptTable(FInfo.getId());
720  if (tr.size==0)
721  return false;
722 
723  int const sz = (int)tr.size;
724  const TableEntry * const ftbl = tr.table;
725  Value *opr0 = CI->getArgOperand(0);
726 
727  if (getVecSize(FInfo) > 1) {
728  if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(opr0)) {
730  for (int eltNo = 0; eltNo < getVecSize(FInfo); ++eltNo) {
731  ConstantFP *eltval = dyn_cast<ConstantFP>(
732  CV->getElementAsConstant((unsigned)eltNo));
733  assert(eltval && "Non-FP arguments in math function!");
734  bool found = false;
735  for (int i=0; i < sz; ++i) {
736  if (eltval->isExactlyValue(ftbl[i].input)) {
737  DVal.push_back(ftbl[i].result);
738  found = true;
739  break;
740  }
741  }
742  if (!found) {
743  // This vector constants not handled yet.
744  return false;
745  }
746  }
747  LLVMContext &context = CI->getParent()->getParent()->getContext();
748  Constant *nval;
749  if (getArgType(FInfo) == AMDGPULibFunc::F32) {
751  for (unsigned i = 0; i < DVal.size(); ++i) {
752  FVal.push_back((float)DVal[i]);
753  }
754  ArrayRef<float> tmp(FVal);
755  nval = ConstantDataVector::get(context, tmp);
756  } else { // F64
757  ArrayRef<double> tmp(DVal);
758  nval = ConstantDataVector::get(context, tmp);
759  }
760  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
761  replaceCall(nval);
762  return true;
763  }
764  } else {
765  // Scalar version
766  if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
767  for (int i = 0; i < sz; ++i) {
768  if (CF->isExactlyValue(ftbl[i].input)) {
769  Value *nval = ConstantFP::get(CF->getType(), ftbl[i].result);
770  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
771  replaceCall(nval);
772  return true;
773  }
774  }
775  }
776  }
777 
778  return false;
779 }
780 
781 bool AMDGPULibCalls::replaceWithNative(CallInst *CI, const FuncInfo &FInfo) {
782  Module *M = CI->getModule();
783  if (getArgType(FInfo) != AMDGPULibFunc::F32 ||
784  FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
785  !HasNative(FInfo.getId()))
786  return false;
787 
788  AMDGPULibFunc nf = FInfo;
790  if (FunctionCallee FPExpr = getFunction(M, nf)) {
791  LLVM_DEBUG(dbgs() << "AMDIC: " << *CI << " ---> ");
792 
793  CI->setCalledFunction(FPExpr);
794 
795  LLVM_DEBUG(dbgs() << *CI << '\n');
796 
797  return true;
798  }
799  return false;
800 }
801 
802 // [native_]half_recip(c) ==> 1.0/c
803 bool AMDGPULibCalls::fold_recip(CallInst *CI, IRBuilder<> &B,
804  const FuncInfo &FInfo) {
805  Value *opr0 = CI->getArgOperand(0);
806  if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
807  // Just create a normal div. Later, InstCombine will be able
808  // to compute the divide into a constant (avoid check float infinity
809  // or subnormal at this point).
810  Value *nval = B.CreateFDiv(ConstantFP::get(CF->getType(), 1.0),
811  opr0,
812  "recip2div");
813  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
814  replaceCall(nval);
815  return true;
816  }
817  return false;
818 }
819 
820 // [native_]half_divide(x, c) ==> x/c
821 bool AMDGPULibCalls::fold_divide(CallInst *CI, IRBuilder<> &B,
822  const FuncInfo &FInfo) {
823  Value *opr0 = CI->getArgOperand(0);
824  Value *opr1 = CI->getArgOperand(1);
825  ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
826  ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
827 
828  if ((CF0 && CF1) || // both are constants
829  (CF1 && (getArgType(FInfo) == AMDGPULibFunc::F32)))
830  // CF1 is constant && f32 divide
831  {
832  Value *nval1 = B.CreateFDiv(ConstantFP::get(opr1->getType(), 1.0),
833  opr1, "__div2recip");
834  Value *nval = B.CreateFMul(opr0, nval1, "__div2mul");
835  replaceCall(nval);
836  return true;
837  }
838  return false;
839 }
840 
841 namespace llvm {
842 static double log2(double V) {
843 #if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L
844  return ::log2(V);
845 #else
846  return log(V) / numbers::ln2;
847 #endif
848 }
849 }
850 
851 bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
852  const FuncInfo &FInfo) {
853  assert((FInfo.getId() == AMDGPULibFunc::EI_POW ||
854  FInfo.getId() == AMDGPULibFunc::EI_POWR ||
855  FInfo.getId() == AMDGPULibFunc::EI_POWN) &&
856  "fold_pow: encounter a wrong function call");
857 
858  Value *opr0, *opr1;
859  ConstantFP *CF;
860  ConstantInt *CINT;
861  ConstantAggregateZero *CZero;
862  Type *eltType;
863 
864  opr0 = CI->getArgOperand(0);
865  opr1 = CI->getArgOperand(1);
866  CZero = dyn_cast<ConstantAggregateZero>(opr1);
867  if (getVecSize(FInfo) == 1) {
868  eltType = opr0->getType();
869  CF = dyn_cast<ConstantFP>(opr1);
870  CINT = dyn_cast<ConstantInt>(opr1);
871  } else {
872  VectorType *VTy = dyn_cast<VectorType>(opr0->getType());
873  assert(VTy && "Oprand of vector function should be of vectortype");
874  eltType = VTy->getElementType();
875  ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1);
876 
877  // Now, only Handle vector const whose elements have the same value.
878  CF = CDV ? dyn_cast_or_null<ConstantFP>(CDV->getSplatValue()) : nullptr;
879  CINT = CDV ? dyn_cast_or_null<ConstantInt>(CDV->getSplatValue()) : nullptr;
880  }
881 
882  // No unsafe math , no constant argument, do nothing
883  if (!isUnsafeMath(CI) && !CF && !CINT && !CZero)
884  return false;
885 
886  // 0x1111111 means that we don't do anything for this call.
887  int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111);
888 
889  if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0) || CZero) {
890  // pow/powr/pown(x, 0) == 1
891  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1\n");
892  Constant *cnval = ConstantFP::get(eltType, 1.0);
893  if (getVecSize(FInfo) > 1) {
894  cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
895  }
896  replaceCall(cnval);
897  return true;
898  }
899  if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) {
900  // pow/powr/pown(x, 1.0) = x
901  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n");
902  replaceCall(opr0);
903  return true;
904  }
905  if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) {
906  // pow/powr/pown(x, 2.0) = x*x
907  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * " << *opr0
908  << "\n");
909  Value *nval = B.CreateFMul(opr0, opr0, "__pow2");
910  replaceCall(nval);
911  return true;
912  }
913  if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) {
914  // pow/powr/pown(x, -1.0) = 1.0/x
915  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1 / " << *opr0 << "\n");
916  Constant *cnval = ConstantFP::get(eltType, 1.0);
917  if (getVecSize(FInfo) > 1) {
918  cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
919  }
920  Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip");
921  replaceCall(nval);
922  return true;
923  }
924 
925  Module *M = CI->getModule();
926  if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) {
927  // pow[r](x, [-]0.5) = sqrt(x)
928  bool issqrt = CF->isExactlyValue(0.5);
929  if (FunctionCallee FPExpr =
930  getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
932  FInfo))) {
933  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
934  << FInfo.getName().c_str() << "(" << *opr0 << ")\n");
935  Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt"
936  : "__pow2rsqrt");
937  replaceCall(nval);
938  return true;
939  }
940  }
941 
942  if (!isUnsafeMath(CI))
943  return false;
944 
945  // Unsafe Math optimization
946 
947  // Remember that ci_opr1 is set if opr1 is integral
948  if (CF) {
949  double dval = (getArgType(FInfo) == AMDGPULibFunc::F32)
950  ? (double)CF->getValueAPF().convertToFloat()
951  : CF->getValueAPF().convertToDouble();
952  int ival = (int)dval;
953  if ((double)ival == dval) {
954  ci_opr1 = ival;
955  } else
956  ci_opr1 = 0x11111111;
957  }
958 
959  // pow/powr/pown(x, c) = [1/](x*x*..x); where
960  // trunc(c) == c && the number of x == c && |c| <= 12
961  unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1;
962  if (abs_opr1 <= 12) {
963  Constant *cnval;
964  Value *nval;
965  if (abs_opr1 == 0) {
966  cnval = ConstantFP::get(eltType, 1.0);
967  if (getVecSize(FInfo) > 1) {
968  cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
969  }
970  nval = cnval;
971  } else {
972  Value *valx2 = nullptr;
973  nval = nullptr;
974  while (abs_opr1 > 0) {
975  valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0;
976  if (abs_opr1 & 1) {
977  nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2;
978  }
979  abs_opr1 >>= 1;
980  }
981  }
982 
983  if (ci_opr1 < 0) {
984  cnval = ConstantFP::get(eltType, 1.0);
985  if (getVecSize(FInfo) > 1) {
986  cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
987  }
988  nval = B.CreateFDiv(cnval, nval, "__1powprod");
989  }
990  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
991  << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0
992  << ")\n");
993  replaceCall(nval);
994  return true;
995  }
996 
997  // powr ---> exp2(y * log2(x))
998  // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31))
999  FunctionCallee ExpExpr =
1000  getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo));
1001  if (!ExpExpr)
1002  return false;
1003 
1004  bool needlog = false;
1005  bool needabs = false;
1006  bool needcopysign = false;
1007  Constant *cnval = nullptr;
1008  if (getVecSize(FInfo) == 1) {
1009  CF = dyn_cast<ConstantFP>(opr0);
1010 
1011  if (CF) {
1012  double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
1013  ? (double)CF->getValueAPF().convertToFloat()
1014  : CF->getValueAPF().convertToDouble();
1015 
1016  V = log2(std::abs(V));
1017  cnval = ConstantFP::get(eltType, V);
1018  needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR) &&
1019  CF->isNegative();
1020  } else {
1021  needlog = true;
1022  needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR &&
1023  (!CF || CF->isNegative());
1024  }
1025  } else {
1026  ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0);
1027 
1028  if (!CDV) {
1029  needlog = true;
1030  needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR;
1031  } else {
1032  assert ((int)CDV->getNumElements() == getVecSize(FInfo) &&
1033  "Wrong vector size detected");
1034 
1036  for (int i=0; i < getVecSize(FInfo); ++i) {
1037  double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
1038  ? (double)CDV->getElementAsFloat(i)
1039  : CDV->getElementAsDouble(i);
1040  if (V < 0.0) needcopysign = true;
1041  V = log2(std::abs(V));
1042  DVal.push_back(V);
1043  }
1044  if (getArgType(FInfo) == AMDGPULibFunc::F32) {
1045  SmallVector<float, 0> FVal;
1046  for (unsigned i=0; i < DVal.size(); ++i) {
1047  FVal.push_back((float)DVal[i]);
1048  }
1049  ArrayRef<float> tmp(FVal);
1050  cnval = ConstantDataVector::get(M->getContext(), tmp);
1051  } else {
1052  ArrayRef<double> tmp(DVal);
1053  cnval = ConstantDataVector::get(M->getContext(), tmp);
1054  }
1055  }
1056  }
1057 
1058  if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW)) {
1059  // We cannot handle corner cases for a general pow() function, give up
1060  // unless y is a constant integral value. Then proceed as if it were pown.
1061  if (getVecSize(FInfo) == 1) {
1062  if (const ConstantFP *CF = dyn_cast<ConstantFP>(opr1)) {
1063  double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
1064  ? (double)CF->getValueAPF().convertToFloat()
1065  : CF->getValueAPF().convertToDouble();
1066  if (y != (double)(int64_t)y)
1067  return false;
1068  } else
1069  return false;
1070  } else {
1071  if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1)) {
1072  for (int i=0; i < getVecSize(FInfo); ++i) {
1073  double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
1074  ? (double)CDV->getElementAsFloat(i)
1075  : CDV->getElementAsDouble(i);
1076  if (y != (double)(int64_t)y)
1077  return false;
1078  }
1079  } else
1080  return false;
1081  }
1082  }
1083 
1084  Value *nval;
1085  if (needabs) {
1086  FunctionCallee AbsExpr =
1087  getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_FABS, FInfo));
1088  if (!AbsExpr)
1089  return false;
1090  nval = CreateCallEx(B, AbsExpr, opr0, "__fabs");
1091  } else {
1092  nval = cnval ? cnval : opr0;
1093  }
1094  if (needlog) {
1095  FunctionCallee LogExpr =
1096  getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo));
1097  if (!LogExpr)
1098  return false;
1099  nval = CreateCallEx(B,LogExpr, nval, "__log2");
1100  }
1101 
1102  if (FInfo.getId() == AMDGPULibFunc::EI_POWN) {
1103  // convert int(32) to fp(f32 or f64)
1104  opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F");
1105  }
1106  nval = B.CreateFMul(opr1, nval, "__ylogx");
1107  nval = CreateCallEx(B,ExpExpr, nval, "__exp2");
1108 
1109  if (needcopysign) {
1110  Value *opr_n;
1111  Type* rTy = opr0->getType();
1112  Type* nTyS = eltType->isDoubleTy() ? B.getInt64Ty() : B.getInt32Ty();
1113  Type *nTy = nTyS;
1114  if (const auto *vTy = dyn_cast<FixedVectorType>(rTy))
1115  nTy = FixedVectorType::get(nTyS, vTy);
1116  unsigned size = nTy->getScalarSizeInBits();
1117  opr_n = CI->getArgOperand(1);
1118  if (opr_n->getType()->isIntegerTy())
1119  opr_n = B.CreateZExtOrBitCast(opr_n, nTy, "__ytou");
1120  else
1121  opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");
1122 
1123  Value *sign = B.CreateShl(opr_n, size-1, "__yeven");
1124  sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign");
1125  nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign);
1126  nval = B.CreateBitCast(nval, opr0->getType());
1127  }
1128 
1129  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
1130  << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n");
1131  replaceCall(nval);
1132 
1133  return true;
1134 }
1135 
1136 bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
1137  const FuncInfo &FInfo) {
1138  Value *opr0 = CI->getArgOperand(0);
1139  Value *opr1 = CI->getArgOperand(1);
1140 
1141  ConstantInt *CINT = dyn_cast<ConstantInt>(opr1);
1142  if (!CINT) {
1143  return false;
1144  }
1145  int ci_opr1 = (int)CINT->getSExtValue();
1146  if (ci_opr1 == 1) { // rootn(x, 1) = x
1147  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n");
1148  replaceCall(opr0);
1149  return true;
1150  }
1151  if (ci_opr1 == 2) { // rootn(x, 2) = sqrt(x)
1152  Module *M = CI->getModule();
1153  if (FunctionCallee FPExpr =
1154  getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
1155  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> sqrt(" << *opr0 << ")\n");
1156  Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt");
1157  replaceCall(nval);
1158  return true;
1159  }
1160  } else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x)
1161  Module *M = CI->getModule();
1162  if (FunctionCallee FPExpr =
1163  getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) {
1164  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> cbrt(" << *opr0 << ")\n");
1165  Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt");
1166  replaceCall(nval);
1167  return true;
1168  }
1169  } else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x
1170  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1.0 / " << *opr0 << "\n");
1171  Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0),
1172  opr0,
1173  "__rootn2div");
1174  replaceCall(nval);
1175  return true;
1176  } else if (ci_opr1 == -2) { // rootn(x, -2) = rsqrt(x)
1177  Module *M = CI->getModule();
1178  if (FunctionCallee FPExpr =
1179  getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT, FInfo))) {
1180  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> rsqrt(" << *opr0
1181  << ")\n");
1182  Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt");
1183  replaceCall(nval);
1184  return true;
1185  }
1186  }
1187  return false;
1188 }
1189 
1190 bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B,
1191  const FuncInfo &FInfo) {
1192  Value *opr0 = CI->getArgOperand(0);
1193  Value *opr1 = CI->getArgOperand(1);
1194  Value *opr2 = CI->getArgOperand(2);
1195 
1196  ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
1197  ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
1198  if ((CF0 && CF0->isZero()) || (CF1 && CF1->isZero())) {
1199  // fma/mad(a, b, c) = c if a=0 || b=0
1200  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr2 << "\n");
1201  replaceCall(opr2);
1202  return true;
1203  }
1204  if (CF0 && CF0->isExactlyValue(1.0f)) {
1205  // fma/mad(a, b, c) = b+c if a=1
1206  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr1 << " + " << *opr2
1207  << "\n");
1208  Value *nval = B.CreateFAdd(opr1, opr2, "fmaadd");
1209  replaceCall(nval);
1210  return true;
1211  }
1212  if (CF1 && CF1->isExactlyValue(1.0f)) {
1213  // fma/mad(a, b, c) = a+c if b=1
1214  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " + " << *opr2
1215  << "\n");
1216  Value *nval = B.CreateFAdd(opr0, opr2, "fmaadd");
1217  replaceCall(nval);
1218  return true;
1219  }
1220  if (ConstantFP *CF = dyn_cast<ConstantFP>(opr2)) {
1221  if (CF->isZero()) {
1222  // fma/mad(a, b, c) = a*b if c=0
1223  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * "
1224  << *opr1 << "\n");
1225  Value *nval = B.CreateFMul(opr0, opr1, "fmamul");
1226  replaceCall(nval);
1227  return true;
1228  }
1229  }
1230 
1231  return false;
1232 }
1233 
1234 // Get a scalar native builtin single argument FP function
1235 FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M,
1236  const FuncInfo &FInfo) {
1237  if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))
1238  return nullptr;
1239  FuncInfo nf = FInfo;
1241  return getFunction(M, nf);
1242 }
1243 
1244 // fold sqrt -> native_sqrt (x)
1245 bool AMDGPULibCalls::fold_sqrt(CallInst *CI, IRBuilder<> &B,
1246  const FuncInfo &FInfo) {
1247  if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) &&
1248  (FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) {
1249  if (FunctionCallee FPExpr = getNativeFunction(
1251  Value *opr0 = CI->getArgOperand(0);
1252  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
1253  << "sqrt(" << *opr0 << ")\n");
1254  Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt");
1255  replaceCall(nval);
1256  return true;
1257  }
1258  }
1259  return false;
1260 }
1261 
1262 // fold sin, cos -> sincos.
1263 bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
1264  AliasAnalysis *AA) {
1265  AMDGPULibFunc fInfo;
1266  if (!AMDGPULibFunc::parse(CI->getCalledFunction()->getName(), fInfo))
1267  return false;
1268 
1269  assert(fInfo.getId() == AMDGPULibFunc::EI_SIN ||
1270  fInfo.getId() == AMDGPULibFunc::EI_COS);
1271  bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN;
1272 
1273  Value *CArgVal = CI->getArgOperand(0);
1274  BasicBlock * const CBB = CI->getParent();
1275 
1276  int const MaxScan = 30;
1277  bool Changed = false;
1278 
1279  { // fold in load value.
1280  LoadInst *LI = dyn_cast<LoadInst>(CArgVal);
1281  if (LI && LI->getParent() == CBB) {
1282  BasicBlock::iterator BBI = LI->getIterator();
1283  Value *AvailableVal = FindAvailableLoadedValue(LI, CBB, BBI, MaxScan, AA);
1284  if (AvailableVal) {
1285  Changed = true;
1286  CArgVal->replaceAllUsesWith(AvailableVal);
1287  if (CArgVal->getNumUses() == 0)
1288  LI->eraseFromParent();
1289  CArgVal = CI->getArgOperand(0);
1290  }
1291  }
1292  }
1293 
1294  Module *M = CI->getModule();
1296  std::string const PairName = fInfo.mangle();
1297 
1298  CallInst *UI = nullptr;
1299  for (User* U : CArgVal->users()) {
1300  CallInst *XI = dyn_cast_or_null<CallInst>(U);
1301  if (!XI || XI == CI || XI->getParent() != CBB)
1302  continue;
1303 
1304  Function *UCallee = XI->getCalledFunction();
1305  if (!UCallee || !UCallee->getName().equals(PairName))
1306  continue;
1307 
1309  if (BBI == CI->getParent()->begin())
1310  break;
1311  --BBI;
1312  for (int I = MaxScan; I > 0 && BBI != CBB->begin(); --BBI, --I) {
1313  if (cast<Instruction>(BBI) == XI) {
1314  UI = XI;
1315  break;
1316  }
1317  }
1318  if (UI) break;
1319  }
1320 
1321  if (!UI)
1322  return Changed;
1323 
1324  // Merge the sin and cos.
1325 
1326  // for OpenCL 2.0 we have only generic implementation of sincos
1327  // function.
1330  FunctionCallee Fsincos = getFunction(M, nf);
1331  if (!Fsincos)
1332  return Changed;
1333 
1334  BasicBlock::iterator ItOld = B.GetInsertPoint();
1335  AllocaInst *Alloc = insertAlloca(UI, B, "__sincos_");
1336  B.SetInsertPoint(UI);
1337 
1338  Value *P = Alloc;
1339  Type *PTy = Fsincos.getFunctionType()->getParamType(1);
1340  // The allocaInst allocates the memory in private address space. This need
1341  // to be bitcasted to point to the address space of cos pointer type.
1342  // In OpenCL 2.0 this is generic, while in 1.2 that is private.
1344  P = B.CreateAddrSpaceCast(Alloc, PTy);
1345  CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P);
1346 
1347  LLVM_DEBUG(errs() << "AMDIC: fold_sincos (" << *CI << ", " << *UI << ") with "
1348  << *Call << "\n");
1349 
1350  if (!isSin) { // CI->cos, UI->sin
1351  B.SetInsertPoint(&*ItOld);
1352  UI->replaceAllUsesWith(&*Call);
1353  Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
1354  CI->replaceAllUsesWith(Reload);
1355  UI->eraseFromParent();
1356  CI->eraseFromParent();
1357  } else { // CI->sin, UI->cos
1358  Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
1359  UI->replaceAllUsesWith(Reload);
1360  CI->replaceAllUsesWith(Call);
1361  UI->eraseFromParent();
1362  CI->eraseFromParent();
1363  }
1364  return true;
1365 }
1366 
1367 bool AMDGPULibCalls::fold_wavefrontsize(CallInst *CI, IRBuilder<> &B) {
1368  if (!TM)
1369  return false;
1370 
1371  StringRef CPU = TM->getTargetCPU();
1372  StringRef Features = TM->getTargetFeatureString();
1373  if ((CPU.empty() || CPU.equals_insensitive("generic")) &&
1374  (Features.empty() || !Features.contains_insensitive("wavefrontsize")))
1375  return false;
1376 
1377  Function *F = CI->getParent()->getParent();
1378  const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(*F);
1379  unsigned N = ST.getWavefrontSize();
1380 
1381  LLVM_DEBUG(errs() << "AMDIC: fold_wavefrontsize (" << *CI << ") with "
1382  << N << "\n");
1383 
1384  CI->replaceAllUsesWith(ConstantInt::get(B.getInt32Ty(), N));
1385  CI->eraseFromParent();
1386  return true;
1387 }
1388 
1389 // Get insertion point at entry.
1390 BasicBlock::iterator AMDGPULibCalls::getEntryIns(CallInst * UI) {
1391  Function * Func = UI->getParent()->getParent();
1392  BasicBlock * BB = &Func->getEntryBlock();
1393  assert(BB && "Entry block not found!");
1394  BasicBlock::iterator ItNew = BB->begin();
1395  return ItNew;
1396 }
1397 
1398 // Insert a AllocsInst at the beginning of function entry block.
1399 AllocaInst* AMDGPULibCalls::insertAlloca(CallInst *UI, IRBuilder<> &B,
1400  const char *prefix) {
1401  BasicBlock::iterator ItNew = getEntryIns(UI);
1402  Function *UCallee = UI->getCalledFunction();
1403  Type *RetType = UCallee->getReturnType();
1404  B.SetInsertPoint(&*ItNew);
1405  AllocaInst *Alloc = B.CreateAlloca(RetType, 0,
1406  std::string(prefix) + UI->getName());
1407  Alloc->setAlignment(
1408  Align(UCallee->getParent()->getDataLayout().getTypeAllocSize(RetType)));
1409  return Alloc;
1410 }
1411 
1412 bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo,
1413  double& Res0, double& Res1,
1414  Constant *copr0, Constant *copr1,
1415  Constant *copr2) {
1416  // By default, opr0/opr1/opr3 holds values of float/double type.
1417  // If they are not float/double, each function has to its
1418  // operand separately.
1419  double opr0=0.0, opr1=0.0, opr2=0.0;
1420  ConstantFP *fpopr0 = dyn_cast_or_null<ConstantFP>(copr0);
1421  ConstantFP *fpopr1 = dyn_cast_or_null<ConstantFP>(copr1);
1422  ConstantFP *fpopr2 = dyn_cast_or_null<ConstantFP>(copr2);
1423  if (fpopr0) {
1424  opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1425  ? fpopr0->getValueAPF().convertToDouble()
1426  : (double)fpopr0->getValueAPF().convertToFloat();
1427  }
1428 
1429  if (fpopr1) {
1430  opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1431  ? fpopr1->getValueAPF().convertToDouble()
1432  : (double)fpopr1->getValueAPF().convertToFloat();
1433  }
1434 
1435  if (fpopr2) {
1436  opr2 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1437  ? fpopr2->getValueAPF().convertToDouble()
1438  : (double)fpopr2->getValueAPF().convertToFloat();
1439  }
1440 
1441  switch (FInfo.getId()) {
1442  default : return false;
1443 
1445  Res0 = acos(opr0);
1446  return true;
1447 
1449  // acosh(x) == log(x + sqrt(x*x - 1))
1450  Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0));
1451  return true;
1452 
1454  Res0 = acos(opr0) / MATH_PI;
1455  return true;
1456 
1458  Res0 = asin(opr0);
1459  return true;
1460 
1462  // asinh(x) == log(x + sqrt(x*x + 1))
1463  Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0));
1464  return true;
1465 
1467  Res0 = asin(opr0) / MATH_PI;
1468  return true;
1469 
1471  Res0 = atan(opr0);
1472  return true;
1473 
1475  // atanh(x) == (log(x+1) - log(x-1))/2;
1476  Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0;
1477  return true;
1478 
1480  Res0 = atan(opr0) / MATH_PI;
1481  return true;
1482 
1484  Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0);
1485  return true;
1486 
1487  case AMDGPULibFunc::EI_COS:
1488  Res0 = cos(opr0);
1489  return true;
1490 
1492  Res0 = cosh(opr0);
1493  return true;
1494 
1496  Res0 = cos(MATH_PI * opr0);
1497  return true;
1498 
1499  case AMDGPULibFunc::EI_EXP:
1500  Res0 = exp(opr0);
1501  return true;
1502 
1504  Res0 = pow(2.0, opr0);
1505  return true;
1506 
1508  Res0 = pow(10.0, opr0);
1509  return true;
1510 
1512  Res0 = exp(opr0) - 1.0;
1513  return true;
1514 
1515  case AMDGPULibFunc::EI_LOG:
1516  Res0 = log(opr0);
1517  return true;
1518 
1520  Res0 = log(opr0) / log(2.0);
1521  return true;
1522 
1524  Res0 = log(opr0) / log(10.0);
1525  return true;
1526 
1528  Res0 = 1.0 / sqrt(opr0);
1529  return true;
1530 
1531  case AMDGPULibFunc::EI_SIN:
1532  Res0 = sin(opr0);
1533  return true;
1534 
1536  Res0 = sinh(opr0);
1537  return true;
1538 
1540  Res0 = sin(MATH_PI * opr0);
1541  return true;
1542 
1544  Res0 = sqrt(opr0);
1545  return true;
1546 
1547  case AMDGPULibFunc::EI_TAN:
1548  Res0 = tan(opr0);
1549  return true;
1550 
1552  Res0 = tanh(opr0);
1553  return true;
1554 
1556  Res0 = tan(MATH_PI * opr0);
1557  return true;
1558 
1560  Res0 = 1.0 / opr0;
1561  return true;
1562 
1563  // two-arg functions
1565  Res0 = opr0 / opr1;
1566  return true;
1567 
1568  case AMDGPULibFunc::EI_POW:
1570  Res0 = pow(opr0, opr1);
1571  return true;
1572 
1573  case AMDGPULibFunc::EI_POWN: {
1574  if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
1575  double val = (double)iopr1->getSExtValue();
1576  Res0 = pow(opr0, val);
1577  return true;
1578  }
1579  return false;
1580  }
1581 
1582  case AMDGPULibFunc::EI_ROOTN: {
1583  if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
1584  double val = (double)iopr1->getSExtValue();
1585  Res0 = pow(opr0, 1.0 / val);
1586  return true;
1587  }
1588  return false;
1589  }
1590 
1591  // with ptr arg
1593  Res0 = sin(opr0);
1594  Res1 = cos(opr0);
1595  return true;
1596 
1597  // three-arg functions
1598  case AMDGPULibFunc::EI_FMA:
1599  case AMDGPULibFunc::EI_MAD:
1600  Res0 = opr0 * opr1 + opr2;
1601  return true;
1602  }
1603 
1604  return false;
1605 }
1606 
1607 bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) {
1608  int numArgs = (int)aCI->arg_size();
1609  if (numArgs > 3)
1610  return false;
1611 
1612  Constant *copr0 = nullptr;
1613  Constant *copr1 = nullptr;
1614  Constant *copr2 = nullptr;
1615  if (numArgs > 0) {
1616  if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr)
1617  return false;
1618  }
1619 
1620  if (numArgs > 1) {
1621  if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) {
1622  if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS)
1623  return false;
1624  }
1625  }
1626 
1627  if (numArgs > 2) {
1628  if ((copr2 = dyn_cast<Constant>(aCI->getArgOperand(2))) == nullptr)
1629  return false;
1630  }
1631 
1632  // At this point, all arguments to aCI are constants.
1633 
1634  // max vector size is 16, and sincos will generate two results.
1635  double DVal0[16], DVal1[16];
1636  bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS);
1637  if (getVecSize(FInfo) == 1) {
1638  if (!evaluateScalarMathFunc(FInfo, DVal0[0],
1639  DVal1[0], copr0, copr1, copr2)) {
1640  return false;
1641  }
1642  } else {
1643  ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0);
1644  ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1);
1645  ConstantDataVector *CDV2 = dyn_cast_or_null<ConstantDataVector>(copr2);
1646  for (int i=0; i < getVecSize(FInfo); ++i) {
1647  Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr;
1648  Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr;
1649  Constant *celt2 = CDV2 ? CDV2->getElementAsConstant(i) : nullptr;
1650  if (!evaluateScalarMathFunc(FInfo, DVal0[i],
1651  DVal1[i], celt0, celt1, celt2)) {
1652  return false;
1653  }
1654  }
1655  }
1656 
1657  LLVMContext &context = CI->getParent()->getParent()->getContext();
1658  Constant *nval0, *nval1;
1659  if (getVecSize(FInfo) == 1) {
1660  nval0 = ConstantFP::get(CI->getType(), DVal0[0]);
1661  if (hasTwoResults)
1662  nval1 = ConstantFP::get(CI->getType(), DVal1[0]);
1663  } else {
1664  if (getArgType(FInfo) == AMDGPULibFunc::F32) {
1665  SmallVector <float, 0> FVal0, FVal1;
1666  for (int i=0; i < getVecSize(FInfo); ++i)
1667  FVal0.push_back((float)DVal0[i]);
1668  ArrayRef<float> tmp0(FVal0);
1669  nval0 = ConstantDataVector::get(context, tmp0);
1670  if (hasTwoResults) {
1671  for (int i=0; i < getVecSize(FInfo); ++i)
1672  FVal1.push_back((float)DVal1[i]);
1673  ArrayRef<float> tmp1(FVal1);
1674  nval1 = ConstantDataVector::get(context, tmp1);
1675  }
1676  } else {
1677  ArrayRef<double> tmp0(DVal0);
1678  nval0 = ConstantDataVector::get(context, tmp0);
1679  if (hasTwoResults) {
1680  ArrayRef<double> tmp1(DVal1);
1681  nval1 = ConstantDataVector::get(context, tmp1);
1682  }
1683  }
1684  }
1685 
1686  if (hasTwoResults) {
1687  // sincos
1688  assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS &&
1689  "math function with ptr arg not supported yet");
1690  new StoreInst(nval1, aCI->getArgOperand(1), aCI);
1691  }
1692 
1693  replaceCall(nval0);
1694  return true;
1695 }
1696 
1697 // Public interface to the Simplify LibCalls pass.
1699  return new AMDGPUSimplifyLibCalls(TM);
1700 }
1701 
1703  return new AMDGPUUseNativeCalls();
1704 }
1705 
1707  if (skipFunction(F))
1708  return false;
1709 
1710  bool Changed = false;
1711  auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1712 
1713  LLVM_DEBUG(dbgs() << "AMDIC: process function ";
1714  F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
1715 
1716  for (auto &BB : F) {
1717  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
1718  // Ignore non-calls.
1719  CallInst *CI = dyn_cast<CallInst>(I);
1720  ++I;
1721  // Ignore intrinsics that do not become real instructions.
1722  if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd())
1723  continue;
1724 
1725  // Ignore indirect calls.
1727  if (Callee == 0) continue;
1728 
1729  LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";
1730  dbgs().flush());
1731  if(Simplifier.fold(CI, AA))
1732  Changed = true;
1733  }
1734  }
1735  return Changed;
1736 }
1737 
1740  AMDGPULibCalls Simplifier(&TM);
1741  Simplifier.initNativeFuncs();
1742 
1743  bool Changed = false;
1744  auto AA = &AM.getResult<AAManager>(F);
1745 
1746  LLVM_DEBUG(dbgs() << "AMDIC: process function ";
1747  F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
1748 
1749  for (auto &BB : F) {
1750  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
1751  // Ignore non-calls.
1752  CallInst *CI = dyn_cast<CallInst>(I);
1753  ++I;
1754  // Ignore intrinsics that do not become real instructions.
1755  if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd())
1756  continue;
1757 
1758  // Ignore indirect calls.
1760  if (Callee == 0)
1761  continue;
1762 
1763  LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";
1764  dbgs().flush());
1765  if (Simplifier.fold(CI, AA))
1766  Changed = true;
1767  }
1768  }
1769  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
1770 }
1771 
1773  if (skipFunction(F) || UseNative.empty())
1774  return false;
1775 
1776  bool Changed = false;
1777  for (auto &BB : F) {
1778  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
1779  // Ignore non-calls.
1780  CallInst *CI = dyn_cast<CallInst>(I);
1781  ++I;
1782  if (!CI) continue;
1783 
1784  // Ignore indirect calls.
1786  if (Callee == 0) continue;
1787 
1788  if(Simplifier.useNative(CI))
1789  Changed = true;
1790  }
1791  }
1792  return Changed;
1793 }
1794 
1797  if (UseNative.empty())
1798  return PreservedAnalyses::all();
1799 
1800  AMDGPULibCalls Simplifier;
1801  Simplifier.initNativeFuncs();
1802 
1803  bool Changed = false;
1804  for (auto &BB : F) {
1805  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
1806  // Ignore non-calls.
1807  CallInst *CI = dyn_cast<CallInst>(I);
1808  ++I;
1809  if (!CI)
1810  continue;
1811 
1812  // Ignore indirect calls.
1814  if (Callee == 0)
1815  continue;
1816 
1817  if (Simplifier.useNative(CI))
1818  Changed = true;
1819  }
1820  }
1821  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
1822 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::AMDGPULibFunc::isMangled
bool isMangled() const
Definition: AMDGPULibFunc.h:379
llvm::AMDGPULibFuncBase::EI_ASINH
@ EI_ASINH
Definition: AMDGPULibFunc.h:46
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1288
llvm::APFloat::convertToFloat
float convertToFloat() const
Converts this APFloat to host float value.
Definition: APFloat.cpp:4908
llvm::AMDGPULibFuncBase::EI_NFMA
@ EI_NFMA
Definition: AMDGPULibFunc.h:224
llvm::AMDGPULibCalls::AMDGPULibCalls
AMDGPULibCalls(const TargetMachine *TM_=nullptr)
Definition: AMDGPULibCalls.cpp:142
getOptTable
static TableRef getOptTable(AMDGPULibFunc::EFuncId id)
Definition: AMDGPULibCalls.cpp:409
tbl_asin
static const TableEntry tbl_asin[]
Definition: AMDGPULibCalls.cpp:247
tbl_sinpi
static const TableEntry tbl_sinpi[]
Definition: AMDGPULibCalls.cpp:348
TableRef::table
const TableEntry * table
Definition: AMDGPULibCalls.cpp:401
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
llvm::AMDGPULibFuncBase::EI_ACOSH
@ EI_ACOSH
Definition: AMDGPULibFunc.h:40
llvm::StringRef::empty
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:153
llvm::AMDGPULibFuncBase::F64
@ F64
Definition: AMDGPULibFunc.h:272
tbl_tan
static const TableEntry tbl_tan[]
Definition: AMDGPULibCalls.cpp:357
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:364
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
tbl_acos
static const TableEntry tbl_acos[]
Definition: AMDGPULibCalls.cpp:232
tbl_log2
static const TableEntry tbl_log2[]
Definition: AMDGPULibCalls.cpp:328
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:783
Loads.h
llvm::Function
Definition: Function.h:62
llvm::Attribute
Definition: Attributes.h:52
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::CallBase::setCalledFunction
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
Definition: InstrTypes.h:1419
TableRef::size
size_t size
Definition: AMDGPULibCalls.cpp:400
llvm::PointerType::get
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Definition: Type.cpp:729
double
into xmm2 addss xmm2 xmm1 xmm3 addss xmm3 movaps xmm0 unpcklps xmm0 ret seems silly when it could just be one addps Expand libm rounding functions main should enable SSE DAZ mode and other fast SSE modes Think about doing i64 math in SSE regs on x86 This testcase should have no SSE instructions in and only one load from a constant double
Definition: README-SSE.txt:85
llvm::AMDGPULibFuncBase::EI_NCOS
@ EI_NCOS
Definition: AMDGPULibFunc.h:222
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
tbl_atanh
static const TableEntry tbl_atanh[]
Definition: AMDGPULibCalls.cpp:269
llvm::AMDGPULibFuncBase::EI_ROOTN
@ EI_ROOTN
Definition: AMDGPULibFunc.h:162
llvm::AMDGPULibFuncBase::EI_READ_PIPE_2
@ EI_READ_PIPE_2
Definition: AMDGPULibFunc.h:238
llvm::User::dropAllReferences
void dropAllReferences()
Drop all references to operands.
Definition: User.h:299
MATH_PI
#define MATH_PI
Definition: AMDGPULibCalls.cpp:38
llvm::cl::CommaSeparated
@ CommaSeparated
Definition: CommandLine.h:169
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:734
tbl_log
static const TableEntry tbl_log[]
Definition: AMDGPULibCalls.cpp:324
llvm::IRBuilder<>
llvm::cl::ValueOptional
@ ValueOptional
Definition: CommandLine.h:136
llvm::AMDGPULibFuncBase::EI_ATANPI
@ EI_ATANPI
Definition: AMDGPULibFunc.h:54
llvm::FunctionType::get
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:363
tbl_log10
static const TableEntry tbl_log10[]
Definition: AMDGPULibCalls.cpp:332
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::AMDGPULibFuncBase::EI_POWR
@ EI_POWR
Definition: AMDGPULibFunc.h:152
llvm::ConstantDataVector::get
static Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
Definition: Constants.cpp:3082
llvm::ConstantDataSequential::getElementAsFloat
float getElementAsFloat(unsigned i) const
If this is an sequential container of floats, return the specified element as a float.
Definition: Constants.cpp:3263
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:158
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:321
llvm::AMDGPULibFunc::parse
static bool parse(StringRef MangledName, AMDGPULibFunc &Ptr)
Definition: AMDGPULibFunc.cpp:680
llvm::ConstantFP::isZero
bool isZero() const
Return true if the value is positive or negative zero.
Definition: Constants.h:301
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::AMDGPULibFuncBase::EI_ERFC
@ EI_ERFC
Definition: AMDGPULibFunc.h:84
llvm::AMDGPULibFuncBase::EI_TANPI
@ EI_TANPI
Definition: AMDGPULibFunc.h:195
llvm::CallBase::getAttributes
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1458
llvm::ConstantAggregateZero
All zero aggregate value.
Definition: Constants.h:336
llvm::ConstantFP::getValueAPF
const APFloat & getValueAPF() const
Definition: Constants.h:297
llvm::AMDGPULibFuncBase::EI_COSH
@ EI_COSH
Definition: AMDGPULibFunc.h:75
tbl_cbrt
static const TableEntry tbl_cbrt[]
Definition: AMDGPULibCalls.cpp:279
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:422
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:893
llvm::AMDGPULibFuncBase::EI_CBRT
@ EI_CBRT
Definition: AMDGPULibFunc.h:67
llvm::AMDGPULibFuncBase::EI_SIN
@ EI_SIN
Definition: AMDGPULibFunc.h:171
llvm::AMDGPULibFuncBase::EI_LOG
@ EI_LOG
Definition: AMDGPULibFunc.h:129
tmp1
urem i32 %X, 255 ret i32 %tmp1 } Currently it compiles to:... movl $2155905153, %ecx movl 8(%esp), %esi movl %esi, %eax mull %ecx ... This could be "reassociated" into:movl $2155905153, %eax movl 8(%esp), %ecx mull %ecx to avoid the copy. In fact, the existing two-address stuff would do this except that mul isn 't a commutative 2-addr instruction. I guess this has to be done at isel time based on the #uses to mul? Make sure the instruction which starts a loop does not cross a cacheline boundary. This requires knowning the exact length of each machine instruction. That is somewhat complicated, but doable. Example 256.bzip2:In the new trace, the hot loop has an instruction which crosses a cacheline boundary. In addition to potential cache misses, this can 't help decoding as I imagine there has to be some kind of complicated decoder reset and realignment to grab the bytes from the next cacheline. 532 532 0x3cfc movb(1809(%esp, %esi), %bl<<<--- spans 2 64 byte lines 942 942 0x3d03 movl %dh,(1809(%esp, %esi) 937 937 0x3d0a incl %esi 3 3 0x3d0b cmpb %bl, %dl 27 27 0x3d0d jnz 0x000062db< main+11707 > In c99 mode, the preprocessor doesn 't like assembly comments like #TRUNCATE. This could be a single 16-bit load. int f(char *p) { if((p[0]==1) &(p[1]==2)) return 1 tmp1
Definition: README.txt:375
llvm::AMDGPULibFuncBase::EI_POW
@ EI_POW
Definition: AMDGPULibFunc.h:150
that
we should consider alternate ways to model stack dependencies Lots of things could be done in WebAssemblyTargetTransformInfo cpp there are numerous optimization related hooks that can be overridden in WebAssemblyTargetLowering Instead of the OptimizeReturned which should consider preserving the returned attribute through to MachineInstrs and extending the MemIntrinsicResults pass to do this optimization on calls too That would also let the WebAssemblyPeephole pass clean up dead defs for such as it does for stores Consider implementing and or getMachineCombinerPatterns Find a clean way to fix the problem which leads to the Shrink Wrapping pass being run after the WebAssembly PEI pass When setting multiple variables to the same we currently get code like const It could be done with a smaller encoding like local tee $pop5 local $pop6 WebAssembly registers are implicitly initialized to zero Explicit zeroing is therefore often redundant and could be optimized away Small indices may use smaller encodings than large indices WebAssemblyRegColoring and or WebAssemblyRegRenumbering should sort registers according to their usage frequency to maximize the usage of smaller encodings Many cases of irreducible control flow could be transformed more optimally than via the transform in WebAssemblyFixIrreducibleControlFlow cpp It may also be worthwhile to do transforms before register particularly when duplicating to allow register coloring to be aware of the duplication WebAssemblyRegStackify could use AliasAnalysis to reorder loads and stores more aggressively WebAssemblyRegStackify is currently a greedy algorithm This means that
Definition: README.txt:130
tmp
alloca< 16 x float >, align 16 %tmp2=alloca< 16 x float >, align 16 store< 16 x float > %A,< 16 x float > *%tmp %s=bitcast< 16 x float > *%tmp to i8 *%s2=bitcast< 16 x float > *%tmp2 to i8 *call void @llvm.memcpy.i64(i8 *%s, i8 *%s2, i64 64, i32 16) %R=load< 16 x float > *%tmp2 ret< 16 x float > %R } declare void @llvm.memcpy.i64(i8 *nocapture, i8 *nocapture, i64, i32) nounwind which compiles to:_foo:subl $140, %esp movaps %xmm3, 112(%esp) movaps %xmm2, 96(%esp) movaps %xmm1, 80(%esp) movaps %xmm0, 64(%esp) movl 60(%esp), %eax movl %eax, 124(%esp) movl 56(%esp), %eax movl %eax, 120(%esp) movl 52(%esp), %eax< many many more 32-bit copies > movaps(%esp), %xmm0 movaps 16(%esp), %xmm1 movaps 32(%esp), %xmm2 movaps 48(%esp), %xmm3 addl $140, %esp ret On Nehalem, it may even be cheaper to just use movups when unaligned than to fall back to lower-granularity chunks. Implement processor-specific optimizations for parity with GCC on these processors. GCC does two optimizations:1. ix86_pad_returns inserts a noop before ret instructions if immediately preceded by a conditional branch or is the target of a jump. 2. ix86_avoid_jump_misspredicts inserts noops in cases where a 16-byte block of code contains more than 3 branches. The first one is done for all AMDs, Core2, and "Generic" The second one is done for:Atom, Pentium Pro, all AMDs, Pentium 4, Nocona, Core 2, and "Generic" Testcase:int x(int a) { return(a &0xf0)> >4 tmp
Definition: README.txt:1347
llvm::AMDGPULibFuncBase::EI_RECIP
@ EI_RECIP
Definition: AMDGPULibFunc.h:155
llvm::AMDGPULibFuncBase::EI_NSIN
@ EI_NSIN
Definition: AMDGPULibFunc.h:228
llvm::Attribute::getValueAsBool
bool getValueAsBool() const
Return the attribute's value as a boolean.
Definition: Attributes.cpp:287
llvm::initializeAMDGPUSimplifyLibCallsPass
void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &)
llvm::AMDGPULibCalls
Definition: AMDGPULibCalls.cpp:45
tbl_erf
static const TableEntry tbl_erf[]
Definition: AMDGPULibCalls.cpp:301
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::AMDGPULibFuncBase::EI_NSQRT
@ EI_NSQRT
Definition: AMDGPULibFunc.h:229
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:198
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
AliasAnalysis.h
result
It looks like we only need to define PPCfmarto for these because according to these instructions perform RTO on fma s result
Definition: README_P9.txt:256
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::AMDGPULibCalls::replaceCall
void replaceCall(Value *With)
Definition: AMDGPULibCalls.cpp:136
llvm::AMDGPULibFunc::getLeads
Param * getLeads()
Get leading parameters for mangled lib functions.
Definition: AMDGPULibFunc.cpp:1048
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:206
llvm::AMDGPULibFuncBase::getEPtrKindFromAddrSpace
static unsigned getEPtrKindFromAddrSpace(unsigned AS)
Definition: AMDGPULibFunc.h:312
llvm::AMDGPULibFunc
Wrapper class for AMDGPULIbFuncImpl.
Definition: AMDGPULibFunc.h:357
tbl_acospi
static const TableEntry tbl_acospi[]
Definition: AMDGPULibCalls.cpp:241
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::AMDGPULibFuncBase::EI_TANH
@ EI_TANH
Definition: AMDGPULibFunc.h:194
llvm::AMDGPULibFuncBase::EI_FMA
@ EI_FMA
Definition: AMDGPULibFunc.h:95
DEBUG_WITH_TYPE
#define DEBUG_WITH_TYPE(TYPE, X)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition: Debug.h:64
llvm::AMDGPULibFuncBase::EI_ATANH
@ EI_ATANH
Definition: AMDGPULibFunc.h:53
llvm::createAMDGPUUseNativeCallsPass
FunctionPass * createAMDGPUUseNativeCallsPass()
Definition: AMDGPULibCalls.cpp:1702
TargetMachine.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::AMDGPULibFuncBase::EType
EType
Definition: AMDGPULibFunc.h:252
llvm::AAResults
Definition: AliasAnalysis.h:508
GCNSubtarget.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
tbl_exp
static const TableEntry tbl_exp[]
Definition: AMDGPULibCalls.cpp:305
llvm::AMDGPULibFuncBase::EI_FABS
@ EI_FABS
Definition: AMDGPULibFunc.h:89
llvm::User
Definition: User.h:44
llvm::AMDGPULibCalls::useNative
bool useNative(CallInst *CI)
Definition: AMDGPULibCalls.cpp:527
llvm::AMDGPULibFunc::getOrInsertFunction
static FunctionCallee getOrInsertFunction(llvm::Module *M, const AMDGPULibFunc &fInfo)
Definition: AMDGPULibFunc.cpp:959
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1383
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
llvm::initializeAMDGPUUseNativeCallsPass
void initializeAMDGPUUseNativeCallsPass(PassRegistry &)
llvm::CallInst::Create
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1518
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
tbl_asinpi
static const TableEntry tbl_asinpi[]
Definition: AMDGPULibCalls.cpp:257
llvm::AMDGPULibFuncBase::EI_ACOSPI
@ EI_ACOSPI
Definition: AMDGPULibFunc.h:41
false
Definition: StackSlotColoring.cpp:142
llvm::AMDGPULibFuncBase::EI_NEXP2
@ EI_NEXP2
Definition: AMDGPULibFunc.h:223
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::APFloat::convertToDouble
double convertToDouble() const
Converts this APFloat to host double value.
Definition: APFloat.cpp:4895
llvm::Instruction
Definition: Instruction.h:45
llvm::ConstantDataVector::getSplatValue
Constant * getSplatValue() const
If this is a splat constant, meaning that all of the elements have the same value,...
Definition: Constants.cpp:3320
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:191
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:257
llvm::ConstantFP::isExactlyValue
bool isExactlyValue(const APFloat &V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: Constants.cpp:1096
llvm::raw_ostream::flush
void flush()
Definition: raw_ostream.h:186
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:925
llvm::FunctionCallee::getFunctionType
FunctionType * getFunctionType()
Definition: DerivedTypes.h:182
getVecSize
static int getVecSize(const AMDGPULibFunc &FInfo)
Definition: AMDGPULibCalls.cpp:453
llvm::AMDGPULibFuncBase::EI_ASIN
@ EI_ASIN
Definition: AMDGPULibFunc.h:45
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::AMDGPULibFuncBase::EI_TAN
@ EI_TAN
Definition: AMDGPULibFunc.h:193
tbl_expm1
static const TableEntry tbl_expm1[]
Definition: AMDGPULibCalls.cpp:320
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:686
input
The initial backend is deliberately restricted to z10 We should add support for later architectures at some point If an asm ties an i32 r result to an i64 input
Definition: README.txt:10
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:153
tbl_atan
static const TableEntry tbl_atan[]
Definition: AMDGPULibCalls.cpp:263
llvm::Instruction::isLifetimeStartOrEnd
bool isLifetimeStartOrEnd() const
Return true if the instruction is a llvm.lifetime.start or llvm.lifetime.end marker.
Definition: Instruction.cpp:706
Name
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
Definition: AMDGPULibCalls.cpp:207
llvm::AMDGPUSimplifyLibCallsPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: AMDGPULibCalls.cpp:1738
HasNative
static bool HasNative(AMDGPULibFunc::EFuncId id)
Definition: AMDGPULibCalls.cpp:376
tbl_tanh
static const TableEntry tbl_tanh[]
Definition: AMDGPULibCalls.cpp:361
llvm::AMDGPULibFuncBase::NOPFX
@ NOPFX
Definition: AMDGPULibFunc.h:247
llvm::AMDGPULibFuncBase::EI_EXP10
@ EI_EXP10
Definition: AMDGPULibFunc.h:86
llvm::StringRef::equals
LLVM_NODISCARD bool equals(StringRef RHS) const
equals - Check for string equality, this is more efficient than compare() when the relative ordering ...
Definition: StringRef.h:187
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::AMDGPULibFuncBase::EI_READ_PIPE_4
@ EI_READ_PIPE_4
Definition: AMDGPULibFunc.h:239
llvm::AMDGPULibFuncBase::EI_ACOS
@ EI_ACOS
Definition: AMDGPULibFunc.h:39
AMDGPULibFunc.h
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:190
TableRef::TableRef
TableRef()
Definition: AMDGPULibCalls.cpp:403
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
llvm::cl::opt< bool >
val
The initial backend is deliberately restricted to z10 We should add support for later architectures at some point If an asm ties an i32 r result to an i64 the input will be treated as an leaving the upper bits uninitialised For i64 store i32 val
Definition: README.txt:15
llvm::AMDGPULibFuncBase::HALF
@ HALF
Definition: AMDGPULibFunc.h:249
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:304
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::AMDGPULibCalls::isUnsafeMath
bool isUnsafeMath(const CallInst *CI) const
Definition: AMDGPULibCalls.cpp:474
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
llvm::StringRef::equals_insensitive
LLVM_NODISCARD bool equals_insensitive(StringRef RHS) const
Check for string equality, ignoring case.
Definition: StringRef.h:194
llvm::Function::getReturnType
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:182
llvm::AMDGPULibFuncBase::F32
@ F32
Definition: AMDGPULibFunc.h:271
tbl_cospi
static const TableEntry tbl_cospi[]
Definition: AMDGPULibCalls.cpp:293
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:578
const
aarch64 promote const
Definition: AArch64PromoteConstant.cpp:232
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::AMDGPULibFuncBase::EI_EXP
@ EI_EXP
Definition: AMDGPULibFunc.h:85
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::AMDGPULibFuncBase::EI_TGAMMA
@ EI_TGAMMA
Definition: AMDGPULibFunc.h:196
llvm::AMDGPULibFuncBase::EI_WRITE_PIPE_4
@ EI_WRITE_PIPE_4
Definition: AMDGPULibFunc.h:241
llvm::AMDGPULibFuncBase::EI_COSPI
@ EI_COSPI
Definition: AMDGPULibFunc.h:76
llvm::AMDGPULibFunc::mangle
std::string mangle() const
Definition: AMDGPULibFunc.h:387
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::ConstantDataVector
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
Definition: Constants.h:752
llvm::FunctionType::getParamType
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
getArgType
static AMDGPULibFunc::EType getArgType(const AMDGPULibFunc &FInfo)
Definition: AMDGPULibCalls.cpp:457
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::AMDGPUUseNativeCallsPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: AMDGPULibCalls.cpp:1795
llvm::AMDGPULibCalls::initNativeFuncs
void initNativeFuncs()
Definition: AMDGPULibCalls.cpp:487
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1616
tbl_asinh
static const TableEntry tbl_asinh[]
Definition: AMDGPULibCalls.cpp:253
false
amdgpu Simplify well known AMD library false
Definition: AMDGPULibCalls.cpp:199
llvm::AMDGPULibFunc::getNumArgs
unsigned getNumArgs() const
Definition: AMDGPULibFunc.h:372
IRBuilder.h
llvm::ConstantFP::isNegative
bool isNegative() const
Return true if the sign bit is set.
Definition: Constants.h:304
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::FPMathOperator
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition: Operator.h:250
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:79
llvm::ConstantDataSequential::getElementAsConstant
Constant * getElementAsConstant(unsigned i) const
Return a Constant for a specified index's element.
Definition: Constants.cpp:3275
llvm::AMDGPULibFuncBase::EI_ASINPI
@ EI_ASINPI
Definition: AMDGPULibFunc.h:47
llvm::AMDGPULibFuncBase::EI_SINCOS
@ EI_SINCOS
Definition: AMDGPULibFunc.h:172
llvm::AMDGPULibFuncBase::EI_WRITE_PIPE_2
@ EI_WRITE_PIPE_2
Definition: AMDGPULibFunc.h:240
llvm::AMDGPULibFuncBase::EI_SINH
@ EI_SINH
Definition: AMDGPULibFunc.h:173
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:358
tbl_cos
static const TableEntry tbl_cos[]
Definition: AMDGPULibCalls.cpp:285
llvm::StringRef::contains_insensitive
LLVM_NODISCARD bool contains_insensitive(StringRef Other) const
Return true if the given string is a substring of *this, and false otherwise.
Definition: StringRef.h:472
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1532
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::AMDGPULibFuncBase::Param::VectorSize
unsigned char VectorSize
Definition: AMDGPULibFunc.h:293
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
AMDGPU.h
Simplify
assume Assume Simplify
Definition: AssumeBundleBuilder.cpp:603
llvm::AMDGPULibFuncBase::EI_DIVIDE
@ EI_DIVIDE
Definition: AMDGPULibFunc.h:81
tbl_erfc
static const TableEntry tbl_erfc[]
Definition: AMDGPULibCalls.cpp:297
llvm::Value::getNumUses
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:255
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::AMDGPULibFuncBase::EI_COS
@ EI_COS
Definition: AMDGPULibFunc.h:74
llvm::AMDGPULibFuncBase::EI_LOG2
@ EI_LOG2
Definition: AMDGPULibFunc.h:132
tbl_atanpi
static const TableEntry tbl_atanpi[]
Definition: AMDGPULibCalls.cpp:273
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
library
Itanium Name Demangler i e convert the string _Z1fv into but neither can depend on each other libcxxabi needs the demangler to implement which is part of the itanium ABI spec LLVM needs a copy for a bunch of but doesn t want to use the system s __cxa_demangle because it a might not be and b probably isn t that up to date on the latest language features The copy of the demangler in LLVM has some extra stuff that aren t needed in which depend on the shared generic components Despite these we want to keep the core generic demangling library identical between both copies to simplify development and testing If you re working on the generic library
Definition: README.txt:30
llvm::AMDGPULibFuncBase::EFuncId
EFuncId
Definition: AMDGPULibFunc.h:23
llvm::AMDGPULibFunc::setId
void setId(EFuncId Id)
Definition: AMDGPULibFunc.h:380
llvm::AMDGPULibFuncBase::EI_EXP2
@ EI_EXP2
Definition: AMDGPULibFunc.h:87
tbl_tanpi
static const TableEntry tbl_tanpi[]
Definition: AMDGPULibCalls.cpp:365
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
llvm::TargetMachine::getTargetFeatureString
StringRef getTargetFeatureString() const
Definition: TargetMachine.h:130
llvm::AMDGPULibFunc::setPrefix
void setPrefix(ENamePrefix PFX)
Definition: AMDGPULibFunc.h:390
llvm::ConstantInt::getSExtValue
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:148
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::AMDGPULibFuncBase::EI_NRSQRT
@ EI_NRSQRT
Definition: AMDGPULibFunc.h:227
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:175
UseNative
static cl::list< std::string > UseNative("amdgpu-use-native", cl::desc("Comma separated list of functions to replace with native, or all"), cl::CommaSeparated, cl::ValueOptional, cl::Hidden)
well
llvm ldr ldrb ldrh str strh strb strb gcc and possibly speed as well(we don 't have a good way to measure on ARM). *Consider this silly example
Definition: README.txt:138
llvm::FunctionCallee::getCallee
Value * getCallee()
Definition: DerivedTypes.h:184
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:206
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:36
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
EnablePreLink
static cl::opt< bool > EnablePreLink("amdgpu-prelink", cl::desc("Enable pre-link mode optimizations"), cl::init(false), cl::Hidden)
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:242
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib", "Simplify well-known AMD library calls", false, false) INITIALIZE_PASS_END(AMDGPUSimplifyLibCalls
llvm::Type::getIntNTy
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:245
llvm::AMDGPULibFuncBase::EI_ERF
@ EI_ERF
Definition: AMDGPULibFunc.h:83
llvm::ConstantDataSequential::getElementAsDouble
double getElementAsDouble(unsigned i) const
If this is an sequential container of doubles, return the specified element as a double.
Definition: Constants.cpp:3269
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:324
llvm::AMDGPULibFuncBase::EI_MAD
@ EI_MAD
Definition: AMDGPULibFunc.h:134
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::CallBase::arg_size
unsigned arg_size() const
Definition: InstrTypes.h:1326
llvm::AMDGPULibFuncBase::Param::ArgType
unsigned char ArgType
Definition: AMDGPULibFunc.h:292
llvm::AMDGPULibFuncBase::EI_POWN
@ EI_POWN
Definition: AMDGPULibFunc.h:151
llvm::ConstantFP::get
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:972
TableRef::TableRef
TableRef(const TableEntry(&tbl)[N])
Definition: AMDGPULibCalls.cpp:406
llvm::AMDGPULibFunc::getPrefix
ENamePrefix getPrefix() const
Definition: AMDGPULibFunc.h:374
llvm::FindAvailableLoadedValue
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, AAResults *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:431
llvm::AMDGPULibFuncBase::EI_LOG10
@ EI_LOG10
Definition: AMDGPULibFunc.h:130
llvm::AMDGPULibFunc::getId
EFuncId getId() const
Definition: AMDGPULibFunc.h:373
llvm::Type::isDoubleTy
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:150
y
into llvm powi allowing the code generator to produce balanced multiplication trees the intrinsic needs to be extended to support and second the code generator needs to be enhanced to lower these to multiplication trees Interesting testcase for add shift mul int y
Definition: README.txt:61
llvm::log2
static double log2(double V)
Definition: AMDGPULibCalls.cpp:842
tbl_rsqrt
static const TableEntry tbl_rsqrt[]
Definition: AMDGPULibCalls.cpp:336
simplifylib
amdgpu simplifylib
Definition: AMDGPULibCalls.cpp:198
llvm::AMDGPULibFuncBase::EI_EXPM1
@ EI_EXPM1
Definition: AMDGPULibFunc.h:88
MATH_SQRT2
#define MATH_SQRT2
Definition: AMDGPULibCalls.cpp:40
llvm::numbers::ln2
constexpr double ln2
Definition: MathExtras.h:59
llvm::AMDGPULibCalls::CI
CallInst * CI
Definition: AMDGPULibCalls.cpp:132
llvm::TargetMachine::getTargetCPU
StringRef getTargetCPU() const
Definition: TargetMachine.h:129
tbl_exp10
static const TableEntry tbl_exp10[]
Definition: AMDGPULibCalls.cpp:315
llvm::TargetMachine::getSubtarget
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Definition: TargetMachine.h:166
llvm::AMDGPULibCalls::fold
bool fold(CallInst *CI, AliasAnalysis *AA=nullptr)
Definition: AMDGPULibCalls.cpp:621
tbl_sin
static const TableEntry tbl_sin[]
Definition: AMDGPULibCalls.cpp:340
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1328
llvm::ConstantDataVector::getSplat
static Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:3143
N
#define N
tbl_exp2
static const TableEntry tbl_exp2[]
Definition: AMDGPULibCalls.cpp:310
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1336
llvm::AMDGPULibFunc::getName
std::string getName() const
Get unmangled name for mangled library function and name for unmangled library function.
Definition: AMDGPULibFunc.h:371
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::to_string
std::string to_string(const T &Value)
Definition: ScopedPrinter.h:63
llvm::AMDGPULibFuncBase::EI_SQRT
@ EI_SQRT
Definition: AMDGPULibFunc.h:176
MATH_E
#define MATH_E
Definition: AMDGPULibCalls.cpp:39
tbl_sinh
static const TableEntry tbl_sinh[]
Definition: AMDGPULibCalls.cpp:344
llvm::FunctionCallee
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:165
calls
amdgpu Simplify well known AMD library calls
Definition: AMDGPULibCalls.cpp:199
tbl_cosh
static const TableEntry tbl_cosh[]
Definition: AMDGPULibCalls.cpp:289
llvm::AMDGPULibFuncBase::EI_NLOG2
@ EI_NLOG2
Definition: AMDGPULibFunc.h:225
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:401
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1475
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
tbl_sqrt
static const TableEntry tbl_sqrt[]
Definition: AMDGPULibCalls.cpp:352
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:62
llvm::cl::desc
Definition: CommandLine.h:412
llvm::AMDGPULibFuncBase::EI_RSQRT
@ EI_RSQRT
Definition: AMDGPULibFunc.h:165
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1282
TableRef
Definition: AMDGPULibCalls.cpp:399
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::AMDGPULibFuncBase::EI_ATAN
@ EI_ATAN
Definition: AMDGPULibFunc.h:50
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::AMDGPULibFuncBase::EI_SINPI
@ EI_SINPI
Definition: AMDGPULibFunc.h:174
llvm::AMDGPULibFuncBase::NATIVE
@ NATIVE
Definition: AMDGPULibFunc.h:248
MATH_SQRT1_2
#define MATH_SQRT1_2
Definition: AMDGPULibCalls.cpp:41
tbl_acosh
static const TableEntry tbl_acosh[]
Definition: AMDGPULibCalls.cpp:238
tbl_tgamma
static const TableEntry tbl_tgamma[]
Definition: AMDGPULibCalls.cpp:369
llvm::createAMDGPUSimplifyLibCallsPass
FunctionPass * createAMDGPUSimplifyLibCallsPass(const TargetMachine *)
Definition: AMDGPULibCalls.cpp:1698
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
INITIALIZE_PASS
TargetPassConfig.
Definition: TargetPassConfig.cpp:359
llvm::DataLayout::getTypeAllocSize
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:503
llvm::ConstantDataSequential::getNumElements
unsigned getNumElements() const
Return the number of elements in the array or vector.
Definition: Constants.cpp:2933
llvm::cl::list
Definition: CommandLine.h:1640