LLVM  14.0.0git
AMDGPULibCalls.cpp
Go to the documentation of this file.
1 //===- AMDGPULibCalls.cpp -------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file does AMD library function optimizations.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "AMDGPULibFunc.h"
16 #include "GCNSubtarget.h"
18 #include "llvm/Analysis/Loads.h"
19 #include "llvm/IR/IntrinsicsAMDGPU.h"
20 #include "llvm/IR/IRBuilder.h"
21 #include "llvm/InitializePasses.h"
23 
24 #define DEBUG_TYPE "amdgpu-simplifylib"
25 
26 using namespace llvm;
27 
28 static cl::opt<bool> EnablePreLink("amdgpu-prelink",
29  cl::desc("Enable pre-link mode optimizations"),
30  cl::init(false),
31  cl::Hidden);
32 
33 static cl::list<std::string> UseNative("amdgpu-use-native",
34  cl::desc("Comma separated list of functions to replace with native, or all"),
36  cl::Hidden);
37 
38 #define MATH_PI numbers::pi
39 #define MATH_E numbers::e
40 #define MATH_SQRT2 numbers::sqrt2
41 #define MATH_SQRT1_2 numbers::inv_sqrt2
42 
43 namespace llvm {
44 
46 private:
47 
49 
50  const TargetMachine *TM;
51 
52  // -fuse-native.
53  bool AllNative = false;
54 
55  bool useNativeFunc(const StringRef F) const;
56 
57  // Return a pointer (pointer expr) to the function if function defintion with
58  // "FuncName" exists. It may create a new function prototype in pre-link mode.
59  FunctionCallee getFunction(Module *M, const FuncInfo &fInfo);
60 
61  // Replace a normal function with its native version.
62  bool replaceWithNative(CallInst *CI, const FuncInfo &FInfo);
63 
64  bool parseFunctionName(const StringRef& FMangledName,
65  FuncInfo *FInfo=nullptr /*out*/);
66 
67  bool TDOFold(CallInst *CI, const FuncInfo &FInfo);
68 
69  /* Specialized optimizations */
70 
71  // recip (half or native)
72  bool fold_recip(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
73 
74  // divide (half or native)
75  bool fold_divide(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
76 
77  // pow/powr/pown
78  bool fold_pow(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
79 
80  // rootn
81  bool fold_rootn(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
82 
83  // fma/mad
84  bool fold_fma_mad(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
85 
86  // -fuse-native for sincos
87  bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo);
88 
89  // evaluate calls if calls' arguments are constants.
90  bool evaluateScalarMathFunc(FuncInfo &FInfo, double& Res0,
91  double& Res1, Constant *copr0, Constant *copr1, Constant *copr2);
92  bool evaluateCall(CallInst *aCI, FuncInfo &FInfo);
93 
94  // exp
95  bool fold_exp(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
96 
97  // exp2
98  bool fold_exp2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
99 
100  // exp10
101  bool fold_exp10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
102 
103  // log
104  bool fold_log(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
105 
106  // log2
107  bool fold_log2(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
108 
109  // log10
110  bool fold_log10(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
111 
112  // sqrt
113  bool fold_sqrt(CallInst *CI, IRBuilder<> &B, const FuncInfo &FInfo);
114 
115  // sin/cos
116  bool fold_sincos(CallInst * CI, IRBuilder<> &B, AliasAnalysis * AA);
117 
118  // __read_pipe/__write_pipe
119  bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, FuncInfo &FInfo);
120 
121  // llvm.amdgcn.wavefrontsize
122  bool fold_wavefrontsize(CallInst *CI, IRBuilder<> &B);
123 
124  // Get insertion point at entry.
125  BasicBlock::iterator getEntryIns(CallInst * UI);
126  // Insert an Alloc instruction.
127  AllocaInst* insertAlloca(CallInst * UI, IRBuilder<> &B, const char *prefix);
128  // Get a scalar native builtin signle argument FP function
129  FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo);
130 
131 protected:
133 
134  bool isUnsafeMath(const CallInst *CI) const;
135 
136  void replaceCall(Value *With) {
137  CI->replaceAllUsesWith(With);
138  CI->eraseFromParent();
139  }
140 
141 public:
142  AMDGPULibCalls(const TargetMachine *TM_ = nullptr) : TM(TM_) {}
143 
144  bool fold(CallInst *CI, AliasAnalysis *AA = nullptr);
145 
146  void initNativeFuncs();
147 
148  // Replace a normal math function call with that native version
149  bool useNative(CallInst *CI);
150 };
151 
152 } // end llvm namespace
153 
154 namespace {
155 
156  class AMDGPUSimplifyLibCalls : public FunctionPass {
157 
158  AMDGPULibCalls Simplifier;
159 
160  public:
161  static char ID; // Pass identification
162 
163  AMDGPUSimplifyLibCalls(const TargetMachine *TM = nullptr)
164  : FunctionPass(ID), Simplifier(TM) {
166  }
167 
168  void getAnalysisUsage(AnalysisUsage &AU) const override {
170  }
171 
172  bool runOnFunction(Function &M) override;
173  };
174 
175  class AMDGPUUseNativeCalls : public FunctionPass {
176 
177  AMDGPULibCalls Simplifier;
178 
179  public:
180  static char ID; // Pass identification
181 
182  AMDGPUUseNativeCalls() : FunctionPass(ID) {
184  Simplifier.initNativeFuncs();
185  }
186 
187  bool runOnFunction(Function &F) override;
188  };
189 
190 } // end anonymous namespace.
191 
193 char AMDGPUUseNativeCalls::ID = 0;
194 
195 INITIALIZE_PASS_BEGIN(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",
196  "Simplify well-known AMD library calls", false, false)
198 INITIALIZE_PASS_END(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib",
200 
201 INITIALIZE_PASS(AMDGPUUseNativeCalls, "amdgpu-usenative",
202  "Replace builtin math calls with that native versions.",
203  false, false)
204 
205 template <typename IRB>
206 static CallInst *CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg,
207  const Twine &Name = "") {
208  CallInst *R = B.CreateCall(Callee, Arg, Name);
209  if (Function *F = dyn_cast<Function>(Callee.getCallee()))
210  R->setCallingConv(F->getCallingConv());
211  return R;
212 }
213 
214 template <typename IRB>
215 static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1,
216  Value *Arg2, const Twine &Name = "") {
217  CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name);
218  if (Function *F = dyn_cast<Function>(Callee.getCallee()))
219  R->setCallingConv(F->getCallingConv());
220  return R;
221 }
222 
223 // Data structures for table-driven optimizations.
224 // FuncTbl works for both f32 and f64 functions with 1 input argument
225 
226 struct TableEntry {
227  double result;
228  double input;
229 };
230 
231 /* a list of {result, input} */
232 static const TableEntry tbl_acos[] = {
233  {MATH_PI / 2.0, 0.0},
234  {MATH_PI / 2.0, -0.0},
235  {0.0, 1.0},
236  {MATH_PI, -1.0}
237 };
238 static const TableEntry tbl_acosh[] = {
239  {0.0, 1.0}
240 };
241 static const TableEntry tbl_acospi[] = {
242  {0.5, 0.0},
243  {0.5, -0.0},
244  {0.0, 1.0},
245  {1.0, -1.0}
246 };
247 static const TableEntry tbl_asin[] = {
248  {0.0, 0.0},
249  {-0.0, -0.0},
250  {MATH_PI / 2.0, 1.0},
251  {-MATH_PI / 2.0, -1.0}
252 };
253 static const TableEntry tbl_asinh[] = {
254  {0.0, 0.0},
255  {-0.0, -0.0}
256 };
257 static const TableEntry tbl_asinpi[] = {
258  {0.0, 0.0},
259  {-0.0, -0.0},
260  {0.5, 1.0},
261  {-0.5, -1.0}
262 };
263 static const TableEntry tbl_atan[] = {
264  {0.0, 0.0},
265  {-0.0, -0.0},
266  {MATH_PI / 4.0, 1.0},
267  {-MATH_PI / 4.0, -1.0}
268 };
269 static const TableEntry tbl_atanh[] = {
270  {0.0, 0.0},
271  {-0.0, -0.0}
272 };
273 static const TableEntry tbl_atanpi[] = {
274  {0.0, 0.0},
275  {-0.0, -0.0},
276  {0.25, 1.0},
277  {-0.25, -1.0}
278 };
279 static const TableEntry tbl_cbrt[] = {
280  {0.0, 0.0},
281  {-0.0, -0.0},
282  {1.0, 1.0},
283  {-1.0, -1.0},
284 };
285 static const TableEntry tbl_cos[] = {
286  {1.0, 0.0},
287  {1.0, -0.0}
288 };
289 static const TableEntry tbl_cosh[] = {
290  {1.0, 0.0},
291  {1.0, -0.0}
292 };
293 static const TableEntry tbl_cospi[] = {
294  {1.0, 0.0},
295  {1.0, -0.0}
296 };
297 static const TableEntry tbl_erfc[] = {
298  {1.0, 0.0},
299  {1.0, -0.0}
300 };
301 static const TableEntry tbl_erf[] = {
302  {0.0, 0.0},
303  {-0.0, -0.0}
304 };
305 static const TableEntry tbl_exp[] = {
306  {1.0, 0.0},
307  {1.0, -0.0},
308  {MATH_E, 1.0}
309 };
310 static const TableEntry tbl_exp2[] = {
311  {1.0, 0.0},
312  {1.0, -0.0},
313  {2.0, 1.0}
314 };
315 static const TableEntry tbl_exp10[] = {
316  {1.0, 0.0},
317  {1.0, -0.0},
318  {10.0, 1.0}
319 };
320 static const TableEntry tbl_expm1[] = {
321  {0.0, 0.0},
322  {-0.0, -0.0}
323 };
324 static const TableEntry tbl_log[] = {
325  {0.0, 1.0},
326  {1.0, MATH_E}
327 };
328 static const TableEntry tbl_log2[] = {
329  {0.0, 1.0},
330  {1.0, 2.0}
331 };
332 static const TableEntry tbl_log10[] = {
333  {0.0, 1.0},
334  {1.0, 10.0}
335 };
336 static const TableEntry tbl_rsqrt[] = {
337  {1.0, 1.0},
338  {MATH_SQRT1_2, 2.0}
339 };
340 static const TableEntry tbl_sin[] = {
341  {0.0, 0.0},
342  {-0.0, -0.0}
343 };
344 static const TableEntry tbl_sinh[] = {
345  {0.0, 0.0},
346  {-0.0, -0.0}
347 };
348 static const TableEntry tbl_sinpi[] = {
349  {0.0, 0.0},
350  {-0.0, -0.0}
351 };
352 static const TableEntry tbl_sqrt[] = {
353  {0.0, 0.0},
354  {1.0, 1.0},
355  {MATH_SQRT2, 2.0}
356 };
357 static const TableEntry tbl_tan[] = {
358  {0.0, 0.0},
359  {-0.0, -0.0}
360 };
361 static const TableEntry tbl_tanh[] = {
362  {0.0, 0.0},
363  {-0.0, -0.0}
364 };
365 static const TableEntry tbl_tanpi[] = {
366  {0.0, 0.0},
367  {-0.0, -0.0}
368 };
369 static const TableEntry tbl_tgamma[] = {
370  {1.0, 1.0},
371  {1.0, 2.0},
372  {2.0, 3.0},
373  {6.0, 4.0}
374 };
375 
377  switch(id) {
393  return true;
394  default:;
395  }
396  return false;
397 }
398 
399 struct TableRef {
400  size_t size;
401  const TableEntry *table; // variable size: from 0 to (size - 1)
402 
403  TableRef() : size(0), table(nullptr) {}
404 
405  template <size_t N>
406  TableRef(const TableEntry (&tbl)[N]) : size(N), table(&tbl[0]) {}
407 };
408 
410  switch(id) {
422  case AMDGPULibFunc::EI_COS: return TableRef(tbl_cos);
426  case AMDGPULibFunc::EI_ERF: return TableRef(tbl_erf);
427  case AMDGPULibFunc::EI_EXP: return TableRef(tbl_exp);
432  case AMDGPULibFunc::EI_LOG: return TableRef(tbl_log);
439  case AMDGPULibFunc::EI_SIN: return TableRef(tbl_sin);
444  case AMDGPULibFunc::EI_TAN: return TableRef(tbl_tan);
448  default:;
449  }
450  return TableRef();
451 }
452 
453 static inline int getVecSize(const AMDGPULibFunc& FInfo) {
454  return FInfo.getLeads()[0].VectorSize;
455 }
456 
457 static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) {
458  return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType;
459 }
460 
461 FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) {
462  // If we are doing PreLinkOpt, the function is external. So it is safe to
463  // use getOrInsertFunction() at this stage.
464 
466  : AMDGPULibFunc::getFunction(M, fInfo);
467 }
468 
469 bool AMDGPULibCalls::parseFunctionName(const StringRef& FMangledName,
470  FuncInfo *FInfo) {
471  return AMDGPULibFunc::parse(FMangledName, *FInfo);
472 }
473 
474 bool AMDGPULibCalls::isUnsafeMath(const CallInst *CI) const {
475  if (auto Op = dyn_cast<FPMathOperator>(CI))
476  if (Op->isFast())
477  return true;
478  const Function *F = CI->getParent()->getParent();
479  Attribute Attr = F->getFnAttribute("unsafe-fp-math");
480  return Attr.getValueAsBool();
481 }
482 
483 bool AMDGPULibCalls::useNativeFunc(const StringRef F) const {
484  return AllNative || llvm::is_contained(UseNative, F);
485 }
486 
488  AllNative = useNativeFunc("all") ||
489  (UseNative.getNumOccurrences() && UseNative.size() == 1 &&
490  UseNative.begin()->empty());
491 }
492 
493 bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
494  bool native_sin = useNativeFunc("sin");
495  bool native_cos = useNativeFunc("cos");
496 
497  if (native_sin && native_cos) {
498  Module *M = aCI->getModule();
499  Value *opr0 = aCI->getArgOperand(0);
500 
501  AMDGPULibFunc nf;
502  nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType;
503  nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize;
504 
507  FunctionCallee sinExpr = getFunction(M, nf);
508 
511  FunctionCallee cosExpr = getFunction(M, nf);
512  if (sinExpr && cosExpr) {
513  Value *sinval = CallInst::Create(sinExpr, opr0, "splitsin", aCI);
514  Value *cosval = CallInst::Create(cosExpr, opr0, "splitcos", aCI);
515  new StoreInst(cosval, aCI->getArgOperand(1), aCI);
516 
517  DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
518  << " with native version of sin/cos");
519 
520  replaceCall(sinval);
521  return true;
522  }
523  }
524  return false;
525 }
526 
528  CI = aCI;
530 
531  FuncInfo FInfo;
532  if (!parseFunctionName(Callee->getName(), &FInfo) || !FInfo.isMangled() ||
533  FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
534  getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) ||
535  !(AllNative || useNativeFunc(FInfo.getName()))) {
536  return false;
537  }
538 
539  if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS)
540  return sincosUseNative(aCI, FInfo);
541 
543  FunctionCallee F = getFunction(aCI->getModule(), FInfo);
544  if (!F)
545  return false;
546 
547  aCI->setCalledFunction(F);
548  DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
549  << " with native version");
550  return true;
551 }
552 
553 // Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe
554 // builtin, with appended type size and alignment arguments, where 2 or 4
555 // indicates the original number of arguments. The library has optimized version
556 // of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same
557 // power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N
558 // for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ...,
559 // 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4.
560 bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
561  FuncInfo &FInfo) {
562  auto *Callee = CI->getCalledFunction();
563  if (!Callee->isDeclaration())
564  return false;
565 
566  assert(Callee->hasName() && "Invalid read_pipe/write_pipe function");
567  auto *M = Callee->getParent();
568  auto &Ctx = M->getContext();
569  std::string Name = std::string(Callee->getName());
570  auto NumArg = CI->getNumArgOperands();
571  if (NumArg != 4 && NumArg != 6)
572  return false;
573  auto *PacketSize = CI->getArgOperand(NumArg - 2);
574  auto *PacketAlign = CI->getArgOperand(NumArg - 1);
575  if (!isa<ConstantInt>(PacketSize) || !isa<ConstantInt>(PacketAlign))
576  return false;
577  unsigned Size = cast<ConstantInt>(PacketSize)->getZExtValue();
578  Align Alignment = cast<ConstantInt>(PacketAlign)->getAlignValue();
579  if (Alignment != Size)
580  return false;
581 
582  Type *PtrElemTy;
583  if (Size <= 8)
584  PtrElemTy = Type::getIntNTy(Ctx, Size * 8);
585  else
586  PtrElemTy = FixedVectorType::get(Type::getInt64Ty(Ctx), Size / 8);
587  unsigned PtrArgLoc = CI->getNumArgOperands() - 3;
588  auto PtrArg = CI->getArgOperand(PtrArgLoc);
589  unsigned PtrArgAS = PtrArg->getType()->getPointerAddressSpace();
590  auto *PtrTy = llvm::PointerType::get(PtrElemTy, PtrArgAS);
591 
593  for (unsigned I = 0; I != PtrArgLoc; ++I)
594  ArgTys.push_back(CI->getArgOperand(I)->getType());
595  ArgTys.push_back(PtrTy);
596 
597  Name = Name + "_" + std::to_string(Size);
598  auto *FTy = FunctionType::get(Callee->getReturnType(),
599  ArrayRef<Type *>(ArgTys), false);
600  AMDGPULibFunc NewLibFunc(Name, FTy);
602  if (!F)
603  return false;
604 
605  auto *BCast = B.CreatePointerCast(PtrArg, PtrTy);
607  for (unsigned I = 0; I != PtrArgLoc; ++I)
608  Args.push_back(CI->getArgOperand(I));
609  Args.push_back(BCast);
610 
611  auto *NCI = B.CreateCall(F, Args);
612  NCI->setAttributes(CI->getAttributes());
613  CI->replaceAllUsesWith(NCI);
615  CI->eraseFromParent();
616 
617  return true;
618 }
619 
620 // This function returns false if no change; return true otherwise.
622  this->CI = CI;
624 
625  // Ignore indirect calls.
626  if (Callee == 0) return false;
627 
628  BasicBlock *BB = CI->getParent();
631 
632  // Set the builder to the instruction after the call.
633  B.SetInsertPoint(BB, CI->getIterator());
634 
635  // Copy fast flags from the original call.
636  if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(CI))
637  B.setFastMathFlags(FPOp->getFastMathFlags());
638 
639  switch (Callee->getIntrinsicID()) {
640  default:
641  break;
642  case Intrinsic::amdgcn_wavefrontsize:
643  return !EnablePreLink && fold_wavefrontsize(CI, B);
644  }
645 
646  FuncInfo FInfo;
647  if (!parseFunctionName(Callee->getName(), &FInfo))
648  return false;
649 
650  // Further check the number of arguments to see if they match.
651  if (CI->getNumArgOperands() != FInfo.getNumArgs())
652  return false;
653 
654  if (TDOFold(CI, FInfo))
655  return true;
656 
657  // Under unsafe-math, evaluate calls if possible.
658  // According to Brian Sumner, we can do this for all f32 function calls
659  // using host's double function calls.
660  if (isUnsafeMath(CI) && evaluateCall(CI, FInfo))
661  return true;
662 
663  // Specilized optimizations for each function call
664  switch (FInfo.getId()) {
666  // skip vector function
667  assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||
668  FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
669  "recip must be an either native or half function");
670  return (getVecSize(FInfo) != 1) ? false : fold_recip(CI, B, FInfo);
671 
673  // skip vector function
674  assert ((FInfo.getPrefix() == AMDGPULibFunc::NATIVE ||
675  FInfo.getPrefix() == AMDGPULibFunc::HALF) &&
676  "divide must be an either native or half function");
677  return (getVecSize(FInfo) != 1) ? false : fold_divide(CI, B, FInfo);
678 
682  return fold_pow(CI, B, FInfo);
683 
685  // skip vector function
686  return (getVecSize(FInfo) != 1) ? false : fold_rootn(CI, B, FInfo);
687 
691  // skip vector function
692  return (getVecSize(FInfo) != 1) ? false : fold_fma_mad(CI, B, FInfo);
693 
695  return isUnsafeMath(CI) && fold_sqrt(CI, B, FInfo);
698  if ((getArgType(FInfo) == AMDGPULibFunc::F32 ||
699  getArgType(FInfo) == AMDGPULibFunc::F64)
700  && (FInfo.getPrefix() == AMDGPULibFunc::NOPFX))
701  return fold_sincos(CI, B, AA);
702 
703  break;
708  return fold_read_write_pipe(CI, B, FInfo);
709 
710  default:
711  break;
712  }
713 
714  return false;
715 }
716 
717 bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
718  // Table-Driven optimization
719  const TableRef tr = getOptTable(FInfo.getId());
720  if (tr.size==0)
721  return false;
722 
723  int const sz = (int)tr.size;
724  const TableEntry * const ftbl = tr.table;
725  Value *opr0 = CI->getArgOperand(0);
726 
727  if (getVecSize(FInfo) > 1) {
728  if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(opr0)) {
730  for (int eltNo = 0; eltNo < getVecSize(FInfo); ++eltNo) {
731  ConstantFP *eltval = dyn_cast<ConstantFP>(
732  CV->getElementAsConstant((unsigned)eltNo));
733  assert(eltval && "Non-FP arguments in math function!");
734  bool found = false;
735  for (int i=0; i < sz; ++i) {
736  if (eltval->isExactlyValue(ftbl[i].input)) {
737  DVal.push_back(ftbl[i].result);
738  found = true;
739  break;
740  }
741  }
742  if (!found) {
743  // This vector constants not handled yet.
744  return false;
745  }
746  }
747  LLVMContext &context = CI->getParent()->getParent()->getContext();
748  Constant *nval;
749  if (getArgType(FInfo) == AMDGPULibFunc::F32) {
751  for (unsigned i = 0; i < DVal.size(); ++i) {
752  FVal.push_back((float)DVal[i]);
753  }
754  ArrayRef<float> tmp(FVal);
755  nval = ConstantDataVector::get(context, tmp);
756  } else { // F64
757  ArrayRef<double> tmp(DVal);
758  nval = ConstantDataVector::get(context, tmp);
759  }
760  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
761  replaceCall(nval);
762  return true;
763  }
764  } else {
765  // Scalar version
766  if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
767  for (int i = 0; i < sz; ++i) {
768  if (CF->isExactlyValue(ftbl[i].input)) {
769  Value *nval = ConstantFP::get(CF->getType(), ftbl[i].result);
770  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
771  replaceCall(nval);
772  return true;
773  }
774  }
775  }
776  }
777 
778  return false;
779 }
780 
781 bool AMDGPULibCalls::replaceWithNative(CallInst *CI, const FuncInfo &FInfo) {
782  Module *M = CI->getModule();
783  if (getArgType(FInfo) != AMDGPULibFunc::F32 ||
784  FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
785  !HasNative(FInfo.getId()))
786  return false;
787 
788  AMDGPULibFunc nf = FInfo;
790  if (FunctionCallee FPExpr = getFunction(M, nf)) {
791  LLVM_DEBUG(dbgs() << "AMDIC: " << *CI << " ---> ");
792 
793  CI->setCalledFunction(FPExpr);
794 
795  LLVM_DEBUG(dbgs() << *CI << '\n');
796 
797  return true;
798  }
799  return false;
800 }
801 
802 // [native_]half_recip(c) ==> 1.0/c
803 bool AMDGPULibCalls::fold_recip(CallInst *CI, IRBuilder<> &B,
804  const FuncInfo &FInfo) {
805  Value *opr0 = CI->getArgOperand(0);
806  if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
807  // Just create a normal div. Later, InstCombine will be able
808  // to compute the divide into a constant (avoid check float infinity
809  // or subnormal at this point).
810  Value *nval = B.CreateFDiv(ConstantFP::get(CF->getType(), 1.0),
811  opr0,
812  "recip2div");
813  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
814  replaceCall(nval);
815  return true;
816  }
817  return false;
818 }
819 
820 // [native_]half_divide(x, c) ==> x/c
821 bool AMDGPULibCalls::fold_divide(CallInst *CI, IRBuilder<> &B,
822  const FuncInfo &FInfo) {
823  Value *opr0 = CI->getArgOperand(0);
824  Value *opr1 = CI->getArgOperand(1);
825  ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
826  ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
827 
828  if ((CF0 && CF1) || // both are constants
829  (CF1 && (getArgType(FInfo) == AMDGPULibFunc::F32)))
830  // CF1 is constant && f32 divide
831  {
832  Value *nval1 = B.CreateFDiv(ConstantFP::get(opr1->getType(), 1.0),
833  opr1, "__div2recip");
834  Value *nval = B.CreateFMul(opr0, nval1, "__div2mul");
835  replaceCall(nval);
836  return true;
837  }
838  return false;
839 }
840 
841 namespace llvm {
842 static double log2(double V) {
843 #if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L
844  return ::log2(V);
845 #else
846  return log(V) / numbers::ln2;
847 #endif
848 }
849 }
850 
851 bool AMDGPULibCalls::fold_pow(CallInst *CI, IRBuilder<> &B,
852  const FuncInfo &FInfo) {
853  assert((FInfo.getId() == AMDGPULibFunc::EI_POW ||
854  FInfo.getId() == AMDGPULibFunc::EI_POWR ||
855  FInfo.getId() == AMDGPULibFunc::EI_POWN) &&
856  "fold_pow: encounter a wrong function call");
857 
858  Value *opr0, *opr1;
859  ConstantFP *CF;
860  ConstantInt *CINT;
861  ConstantAggregateZero *CZero;
862  Type *eltType;
863 
864  opr0 = CI->getArgOperand(0);
865  opr1 = CI->getArgOperand(1);
866  CZero = dyn_cast<ConstantAggregateZero>(opr1);
867  if (getVecSize(FInfo) == 1) {
868  eltType = opr0->getType();
869  CF = dyn_cast<ConstantFP>(opr1);
870  CINT = dyn_cast<ConstantInt>(opr1);
871  } else {
872  VectorType *VTy = dyn_cast<VectorType>(opr0->getType());
873  assert(VTy && "Oprand of vector function should be of vectortype");
874  eltType = VTy->getElementType();
875  ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1);
876 
877  // Now, only Handle vector const whose elements have the same value.
878  CF = CDV ? dyn_cast_or_null<ConstantFP>(CDV->getSplatValue()) : nullptr;
879  CINT = CDV ? dyn_cast_or_null<ConstantInt>(CDV->getSplatValue()) : nullptr;
880  }
881 
882  // No unsafe math , no constant argument, do nothing
883  if (!isUnsafeMath(CI) && !CF && !CINT && !CZero)
884  return false;
885 
886  // 0x1111111 means that we don't do anything for this call.
887  int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111);
888 
889  if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0) || CZero) {
890  // pow/powr/pown(x, 0) == 1
891  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1\n");
892  Constant *cnval = ConstantFP::get(eltType, 1.0);
893  if (getVecSize(FInfo) > 1) {
894  cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
895  }
896  replaceCall(cnval);
897  return true;
898  }
899  if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) {
900  // pow/powr/pown(x, 1.0) = x
901  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n");
902  replaceCall(opr0);
903  return true;
904  }
905  if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) {
906  // pow/powr/pown(x, 2.0) = x*x
907  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * " << *opr0
908  << "\n");
909  Value *nval = B.CreateFMul(opr0, opr0, "__pow2");
910  replaceCall(nval);
911  return true;
912  }
913  if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) {
914  // pow/powr/pown(x, -1.0) = 1.0/x
915  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1 / " << *opr0 << "\n");
916  Constant *cnval = ConstantFP::get(eltType, 1.0);
917  if (getVecSize(FInfo) > 1) {
918  cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
919  }
920  Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip");
921  replaceCall(nval);
922  return true;
923  }
924 
925  Module *M = CI->getModule();
926  if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) {
927  // pow[r](x, [-]0.5) = sqrt(x)
928  bool issqrt = CF->isExactlyValue(0.5);
929  if (FunctionCallee FPExpr =
930  getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
932  FInfo))) {
933  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
934  << FInfo.getName().c_str() << "(" << *opr0 << ")\n");
935  Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt"
936  : "__pow2rsqrt");
937  replaceCall(nval);
938  return true;
939  }
940  }
941 
942  if (!isUnsafeMath(CI))
943  return false;
944 
945  // Unsafe Math optimization
946 
947  // Remember that ci_opr1 is set if opr1 is integral
948  if (CF) {
949  double dval = (getArgType(FInfo) == AMDGPULibFunc::F32)
950  ? (double)CF->getValueAPF().convertToFloat()
951  : CF->getValueAPF().convertToDouble();
952  int ival = (int)dval;
953  if ((double)ival == dval) {
954  ci_opr1 = ival;
955  } else
956  ci_opr1 = 0x11111111;
957  }
958 
959  // pow/powr/pown(x, c) = [1/](x*x*..x); where
960  // trunc(c) == c && the number of x == c && |c| <= 12
961  unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1;
962  if (abs_opr1 <= 12) {
963  Constant *cnval;
964  Value *nval;
965  if (abs_opr1 == 0) {
966  cnval = ConstantFP::get(eltType, 1.0);
967  if (getVecSize(FInfo) > 1) {
968  cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
969  }
970  nval = cnval;
971  } else {
972  Value *valx2 = nullptr;
973  nval = nullptr;
974  while (abs_opr1 > 0) {
975  valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0;
976  if (abs_opr1 & 1) {
977  nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2;
978  }
979  abs_opr1 >>= 1;
980  }
981  }
982 
983  if (ci_opr1 < 0) {
984  cnval = ConstantFP::get(eltType, 1.0);
985  if (getVecSize(FInfo) > 1) {
986  cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
987  }
988  nval = B.CreateFDiv(cnval, nval, "__1powprod");
989  }
990  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
991  << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0
992  << ")\n");
993  replaceCall(nval);
994  return true;
995  }
996 
997  // powr ---> exp2(y * log2(x))
998  // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31))
999  FunctionCallee ExpExpr =
1000  getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo));
1001  if (!ExpExpr)
1002  return false;
1003 
1004  bool needlog = false;
1005  bool needabs = false;
1006  bool needcopysign = false;
1007  Constant *cnval = nullptr;
1008  if (getVecSize(FInfo) == 1) {
1009  CF = dyn_cast<ConstantFP>(opr0);
1010 
1011  if (CF) {
1012  double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
1013  ? (double)CF->getValueAPF().convertToFloat()
1014  : CF->getValueAPF().convertToDouble();
1015 
1016  V = log2(std::abs(V));
1017  cnval = ConstantFP::get(eltType, V);
1018  needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR) &&
1019  CF->isNegative();
1020  } else {
1021  needlog = true;
1022  needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR &&
1023  (!CF || CF->isNegative());
1024  }
1025  } else {
1026  ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0);
1027 
1028  if (!CDV) {
1029  needlog = true;
1030  needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR;
1031  } else {
1032  assert ((int)CDV->getNumElements() == getVecSize(FInfo) &&
1033  "Wrong vector size detected");
1034 
1036  for (int i=0; i < getVecSize(FInfo); ++i) {
1037  double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
1038  ? (double)CDV->getElementAsFloat(i)
1039  : CDV->getElementAsDouble(i);
1040  if (V < 0.0) needcopysign = true;
1041  V = log2(std::abs(V));
1042  DVal.push_back(V);
1043  }
1044  if (getArgType(FInfo) == AMDGPULibFunc::F32) {
1045  SmallVector<float, 0> FVal;
1046  for (unsigned i=0; i < DVal.size(); ++i) {
1047  FVal.push_back((float)DVal[i]);
1048  }
1049  ArrayRef<float> tmp(FVal);
1050  cnval = ConstantDataVector::get(M->getContext(), tmp);
1051  } else {
1052  ArrayRef<double> tmp(DVal);
1053  cnval = ConstantDataVector::get(M->getContext(), tmp);
1054  }
1055  }
1056  }
1057 
1058  if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW)) {
1059  // We cannot handle corner cases for a general pow() function, give up
1060  // unless y is a constant integral value. Then proceed as if it were pown.
1061  if (getVecSize(FInfo) == 1) {
1062  if (const ConstantFP *CF = dyn_cast<ConstantFP>(opr1)) {
1063  double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
1064  ? (double)CF->getValueAPF().convertToFloat()
1065  : CF->getValueAPF().convertToDouble();
1066  if (y != (double)(int64_t)y)
1067  return false;
1068  } else
1069  return false;
1070  } else {
1071  if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr1)) {
1072  for (int i=0; i < getVecSize(FInfo); ++i) {
1073  double y = (getArgType(FInfo) == AMDGPULibFunc::F32)
1074  ? (double)CDV->getElementAsFloat(i)
1075  : CDV->getElementAsDouble(i);
1076  if (y != (double)(int64_t)y)
1077  return false;
1078  }
1079  } else
1080  return false;
1081  }
1082  }
1083 
1084  Value *nval;
1085  if (needabs) {
1086  FunctionCallee AbsExpr =
1087  getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_FABS, FInfo));
1088  if (!AbsExpr)
1089  return false;
1090  nval = CreateCallEx(B, AbsExpr, opr0, "__fabs");
1091  } else {
1092  nval = cnval ? cnval : opr0;
1093  }
1094  if (needlog) {
1095  FunctionCallee LogExpr =
1096  getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo));
1097  if (!LogExpr)
1098  return false;
1099  nval = CreateCallEx(B,LogExpr, nval, "__log2");
1100  }
1101 
1102  if (FInfo.getId() == AMDGPULibFunc::EI_POWN) {
1103  // convert int(32) to fp(f32 or f64)
1104  opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F");
1105  }
1106  nval = B.CreateFMul(opr1, nval, "__ylogx");
1107  nval = CreateCallEx(B,ExpExpr, nval, "__exp2");
1108 
1109  if (needcopysign) {
1110  Value *opr_n;
1111  Type* rTy = opr0->getType();
1112  Type* nTyS = eltType->isDoubleTy() ? B.getInt64Ty() : B.getInt32Ty();
1113  Type *nTy = nTyS;
1114  if (const auto *vTy = dyn_cast<FixedVectorType>(rTy))
1115  nTy = FixedVectorType::get(nTyS, vTy);
1116  unsigned size = nTy->getScalarSizeInBits();
1117  opr_n = CI->getArgOperand(1);
1118  if (opr_n->getType()->isIntegerTy())
1119  opr_n = B.CreateZExtOrBitCast(opr_n, nTy, "__ytou");
1120  else
1121  opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");
1122 
1123  Value *sign = B.CreateShl(opr_n, size-1, "__yeven");
1124  sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign");
1125  nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign);
1126  nval = B.CreateBitCast(nval, opr0->getType());
1127  }
1128 
1129  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
1130  << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n");
1131  replaceCall(nval);
1132 
1133  return true;
1134 }
1135 
1136 bool AMDGPULibCalls::fold_rootn(CallInst *CI, IRBuilder<> &B,
1137  const FuncInfo &FInfo) {
1138  Value *opr0 = CI->getArgOperand(0);
1139  Value *opr1 = CI->getArgOperand(1);
1140 
1141  ConstantInt *CINT = dyn_cast<ConstantInt>(opr1);
1142  if (!CINT) {
1143  return false;
1144  }
1145  int ci_opr1 = (int)CINT->getSExtValue();
1146  if (ci_opr1 == 1) { // rootn(x, 1) = x
1147  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << "\n");
1148  replaceCall(opr0);
1149  return true;
1150  }
1151  if (ci_opr1 == 2) { // rootn(x, 2) = sqrt(x)
1152  Module *M = CI->getModule();
1153  if (FunctionCallee FPExpr =
1154  getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
1155  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> sqrt(" << *opr0 << ")\n");
1156  Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt");
1157  replaceCall(nval);
1158  return true;
1159  }
1160  } else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x)
1161  Module *M = CI->getModule();
1162  if (FunctionCallee FPExpr =
1163  getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) {
1164  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> cbrt(" << *opr0 << ")\n");
1165  Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt");
1166  replaceCall(nval);
1167  return true;
1168  }
1169  } else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x
1170  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> 1.0 / " << *opr0 << "\n");
1171  Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0),
1172  opr0,
1173  "__rootn2div");
1174  replaceCall(nval);
1175  return true;
1176  } else if (ci_opr1 == -2) { // rootn(x, -2) = rsqrt(x)
1177  Module *M = CI->getModule();
1178  if (FunctionCallee FPExpr =
1179  getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT, FInfo))) {
1180  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> rsqrt(" << *opr0
1181  << ")\n");
1182  Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt");
1183  replaceCall(nval);
1184  return true;
1185  }
1186  }
1187  return false;
1188 }
1189 
1190 bool AMDGPULibCalls::fold_fma_mad(CallInst *CI, IRBuilder<> &B,
1191  const FuncInfo &FInfo) {
1192  Value *opr0 = CI->getArgOperand(0);
1193  Value *opr1 = CI->getArgOperand(1);
1194  Value *opr2 = CI->getArgOperand(2);
1195 
1196  ConstantFP *CF0 = dyn_cast<ConstantFP>(opr0);
1197  ConstantFP *CF1 = dyn_cast<ConstantFP>(opr1);
1198  if ((CF0 && CF0->isZero()) || (CF1 && CF1->isZero())) {
1199  // fma/mad(a, b, c) = c if a=0 || b=0
1200  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr2 << "\n");
1201  replaceCall(opr2);
1202  return true;
1203  }
1204  if (CF0 && CF0->isExactlyValue(1.0f)) {
1205  // fma/mad(a, b, c) = b+c if a=1
1206  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr1 << " + " << *opr2
1207  << "\n");
1208  Value *nval = B.CreateFAdd(opr1, opr2, "fmaadd");
1209  replaceCall(nval);
1210  return true;
1211  }
1212  if (CF1 && CF1->isExactlyValue(1.0f)) {
1213  // fma/mad(a, b, c) = a+c if b=1
1214  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " + " << *opr2
1215  << "\n");
1216  Value *nval = B.CreateFAdd(opr0, opr2, "fmaadd");
1217  replaceCall(nval);
1218  return true;
1219  }
1220  if (ConstantFP *CF = dyn_cast<ConstantFP>(opr2)) {
1221  if (CF->isZero()) {
1222  // fma/mad(a, b, c) = a*b if c=0
1223  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *opr0 << " * "
1224  << *opr1 << "\n");
1225  Value *nval = B.CreateFMul(opr0, opr1, "fmamul");
1226  replaceCall(nval);
1227  return true;
1228  }
1229  }
1230 
1231  return false;
1232 }
1233 
1234 // Get a scalar native builtin signle argument FP function
1235 FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M,
1236  const FuncInfo &FInfo) {
1237  if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))
1238  return nullptr;
1239  FuncInfo nf = FInfo;
1241  return getFunction(M, nf);
1242 }
1243 
1244 // fold sqrt -> native_sqrt (x)
1245 bool AMDGPULibCalls::fold_sqrt(CallInst *CI, IRBuilder<> &B,
1246  const FuncInfo &FInfo) {
1247  if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) &&
1248  (FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) {
1249  if (FunctionCallee FPExpr = getNativeFunction(
1251  Value *opr0 = CI->getArgOperand(0);
1252  LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> "
1253  << "sqrt(" << *opr0 << ")\n");
1254  Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt");
1255  replaceCall(nval);
1256  return true;
1257  }
1258  }
1259  return false;
1260 }
1261 
1262 // fold sin, cos -> sincos.
1263 bool AMDGPULibCalls::fold_sincos(CallInst *CI, IRBuilder<> &B,
1264  AliasAnalysis *AA) {
1265  AMDGPULibFunc fInfo;
1266  if (!AMDGPULibFunc::parse(CI->getCalledFunction()->getName(), fInfo))
1267  return false;
1268 
1269  assert(fInfo.getId() == AMDGPULibFunc::EI_SIN ||
1270  fInfo.getId() == AMDGPULibFunc::EI_COS);
1271  bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN;
1272 
1273  Value *CArgVal = CI->getArgOperand(0);
1274  BasicBlock * const CBB = CI->getParent();
1275 
1276  int const MaxScan = 30;
1277  bool Changed = false;
1278 
1279  { // fold in load value.
1280  LoadInst *LI = dyn_cast<LoadInst>(CArgVal);
1281  if (LI && LI->getParent() == CBB) {
1282  BasicBlock::iterator BBI = LI->getIterator();
1283  Value *AvailableVal = FindAvailableLoadedValue(LI, CBB, BBI, MaxScan, AA);
1284  if (AvailableVal) {
1285  Changed = true;
1286  CArgVal->replaceAllUsesWith(AvailableVal);
1287  if (CArgVal->getNumUses() == 0)
1288  LI->eraseFromParent();
1289  CArgVal = CI->getArgOperand(0);
1290  }
1291  }
1292  }
1293 
1294  Module *M = CI->getModule();
1296  std::string const PairName = fInfo.mangle();
1297 
1298  CallInst *UI = nullptr;
1299  for (User* U : CArgVal->users()) {
1300  CallInst *XI = dyn_cast_or_null<CallInst>(U);
1301  if (!XI || XI == CI || XI->getParent() != CBB)
1302  continue;
1303 
1304  Function *UCallee = XI->getCalledFunction();
1305  if (!UCallee || !UCallee->getName().equals(PairName))
1306  continue;
1307 
1309  if (BBI == CI->getParent()->begin())
1310  break;
1311  --BBI;
1312  for (int I = MaxScan; I > 0 && BBI != CBB->begin(); --BBI, --I) {
1313  if (cast<Instruction>(BBI) == XI) {
1314  UI = XI;
1315  break;
1316  }
1317  }
1318  if (UI) break;
1319  }
1320 
1321  if (!UI)
1322  return Changed;
1323 
1324  // Merge the sin and cos.
1325 
1326  // for OpenCL 2.0 we have only generic implementation of sincos
1327  // function.
1330  FunctionCallee Fsincos = getFunction(M, nf);
1331  if (!Fsincos)
1332  return Changed;
1333 
1334  BasicBlock::iterator ItOld = B.GetInsertPoint();
1335  AllocaInst *Alloc = insertAlloca(UI, B, "__sincos_");
1336  B.SetInsertPoint(UI);
1337 
1338  Value *P = Alloc;
1339  Type *PTy = Fsincos.getFunctionType()->getParamType(1);
1340  // The allocaInst allocates the memory in private address space. This need
1341  // to be bitcasted to point to the address space of cos pointer type.
1342  // In OpenCL 2.0 this is generic, while in 1.2 that is private.
1344  P = B.CreateAddrSpaceCast(Alloc, PTy);
1345  CallInst *Call = CreateCallEx2(B, Fsincos, UI->getArgOperand(0), P);
1346 
1347  LLVM_DEBUG(errs() << "AMDIC: fold_sincos (" << *CI << ", " << *UI << ") with "
1348  << *Call << "\n");
1349 
1350  if (!isSin) { // CI->cos, UI->sin
1351  B.SetInsertPoint(&*ItOld);
1352  UI->replaceAllUsesWith(&*Call);
1353  Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
1354  CI->replaceAllUsesWith(Reload);
1355  UI->eraseFromParent();
1356  CI->eraseFromParent();
1357  } else { // CI->sin, UI->cos
1358  Instruction *Reload = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
1359  UI->replaceAllUsesWith(Reload);
1360  CI->replaceAllUsesWith(Call);
1361  UI->eraseFromParent();
1362  CI->eraseFromParent();
1363  }
1364  return true;
1365 }
1366 
1367 bool AMDGPULibCalls::fold_wavefrontsize(CallInst *CI, IRBuilder<> &B) {
1368  if (!TM)
1369  return false;
1370 
1371  StringRef CPU = TM->getTargetCPU();
1372  StringRef Features = TM->getTargetFeatureString();
1373  if ((CPU.empty() || CPU.equals_insensitive("generic")) &&
1374  (Features.empty() ||
1375  Features.find_insensitive("wavefrontsize") == StringRef::npos))
1376  return false;
1377 
1378  Function *F = CI->getParent()->getParent();
1379  const GCNSubtarget &ST = TM->getSubtarget<GCNSubtarget>(*F);
1380  unsigned N = ST.getWavefrontSize();
1381 
1382  LLVM_DEBUG(errs() << "AMDIC: fold_wavefrontsize (" << *CI << ") with "
1383  << N << "\n");
1384 
1385  CI->replaceAllUsesWith(ConstantInt::get(B.getInt32Ty(), N));
1386  CI->eraseFromParent();
1387  return true;
1388 }
1389 
1390 // Get insertion point at entry.
1391 BasicBlock::iterator AMDGPULibCalls::getEntryIns(CallInst * UI) {
1392  Function * Func = UI->getParent()->getParent();
1393  BasicBlock * BB = &Func->getEntryBlock();
1394  assert(BB && "Entry block not found!");
1395  BasicBlock::iterator ItNew = BB->begin();
1396  return ItNew;
1397 }
1398 
1399 // Insert a AllocsInst at the beginning of function entry block.
1400 AllocaInst* AMDGPULibCalls::insertAlloca(CallInst *UI, IRBuilder<> &B,
1401  const char *prefix) {
1402  BasicBlock::iterator ItNew = getEntryIns(UI);
1403  Function *UCallee = UI->getCalledFunction();
1404  Type *RetType = UCallee->getReturnType();
1405  B.SetInsertPoint(&*ItNew);
1406  AllocaInst *Alloc = B.CreateAlloca(RetType, 0,
1407  std::string(prefix) + UI->getName());
1408  Alloc->setAlignment(
1409  Align(UCallee->getParent()->getDataLayout().getTypeAllocSize(RetType)));
1410  return Alloc;
1411 }
1412 
1413 bool AMDGPULibCalls::evaluateScalarMathFunc(FuncInfo &FInfo,
1414  double& Res0, double& Res1,
1415  Constant *copr0, Constant *copr1,
1416  Constant *copr2) {
1417  // By default, opr0/opr1/opr3 holds values of float/double type.
1418  // If they are not float/double, each function has to its
1419  // operand separately.
1420  double opr0=0.0, opr1=0.0, opr2=0.0;
1421  ConstantFP *fpopr0 = dyn_cast_or_null<ConstantFP>(copr0);
1422  ConstantFP *fpopr1 = dyn_cast_or_null<ConstantFP>(copr1);
1423  ConstantFP *fpopr2 = dyn_cast_or_null<ConstantFP>(copr2);
1424  if (fpopr0) {
1425  opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1426  ? fpopr0->getValueAPF().convertToDouble()
1427  : (double)fpopr0->getValueAPF().convertToFloat();
1428  }
1429 
1430  if (fpopr1) {
1431  opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1432  ? fpopr1->getValueAPF().convertToDouble()
1433  : (double)fpopr1->getValueAPF().convertToFloat();
1434  }
1435 
1436  if (fpopr2) {
1437  opr2 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1438  ? fpopr2->getValueAPF().convertToDouble()
1439  : (double)fpopr2->getValueAPF().convertToFloat();
1440  }
1441 
1442  switch (FInfo.getId()) {
1443  default : return false;
1444 
1446  Res0 = acos(opr0);
1447  return true;
1448 
1450  // acosh(x) == log(x + sqrt(x*x - 1))
1451  Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0));
1452  return true;
1453 
1455  Res0 = acos(opr0) / MATH_PI;
1456  return true;
1457 
1459  Res0 = asin(opr0);
1460  return true;
1461 
1463  // asinh(x) == log(x + sqrt(x*x + 1))
1464  Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0));
1465  return true;
1466 
1468  Res0 = asin(opr0) / MATH_PI;
1469  return true;
1470 
1472  Res0 = atan(opr0);
1473  return true;
1474 
1476  // atanh(x) == (log(x+1) - log(x-1))/2;
1477  Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0;
1478  return true;
1479 
1481  Res0 = atan(opr0) / MATH_PI;
1482  return true;
1483 
1485  Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0);
1486  return true;
1487 
1488  case AMDGPULibFunc::EI_COS:
1489  Res0 = cos(opr0);
1490  return true;
1491 
1493  Res0 = cosh(opr0);
1494  return true;
1495 
1497  Res0 = cos(MATH_PI * opr0);
1498  return true;
1499 
1500  case AMDGPULibFunc::EI_EXP:
1501  Res0 = exp(opr0);
1502  return true;
1503 
1505  Res0 = pow(2.0, opr0);
1506  return true;
1507 
1509  Res0 = pow(10.0, opr0);
1510  return true;
1511 
1513  Res0 = exp(opr0) - 1.0;
1514  return true;
1515 
1516  case AMDGPULibFunc::EI_LOG:
1517  Res0 = log(opr0);
1518  return true;
1519 
1521  Res0 = log(opr0) / log(2.0);
1522  return true;
1523 
1525  Res0 = log(opr0) / log(10.0);
1526  return true;
1527 
1529  Res0 = 1.0 / sqrt(opr0);
1530  return true;
1531 
1532  case AMDGPULibFunc::EI_SIN:
1533  Res0 = sin(opr0);
1534  return true;
1535 
1537  Res0 = sinh(opr0);
1538  return true;
1539 
1541  Res0 = sin(MATH_PI * opr0);
1542  return true;
1543 
1545  Res0 = sqrt(opr0);
1546  return true;
1547 
1548  case AMDGPULibFunc::EI_TAN:
1549  Res0 = tan(opr0);
1550  return true;
1551 
1553  Res0 = tanh(opr0);
1554  return true;
1555 
1557  Res0 = tan(MATH_PI * opr0);
1558  return true;
1559 
1561  Res0 = 1.0 / opr0;
1562  return true;
1563 
1564  // two-arg functions
1566  Res0 = opr0 / opr1;
1567  return true;
1568 
1569  case AMDGPULibFunc::EI_POW:
1571  Res0 = pow(opr0, opr1);
1572  return true;
1573 
1574  case AMDGPULibFunc::EI_POWN: {
1575  if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
1576  double val = (double)iopr1->getSExtValue();
1577  Res0 = pow(opr0, val);
1578  return true;
1579  }
1580  return false;
1581  }
1582 
1583  case AMDGPULibFunc::EI_ROOTN: {
1584  if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
1585  double val = (double)iopr1->getSExtValue();
1586  Res0 = pow(opr0, 1.0 / val);
1587  return true;
1588  }
1589  return false;
1590  }
1591 
1592  // with ptr arg
1594  Res0 = sin(opr0);
1595  Res1 = cos(opr0);
1596  return true;
1597 
1598  // three-arg functions
1599  case AMDGPULibFunc::EI_FMA:
1600  case AMDGPULibFunc::EI_MAD:
1601  Res0 = opr0 * opr1 + opr2;
1602  return true;
1603  }
1604 
1605  return false;
1606 }
1607 
1608 bool AMDGPULibCalls::evaluateCall(CallInst *aCI, FuncInfo &FInfo) {
1609  int numArgs = (int)aCI->getNumArgOperands();
1610  if (numArgs > 3)
1611  return false;
1612 
1613  Constant *copr0 = nullptr;
1614  Constant *copr1 = nullptr;
1615  Constant *copr2 = nullptr;
1616  if (numArgs > 0) {
1617  if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr)
1618  return false;
1619  }
1620 
1621  if (numArgs > 1) {
1622  if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) {
1623  if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS)
1624  return false;
1625  }
1626  }
1627 
1628  if (numArgs > 2) {
1629  if ((copr2 = dyn_cast<Constant>(aCI->getArgOperand(2))) == nullptr)
1630  return false;
1631  }
1632 
1633  // At this point, all arguments to aCI are constants.
1634 
1635  // max vector size is 16, and sincos will generate two results.
1636  double DVal0[16], DVal1[16];
1637  bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS);
1638  if (getVecSize(FInfo) == 1) {
1639  if (!evaluateScalarMathFunc(FInfo, DVal0[0],
1640  DVal1[0], copr0, copr1, copr2)) {
1641  return false;
1642  }
1643  } else {
1644  ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0);
1645  ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1);
1646  ConstantDataVector *CDV2 = dyn_cast_or_null<ConstantDataVector>(copr2);
1647  for (int i=0; i < getVecSize(FInfo); ++i) {
1648  Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr;
1649  Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr;
1650  Constant *celt2 = CDV2 ? CDV2->getElementAsConstant(i) : nullptr;
1651  if (!evaluateScalarMathFunc(FInfo, DVal0[i],
1652  DVal1[i], celt0, celt1, celt2)) {
1653  return false;
1654  }
1655  }
1656  }
1657 
1658  LLVMContext &context = CI->getParent()->getParent()->getContext();
1659  Constant *nval0, *nval1;
1660  if (getVecSize(FInfo) == 1) {
1661  nval0 = ConstantFP::get(CI->getType(), DVal0[0]);
1662  if (hasTwoResults)
1663  nval1 = ConstantFP::get(CI->getType(), DVal1[0]);
1664  } else {
1665  if (getArgType(FInfo) == AMDGPULibFunc::F32) {
1666  SmallVector <float, 0> FVal0, FVal1;
1667  for (int i=0; i < getVecSize(FInfo); ++i)
1668  FVal0.push_back((float)DVal0[i]);
1669  ArrayRef<float> tmp0(FVal0);
1670  nval0 = ConstantDataVector::get(context, tmp0);
1671  if (hasTwoResults) {
1672  for (int i=0; i < getVecSize(FInfo); ++i)
1673  FVal1.push_back((float)DVal1[i]);
1674  ArrayRef<float> tmp1(FVal1);
1675  nval1 = ConstantDataVector::get(context, tmp1);
1676  }
1677  } else {
1678  ArrayRef<double> tmp0(DVal0);
1679  nval0 = ConstantDataVector::get(context, tmp0);
1680  if (hasTwoResults) {
1681  ArrayRef<double> tmp1(DVal1);
1682  nval1 = ConstantDataVector::get(context, tmp1);
1683  }
1684  }
1685  }
1686 
1687  if (hasTwoResults) {
1688  // sincos
1689  assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS &&
1690  "math function with ptr arg not supported yet");
1691  new StoreInst(nval1, aCI->getArgOperand(1), aCI);
1692  }
1693 
1694  replaceCall(nval0);
1695  return true;
1696 }
1697 
1698 // Public interface to the Simplify LibCalls pass.
1700  return new AMDGPUSimplifyLibCalls(TM);
1701 }
1702 
1704  return new AMDGPUUseNativeCalls();
1705 }
1706 
1708  if (skipFunction(F))
1709  return false;
1710 
1711  bool Changed = false;
1712  auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1713 
1714  LLVM_DEBUG(dbgs() << "AMDIC: process function ";
1715  F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
1716 
1717  for (auto &BB : F) {
1718  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
1719  // Ignore non-calls.
1720  CallInst *CI = dyn_cast<CallInst>(I);
1721  ++I;
1722  // Ignore intrinsics that do not become real instructions.
1723  if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd())
1724  continue;
1725 
1726  // Ignore indirect calls.
1728  if (Callee == 0) continue;
1729 
1730  LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";
1731  dbgs().flush());
1732  if(Simplifier.fold(CI, AA))
1733  Changed = true;
1734  }
1735  }
1736  return Changed;
1737 }
1738 
1741  AMDGPULibCalls Simplifier(&TM);
1742  Simplifier.initNativeFuncs();
1743 
1744  bool Changed = false;
1745  auto AA = &AM.getResult<AAManager>(F);
1746 
1747  LLVM_DEBUG(dbgs() << "AMDIC: process function ";
1748  F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
1749 
1750  for (auto &BB : F) {
1751  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
1752  // Ignore non-calls.
1753  CallInst *CI = dyn_cast<CallInst>(I);
1754  ++I;
1755  // Ignore intrinsics that do not become real instructions.
1756  if (!CI || isa<DbgInfoIntrinsic>(CI) || CI->isLifetimeStartOrEnd())
1757  continue;
1758 
1759  // Ignore indirect calls.
1761  if (Callee == 0)
1762  continue;
1763 
1764  LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << "\n";
1765  dbgs().flush());
1766  if (Simplifier.fold(CI, AA))
1767  Changed = true;
1768  }
1769  }
1770  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
1771 }
1772 
1774  if (skipFunction(F) || UseNative.empty())
1775  return false;
1776 
1777  bool Changed = false;
1778  for (auto &BB : F) {
1779  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ) {
1780  // Ignore non-calls.
1781  CallInst *CI = dyn_cast<CallInst>(I);
1782  ++I;
1783  if (!CI) continue;
1784 
1785  // Ignore indirect calls.
1787  if (Callee == 0) continue;
1788 
1789  if(Simplifier.useNative(CI))
1790  Changed = true;
1791  }
1792  }
1793  return Changed;
1794 }
1795 
1798  if (UseNative.empty())
1799  return PreservedAnalyses::all();
1800 
1801  AMDGPULibCalls Simplifier;
1802  Simplifier.initNativeFuncs();
1803 
1804  bool Changed = false;
1805  for (auto &BB : F) {
1806  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
1807  // Ignore non-calls.
1808  CallInst *CI = dyn_cast<CallInst>(I);
1809  ++I;
1810  if (!CI)
1811  continue;
1812 
1813  // Ignore indirect calls.
1815  if (Callee == 0)
1816  continue;
1817 
1818  if (Simplifier.useNative(CI))
1819  Changed = true;
1820  }
1821  }
1822  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
1823 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::AMDGPULibFunc::isMangled
bool isMangled() const
Definition: AMDGPULibFunc.h:379
llvm::AMDGPULibFuncBase::EI_ASINH
@ EI_ASINH
Definition: AMDGPULibFunc.h:46
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1233
llvm::APFloat::convertToFloat
float convertToFloat() const
Converts this APFloat to host float value.
Definition: APFloat.cpp:4898
llvm::AMDGPULibFuncBase::EI_NFMA
@ EI_NFMA
Definition: AMDGPULibFunc.h:224
llvm::AMDGPULibCalls::AMDGPULibCalls
AMDGPULibCalls(const TargetMachine *TM_=nullptr)
Definition: AMDGPULibCalls.cpp:142
getOptTable
static TableRef getOptTable(AMDGPULibFunc::EFuncId id)
Definition: AMDGPULibCalls.cpp:409
tbl_asin
static const TableEntry tbl_asin[]
Definition: AMDGPULibCalls.cpp:247
tbl_sinpi
static const TableEntry tbl_sinpi[]
Definition: AMDGPULibCalls.cpp:348
TableRef::table
const TableEntry * table
Definition: AMDGPULibCalls.cpp:401
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
llvm::AMDGPULibFuncBase::EI_ACOSH
@ EI_ACOSH
Definition: AMDGPULibFunc.h:40
llvm::StringRef::empty
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:153
llvm::AMDGPULibFuncBase::F64
@ F64
Definition: AMDGPULibFunc.h:272
tbl_tan
static const TableEntry tbl_tan[]
Definition: AMDGPULibCalls.cpp:357
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
tbl_acos
static const TableEntry tbl_acos[]
Definition: AMDGPULibCalls.cpp:232
tbl_log2
static const TableEntry tbl_log2[]
Definition: AMDGPULibCalls.cpp:328
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:769
Loads.h
llvm::Function
Definition: Function.h:61
llvm::Attribute
Definition: Attributes.h:52
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::CallBase::setCalledFunction
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
Definition: InstrTypes.h:1429
TableRef::size
size_t size
Definition: AMDGPULibCalls.cpp:400
llvm::PointerType::get
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Definition: Type.cpp:693
double
into xmm2 addss xmm2 xmm1 xmm3 addss xmm3 movaps xmm0 unpcklps xmm0 ret seems silly when it could just be one addps Expand libm rounding functions main should enable SSE DAZ mode and other fast SSE modes Think about doing i64 math in SSE regs on x86 This testcase should have no SSE instructions in and only one load from a constant double
Definition: README-SSE.txt:85
llvm::StringRef::npos
static constexpr size_t npos
Definition: StringRef.h:60
llvm::AMDGPULibFuncBase::EI_NCOS
@ EI_NCOS
Definition: AMDGPULibFunc.h:222
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
tbl_atanh
static const TableEntry tbl_atanh[]
Definition: AMDGPULibCalls.cpp:269
llvm::AMDGPULibFuncBase::EI_ROOTN
@ EI_ROOTN
Definition: AMDGPULibFunc.h:162
llvm::AMDGPULibFuncBase::EI_READ_PIPE_2
@ EI_READ_PIPE_2
Definition: AMDGPULibFunc.h:238
llvm::User::dropAllReferences
void dropAllReferences()
Drop all references to operands.
Definition: User.h:299
MATH_PI
#define MATH_PI
Definition: AMDGPULibCalls.cpp:38
llvm::cl::CommaSeparated
@ CommaSeparated
Definition: CommandLine.h:169
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:733
tbl_log
static const TableEntry tbl_log[]
Definition: AMDGPULibCalls.cpp:324
llvm::IRBuilder<>
llvm::cl::ValueOptional
@ ValueOptional
Definition: CommandLine.h:136
llvm::AMDGPULibFuncBase::EI_ATANPI
@ EI_ATANPI
Definition: AMDGPULibFunc.h:54
llvm::FunctionType::get
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:327
tbl_log10
static const TableEntry tbl_log10[]
Definition: AMDGPULibCalls.cpp:332
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::AMDGPULibFuncBase::EI_POWR
@ EI_POWR
Definition: AMDGPULibFunc.h:152
llvm::ConstantDataVector::get
static Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
Definition: Constants.cpp:3070
llvm::ConstantDataSequential::getElementAsFloat
float getElementAsFloat(unsigned i) const
If this is an sequential container of floats, return the specified element as a float.
Definition: Constants.cpp:3251
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:158
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:321
llvm::AMDGPULibFunc::parse
static bool parse(StringRef MangledName, AMDGPULibFunc &Ptr)
Definition: AMDGPULibFunc.cpp:680
llvm::ConstantFP::isZero
bool isZero() const
Return true if the value is positive or negative zero.
Definition: Constants.h:301
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::AMDGPULibFuncBase::EI_ERFC
@ EI_ERFC
Definition: AMDGPULibFunc.h:84
llvm::AMDGPULibFuncBase::EI_TANPI
@ EI_TANPI
Definition: AMDGPULibFunc.h:195
llvm::CallBase::getAttributes
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1468
llvm::ConstantAggregateZero
All zero aggregate value.
Definition: Constants.h:336
llvm::ConstantFP::getValueAPF
const APFloat & getValueAPF() const
Definition: Constants.h:297
llvm::AMDGPULibFuncBase::EI_COSH
@ EI_COSH
Definition: AMDGPULibFunc.h:75
tbl_cbrt
static const TableEntry tbl_cbrt[]
Definition: AMDGPULibCalls.cpp:279
llvm::GCNSubtarget
Definition: GCNSubtarget.h:38
llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:421
llvm::errs
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
Definition: raw_ostream.cpp:892
llvm::AMDGPULibFuncBase::EI_CBRT
@ EI_CBRT
Definition: AMDGPULibFunc.h:67
llvm::AMDGPULibFuncBase::EI_SIN
@ EI_SIN
Definition: AMDGPULibFunc.h:171
llvm::CallBase::getNumArgOperands
unsigned getNumArgOperands() const
Definition: InstrTypes.h:1336
llvm::AMDGPULibFuncBase::EI_LOG
@ EI_LOG
Definition: AMDGPULibFunc.h:129
tmp1
urem i32 %X, 255 ret i32 %tmp1 } Currently it compiles to:... movl $2155905153, %ecx movl 8(%esp), %esi movl %esi, %eax mull %ecx ... This could be "reassociated" into:movl $2155905153, %eax movl 8(%esp), %ecx mull %ecx to avoid the copy. In fact, the existing two-address stuff would do this except that mul isn 't a commutative 2-addr instruction. I guess this has to be done at isel time based on the #uses to mul? Make sure the instruction which starts a loop does not cross a cacheline boundary. This requires knowning the exact length of each machine instruction. That is somewhat complicated, but doable. Example 256.bzip2:In the new trace, the hot loop has an instruction which crosses a cacheline boundary. In addition to potential cache misses, this can 't help decoding as I imagine there has to be some kind of complicated decoder reset and realignment to grab the bytes from the next cacheline. 532 532 0x3cfc movb(1809(%esp, %esi), %bl<<<--- spans 2 64 byte lines 942 942 0x3d03 movl %dh,(1809(%esp, %esi) 937 937 0x3d0a incl %esi 3 3 0x3d0b cmpb %bl, %dl 27 27 0x3d0d jnz 0x000062db< main+11707 > In c99 mode, the preprocessor doesn 't like assembly comments like #TRUNCATE. This could be a single 16-bit load. int f(char *p) { if((p[0]==1) &(p[1]==2)) return 1 tmp1
Definition: README.txt:375
llvm::AMDGPULibFuncBase::EI_POW
@ EI_POW
Definition: AMDGPULibFunc.h:150
that
we should consider alternate ways to model stack dependencies Lots of things could be done in WebAssemblyTargetTransformInfo cpp there are numerous optimization related hooks that can be overridden in WebAssemblyTargetLowering Instead of the OptimizeReturned which should consider preserving the returned attribute through to MachineInstrs and extending the MemIntrinsicResults pass to do this optimization on calls too That would also let the WebAssemblyPeephole pass clean up dead defs for such as it does for stores Consider implementing and or getMachineCombinerPatterns Find a clean way to fix the problem which leads to the Shrink Wrapping pass being run after the WebAssembly PEI pass When setting multiple variables to the same we currently get code like const It could be done with a smaller encoding like local tee $pop5 local $pop6 WebAssembly registers are implicitly initialized to zero Explicit zeroing is therefore often redundant and could be optimized away Small indices may use smaller encodings than large indices WebAssemblyRegColoring and or WebAssemblyRegRenumbering should sort registers according to their usage frequency to maximize the usage of smaller encodings Many cases of irreducible control flow could be transformed more optimally than via the transform in WebAssemblyFixIrreducibleControlFlow cpp It may also be worthwhile to do transforms before register particularly when duplicating to allow register coloring to be aware of the duplication WebAssemblyRegStackify could use AliasAnalysis to reorder loads and stores more aggressively WebAssemblyRegStackify is currently a greedy algorithm This means that
Definition: README.txt:130
tmp
alloca< 16 x float >, align 16 %tmp2=alloca< 16 x float >, align 16 store< 16 x float > %A,< 16 x float > *%tmp %s=bitcast< 16 x float > *%tmp to i8 *%s2=bitcast< 16 x float > *%tmp2 to i8 *call void @llvm.memcpy.i64(i8 *%s, i8 *%s2, i64 64, i32 16) %R=load< 16 x float > *%tmp2 ret< 16 x float > %R } declare void @llvm.memcpy.i64(i8 *nocapture, i8 *nocapture, i64, i32) nounwind which compiles to:_foo:subl $140, %esp movaps %xmm3, 112(%esp) movaps %xmm2, 96(%esp) movaps %xmm1, 80(%esp) movaps %xmm0, 64(%esp) movl 60(%esp), %eax movl %eax, 124(%esp) movl 56(%esp), %eax movl %eax, 120(%esp) movl 52(%esp), %eax< many many more 32-bit copies > movaps(%esp), %xmm0 movaps 16(%esp), %xmm1 movaps 32(%esp), %xmm2 movaps 48(%esp), %xmm3 addl $140, %esp ret On Nehalem, it may even be cheaper to just use movups when unaligned than to fall back to lower-granularity chunks. Implement processor-specific optimizations for parity with GCC on these processors. GCC does two optimizations:1. ix86_pad_returns inserts a noop before ret instructions if immediately preceded by a conditional branch or is the target of a jump. 2. ix86_avoid_jump_misspredicts inserts noops in cases where a 16-byte block of code contains more than 3 branches. The first one is done for all AMDs, Core2, and "Generic" The second one is done for:Atom, Pentium Pro, all AMDs, Pentium 4, Nocona, Core 2, and "Generic" Testcase:int x(int a) { return(a &0xf0)> >4 tmp
Definition: README.txt:1347
llvm::AMDGPULibFuncBase::EI_RECIP
@ EI_RECIP
Definition: AMDGPULibFunc.h:155
llvm::AMDGPULibFuncBase::EI_NSIN
@ EI_NSIN
Definition: AMDGPULibFunc.h:228
llvm::Attribute::getValueAsBool
bool getValueAsBool() const
Return the attribute's value as a boolean.
Definition: Attributes.cpp:287
llvm::initializeAMDGPUSimplifyLibCallsPass
void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &)
llvm::AMDGPULibCalls
Definition: AMDGPULibCalls.cpp:45
tbl_erf
static const TableEntry tbl_erf[]
Definition: AMDGPULibCalls.cpp:301
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::AMDGPULibFuncBase::EI_NSQRT
@ EI_NSQRT
Definition: AMDGPULibFunc.h:229
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:180
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
AliasAnalysis.h
result
It looks like we only need to define PPCfmarto for these because according to these instructions perform RTO on fma s result
Definition: README_P9.txt:256
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::AMDGPULibCalls::replaceCall
void replaceCall(Value *With)
Definition: AMDGPULibCalls.cpp:136
llvm::AMDGPULibFunc::getLeads
Param * getLeads()
Get leading parameters for mangled lib functions.
Definition: AMDGPULibFunc.cpp:1050
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:206
llvm::AMDGPULibFuncBase::getEPtrKindFromAddrSpace
static unsigned getEPtrKindFromAddrSpace(unsigned AS)
Definition: AMDGPULibFunc.h:312
llvm::AMDGPULibFunc
Wrapper class for AMDGPULIbFuncImpl.
Definition: AMDGPULibFunc.h:357
tbl_acospi
static const TableEntry tbl_acospi[]
Definition: AMDGPULibCalls.cpp:241
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::AMDGPULibFuncBase::EI_TANH
@ EI_TANH
Definition: AMDGPULibFunc.h:194
llvm::AMDGPULibFuncBase::EI_FMA
@ EI_FMA
Definition: AMDGPULibFunc.h:95
DEBUG_WITH_TYPE
#define DEBUG_WITH_TYPE(TYPE, X)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition: Debug.h:64
llvm::AMDGPULibFuncBase::EI_ATANH
@ EI_ATANH
Definition: AMDGPULibFunc.h:53
llvm::createAMDGPUUseNativeCallsPass
FunctionPass * createAMDGPUUseNativeCallsPass()
Definition: AMDGPULibCalls.cpp:1703
TargetMachine.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
llvm::AMDGPULibFuncBase::EType
EType
Definition: AMDGPULibFunc.h:252
llvm::AAResults
Definition: AliasAnalysis.h:456
GCNSubtarget.h
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
tbl_exp
static const TableEntry tbl_exp[]
Definition: AMDGPULibCalls.cpp:305
llvm::AMDGPULibFuncBase::EI_FABS
@ EI_FABS
Definition: AMDGPULibFunc.h:89
llvm::User
Definition: User.h:44
llvm::AMDGPULibCalls::useNative
bool useNative(CallInst *CI)
Definition: AMDGPULibCalls.cpp:527
llvm::AMDGPULibFunc::getOrInsertFunction
static FunctionCallee getOrInsertFunction(llvm::Module *M, const AMDGPULibFunc &fInfo)
Definition: AMDGPULibFunc.cpp:959
llvm::CallBase::getCalledFunction
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation.
Definition: InstrTypes.h:1393
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:296
llvm::initializeAMDGPUUseNativeCallsPass
void initializeAMDGPUUseNativeCallsPass(PassRegistry &)
llvm::CallInst::Create
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Definition: Instructions.h:1518
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
tbl_asinpi
static const TableEntry tbl_asinpi[]
Definition: AMDGPULibCalls.cpp:257
llvm::AMDGPULibFuncBase::EI_ACOSPI
@ EI_ACOSPI
Definition: AMDGPULibFunc.h:41
false
Definition: StackSlotColoring.cpp:142
llvm::AMDGPULibFuncBase::EI_NEXP2
@ EI_NEXP2
Definition: AMDGPULibFunc.h:223
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::APFloat::convertToDouble
double convertToDouble() const
Converts this APFloat to host double value.
Definition: APFloat.cpp:4885
llvm::Instruction
Definition: Instruction.h:45
llvm::ConstantDataVector::getSplatValue
Constant * getSplatValue() const
If this is a splat constant, meaning that all of the elements have the same value,...
Definition: Constants.cpp:3308
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:153
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:257
llvm::ConstantFP::isExactlyValue
bool isExactlyValue(const APFloat &V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition: Constants.cpp:1070
llvm::raw_ostream::flush
void flush()
Definition: raw_ostream.h:186
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:899
llvm::FunctionCallee::getFunctionType
FunctionType * getFunctionType()
Definition: DerivedTypes.h:181
getVecSize
static int getVecSize(const AMDGPULibFunc &FInfo)
Definition: AMDGPULibCalls.cpp:453
llvm::AMDGPULibFuncBase::EI_ASIN
@ EI_ASIN
Definition: AMDGPULibFunc.h:45
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::AMDGPULibFuncBase::EI_TAN
@ EI_TAN
Definition: AMDGPULibFunc.h:193
tbl_expm1
static const TableEntry tbl_expm1[]
Definition: AMDGPULibCalls.cpp:320
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:650
input
The initial backend is deliberately restricted to z10 We should add support for later architectures at some point If an asm ties an i32 r result to an i64 input
Definition: README.txt:10
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:153
tbl_atan
static const TableEntry tbl_atan[]
Definition: AMDGPULibCalls.cpp:263
llvm::Instruction::isLifetimeStartOrEnd
bool isLifetimeStartOrEnd() const
Return true if the instruction is a llvm.lifetime.start or llvm.lifetime.end marker.
Definition: Instruction.cpp:706
Name
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
Definition: AMDGPULibCalls.cpp:207
llvm::AMDGPUSimplifyLibCallsPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: AMDGPULibCalls.cpp:1739
HasNative
static bool HasNative(AMDGPULibFunc::EFuncId id)
Definition: AMDGPULibCalls.cpp:376
tbl_tanh
static const TableEntry tbl_tanh[]
Definition: AMDGPULibCalls.cpp:361
llvm::AMDGPULibFuncBase::NOPFX
@ NOPFX
Definition: AMDGPULibFunc.h:247
llvm::AMDGPULibFuncBase::EI_EXP10
@ EI_EXP10
Definition: AMDGPULibFunc.h:86
llvm::StringRef::equals
LLVM_NODISCARD bool equals(StringRef RHS) const
equals - Check for string equality, this is more efficient than compare() when the relative ordering ...
Definition: StringRef.h:187
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::AMDGPULibFuncBase::EI_READ_PIPE_4
@ EI_READ_PIPE_4
Definition: AMDGPULibFunc.h:239
llvm::AMDGPULibFuncBase::EI_ACOS
@ EI_ACOS
Definition: AMDGPULibFunc.h:39
AMDGPULibFunc.h
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:201
TableRef::TableRef
TableRef()
Definition: AMDGPULibCalls.cpp:403
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:388
llvm::cl::opt< bool >
val
The initial backend is deliberately restricted to z10 We should add support for later architectures at some point If an asm ties an i32 r result to an i64 the input will be treated as an leaving the upper bits uninitialised For i64 store i32 val
Definition: README.txt:15
llvm::AMDGPULibFuncBase::HALF
@ HALF
Definition: AMDGPULibFunc.h:249
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:304
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::AMDGPULibCalls::isUnsafeMath
bool isUnsafeMath(const CallInst *CI) const
Definition: AMDGPULibCalls.cpp:474
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:78
llvm::StringRef::equals_insensitive
LLVM_NODISCARD bool equals_insensitive(StringRef RHS) const
Check for string equality, ignoring case.
Definition: StringRef.h:194
llvm::Function::getReturnType
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:180
llvm::AMDGPULibFuncBase::F32
@ F32
Definition: AMDGPULibFunc.h:271
tbl_cospi
static const TableEntry tbl_cospi[]
Definition: AMDGPULibCalls.cpp:293
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:387
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:572
const
aarch64 promote const
Definition: AArch64PromoteConstant.cpp:232
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::AMDGPULibFuncBase::EI_EXP
@ EI_EXP
Definition: AMDGPULibFunc.h:85
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::AMDGPULibFuncBase::EI_TGAMMA
@ EI_TGAMMA
Definition: AMDGPULibFunc.h:196
llvm::AMDGPULibFuncBase::EI_WRITE_PIPE_4
@ EI_WRITE_PIPE_4
Definition: AMDGPULibFunc.h:241
llvm::AMDGPULibFuncBase::EI_COSPI
@ EI_COSPI
Definition: AMDGPULibFunc.h:76
llvm::AMDGPULibFunc::mangle
std::string mangle() const
Definition: AMDGPULibFunc.h:387
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::ConstantDataVector
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
Definition: Constants.h:752
llvm::FunctionType::getParamType
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:134
getArgType
static AMDGPULibFunc::EType getArgType(const AMDGPULibFunc &FInfo)
Definition: AMDGPULibCalls.cpp:457
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::AMDGPUUseNativeCallsPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: AMDGPULibCalls.cpp:1796
llvm::AMDGPULibCalls::initNativeFuncs
void initNativeFuncs()
Definition: AMDGPULibCalls.cpp:487
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1612
tbl_asinh
static const TableEntry tbl_asinh[]
Definition: AMDGPULibCalls.cpp:253
false
amdgpu Simplify well known AMD library false
Definition: AMDGPULibCalls.cpp:199
llvm::AMDGPULibFunc::getNumArgs
unsigned getNumArgs() const
Definition: AMDGPULibFunc.h:372
IRBuilder.h
llvm::ConstantFP::isNegative
bool isNegative() const
Return true if the sign bit is set.
Definition: Constants.h:304
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::FPMathOperator
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition: Operator.h:250
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::ConstantDataSequential::getElementAsConstant
Constant * getElementAsConstant(unsigned i) const
Return a Constant for a specified index's element.
Definition: Constants.cpp:3263
llvm::AMDGPULibFuncBase::EI_ASINPI
@ EI_ASINPI
Definition: AMDGPULibFunc.h:47
llvm::AMDGPULibFuncBase::EI_SINCOS
@ EI_SINCOS
Definition: AMDGPULibFunc.h:172
llvm::AMDGPULibFuncBase::EI_WRITE_PIPE_2
@ EI_WRITE_PIPE_2
Definition: AMDGPULibFunc.h:240
llvm::AMDGPULibFuncBase::EI_SINH
@ EI_SINH
Definition: AMDGPULibFunc.h:173
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
tbl_cos
static const TableEntry tbl_cos[]
Definition: AMDGPULibCalls.cpp:285
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1528
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::AMDGPULibFuncBase::Param::VectorSize
unsigned char VectorSize
Definition: AMDGPULibFunc.h:293
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
AMDGPU.h
Simplify
assume Assume Simplify
Definition: AssumeBundleBuilder.cpp:604
llvm::AMDGPULibFuncBase::EI_DIVIDE
@ EI_DIVIDE
Definition: AMDGPULibFunc.h:81
tbl_erfc
static const TableEntry tbl_erfc[]
Definition: AMDGPULibCalls.cpp:297
llvm::Value::getNumUses
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:243
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::AMDGPULibFuncBase::EI_COS
@ EI_COS
Definition: AMDGPULibFunc.h:74
llvm::AMDGPULibFuncBase::EI_LOG2
@ EI_LOG2
Definition: AMDGPULibFunc.h:132
tbl_atanpi
static const TableEntry tbl_atanpi[]
Definition: AMDGPULibCalls.cpp:273
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:520
library
Itanium Name Demangler i e convert the string _Z1fv into but neither can depend on each other libcxxabi needs the demangler to implement which is part of the itanium ABI spec LLVM needs a copy for a bunch of but doesn t want to use the system s __cxa_demangle because it a might not be and b probably isn t that up to date on the latest language features The copy of the demangler in LLVM has some extra stuff that aren t needed in which depend on the shared generic components Despite these we want to keep the core generic demangling library identical between both copies to simplify development and testing If you re working on the generic library
Definition: README.txt:30
llvm::AMDGPULibFuncBase::EFuncId
EFuncId
Definition: AMDGPULibFunc.h:23
llvm::AMDGPULibFunc::setId
void setId(EFuncId Id)
Definition: AMDGPULibFunc.h:380
llvm::AMDGPULibFuncBase::EI_EXP2
@ EI_EXP2
Definition: AMDGPULibFunc.h:87
tbl_tanpi
static const TableEntry tbl_tanpi[]
Definition: AMDGPULibCalls.cpp:365
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
llvm::TargetMachine::getTargetFeatureString
StringRef getTargetFeatureString() const
Definition: TargetMachine.h:125
llvm::AMDGPULibFunc::setPrefix
void setPrefix(ENamePrefix PFX)
Definition: AMDGPULibFunc.h:390
llvm::ConstantInt::getSExtValue
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:148
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::AMDGPULibFuncBase::EI_NRSQRT
@ EI_NRSQRT
Definition: AMDGPULibFunc.h:227
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:297
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:175
UseNative
static cl::list< std::string > UseNative("amdgpu-use-native", cl::desc("Comma separated list of functions to replace with native, or all"), cl::CommaSeparated, cl::ValueOptional, cl::Hidden)
well
llvm ldr ldrb ldrh str strh strb strb gcc and possibly speed as well(we don 't have a good way to measure on ARM). *Consider this silly example
Definition: README.txt:138
llvm::FunctionCallee::getCallee
Value * getCallee()
Definition: DerivedTypes.h:183
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:206
llvm::BasicBlock::getContext
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
EnablePreLink
static cl::opt< bool > EnablePreLink("amdgpu-prelink", cl::desc("Enable pre-link mode optimizations"), cl::init(false), cl::Hidden)
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:204
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPUSimplifyLibCalls, "amdgpu-simplifylib", "Simplify well-known AMD library calls", false, false) INITIALIZE_PASS_END(AMDGPUSimplifyLibCalls
llvm::Type::getIntNTy
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:207
llvm::AMDGPULibFuncBase::EI_ERF
@ EI_ERF
Definition: AMDGPULibFunc.h:83
llvm::ConstantDataSequential::getElementAsDouble
double getElementAsDouble(unsigned i) const
If this is an sequential container of doubles, return the specified element as a double.
Definition: Constants.cpp:3257
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
llvm::AMDGPULibFuncBase::EI_MAD
@ EI_MAD
Definition: AMDGPULibFunc.h:134
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::AMDGPULibFuncBase::Param::ArgType
unsigned char ArgType
Definition: AMDGPULibFunc.h:292
llvm::AMDGPULibFuncBase::EI_POWN
@ EI_POWN
Definition: AMDGPULibFunc.h:151
llvm::ConstantFP::get
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:946
TableRef::TableRef
TableRef(const TableEntry(&tbl)[N])
Definition: AMDGPULibCalls.cpp:406
llvm::AMDGPULibFunc::getPrefix
ENamePrefix getPrefix() const
Definition: AMDGPULibFunc.h:374
llvm::FindAvailableLoadedValue
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, AAResults *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:431
llvm::AMDGPULibFuncBase::EI_LOG10
@ EI_LOG10
Definition: AMDGPULibFunc.h:130
llvm::AMDGPULibFunc::getId
EFuncId getId() const
Definition: AMDGPULibFunc.h:373
llvm::Type::isDoubleTy
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:150
y
into llvm powi allowing the code generator to produce balanced multiplication trees the intrinsic needs to be extended to support and second the code generator needs to be enhanced to lower these to multiplication trees Interesting testcase for add shift mul int y
Definition: README.txt:61
llvm::log2
static double log2(double V)
Definition: AMDGPULibCalls.cpp:842
tbl_rsqrt
static const TableEntry tbl_rsqrt[]
Definition: AMDGPULibCalls.cpp:336
simplifylib
amdgpu simplifylib
Definition: AMDGPULibCalls.cpp:198
llvm::AMDGPUAS::FLAT_ADDRESS
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:381
llvm::AMDGPULibFuncBase::EI_EXPM1
@ EI_EXPM1
Definition: AMDGPULibFunc.h:88
MATH_SQRT2
#define MATH_SQRT2
Definition: AMDGPULibCalls.cpp:40
llvm::numbers::ln2
constexpr double ln2
Definition: MathExtras.h:59
llvm::AMDGPULibCalls::CI
CallInst * CI
Definition: AMDGPULibCalls.cpp:132
llvm::TargetMachine::getTargetCPU
StringRef getTargetCPU() const
Definition: TargetMachine.h:124
tbl_exp10
static const TableEntry tbl_exp10[]
Definition: AMDGPULibCalls.cpp:315
llvm::TargetMachine::getSubtarget
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Definition: TargetMachine.h:161
llvm::AMDGPULibCalls::fold
bool fold(CallInst *CI, AliasAnalysis *AA=nullptr)
Definition: AMDGPULibCalls.cpp:621
tbl_sin
static const TableEntry tbl_sin[]
Definition: AMDGPULibCalls.cpp:340
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1338
llvm::ConstantDataVector::getSplat
static Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:3131
N
#define N
tbl_exp2
static const TableEntry tbl_exp2[]
Definition: AMDGPULibCalls.cpp:310
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1281
llvm::AMDGPULibFunc::getName
std::string getName() const
Get unmangled name for mangled library function and name for unmangled library function.
Definition: AMDGPULibFunc.h:371
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::to_string
std::string to_string(const T &Value)
Definition: ScopedPrinter.h:63
llvm::AMDGPULibFuncBase::EI_SQRT
@ EI_SQRT
Definition: AMDGPULibFunc.h:176
MATH_E
#define MATH_E
Definition: AMDGPULibCalls.cpp:39
tbl_sinh
static const TableEntry tbl_sinh[]
Definition: AMDGPULibCalls.cpp:344
llvm::FunctionCallee
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:164
calls
amdgpu Simplify well known AMD library calls
Definition: AMDGPULibCalls.cpp:199
tbl_cosh
static const TableEntry tbl_cosh[]
Definition: AMDGPULibCalls.cpp:289
llvm::StringRef::find_insensitive
LLVM_NODISCARD size_t find_insensitive(char C, size_t From=0) const
Search for the first character C in the string, ignoring case.
Definition: StringRef.cpp:55
llvm::AMDGPULibFuncBase::EI_NLOG2
@ EI_NLOG2
Definition: AMDGPULibFunc.h:225
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:401
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1475
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
tbl_sqrt
static const TableEntry tbl_sqrt[]
Definition: AMDGPULibCalls.cpp:352
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:62
llvm::cl::desc
Definition: CommandLine.h:414
llvm::AMDGPULibFuncBase::EI_RSQRT
@ EI_RSQRT
Definition: AMDGPULibFunc.h:165
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1284
TableRef
Definition: AMDGPULibCalls.cpp:399
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::AMDGPULibFuncBase::EI_ATAN
@ EI_ATAN
Definition: AMDGPULibFunc.h:50
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:422
llvm::AMDGPULibFuncBase::EI_SINPI
@ EI_SINPI
Definition: AMDGPULibFunc.h:174
llvm::AMDGPULibFuncBase::NATIVE
@ NATIVE
Definition: AMDGPULibFunc.h:248
MATH_SQRT1_2
#define MATH_SQRT1_2
Definition: AMDGPULibCalls.cpp:41
tbl_acosh
static const TableEntry tbl_acosh[]
Definition: AMDGPULibCalls.cpp:238
tbl_tgamma
static const TableEntry tbl_tgamma[]
Definition: AMDGPULibCalls.cpp:369
llvm::createAMDGPUSimplifyLibCallsPass
FunctionPass * createAMDGPUSimplifyLibCallsPass(const TargetMachine *)
Definition: AMDGPULibCalls.cpp:1699
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
INITIALIZE_PASS
TargetPassConfig.
Definition: TargetPassConfig.cpp:319
llvm::DataLayout::getTypeAllocSize
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:498
llvm::ConstantDataSequential::getNumElements
unsigned getNumElements() const
Return the number of elements in the array or vector.
Definition: Constants.cpp:2921
llvm::cl::list
Definition: CommandLine.h:1630