LLVM 23.0.0git
AMDGPULibCalls.cpp
Go to the documentation of this file.
1//===- AMDGPULibCalls.cpp -------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file does AMD library function optimizations.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "AMDGPULibFunc.h"
20#include "llvm/IR/Dominators.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/IntrinsicsAMDGPU.h"
23#include "llvm/IR/MDBuilder.h"
25#include <cmath>
26
27#define DEBUG_TYPE "amdgpu-simplifylib"
28
29using namespace llvm;
30using namespace llvm::PatternMatch;
31
32static cl::opt<bool> EnablePreLink("amdgpu-prelink",
33 cl::desc("Enable pre-link mode optimizations"),
34 cl::init(false),
36
37static cl::list<std::string> UseNative("amdgpu-use-native",
38 cl::desc("Comma separated list of functions to replace with native, or all"),
41
42#define MATH_PI numbers::pi
43#define MATH_E numbers::e
44#define MATH_SQRT2 numbers::sqrt2
45#define MATH_SQRT1_2 numbers::inv_sqrt2
46
47enum class PowKind { Pow, PowR, PowN, RootN };
48
49namespace llvm {
50
52private:
54
55 using FuncInfo = llvm::AMDGPULibFunc;
56
57 // -fuse-native.
58 bool AllNative = false;
59
60 bool useNativeFunc(const StringRef F) const;
61
62 // Return a pointer (pointer expr) to the function if function definition with
63 // "FuncName" exists. It may create a new function prototype in pre-link mode.
64 FunctionCallee getFunction(Module *M, const FuncInfo &fInfo);
65
66 /// Wrapper around getFunction which tries to use a faster variant if
67 /// available, and falls back to a less fast option.
68 ///
69 /// Return a replacement function for \p fInfo that has float-typed fast
70 /// variants. \p NewFunc is a base replacement function to use. \p
71 /// NewFuncFastVariant is a faster version to use if the calling context knows
72 /// it's legal. If there is no fast variant to use, \p NewFuncFastVariant
73 /// should be EI_NONE.
74 FunctionCallee getFloatFastVariant(Module *M, const FuncInfo &fInfo,
75 FuncInfo &newInfo,
77 AMDGPULibFunc::EFuncId NewFuncFastVariant);
78
79 bool parseFunctionName(const StringRef &FMangledName, FuncInfo &FInfo);
80
81 bool TDOFold(CallInst *CI, const FuncInfo &FInfo);
82
83 /* Specialized optimizations */
84
85 // pow/powr/pown
86 bool fold_pow(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
87
88 /// Peform a fast math expansion of pow, powr, pown or rootn.
89 bool expandFastPow(FPMathOperator *FPOp, IRBuilder<> &B, PowKind Kind);
90
91 bool tryOptimizePow(FPMathOperator *FPOp, IRBuilder<> &B,
92 const FuncInfo &FInfo);
93
94 // rootn
95 bool fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
96
97 // -fuse-native for sincos
98 bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo);
99
100 // evaluate calls if calls' arguments are constants.
101 bool evaluateScalarMathFunc(const FuncInfo &FInfo, APFloat &Res0,
102 APFloat &Res1, Constant *copr0, Constant *copr1);
103 bool evaluateCall(CallInst *aCI, const FuncInfo &FInfo);
104
105 /// Insert a value to sincos function \p Fsincos. Returns (value of sin, value
106 /// of cos, sincos call).
107 std::tuple<Value *, Value *, Value *> insertSinCos(Value *Arg,
108 FastMathFlags FMF,
109 IRBuilder<> &B,
110 FunctionCallee Fsincos);
111
112 // sin/cos
113 bool fold_sincos(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
114
115 // __read_pipe/__write_pipe
116 bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
117 const FuncInfo &FInfo);
118
119 // Get a scalar native builtin single argument FP function
120 FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo);
121
122 /// Substitute a call to a known libcall with an intrinsic call. If \p
123 /// AllowMinSize is true, allow the replacement in a minsize function.
124 bool shouldReplaceLibcallWithIntrinsic(const CallInst *CI,
125 bool AllowMinSizeF32 = false,
126 bool AllowF64 = false,
127 bool AllowStrictFP = false);
128 void replaceLibCallWithSimpleIntrinsic(IRBuilder<> &B, CallInst *CI,
129 Intrinsic::ID IntrID);
130
131 bool tryReplaceLibcallWithSimpleIntrinsic(IRBuilder<> &B, CallInst *CI,
132 Intrinsic::ID IntrID,
133 bool AllowMinSizeF32 = false,
134 bool AllowF64 = false,
135 bool AllowStrictFP = false);
136
137protected:
138 bool isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const;
139
141
142 static void replaceCall(Instruction *I, Value *With) {
143 I->replaceAllUsesWith(With);
144 I->eraseFromParent();
145 }
146
147 static void replaceCall(FPMathOperator *I, Value *With) {
149 }
150
151public:
153
154 bool fold(CallInst *CI);
155
156 void initNativeFuncs();
157
158 // Replace a normal math function call with that native version
159 bool useNative(CallInst *CI);
160};
161
162} // end namespace llvm
163
164template <typename IRB>
165static CallInst *CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg,
166 const Twine &Name = "") {
167 CallInst *R = B.CreateCall(Callee, Arg, Name);
168 if (Function *F = dyn_cast<Function>(Callee.getCallee()))
169 R->setCallingConv(F->getCallingConv());
170 return R;
171}
172
173template <typename IRB>
174static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1,
175 Value *Arg2, const Twine &Name = "") {
176 CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name);
177 if (Function *F = dyn_cast<Function>(Callee.getCallee()))
178 R->setCallingConv(F->getCallingConv());
179 return R;
180}
181
183 Type *PowNExpTy = Type::getInt32Ty(FT->getContext());
184 if (VectorType *VecTy = dyn_cast<VectorType>(FT->getReturnType()))
185 PowNExpTy = VectorType::get(PowNExpTy, VecTy->getElementCount());
186
187 return FunctionType::get(FT->getReturnType(),
188 {FT->getParamType(0), PowNExpTy}, false);
189}
190
191// Data structures for table-driven optimizations.
192// FuncTbl works for both f32 and f64 functions with 1 input argument
193
195 double result;
196 double input;
197};
198
199/* a list of {result, input} */
200static const TableEntry tbl_acos[] = {
201 {MATH_PI / 2.0, 0.0},
202 {MATH_PI / 2.0, -0.0},
203 {0.0, 1.0},
204 {MATH_PI, -1.0}
205};
206static const TableEntry tbl_acosh[] = {
207 {0.0, 1.0}
208};
209static const TableEntry tbl_acospi[] = {
210 {0.5, 0.0},
211 {0.5, -0.0},
212 {0.0, 1.0},
213 {1.0, -1.0}
214};
215static const TableEntry tbl_asin[] = {
216 {0.0, 0.0},
217 {-0.0, -0.0},
218 {MATH_PI / 2.0, 1.0},
219 {-MATH_PI / 2.0, -1.0}
220};
221static const TableEntry tbl_asinh[] = {
222 {0.0, 0.0},
223 {-0.0, -0.0}
224};
225static const TableEntry tbl_asinpi[] = {
226 {0.0, 0.0},
227 {-0.0, -0.0},
228 {0.5, 1.0},
229 {-0.5, -1.0}
230};
231static const TableEntry tbl_atan[] = {
232 {0.0, 0.0},
233 {-0.0, -0.0},
234 {MATH_PI / 4.0, 1.0},
235 {-MATH_PI / 4.0, -1.0}
236};
237static const TableEntry tbl_atanh[] = {
238 {0.0, 0.0},
239 {-0.0, -0.0}
240};
241static const TableEntry tbl_atanpi[] = {
242 {0.0, 0.0},
243 {-0.0, -0.0},
244 {0.25, 1.0},
245 {-0.25, -1.0}
246};
247static const TableEntry tbl_cbrt[] = {
248 {0.0, 0.0},
249 {-0.0, -0.0},
250 {1.0, 1.0},
251 {-1.0, -1.0},
252};
253static const TableEntry tbl_cos[] = {
254 {1.0, 0.0},
255 {1.0, -0.0}
256};
257static const TableEntry tbl_cosh[] = {
258 {1.0, 0.0},
259 {1.0, -0.0}
260};
261static const TableEntry tbl_cospi[] = {
262 {1.0, 0.0},
263 {1.0, -0.0}
264};
265static const TableEntry tbl_erfc[] = {
266 {1.0, 0.0},
267 {1.0, -0.0}
268};
269static const TableEntry tbl_erf[] = {
270 {0.0, 0.0},
271 {-0.0, -0.0}
272};
273static const TableEntry tbl_exp[] = {
274 {1.0, 0.0},
275 {1.0, -0.0},
276 {MATH_E, 1.0}
277};
278static const TableEntry tbl_exp2[] = {
279 {1.0, 0.0},
280 {1.0, -0.0},
281 {2.0, 1.0}
282};
283static const TableEntry tbl_exp10[] = {
284 {1.0, 0.0},
285 {1.0, -0.0},
286 {10.0, 1.0}
287};
288static const TableEntry tbl_expm1[] = {
289 {0.0, 0.0},
290 {-0.0, -0.0}
291};
292static const TableEntry tbl_log[] = {
293 {0.0, 1.0},
294 {1.0, MATH_E}
295};
296static const TableEntry tbl_log2[] = {
297 {0.0, 1.0},
298 {1.0, 2.0}
299};
300static const TableEntry tbl_log10[] = {
301 {0.0, 1.0},
302 {1.0, 10.0}
303};
304static const TableEntry tbl_rsqrt[] = {
305 {1.0, 1.0},
306 {MATH_SQRT1_2, 2.0}
307};
308static const TableEntry tbl_sin[] = {
309 {0.0, 0.0},
310 {-0.0, -0.0}
311};
312static const TableEntry tbl_sinh[] = {
313 {0.0, 0.0},
314 {-0.0, -0.0}
315};
316static const TableEntry tbl_sinpi[] = {
317 {0.0, 0.0},
318 {-0.0, -0.0}
319};
320static const TableEntry tbl_sqrt[] = {
321 {0.0, 0.0},
322 {1.0, 1.0},
323 {MATH_SQRT2, 2.0}
324};
325static const TableEntry tbl_tan[] = {
326 {0.0, 0.0},
327 {-0.0, -0.0}
328};
329static const TableEntry tbl_tanh[] = {
330 {0.0, 0.0},
331 {-0.0, -0.0}
332};
333static const TableEntry tbl_tanpi[] = {
334 {0.0, 0.0},
335 {-0.0, -0.0}
336};
337static const TableEntry tbl_tgamma[] = {
338 {1.0, 1.0},
339 {1.0, 2.0},
340 {2.0, 3.0},
341 {6.0, 4.0}
342};
343
345 switch(id) {
361 return true;
362 default:;
363 }
364 return false;
365}
366
368
370 switch(id) {
408 default:;
409 }
410 return TableRef();
411}
412
413static inline int getVecSize(const AMDGPULibFunc& FInfo) {
414 return FInfo.getLeads()[0].VectorSize;
415}
416
417static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) {
418 return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType;
419}
420
421FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) {
422 // If we are doing PreLinkOpt, the function is external. So it is safe to
423 // use getOrInsertFunction() at this stage.
424
426 : AMDGPULibFunc::getFunction(M, fInfo);
427}
428
429FunctionCallee AMDGPULibCalls::getFloatFastVariant(
430 Module *M, const FuncInfo &fInfo, FuncInfo &newInfo,
431 AMDGPULibFunc::EFuncId NewFunc, AMDGPULibFunc::EFuncId FastVariant) {
432 assert(NewFunc != FastVariant);
433
434 if (FastVariant != AMDGPULibFunc::EI_NONE &&
435 getArgType(fInfo) == AMDGPULibFunc::F32) {
436 newInfo = AMDGPULibFunc(FastVariant, fInfo);
437 if (FunctionCallee NewCallee = getFunction(M, newInfo))
438 return NewCallee;
439 }
440
441 newInfo = AMDGPULibFunc(NewFunc, fInfo);
442 return getFunction(M, newInfo);
443}
444
445bool AMDGPULibCalls::parseFunctionName(const StringRef &FMangledName,
446 FuncInfo &FInfo) {
447 return AMDGPULibFunc::parse(FMangledName, FInfo);
448}
449
451 return FPOp->hasApproxFunc() && FPOp->hasNoNaNs() && FPOp->hasNoInfs();
452}
453
455 const FPMathOperator *FPOp) const {
456 // TODO: Refine to approxFunc or contract
457 return FPOp->isFast();
458}
459
461 : SQ(F.getParent()->getDataLayout(),
462 &FAM.getResult<TargetLibraryAnalysis>(F),
463 FAM.getCachedResult<DominatorTreeAnalysis>(F),
464 &FAM.getResult<AssumptionAnalysis>(F)) {}
465
466bool AMDGPULibCalls::useNativeFunc(const StringRef F) const {
467 return AllNative || llvm::is_contained(UseNative, F);
468}
469
471 AllNative = useNativeFunc("all") ||
472 (UseNative.getNumOccurrences() && UseNative.size() == 1 &&
473 UseNative.begin()->empty());
474}
475
476bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
477 bool native_sin = useNativeFunc("sin");
478 bool native_cos = useNativeFunc("cos");
479
480 if (native_sin && native_cos) {
481 Module *M = aCI->getModule();
482 Value *opr0 = aCI->getArgOperand(0);
483
484 AMDGPULibFunc nf;
485 nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType;
486 nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize;
487
490 FunctionCallee sinExpr = getFunction(M, nf);
491
494 FunctionCallee cosExpr = getFunction(M, nf);
495 if (sinExpr && cosExpr) {
496 Value *sinval =
497 CallInst::Create(sinExpr, opr0, "splitsin", aCI->getIterator());
498 Value *cosval =
499 CallInst::Create(cosExpr, opr0, "splitcos", aCI->getIterator());
500 new StoreInst(cosval, aCI->getArgOperand(1), aCI->getIterator());
501
502 DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
503 << " with native version of sin/cos");
504
505 replaceCall(aCI, sinval);
506 return true;
507 }
508 }
509 return false;
510}
511
513 Function *Callee = aCI->getCalledFunction();
514 if (!Callee || aCI->isNoBuiltin())
515 return false;
516
517 FuncInfo FInfo;
518 if (!parseFunctionName(Callee->getName(), FInfo) || !FInfo.isMangled() ||
519 FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
520 getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) ||
521 !(AllNative || useNativeFunc(FInfo.getName()))) {
522 return false;
523 }
524
525 if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS)
526 return sincosUseNative(aCI, FInfo);
527
529 FunctionCallee F = getFunction(aCI->getModule(), FInfo);
530 if (!F)
531 return false;
532
533 aCI->setCalledFunction(F);
534 DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
535 << " with native version");
536 return true;
537}
538
539// Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe
540// builtin, with appended type size and alignment arguments, where 2 or 4
541// indicates the original number of arguments. The library has optimized version
542// of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same
543// power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N
544// for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ...,
545// 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4.
546bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
547 const FuncInfo &FInfo) {
548 auto *Callee = CI->getCalledFunction();
549 if (!Callee->isDeclaration())
550 return false;
551
552 assert(Callee->hasName() && "Invalid read_pipe/write_pipe function");
553 auto *M = Callee->getParent();
554 std::string Name = std::string(Callee->getName());
555 auto NumArg = CI->arg_size();
556 if (NumArg != 4 && NumArg != 6)
557 return false;
558 ConstantInt *PacketSize =
559 dyn_cast<ConstantInt>(CI->getArgOperand(NumArg - 2));
560 ConstantInt *PacketAlign =
561 dyn_cast<ConstantInt>(CI->getArgOperand(NumArg - 1));
562 if (!PacketSize || !PacketAlign)
563 return false;
564
565 unsigned Size = PacketSize->getZExtValue();
566 Align Alignment = PacketAlign->getAlignValue();
567 if (Alignment != Size)
568 return false;
569
570 unsigned PtrArgLoc = CI->arg_size() - 3;
571 Value *PtrArg = CI->getArgOperand(PtrArgLoc);
572 Type *PtrTy = PtrArg->getType();
573
575 for (unsigned I = 0; I != PtrArgLoc; ++I)
576 ArgTys.push_back(CI->getArgOperand(I)->getType());
577 ArgTys.push_back(PtrTy);
578
579 Name = Name + "_" + std::to_string(Size);
580 auto *FTy = FunctionType::get(Callee->getReturnType(),
581 ArrayRef<Type *>(ArgTys), false);
582 AMDGPULibFunc NewLibFunc(Name, FTy);
584 if (!F)
585 return false;
586
588 for (unsigned I = 0; I != PtrArgLoc; ++I)
589 Args.push_back(CI->getArgOperand(I));
590 Args.push_back(PtrArg);
591
592 auto *NCI = B.CreateCall(F, Args);
593 NCI->setAttributes(CI->getAttributes());
594 CI->replaceAllUsesWith(NCI);
595 CI->dropAllReferences();
596 CI->eraseFromParent();
597
598 return true;
599}
600
601// This function returns false if no change; return true otherwise.
603 Function *Callee = CI->getCalledFunction();
604 // Ignore indirect calls.
605 if (!Callee || Callee->isIntrinsic() || CI->isNoBuiltin())
606 return false;
607
608 FuncInfo FInfo;
609 if (!parseFunctionName(Callee->getName(), FInfo))
610 return false;
611
612 // Further check the number of arguments to see if they match.
613 // TODO: Check calling convention matches too
614 if (!FInfo.isCompatibleSignature(*Callee->getParent(), CI->getFunctionType()))
615 return false;
616
617 LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << '\n');
618
619 if (TDOFold(CI, FInfo))
620 return true;
621
622 IRBuilder<> B(CI);
623 if (CI->isStrictFP())
624 B.setIsFPConstrained(true);
625
627 // Under unsafe-math, evaluate calls if possible.
628 // According to Brian Sumner, we can do this for all f32 function calls
629 // using host's double function calls.
630 if (canIncreasePrecisionOfConstantFold(FPOp) && evaluateCall(CI, FInfo))
631 return true;
632
633 // Copy fast flags from the original call.
634 FastMathFlags FMF = FPOp->getFastMathFlags();
635 B.setFastMathFlags(FMF);
636
637 // Specialized optimizations for each function call.
638 //
639 // TODO: Handle native functions
640 switch (FInfo.getId()) {
642 if (FMF.none())
643 return false;
644 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::exp,
645 FMF.approxFunc());
647 if (FMF.none())
648 return false;
649 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::exp2,
650 FMF.approxFunc());
652 if (FMF.none())
653 return false;
654 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log,
655 FMF.approxFunc());
657 if (FMF.none())
658 return false;
659 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log2,
660 FMF.approxFunc());
662 if (FMF.none())
663 return false;
664 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log10,
665 FMF.approxFunc());
667 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::minnum,
668 true, true);
670 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::maxnum,
671 true, true);
673 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fma, true,
674 true);
676 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fmuladd,
677 true, true);
679 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fabs, true,
680 true, true);
682 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::copysign,
683 true, true, true);
685 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::floor, true,
686 true);
688 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::ceil, true,
689 true);
691 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::trunc, true,
692 true);
694 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::rint, true,
695 true);
697 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::round, true,
698 true);
700 if (!shouldReplaceLibcallWithIntrinsic(CI, true, true))
701 return false;
702
703 Value *Arg1 = CI->getArgOperand(1);
704 if (VectorType *VecTy = dyn_cast<VectorType>(CI->getType());
705 VecTy && !isa<VectorType>(Arg1->getType())) {
706 Value *SplatArg1 = B.CreateVectorSplat(VecTy->getElementCount(), Arg1);
707 CI->setArgOperand(1, SplatArg1);
708 }
709
711 CI->getModule(), Intrinsic::ldexp,
712 {CI->getType(), CI->getArgOperand(1)->getType()}));
714 return true;
715 }
718 return tryOptimizePow(FPOp, B, FInfo);
721 if (fold_pow(FPOp, B, FInfo))
722 return true;
723 if (!FMF.approxFunc())
724 return false;
725
726 if (FInfo.getId() == AMDGPULibFunc::EI_POWR && FMF.approxFunc() &&
727 getArgType(FInfo) == AMDGPULibFunc::F32) {
728 Module *M = Callee->getParent();
729 AMDGPULibFunc PowrFastInfo(AMDGPULibFunc::EI_POWR_FAST, FInfo);
730 if (FunctionCallee PowrFastFunc = getFunction(M, PowrFastInfo)) {
731 CI->setCalledFunction(PowrFastFunc);
732 return true;
733 }
734 }
735
736 if (!shouldReplaceLibcallWithIntrinsic(CI))
737 return false;
738 return expandFastPow(FPOp, B, PowKind::PowR);
739 }
742 if (fold_pow(FPOp, B, FInfo))
743 return true;
744 if (!FMF.approxFunc())
745 return false;
746
747 if (FInfo.getId() == AMDGPULibFunc::EI_POWN &&
748 getArgType(FInfo) == AMDGPULibFunc::F32) {
749 Module *M = Callee->getParent();
750 AMDGPULibFunc PownFastInfo(AMDGPULibFunc::EI_POWN_FAST, FInfo);
751 if (FunctionCallee PownFastFunc = getFunction(M, PownFastInfo)) {
752 CI->setCalledFunction(PownFastFunc);
753 return true;
754 }
755 }
756
757 if (!shouldReplaceLibcallWithIntrinsic(CI))
758 return false;
759 return expandFastPow(FPOp, B, PowKind::PowN);
760 }
763 if (fold_rootn(FPOp, B, FInfo))
764 return true;
765 if (!FMF.approxFunc())
766 return false;
767
768 if (getArgType(FInfo) == AMDGPULibFunc::F32) {
769 Module *M = Callee->getParent();
770 AMDGPULibFunc RootnFastInfo(AMDGPULibFunc::EI_ROOTN_FAST, FInfo);
771 if (FunctionCallee RootnFastFunc = getFunction(M, RootnFastInfo)) {
772 CI->setCalledFunction(RootnFastFunc);
773 return true;
774 }
775 }
776
777 return expandFastPow(FPOp, B, PowKind::RootN);
778 }
780 // TODO: Allow with strictfp + constrained intrinsic
781 return tryReplaceLibcallWithSimpleIntrinsic(
782 B, CI, Intrinsic::sqrt, true, true, /*AllowStrictFP=*/false);
785 return fold_sincos(FPOp, B, FInfo);
786 default:
787 break;
788 }
789 } else {
790 // Specialized optimizations for each function call
791 switch (FInfo.getId()) {
796 return fold_read_write_pipe(CI, B, FInfo);
797 default:
798 break;
799 }
800 }
801
802 return false;
803}
804
806 const Type *Ty) {
807 Type *ElemTy = Ty->getScalarType();
808 const fltSemantics &FltSem = ElemTy->getFltSemantics();
809
810 SmallVector<Constant *, 4> ConstValues;
811 ConstValues.reserve(Values.size());
812 for (APFloat APF : Values) {
813 bool Unused;
814 APF.convert(FltSem, APFloat::rmNearestTiesToEven, &Unused);
815 ConstValues.push_back(ConstantFP::get(ElemTy, APF));
816 }
817 return ConstantVector::get(ConstValues);
818}
819
820bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
821 // Table-Driven optimization
822 const TableRef tr = getOptTable(FInfo.getId());
823 if (tr.empty())
824 return false;
825
826 int const sz = (int)tr.size();
827 Value *opr0 = CI->getArgOperand(0);
828
829 int vecSize = getVecSize(FInfo);
830 if (vecSize > 1) {
831 // Vector version
832 Constant *CV = dyn_cast<Constant>(opr0);
833 if (CV && CV->getType()->isVectorTy()) {
835 Values.reserve(vecSize);
836 for (int eltNo = 0; eltNo < vecSize; ++eltNo) {
837 ConstantFP *eltval =
838 cast<ConstantFP>(CV->getAggregateElement((unsigned)eltNo));
839 auto MatchingRow = llvm::find_if(tr, [eltval](const TableEntry &entry) {
840 return eltval->isExactlyValue(entry.input);
841 });
842 if (MatchingRow == tr.end())
843 return false;
844 Values.push_back(APFloat(MatchingRow->result));
845 }
846 Constant *NewValues = getConstantFloatVector(Values, CI->getType());
847 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *NewValues << "\n");
848 replaceCall(CI, NewValues);
849 return true;
850 }
851 } else {
852 // Scalar version
853 if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
854 for (int i = 0; i < sz; ++i) {
855 if (CF->isExactlyValue(tr[i].input)) {
856 Value *nval = ConstantFP::get(CF->getType(), tr[i].result);
857 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
858 replaceCall(CI, nval);
859 return true;
860 }
861 }
862 }
863 }
864
865 return false;
866}
867
868namespace llvm {
869static double log2(double V) {
870#if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L
871 return ::log2(V);
872#else
873 return log(V) / numbers::ln2;
874#endif
875}
876} // namespace llvm
877
878bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B,
879 const FuncInfo &FInfo) {
880 assert((FInfo.getId() == AMDGPULibFunc::EI_POW ||
881 FInfo.getId() == AMDGPULibFunc::EI_POW_FAST ||
882 FInfo.getId() == AMDGPULibFunc::EI_POWR ||
883 FInfo.getId() == AMDGPULibFunc::EI_POWR_FAST ||
884 FInfo.getId() == AMDGPULibFunc::EI_POWN ||
885 FInfo.getId() == AMDGPULibFunc::EI_POWN_FAST) &&
886 "fold_pow: encounter a wrong function call");
887
888 Module *M = B.GetInsertBlock()->getModule();
889 Type *eltType = FPOp->getType()->getScalarType();
890 Value *opr0 = FPOp->getOperand(0);
891 Value *opr1 = FPOp->getOperand(1);
892
893 const APFloat *CF = nullptr;
894 const APInt *CINT = nullptr;
895 if (!match(opr1, m_APFloatAllowPoison(CF)))
896 match(opr1, m_APIntAllowPoison(CINT));
897
898 // 0x1111111 means that we don't do anything for this call.
899 int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111);
900
901 if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0)) {
902 // pow/powr/pown(x, 0) == 1
903 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1\n");
904 Constant *cnval = ConstantFP::get(eltType, 1.0);
905 if (getVecSize(FInfo) > 1) {
906 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
907 }
908 replaceCall(FPOp, cnval);
909 return true;
910 }
911 if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) {
912 // pow/powr/pown(x, 1.0) = x
913 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << "\n");
914 replaceCall(FPOp, opr0);
915 return true;
916 }
917 if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) {
918 // pow/powr/pown(x, 2.0) = x*x
919 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << " * "
920 << *opr0 << "\n");
921 Value *nval = B.CreateFMul(opr0, opr0, "__pow2");
922 replaceCall(FPOp, nval);
923 return true;
924 }
925 if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) {
926 // pow/powr/pown(x, -1.0) = 1.0/x
927 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1 / " << *opr0 << "\n");
928 Constant *cnval = ConstantFP::get(eltType, 1.0);
929 if (getVecSize(FInfo) > 1) {
930 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
931 }
932 Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip");
933 replaceCall(FPOp, nval);
934 return true;
935 }
936
937 if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) {
938 // pow[r](x, [-]0.5) = sqrt(x)
939 bool issqrt = CF->isExactlyValue(0.5);
940 if (FunctionCallee FPExpr =
941 getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
943 FInfo))) {
944 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << FInfo.getName()
945 << '(' << *opr0 << ")\n");
946 Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt"
947 : "__pow2rsqrt");
948 replaceCall(FPOp, nval);
949 return true;
950 }
951 }
952
953 if (!isUnsafeFiniteOnlyMath(FPOp))
954 return false;
955
956 // Unsafe Math optimization
957
958 // Remember that ci_opr1 is set if opr1 is integral
959 if (CF) {
960 double dval = (getArgType(FInfo) == AMDGPULibFunc::F32)
961 ? (double)CF->convertToFloat()
962 : CF->convertToDouble();
963 int ival = (int)dval;
964 if ((double)ival == dval) {
965 ci_opr1 = ival;
966 } else
967 ci_opr1 = 0x11111111;
968 }
969
970 // pow/powr/pown(x, c) = [1/](x*x*..x); where
971 // trunc(c) == c && the number of x == c && |c| <= 12
972 unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1;
973 if (abs_opr1 <= 12) {
974 Constant *cnval;
975 Value *nval;
976 if (abs_opr1 == 0) {
977 cnval = ConstantFP::get(eltType, 1.0);
978 if (getVecSize(FInfo) > 1) {
979 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
980 }
981 nval = cnval;
982 } else {
983 Value *valx2 = nullptr;
984 nval = nullptr;
985 while (abs_opr1 > 0) {
986 valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0;
987 if (abs_opr1 & 1) {
988 nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2;
989 }
990 abs_opr1 >>= 1;
991 }
992 }
993
994 if (ci_opr1 < 0) {
995 cnval = ConstantFP::get(eltType, 1.0);
996 if (getVecSize(FInfo) > 1) {
997 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
998 }
999 nval = B.CreateFDiv(cnval, nval, "__1powprod");
1000 }
1001 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> "
1002 << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0
1003 << ")\n");
1004 replaceCall(FPOp, nval);
1005 return true;
1006 }
1007
1008 // If we should use the generic intrinsic instead of emitting a libcall
1009 const bool ShouldUseIntrinsic = eltType->isFloatTy() || eltType->isHalfTy();
1010
1011 // powr ---> exp2(y * log2(x))
1012 // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31))
1013 FunctionCallee ExpExpr;
1014 if (ShouldUseIntrinsic)
1015 ExpExpr = Intrinsic::getOrInsertDeclaration(M, Intrinsic::exp2,
1016 {FPOp->getType()});
1017 else {
1018 ExpExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo));
1019 if (!ExpExpr)
1020 return false;
1021 }
1022
1023 bool needlog = false;
1024 bool needabs = false;
1025 bool needcopysign = false;
1026 Constant *cnval = nullptr;
1027 if (getVecSize(FInfo) == 1) {
1028 CF = nullptr;
1029 match(opr0, m_APFloatAllowPoison(CF));
1030
1031 if (CF) {
1032 double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
1033 ? (double)CF->convertToFloat()
1034 : CF->convertToDouble();
1035
1036 V = log2(std::abs(V));
1037 cnval = ConstantFP::get(eltType, V);
1038 needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR &&
1039 FInfo.getId() != AMDGPULibFunc::EI_POWR_FAST) &&
1040 CF->isNegative();
1041 } else {
1042 needlog = true;
1043 needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR &&
1044 FInfo.getId() != AMDGPULibFunc::EI_POWR_FAST;
1045 }
1046 } else {
1047 ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0);
1048
1049 if (!CDV) {
1050 needlog = true;
1051 needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR &&
1052 FInfo.getId() != AMDGPULibFunc::EI_POWR_FAST;
1053 } else {
1054 assert ((int)CDV->getNumElements() == getVecSize(FInfo) &&
1055 "Wrong vector size detected");
1056
1058 for (int i=0; i < getVecSize(FInfo); ++i) {
1059 double V = CDV->getElementAsAPFloat(i).convertToDouble();
1060 if (V < 0.0) needcopysign = true;
1061 V = log2(std::abs(V));
1062 DVal.push_back(V);
1063 }
1064 if (getArgType(FInfo) == AMDGPULibFunc::F32) {
1066 for (double D : DVal)
1067 FVal.push_back((float)D);
1068 ArrayRef<float> tmp(FVal);
1069 cnval = ConstantDataVector::get(M->getContext(), tmp);
1070 } else {
1071 ArrayRef<double> tmp(DVal);
1072 cnval = ConstantDataVector::get(M->getContext(), tmp);
1073 }
1074 }
1075 }
1076
1077 if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW ||
1078 FInfo.getId() == AMDGPULibFunc::EI_POW_FAST)) {
1079 // We cannot handle corner cases for a general pow() function, give up
1080 // unless y is a constant integral value. Then proceed as if it were pown.
1081 if (!isKnownIntegral(opr1, SQ.getWithInstruction(cast<Instruction>(FPOp)),
1082 FPOp->getFastMathFlags()))
1083 return false;
1084 }
1085
1086 Value *nval;
1087 if (needabs) {
1088 nval = B.CreateFAbs(opr0, nullptr, "__fabs");
1089 } else {
1090 nval = cnval ? cnval : opr0;
1091 }
1092 if (needlog) {
1093 FunctionCallee LogExpr;
1094 if (ShouldUseIntrinsic) {
1095 LogExpr = Intrinsic::getOrInsertDeclaration(M, Intrinsic::log2,
1096 {FPOp->getType()});
1097 } else {
1098 LogExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo));
1099 if (!LogExpr)
1100 return false;
1101 }
1102
1103 nval = CreateCallEx(B,LogExpr, nval, "__log2");
1104 }
1105
1106 if (FInfo.getId() == AMDGPULibFunc::EI_POWN ||
1107 FInfo.getId() == AMDGPULibFunc::EI_POWN_FAST) {
1108 // convert int(32) to fp(f32 or f64)
1109 opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F");
1110 }
1111 nval = B.CreateFMul(opr1, nval, "__ylogx");
1112
1113 CallInst *Exp2Call = CreateCallEx(B, ExpExpr, nval, "__exp2");
1114
1115 // TODO: Generalized fpclass logic for pow
1117 if (FPOp->hasNoNaNs())
1118 KnownNot |= FPClassTest::fcNan;
1119
1120 Exp2Call->addRetAttr(
1121 Attribute::getWithNoFPClass(Exp2Call->getContext(), KnownNot));
1122 nval = Exp2Call;
1123
1124 if (needcopysign) {
1125 Type* nTyS = B.getIntNTy(eltType->getPrimitiveSizeInBits());
1126 Type *nTy = FPOp->getType()->getWithNewType(nTyS);
1127 Value *opr_n = FPOp->getOperand(1);
1128 if (opr_n->getType()->getScalarType()->isIntegerTy())
1129 opr_n = B.CreateZExtOrTrunc(opr_n, nTy, "__ytou");
1130 else
1131 opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");
1132
1133 unsigned size = nTy->getScalarSizeInBits();
1134 Value *sign = B.CreateShl(opr_n, size-1, "__yeven");
1135 sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign");
1136
1137 nval = B.CreateCopySign(nval, B.CreateBitCast(sign, nval->getType()),
1138 nullptr, "__pow_sign");
1139 }
1140
1141 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> "
1142 << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n");
1143 replaceCall(FPOp, nval);
1144
1145 return true;
1146}
1147
1148bool AMDGPULibCalls::fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B,
1149 const FuncInfo &FInfo) {
1150 Value *opr0 = FPOp->getOperand(0);
1151 Value *opr1 = FPOp->getOperand(1);
1152
1153 const APInt *CINT = nullptr;
1154 if (!match(opr1, m_APIntAllowPoison(CINT)))
1155 return false;
1156
1157 Function *Parent = B.GetInsertBlock()->getParent();
1158
1159 int ci_opr1 = (int)CINT->getSExtValue();
1160 if (ci_opr1 == 1 && !Parent->hasFnAttribute(Attribute::StrictFP)) {
1161 // rootn(x, 1) = x
1162 //
1163 // TODO: Insert constrained canonicalize for strictfp case.
1164 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << '\n');
1165 replaceCall(FPOp, opr0);
1166 return true;
1167 }
1168
1169 Module *M = B.GetInsertBlock()->getModule();
1170
1171 CallInst *CI = cast<CallInst>(FPOp);
1172 if (ci_opr1 == 2 &&
1173 shouldReplaceLibcallWithIntrinsic(CI,
1174 /*AllowMinSizeF32=*/true,
1175 /*AllowF64=*/true)) {
1176 // rootn(x, 2) = sqrt(x)
1177 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> sqrt(" << *opr0 << ")\n");
1178
1179 Value *NewCall = B.CreateUnaryIntrinsic(Intrinsic::sqrt, opr0, CI);
1180 NewCall->takeName(CI);
1181
1182 // OpenCL rootn has a looser ulp of 2 requirement than sqrt, so add some
1183 // metadata.
1184 MDBuilder MDHelper(M->getContext());
1185 MDNode *FPMD = MDHelper.createFPMath(std::max(FPOp->getFPAccuracy(), 2.0f));
1186 if (auto *NewCallI = dyn_cast<Instruction>(NewCall))
1187 NewCallI->setMetadata(LLVMContext::MD_fpmath, FPMD);
1188
1189 replaceCall(CI, NewCall);
1190 return true;
1191 }
1192
1193 if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x)
1194 if (FunctionCallee FPExpr =
1195 getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) {
1196 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> cbrt(" << *opr0
1197 << ")\n");
1198 Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt");
1199 replaceCall(FPOp, nval);
1200 return true;
1201 }
1202 } else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x
1203 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1.0 / " << *opr0 << "\n");
1204 Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0),
1205 opr0,
1206 "__rootn2div");
1207 replaceCall(FPOp, nval);
1208 return true;
1209 }
1210
1211 if (ci_opr1 == -2 &&
1212 shouldReplaceLibcallWithIntrinsic(CI,
1213 /*AllowMinSizeF32=*/true,
1214 /*AllowF64=*/true)) {
1215 // rootn(x, -2) = rsqrt(x)
1216
1217 // The original rootn had looser ulp requirements than the resultant sqrt
1218 // and fdiv.
1219 MDBuilder MDHelper(M->getContext());
1220 MDNode *FPMD = MDHelper.createFPMath(std::max(FPOp->getFPAccuracy(), 2.0f));
1221
1222 // TODO: Could handle strictfp but need to fix strict sqrt emission
1223 FastMathFlags FMF = FPOp->getFastMathFlags();
1224 FMF.setAllowContract(true);
1225
1226 Value *Sqrt = B.CreateUnaryIntrinsic(Intrinsic::sqrt, opr0, CI);
1228 B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0), Sqrt));
1229 if (auto *SqrtI = dyn_cast<Instruction>(Sqrt))
1230 SqrtI->setFastMathFlags(FMF);
1231 RSqrt->setFastMathFlags(FMF);
1232 RSqrt->setMetadata(LLVMContext::MD_fpmath, FPMD);
1233
1234 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> rsqrt(" << *opr0
1235 << ")\n");
1236 replaceCall(CI, RSqrt);
1237 return true;
1238 }
1239
1240 return false;
1241}
1242
1243// is_integer(y) => trunc(y) == y
1245 Value *TruncY = B.CreateUnaryIntrinsic(Intrinsic::trunc, Y);
1246 return B.CreateFCmpOEQ(TruncY, Y);
1247}
1248
1250 // Even integers are still integers after division by 2.
1251 auto *HalfY = B.CreateFMul(Y, ConstantFP::get(Y->getType(), 0.5));
1252 return emitIsInteger(B, HalfY);
1253}
1254
1255// is_odd_integer(y) => is_integer(y) && !is_even_integer(y)
1257 Value *IsIntY = emitIsInteger(B, Y);
1258 Value *IsEvenY = emitIsEvenInteger(B, Y);
1259 Value *NotEvenY = B.CreateNot(IsEvenY);
1260 return B.CreateAnd(IsIntY, NotEvenY);
1261}
1262
1263// isinf(val) => fabs(val) == +inf
1265 auto *fabsVal = B.CreateFAbs(val);
1266 return B.CreateFCmpOEQ(fabsVal, ConstantFP::getInfinity(val->getType()));
1267}
1268
1269// y * log2(fabs(x))
1271 Value *AbsX = B.CreateFAbs(X);
1272 Value *LogAbsX = B.CreateUnaryIntrinsic(Intrinsic::log2, AbsX);
1273 Value *YTimesLogX = B.CreateFMul(Y, LogAbsX);
1274 return B.CreateUnaryIntrinsic(Intrinsic::exp2, YTimesLogX);
1275}
1276
1277/// Emit special case management epilog code for fast pow, powr, pown, and rootn
1278/// expansions. \p x and \p y should be the arguments to the library call
1279/// (possibly with some values clamped). \p expylnx should be the result to use
1280/// in normal circumstances.
1282 PowKind Kind) {
1283 Constant *Zero = ConstantFP::getZero(X->getType());
1284 Constant *One = ConstantFP::get(X->getType(), 1.0);
1285 Constant *QNaN = ConstantFP::getQNaN(X->getType());
1286 Constant *PInf = ConstantFP::getInfinity(X->getType());
1287
1288 switch (Kind) {
1289 case PowKind::Pow: {
1290 // is_odd_integer(y)
1291 Value *IsOddY = emitIsOddInteger(B, Y);
1292
1293 // ret = copysign(expylnx, is_odd_y ? x : 1.0f)
1294 Value *SelSign = B.CreateSelect(IsOddY, X, One);
1295 Value *Ret = B.CreateCopySign(ExpYLnX, SelSign);
1296
1297 // if (x < 0 && !is_integer(y)) ret = QNAN
1298 Value *IsIntY = emitIsInteger(B, Y);
1299 Value *condNegX = B.CreateFCmpOLT(X, Zero);
1300 Value *condNotIntY = B.CreateNot(IsIntY);
1301 Value *condNaN = B.CreateAnd(condNegX, condNotIntY);
1302 Ret = B.CreateSelect(condNaN, QNaN, Ret);
1303
1304 // if (isinf(ay)) { ... }
1305
1306 // FIXME: Missing backend optimization to save on materialization cost of
1307 // mixed sign constant infinities.
1308 Value *YIsInf = emitIsInf(B, Y);
1309
1310 Value *AY = B.CreateFAbs(Y);
1311 Value *YIsNegInf = B.CreateFCmpUNE(Y, AY);
1312
1313 Value *AX = B.CreateFAbs(X);
1314 Value *AxEqOne = B.CreateFCmpOEQ(AX, One);
1315 Value *AxLtOne = B.CreateFCmpOLT(AX, One);
1316 Value *XorCond = B.CreateXor(AxLtOne, YIsNegInf);
1317 Value *SelInf =
1318 B.CreateSelect(AxEqOne, AX, B.CreateSelect(XorCond, Zero, AY));
1319 Ret = B.CreateSelect(YIsInf, SelInf, Ret);
1320
1321 // if (isinf(ax) || x == 0.0f) { ... }
1322 Value *XIsInf = emitIsInf(B, X);
1323 Value *XEqZero = B.CreateFCmpOEQ(X, Zero);
1324 Value *AxInfOrZero = B.CreateOr(XIsInf, XEqZero);
1325 Value *YLtZero = B.CreateFCmpOLT(Y, Zero);
1326 Value *XorZeroInf = B.CreateXor(XEqZero, YLtZero);
1327 Value *SelVal = B.CreateSelect(XorZeroInf, Zero, PInf);
1328 Value *SelSign2 = B.CreateSelect(IsOddY, X, Zero);
1329 Value *Copysign = B.CreateCopySign(SelVal, SelSign2);
1330 Ret = B.CreateSelect(AxInfOrZero, Copysign, Ret);
1331
1332 // if (isunordered(x, y)) ret = QNAN
1333 Value *isUnordered = B.CreateFCmpUNO(X, Y);
1334 return B.CreateSelect(isUnordered, QNaN, Ret);
1335 }
1336 case PowKind::PowR: {
1337 Value *YIsNeg = B.CreateFCmpOLT(Y, Zero);
1338 Value *IZ = B.CreateSelect(YIsNeg, PInf, Zero);
1339 Value *ZI = B.CreateSelect(YIsNeg, Zero, PInf);
1340
1341 Value *YEqZero = B.CreateFCmpOEQ(Y, Zero);
1342 Value *SelZeroCase = B.CreateSelect(YEqZero, QNaN, IZ);
1343 Value *XEqZero = B.CreateFCmpOEQ(X, Zero);
1344 Value *Ret = B.CreateSelect(XEqZero, SelZeroCase, ExpYLnX);
1345
1346 Value *XEqInf = B.CreateFCmpOEQ(X, PInf);
1347 Value *YNeZero = B.CreateFCmpUNE(Y, Zero);
1348 Value *CondInfCase = B.CreateAnd(XEqInf, YNeZero);
1349 Ret = B.CreateSelect(CondInfCase, ZI, Ret);
1350
1351 Value *IsInfY = emitIsInf(B, Y);
1352 Value *XNeOne = B.CreateFCmpUNE(X, One);
1353 Value *CondInfY = B.CreateAnd(IsInfY, XNeOne);
1354 Value *XLtOne = B.CreateFCmpOLT(X, One);
1355 Value *SelInfYCase = B.CreateSelect(XLtOne, IZ, ZI);
1356 Ret = B.CreateSelect(CondInfY, SelInfYCase, Ret);
1357
1358 Value *IsUnordered = B.CreateFCmpUNO(X, Y);
1359 return B.CreateSelect(IsUnordered, QNaN, Ret);
1360 }
1361 case PowKind::PowN: {
1362 Constant *ZeroI = ConstantInt::get(Y->getType(), 0);
1363
1364 // is_odd_y = (ny & 1) != 0
1365 Value *OneI = ConstantInt::get(Y->getType(), 1);
1366 Value *YAnd1 = B.CreateAnd(Y, OneI);
1367 Value *IsOddY = B.CreateICmpNE(YAnd1, ZeroI);
1368
1369 // ret = copysign(expylnx, is_odd_y ? x : 1.0f)
1370 Value *SelSign = B.CreateSelect(IsOddY, X, One);
1371 Value *Ret = B.CreateCopySign(ExpYLnX, SelSign);
1372
1373 // if (isinf(x) || x == 0.0f)
1374 Value *FabsX = B.CreateFAbs(X);
1375 Value *XIsInf = B.CreateFCmpOEQ(FabsX, PInf);
1376 Value *XEqZero = B.CreateFCmpOEQ(X, Zero);
1377 Value *InfOrZero = B.CreateOr(XIsInf, XEqZero);
1378
1379 // (x == 0.0f) ^ (ny < 0) ? 0.0f : +inf
1380 Value *YLtZero = B.CreateICmpSLT(Y, ZeroI);
1381 Value *XorZeroInf = B.CreateXor(XEqZero, YLtZero);
1382 Value *SelVal = B.CreateSelect(XorZeroInf, Zero, PInf);
1383
1384 // copysign(selVal, is_odd_y ? x : 0.0f)
1385 Value *SelSign2 = B.CreateSelect(IsOddY, X, Zero);
1386 Value *Copysign = B.CreateCopySign(SelVal, SelSign2);
1387
1388 return B.CreateSelect(InfOrZero, Copysign, Ret);
1389 }
1390 case PowKind::RootN: {
1391 Constant *ZeroI = ConstantInt::get(Y->getType(), 0);
1392
1393 // is_odd_y = (ny & 1) != 0
1394 Value *YAnd1 = B.CreateAnd(Y, ConstantInt::get(Y->getType(), 1));
1395 Value *IsOddY = B.CreateICmpNE(YAnd1, ZeroI);
1396
1397 // ret = copysign(expylnx, is_odd_y ? x : 1.0f)
1398 Value *SelSign = B.CreateSelect(IsOddY, X, One);
1399 Value *Ret = B.CreateCopySign(ExpYLnX, SelSign);
1400
1401 // if (isinf(x) || x == 0.0f)
1402 Value *FabsX = B.CreateFAbs(X);
1403 Value *IsInfX = B.CreateFCmpOEQ(FabsX, PInf);
1404 Value *XEqZero = B.CreateFCmpOEQ(X, Zero);
1405 Value *CondInfOrZero = B.CreateOr(IsInfX, XEqZero);
1406
1407 // (x == 0.0f) ^ (ny < 0) ? 0.0f : +inf
1408 Value *YLtZero = B.CreateICmpSLT(Y, ZeroI);
1409 Value *XorZeroInf = B.CreateXor(XEqZero, YLtZero);
1410 Value *SelVal = B.CreateSelect(XorZeroInf, Zero, PInf);
1411
1412 // copysign(selVal, is_odd_y ? x : 0.0f)
1413 Value *SelSign2 = B.CreateSelect(IsOddY, X, Zero);
1414 Value *Copysign = B.CreateCopySign(SelVal, SelSign2);
1415
1416 Ret = B.CreateSelect(CondInfOrZero, Copysign, Ret);
1417
1418 // if ((x < 0.0f && !is_odd_y) || ny == 0) ret = QNAN
1419 Value *XIsNeg = B.CreateFCmpOLT(X, Zero);
1420 Value *NotOddY = B.CreateNot(IsOddY);
1421 Value *CondNegAndNotOdd = B.CreateAnd(XIsNeg, NotOddY);
1422 Value *YEqZero = B.CreateICmpEQ(Y, ZeroI);
1423 Value *CondBad = B.CreateOr(CondNegAndNotOdd, YEqZero);
1424 return B.CreateSelect(CondBad, QNaN, Ret);
1425 }
1426 }
1427
1428 llvm_unreachable("covered switch");
1429}
1430
1431// TODO: Move the fold_pow folding to sqrt/fdiv here
1432bool AMDGPULibCalls::expandFastPow(FPMathOperator *FPOp, IRBuilder<> &B,
1433 PowKind Kind) {
1434 Type *Ty = FPOp->getType();
1435
1436 // There's currently no reason to do this for half. The correct path is
1437 // promote to float and use the fast float expansion.
1438 //
1439 // TODO: We could move this expansion to lowering to get half pow to work.
1440 if (!Ty->getScalarType()->isFloatTy())
1441 return false;
1442
1443 // TODO: Verify optimization for double and bfloat.
1444 Value *X = FPOp->getOperand(0);
1445 Value *Y = FPOp->getOperand(1);
1446
1447 switch (Kind) {
1448 case PowKind::Pow: {
1449 Constant *One = ConstantFP::get(X->getType(), 1.0);
1450
1451 // if (x == 1.0f) y = 1.0f;
1452 Value *XEqOne = B.CreateFCmpOEQ(X, One);
1453 Y = B.CreateSelect(XEqOne, One, Y);
1454
1455 // if (y == 0.0f) x = 1.0f;
1456 Value *YEqZero = B.CreateFCmpOEQ(Y, ConstantFP::getZero(X->getType()));
1457 X = B.CreateSelect(YEqZero, One, X);
1458
1459 Value *ExpYLnX = emitFastExpYLnx(B, X, Y);
1460 Value *Fixed = emitPowFixup(B, X, Y, ExpYLnX, Kind);
1461 replaceCall(FPOp, Fixed);
1462 return true;
1463 }
1464 case PowKind::PowR: {
1465 Value *NegX = B.CreateFCmpOLT(X, ConstantFP::getZero(X->getType()));
1466 X = B.CreateSelect(NegX, ConstantFP::getQNaN(X->getType()), X);
1467
1468 Value *ExpYLnX = emitFastExpYLnx(B, X, Y);
1469 Value *Fixed = emitPowFixup(B, X, Y, ExpYLnX, Kind);
1470 replaceCall(FPOp, Fixed);
1471 return true;
1472 }
1473 case PowKind::PowN: {
1474 // ny == 0
1475 Value *YEqZero = B.CreateICmpEQ(Y, ConstantInt::get(Y->getType(), 0));
1476
1477 // x = (ny == 0 ? 1.0f : x)
1478 X = B.CreateSelect(YEqZero, ConstantFP::get(X->getType(), 1.0), X);
1479
1480 Value *CastY = B.CreateSIToFP(Y, X->getType());
1481 Value *ExpYLnX = emitFastExpYLnx(B, X, CastY);
1482 Value *Fixed = emitPowFixup(B, X, Y, ExpYLnX, Kind);
1483 replaceCall(FPOp, Fixed);
1484 return true;
1485 }
1486 case PowKind::RootN: {
1487 Value *CastY = B.CreateSIToFP(Y, X->getType());
1488
1489 // This is afn anyway, so we will turn into rcp.
1490 Value *RcpY = B.CreateFDiv(ConstantFP::get(X->getType(), 1.0), CastY);
1491
1492 Value *ExpYLnX = emitFastExpYLnx(B, X, RcpY);
1493 Value *Fixed = emitPowFixup(B, X, Y, ExpYLnX, Kind);
1494 replaceCall(FPOp, Fixed);
1495 return true;
1496 }
1497 }
1498 llvm_unreachable("Unhandled PowKind enum");
1499}
1500
1501bool AMDGPULibCalls::tryOptimizePow(FPMathOperator *FPOp, IRBuilder<> &B,
1502 const FuncInfo &FInfo) {
1503 FastMathFlags FMF = FPOp->getFastMathFlags();
1504 CallInst *Call = cast<CallInst>(FPOp);
1505 Module *M = Call->getModule();
1506
1507 FuncInfo PowrInfo;
1508 AMDGPULibFunc::EFuncId FastPowrFuncId =
1509 FMF.approxFunc() || FInfo.getId() == AMDGPULibFunc::EI_POW_FAST
1512 FunctionCallee PowrFunc = getFloatFastVariant(
1513 M, FInfo, PowrInfo, AMDGPULibFunc::EI_POWR, FastPowrFuncId);
1514
1515 // TODO: Prefer fast pown to fast powr, but slow powr to slow pown.
1516
1517 // pow(x, y) -> powr(x, y) for x >= -0.0
1518 // TODO: Account for flags on current call
1519 if (PowrFunc && cannotBeOrderedLessThanZero(FPOp->getOperand(0),
1520 SQ.getWithInstruction(Call))) {
1521 Call->setCalledFunction(PowrFunc);
1522 return fold_pow(FPOp, B, PowrInfo) || true;
1523 }
1524
1525 // pow(x, y) -> pown(x, y) for known integral y
1526 if (isKnownIntegral(FPOp->getOperand(1), SQ.getWithInstruction(Call),
1527 FPOp->getFastMathFlags())) {
1528 FunctionType *PownType = getPownType(Call->getFunctionType());
1529
1530 FuncInfo PownInfo;
1531 AMDGPULibFunc::EFuncId FastPownFuncId =
1532 FMF.approxFunc() || FInfo.getId() == AMDGPULibFunc::EI_POW_FAST
1535 FunctionCallee PownFunc = getFloatFastVariant(
1536 M, FInfo, PownInfo, AMDGPULibFunc::EI_POWN, FastPownFuncId);
1537
1538 if (PownFunc) {
1539 // TODO: If the incoming integral value is an sitofp/uitofp, it won't
1540 // fold out without a known range. We can probably take the source
1541 // value directly.
1542 Value *CastedArg =
1543 B.CreateFPToSI(FPOp->getOperand(1), PownType->getParamType(1));
1544 // Have to drop any nofpclass attributes on the original call site.
1546 1, AttributeFuncs::typeIncompatible(CastedArg->getType(),
1548 Call->setCalledFunction(PownFunc);
1549 Call->setArgOperand(1, CastedArg);
1550 return fold_pow(FPOp, B, PownInfo) || true;
1551 }
1552 }
1553
1554 if (fold_pow(FPOp, B, FInfo))
1555 return true;
1556
1557 if (!FMF.approxFunc())
1558 return false;
1559
1560 if (FInfo.getId() == AMDGPULibFunc::EI_POW && FMF.approxFunc() &&
1561 getArgType(FInfo) == AMDGPULibFunc::F32) {
1562 AMDGPULibFunc PowFastInfo(AMDGPULibFunc::EI_POW_FAST, FInfo);
1563 if (FunctionCallee PowFastFunc = getFunction(M, PowFastInfo)) {
1564 Call->setCalledFunction(PowFastFunc);
1565 return fold_pow(FPOp, B, PowFastInfo) || true;
1566 }
1567 }
1568
1569 return expandFastPow(FPOp, B, PowKind::Pow);
1570}
1571
1572// Get a scalar native builtin single argument FP function
1573FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M,
1574 const FuncInfo &FInfo) {
1575 if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))
1576 return nullptr;
1577 FuncInfo nf = FInfo;
1579 return getFunction(M, nf);
1580}
1581
1582// Some library calls are just wrappers around llvm intrinsics, but compiled
1583// conservatively. Preserve the flags from the original call site by
1584// substituting them with direct calls with all the flags.
1585bool AMDGPULibCalls::shouldReplaceLibcallWithIntrinsic(const CallInst *CI,
1586 bool AllowMinSizeF32,
1587 bool AllowF64,
1588 bool AllowStrictFP) {
1589 Type *FltTy = CI->getType()->getScalarType();
1590 const bool IsF32 = FltTy->isFloatTy();
1591
1592 // f64 intrinsics aren't implemented for most operations.
1593 if (!IsF32 && !FltTy->isHalfTy() && (!AllowF64 || !FltTy->isDoubleTy()))
1594 return false;
1595
1596 // We're implicitly inlining by replacing the libcall with the intrinsic, so
1597 // don't do it for noinline call sites.
1598 if (CI->isNoInline())
1599 return false;
1600
1601 const Function *ParentF = CI->getFunction();
1602 // TODO: Handle strictfp
1603 if (!AllowStrictFP && ParentF->hasFnAttribute(Attribute::StrictFP))
1604 return false;
1605
1606 if (IsF32 && !AllowMinSizeF32 && ParentF->hasMinSize())
1607 return false;
1608 return true;
1609}
1610
1611void AMDGPULibCalls::replaceLibCallWithSimpleIntrinsic(IRBuilder<> &B,
1612 CallInst *CI,
1613 Intrinsic::ID IntrID) {
1614 if (CI->arg_size() == 2) {
1615 Value *Arg0 = CI->getArgOperand(0);
1616 Value *Arg1 = CI->getArgOperand(1);
1617 VectorType *Arg0VecTy = dyn_cast<VectorType>(Arg0->getType());
1618 VectorType *Arg1VecTy = dyn_cast<VectorType>(Arg1->getType());
1619 if (Arg0VecTy && !Arg1VecTy) {
1620 Value *SplatRHS = B.CreateVectorSplat(Arg0VecTy->getElementCount(), Arg1);
1621 CI->setArgOperand(1, SplatRHS);
1622 } else if (!Arg0VecTy && Arg1VecTy) {
1623 Value *SplatLHS = B.CreateVectorSplat(Arg1VecTy->getElementCount(), Arg0);
1624 CI->setArgOperand(0, SplatLHS);
1625 }
1626 }
1627
1629 CI->getModule(), IntrID, {CI->getType()}));
1631}
1632
1633bool AMDGPULibCalls::tryReplaceLibcallWithSimpleIntrinsic(
1634 IRBuilder<> &B, CallInst *CI, Intrinsic::ID IntrID, bool AllowMinSizeF32,
1635 bool AllowF64, bool AllowStrictFP) {
1636 if (!shouldReplaceLibcallWithIntrinsic(CI, AllowMinSizeF32, AllowF64,
1637 AllowStrictFP))
1638 return false;
1639 replaceLibCallWithSimpleIntrinsic(B, CI, IntrID);
1640 return true;
1641}
1642
1643std::tuple<Value *, Value *, Value *>
1644AMDGPULibCalls::insertSinCos(Value *Arg, FastMathFlags FMF, IRBuilder<> &B,
1645 FunctionCallee Fsincos) {
1646 DebugLoc DL = B.getCurrentDebugLocation();
1647 Function *F = B.GetInsertBlock()->getParent();
1648 B.SetInsertPointPastAllocas(F);
1649
1650 AllocaInst *Alloc = B.CreateAlloca(Arg->getType(), nullptr, "__sincos_");
1651
1652 if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
1653 // If the argument is an instruction, it must dominate all uses so put our
1654 // sincos call there. Otherwise, right after the allocas works well enough
1655 // if it's an argument or constant.
1656
1657 B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator());
1658
1659 // SetInsertPoint unwelcomely always tries to set the debug loc.
1660 B.SetCurrentDebugLocation(DL);
1661 }
1662
1663 Type *CosPtrTy = Fsincos.getFunctionType()->getParamType(1);
1664
1665 // The allocaInst allocates the memory in private address space. This need
1666 // to be addrspacecasted to point to the address space of cos pointer type.
1667 // In OpenCL 2.0 this is generic, while in 1.2 that is private.
1668 Value *CastAlloc = B.CreateAddrSpaceCast(Alloc, CosPtrTy);
1669
1670 CallInst *SinCos = CreateCallEx2(B, Fsincos, Arg, CastAlloc);
1671
1672 // TODO: Is it worth trying to preserve the location for the cos calls for the
1673 // load?
1674
1675 LoadInst *LoadCos = B.CreateLoad(Arg->getType(), Alloc);
1676 return {SinCos, LoadCos, SinCos};
1677}
1678
1679// fold sin, cos -> sincos.
1680bool AMDGPULibCalls::fold_sincos(FPMathOperator *FPOp, IRBuilder<> &B,
1681 const FuncInfo &fInfo) {
1682 assert(fInfo.getId() == AMDGPULibFunc::EI_SIN ||
1683 fInfo.getId() == AMDGPULibFunc::EI_COS);
1684
1685 if ((getArgType(fInfo) != AMDGPULibFunc::F32 &&
1686 getArgType(fInfo) != AMDGPULibFunc::F64) ||
1687 fInfo.getPrefix() != AMDGPULibFunc::NOPFX)
1688 return false;
1689
1690 bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN;
1691
1692 Value *CArgVal = FPOp->getOperand(0);
1693
1694 // TODO: Constant fold the call
1695 if (isa<ConstantData>(CArgVal))
1696 return false;
1697
1698 CallInst *CI = cast<CallInst>(FPOp);
1699
1700 Function *F = B.GetInsertBlock()->getParent();
1701 Module *M = F->getParent();
1702
1703 // Merge the sin and cos. For OpenCL 2.0, there may only be a generic pointer
1704 // implementation. Prefer the private form if available.
1705 AMDGPULibFunc SinCosLibFuncPrivate(AMDGPULibFunc::EI_SINCOS, fInfo);
1706 SinCosLibFuncPrivate.getLeads()[0].PtrKind =
1708
1709 AMDGPULibFunc SinCosLibFuncGeneric(AMDGPULibFunc::EI_SINCOS, fInfo);
1710 SinCosLibFuncGeneric.getLeads()[0].PtrKind =
1712
1713 FunctionCallee FSinCosPrivate = getFunction(M, SinCosLibFuncPrivate);
1714 FunctionCallee FSinCosGeneric = getFunction(M, SinCosLibFuncGeneric);
1715 FunctionCallee FSinCos = FSinCosPrivate ? FSinCosPrivate : FSinCosGeneric;
1716 if (!FSinCos)
1717 return false;
1718
1719 SmallVector<CallInst *> SinCalls;
1720 SmallVector<CallInst *> CosCalls;
1721 SmallVector<CallInst *> SinCosCalls;
1722 FuncInfo PartnerInfo(isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN,
1723 fInfo);
1724 const std::string PairName = PartnerInfo.mangle();
1725
1726 StringRef SinName = isSin ? CI->getCalledFunction()->getName() : PairName;
1727 StringRef CosName = isSin ? PairName : CI->getCalledFunction()->getName();
1728 const std::string SinCosPrivateName = SinCosLibFuncPrivate.mangle();
1729 const std::string SinCosGenericName = SinCosLibFuncGeneric.mangle();
1730
1731 // Intersect the two sets of flags.
1732 FastMathFlags FMF = FPOp->getFastMathFlags();
1733 MDNode *FPMath = CI->getMetadata(LLVMContext::MD_fpmath);
1734
1735 SmallVector<DILocation *> MergeDbgLocs = {CI->getDebugLoc()};
1736
1737 for (User* U : CArgVal->users()) {
1738 CallInst *XI = dyn_cast<CallInst>(U);
1739 if (!XI || XI->getFunction() != F || XI->isNoBuiltin())
1740 continue;
1741
1742 Function *UCallee = XI->getCalledFunction();
1743 if (!UCallee)
1744 continue;
1745
1746 bool Handled = true;
1747
1748 if (UCallee->getName() == SinName)
1749 SinCalls.push_back(XI);
1750 else if (UCallee->getName() == CosName)
1751 CosCalls.push_back(XI);
1752 else if (UCallee->getName() == SinCosPrivateName ||
1753 UCallee->getName() == SinCosGenericName)
1754 SinCosCalls.push_back(XI);
1755 else
1756 Handled = false;
1757
1758 if (Handled) {
1759 MergeDbgLocs.push_back(XI->getDebugLoc());
1760 auto *OtherOp = cast<FPMathOperator>(XI);
1761 FMF &= OtherOp->getFastMathFlags();
1763 FPMath, XI->getMetadata(LLVMContext::MD_fpmath));
1764 }
1765 }
1766
1767 if (SinCalls.empty() || CosCalls.empty())
1768 return false;
1769
1770 B.setFastMathFlags(FMF);
1771 B.setDefaultFPMathTag(FPMath);
1772 DILocation *DbgLoc = DILocation::getMergedLocations(MergeDbgLocs);
1773 B.SetCurrentDebugLocation(DbgLoc);
1774
1775 auto [Sin, Cos, SinCos] = insertSinCos(CArgVal, FMF, B, FSinCos);
1776
1777 auto replaceTrigInsts = [](ArrayRef<CallInst *> Calls, Value *Res) {
1778 for (CallInst *C : Calls)
1779 C->replaceAllUsesWith(Res);
1780
1781 // Leave the other dead instructions to avoid clobbering iterators.
1782 };
1783
1784 replaceTrigInsts(SinCalls, Sin);
1785 replaceTrigInsts(CosCalls, Cos);
1786 replaceTrigInsts(SinCosCalls, SinCos);
1787
1788 // It's safe to delete the original now.
1789 CI->eraseFromParent();
1790 return true;
1791}
1792
1793bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo,
1794 APFloat &Res0, APFloat &Res1,
1795 Constant *copr0, Constant *copr1) {
1796 // By default, opr0/opr1/opr3 holds values of float/double type.
1797 // If they are not float/double, each function has to its
1798 // operand separately.
1799 double opr0 = 0.0, opr1 = 0.0;
1802 if (fpopr0) {
1803 opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1804 ? fpopr0->getValueAPF().convertToDouble()
1805 : (double)fpopr0->getValueAPF().convertToFloat();
1806 }
1807
1808 if (fpopr1) {
1809 opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1810 ? fpopr1->getValueAPF().convertToDouble()
1811 : (double)fpopr1->getValueAPF().convertToFloat();
1812 }
1813
1814 switch (FInfo.getId()) {
1815 default:
1816 return false;
1817
1819 Res0 = APFloat{acos(opr0)};
1820 return true;
1821
1823 // acosh(x) == log(x + sqrt(x*x - 1))
1824 Res0 = APFloat{log(opr0 + sqrt(opr0 * opr0 - 1.0))};
1825 return true;
1826
1828 Res0 = APFloat{acos(opr0) / MATH_PI};
1829 return true;
1830
1832 Res0 = APFloat{asin(opr0)};
1833 return true;
1834
1836 // asinh(x) == log(x + sqrt(x*x + 1))
1837 Res0 = APFloat{log(opr0 + sqrt(opr0 * opr0 + 1.0))};
1838 return true;
1839
1841 Res0 = APFloat{asin(opr0) / MATH_PI};
1842 return true;
1843
1845 Res0 = APFloat{atan(opr0)};
1846 return true;
1847
1849 // atanh(x) == (log(x+1) - log(x-1))/2;
1850 Res0 = APFloat{(log(opr0 + 1.0) - log(opr0 - 1.0)) / 2.0};
1851 return true;
1852
1854 Res0 = APFloat{atan(opr0) / MATH_PI};
1855 return true;
1856
1858 Res0 =
1859 APFloat{(opr0 < 0.0) ? -pow(-opr0, 1.0 / 3.0) : pow(opr0, 1.0 / 3.0)};
1860 return true;
1861
1863 Res0 = APFloat{cos(opr0)};
1864 return true;
1865
1867 Res0 = APFloat{cosh(opr0)};
1868 return true;
1869
1871 Res0 = APFloat{cos(MATH_PI * opr0)};
1872 return true;
1873
1875 Res0 = APFloat{std::exp(opr0)};
1876 return true;
1877
1879 Res0 = APFloat{pow(2.0, opr0)};
1880 return true;
1881
1883 Res0 = APFloat{pow(10.0, opr0)};
1884 return true;
1885
1887 Res0 = APFloat{log(opr0)};
1888 return true;
1889
1891 Res0 = APFloat{log(opr0) / log(2.0)};
1892 return true;
1893
1895 Res0 = APFloat{log(opr0) / log(10.0)};
1896 return true;
1897
1899 Res0 = APFloat{1.0 / sqrt(opr0)};
1900 return true;
1901
1903 Res0 = APFloat{sin(opr0)};
1904 return true;
1905
1907 Res0 = APFloat{sinh(opr0)};
1908 return true;
1909
1911 Res0 = APFloat{sin(MATH_PI * opr0)};
1912 return true;
1913
1915 Res0 = APFloat{tan(opr0)};
1916 return true;
1917
1919 Res0 = APFloat{tanh(opr0)};
1920 return true;
1921
1923 Res0 = APFloat{tan(MATH_PI * opr0)};
1924 return true;
1925
1926 // two-arg functions
1929 Res0 = APFloat{pow(opr0, opr1)};
1930 return true;
1931
1933 if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
1934 double val = (double)iopr1->getSExtValue();
1935 Res0 = APFloat{pow(opr0, val)};
1936 return true;
1937 }
1938 return false;
1939 }
1940
1942 if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
1943 double val = (double)iopr1->getSExtValue();
1944 Res0 = APFloat{pow(opr0, 1.0 / val)};
1945 return true;
1946 }
1947 return false;
1948 }
1949
1950 // with ptr arg
1952 Res0 = APFloat{sin(opr0)};
1953 Res1 = APFloat{cos(opr0)};
1954 return true;
1955 }
1956
1957 return false;
1958}
1959
1960bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) {
1961 int numArgs = (int)aCI->arg_size();
1962 if (numArgs > 3)
1963 return false;
1964
1965 Constant *copr0 = nullptr;
1966 Constant *copr1 = nullptr;
1967 if (numArgs > 0) {
1968 if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr)
1969 return false;
1970 }
1971
1972 if (numArgs > 1) {
1973 if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) {
1974 if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS)
1975 return false;
1976 }
1977 }
1978
1979 // At this point, all arguments to aCI are constants.
1980
1981 // max vector size is 16, and sincos will generate two results.
1982 SmallVector<APFloat, 16> Val0, Val1;
1983 int FuncVecSize = getVecSize(FInfo);
1984 bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS);
1985 if (FuncVecSize == 1) {
1986 if (!evaluateScalarMathFunc(FInfo, Val0.emplace_back(0.0),
1987 Val1.emplace_back(0.0), copr0, copr1)) {
1988 return false;
1989 }
1990 } else {
1991 ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0);
1992 ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1);
1993 for (int i = 0; i < FuncVecSize; ++i) {
1994 Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr;
1995 Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr;
1996 if (!evaluateScalarMathFunc(FInfo, Val0.emplace_back(0.0),
1997 Val1.emplace_back(0.0), celt0, celt1)) {
1998 return false;
1999 }
2000 }
2001 }
2002
2003 Constant *nval0, *nval1;
2004 if (FuncVecSize == 1) {
2005 nval0 = ConstantFP::get(aCI->getType(), Val0[0]);
2006 if (hasTwoResults)
2007 nval1 = ConstantFP::get(aCI->getType(), Val1[0]);
2008 } else {
2009 nval0 = getConstantFloatVector(Val0, aCI->getType());
2010 if (hasTwoResults)
2011 nval1 = getConstantFloatVector(Val1, aCI->getType());
2012 }
2013
2014 if (hasTwoResults) {
2015 // sincos
2016 assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS &&
2017 "math function with ptr arg not supported yet");
2018 new StoreInst(nval1, aCI->getArgOperand(1), aCI->getIterator());
2019 }
2020
2021 replaceCall(aCI, nval0);
2022 return true;
2023}
2024
2027 AMDGPULibCalls Simplifier(F, AM);
2028 Simplifier.initNativeFuncs();
2029
2030 bool Changed = false;
2031
2032 LLVM_DEBUG(dbgs() << "AMDIC: process function ";
2033 F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
2034
2035 for (auto &BB : F) {
2036 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
2037 // Ignore non-calls.
2039 ++I;
2040
2041 if (CI) {
2042 if (Simplifier.fold(CI))
2043 Changed = true;
2044 }
2045 }
2046 }
2048}
2049
2052 if (UseNative.empty())
2053 return PreservedAnalyses::all();
2054
2055 AMDGPULibCalls Simplifier(F, AM);
2056 Simplifier.initNativeFuncs();
2057
2058 bool Changed = false;
2059 for (auto &BB : F) {
2060 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
2061 // Ignore non-calls.
2063 ++I;
2064 if (CI && Simplifier.useNative(CI))
2065 Changed = true;
2066 }
2067 }
2069}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static const TableEntry tbl_log[]
static const TableEntry tbl_tgamma[]
static AMDGPULibFunc::EType getArgType(const AMDGPULibFunc &FInfo)
static const TableEntry tbl_expm1[]
static Constant * getConstantFloatVector(const ArrayRef< APFloat > Values, const Type *Ty)
static const TableEntry tbl_asinpi[]
static const TableEntry tbl_cos[]
#define MATH_SQRT2
static const TableEntry tbl_exp10[]
static CallInst * CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg, const Twine &Name="")
static CallInst * CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1, Value *Arg2, const Twine &Name="")
static const TableEntry tbl_rsqrt[]
static const TableEntry tbl_atanh[]
static const TableEntry tbl_cosh[]
static const TableEntry tbl_asin[]
static const TableEntry tbl_sinh[]
static const TableEntry tbl_acos[]
static const TableEntry tbl_tan[]
static const TableEntry tbl_cospi[]
static const TableEntry tbl_tanpi[]
static cl::opt< bool > EnablePreLink("amdgpu-prelink", cl::desc("Enable pre-link mode optimizations"), cl::init(false), cl::Hidden)
static bool HasNative(AMDGPULibFunc::EFuncId id)
static Value * emitIsInf(IRBuilder<> &B, Value *val)
ArrayRef< TableEntry > TableRef
static int getVecSize(const AMDGPULibFunc &FInfo)
static Value * emitFastExpYLnx(IRBuilder<> &B, Value *X, Value *Y)
static Value * emitIsInteger(IRBuilder<> &B, Value *Y)
static Value * emitIsEvenInteger(IRBuilder<> &B, Value *Y)
static const TableEntry tbl_sin[]
static const TableEntry tbl_atan[]
static const TableEntry tbl_log2[]
static const TableEntry tbl_acospi[]
static Value * emitPowFixup(IRBuilder<> &B, Value *X, Value *Y, Value *ExpYLnX, PowKind Kind)
Emit special case management epilog code for fast pow, powr, pown, and rootn expansions.
static const TableEntry tbl_sqrt[]
static const TableEntry tbl_asinh[]
#define MATH_E
static TableRef getOptTable(AMDGPULibFunc::EFuncId id)
static const TableEntry tbl_acosh[]
static const TableEntry tbl_exp[]
static const TableEntry tbl_cbrt[]
static const TableEntry tbl_sinpi[]
static const TableEntry tbl_atanpi[]
#define MATH_PI
static FunctionType * getPownType(FunctionType *FT)
static const TableEntry tbl_erf[]
static const TableEntry tbl_log10[]
#define MATH_SQRT1_2
static const TableEntry tbl_erfc[]
static cl::list< std::string > UseNative("amdgpu-use-native", cl::desc("Comma separated list of functions to replace with native, or all"), cl::CommaSeparated, cl::ValueOptional, cl::Hidden)
static const TableEntry tbl_tanh[]
static Value * emitIsOddInteger(IRBuilder<> &B, Value *Y)
static const TableEntry tbl_exp2[]
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
loop term fold
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
FunctionAnalysisManager FAM
#define LLVM_DEBUG(...)
Definition Debug.h:119
#define DEBUG_WITH_TYPE(TYPE,...)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition Debug.h:72
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
static void replaceCall(FPMathOperator *I, Value *With)
bool isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const
bool canIncreasePrecisionOfConstantFold(const FPMathOperator *FPOp) const
bool fold(CallInst *CI)
static void replaceCall(Instruction *I, Value *With)
AMDGPULibCalls(Function &F, FunctionAnalysisManager &FAM)
bool useNative(CallInst *CI)
static unsigned getEPtrKindFromAddrSpace(unsigned AS)
Wrapper class for AMDGPULIbFuncImpl.
static bool parse(StringRef MangledName, AMDGPULibFunc &Ptr)
std::string getName() const
Get unmangled name for mangled library function and name for unmangled library function.
static FunctionCallee getOrInsertFunction(llvm::Module *M, const AMDGPULibFunc &fInfo)
void setPrefix(ENamePrefix PFX)
bool isCompatibleSignature(const Module &M, const FunctionType *FuncTy) const
EFuncId getId() const
bool isMangled() const
Param * getLeads()
Get leading parameters for mangled lib functions.
void setId(EFuncId Id)
ENamePrefix getPrefix() const
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
bool isNegative() const
Definition APFloat.h:1538
LLVM_ABI double convertToDouble() const
Converts this APFloat to host double value.
Definition APFloat.cpp:5971
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1521
LLVM_ABI float convertToFloat() const
Converts this APFloat to host float value.
Definition APFloat.cpp:5999
bool isZero() const
Definition APFloat.h:1534
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:130
size_t size() const
Get the array size.
Definition ArrayRef.h:141
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
A function analysis which provides an AssumptionCache.
static LLVM_ABI Attribute getWithNoFPClass(LLVMContext &Context, FPClassTest Mask)
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
void setCallingConv(CallingConv::ID CC)
void removeParamAttrs(unsigned ArgNo, const AttributeMask &AttrsToRemove)
Removes the attributes from the given argument.
bool isNoBuiltin() const
Return true if the call should not be treated as a call to a builtin.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
AttributeSet getParamAttributes(unsigned ArgNo) const
Return the param attributes for this call.
bool isNoInline() const
Return true if the call should not be inlined.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
FunctionType * getFunctionType() const
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
LLVM_ABI APFloat getElementAsAPFloat(uint64_t i) const
If this is a sequential container of floating point type, return the specified element as an APFloat.
LLVM_ABI Constant * getElementAsConstant(uint64_t i) const
Return a Constant for a specified index's element.
LLVM_ABI uint64_t getNumElements() const
Return the number of elements in the array or vector.
static LLVM_ABI Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
const APFloat & getValueAPF() const
Definition Constants.h:463
static LLVM_ABI ConstantFP * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI ConstantFP * getQNaN(Type *Ty, bool Negative=false, APInt *Payload=nullptr)
LLVM_ABI bool isExactlyValue(const APFloat &V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
static LLVM_ABI ConstantFP * getInfinity(Type *Ty, bool Negative=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
Align getAlignValue() const
Return the constant as an llvm::Align, interpreting 0 as Align(1).
Definition Constants.h:186
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
static LLVM_ABI DILocation * getMergedLocations(ArrayRef< DILocation * > Locs)
Try to combine the vector of locations passed as input in a single one.
Analysis pass which computes a DominatorTree.
Definition Dominators.h:278
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:202
bool isFast() const
Test if this operation allows all non-strict floating-point transforms.
Definition Operator.h:264
bool hasNoNaNs() const
Test if this operation's arguments and results are assumed not-NaN.
Definition Operator.h:270
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
Definition Operator.h:291
bool hasNoInfs() const
Test if this operation's arguments and results are assumed not-infinite.
Definition Operator.h:273
bool hasApproxFunc() const
Test if this operation allows approximations of math library functions or intrinsics.
Definition Operator.h:288
LLVM_ABI float getFPAccuracy() const
Get the maximum error permitted by this operation in ULPs.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
void setAllowContract(bool B=true)
Definition FMF.h:90
bool none() const
Definition FMF.h:57
bool approxFunc() const
Definition FMF.h:70
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
FunctionType * getFunctionType()
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:711
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2858
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
static LLVM_ABI MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:68
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Analysis pass providing the TargetLibraryInfo.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:288
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:309
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:155
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition Type.h:144
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:232
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:158
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
void dropAllReferences()
Drop all references to operands.
Definition User.h:324
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:549
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:399
Base class of all SIMD vector types.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
LLVM_ABI APInt pow(const APInt &X, int64_t N)
Compute X^N for N>=0.
Definition APInt.cpp:3207
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
bool match(Val *V, const Pattern &P)
ap_match< APFloat > m_APFloatAllowPoison(const APFloat *&Res)
Match APFloat while allowing poison in splat vector constants.
initializer< Ty > init(const Ty &Val)
constexpr double ln2
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
static double log2(double V)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1668
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
LLVM_ABI bool isKnownIntegral(const Value *V, const SimplifyQuery &SQ, FastMathFlags FMF)
Return true if the floating-point value V is known to be an integer value.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool cannotBeOrderedLessThanZero(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if we can prove that the specified FP value is either NaN or never less than -0....
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39