LLVM 23.0.0git
AMDGPULibCalls.cpp
Go to the documentation of this file.
1//===- AMDGPULibCalls.cpp -------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file does AMD library function optimizations.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
15#include "AMDGPULibFunc.h"
20#include "llvm/IR/Dominators.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/MDBuilder.h"
24#include <cmath>
25
26#define DEBUG_TYPE "amdgpu-simplifylib"
27
28using namespace llvm;
29using namespace llvm::PatternMatch;
30
31static cl::opt<bool> EnablePreLink("amdgpu-prelink",
32 cl::desc("Enable pre-link mode optimizations"),
33 cl::init(false),
35
36static cl::list<std::string> UseNative("amdgpu-use-native",
37 cl::desc("Comma separated list of functions to replace with native, or all"),
40
41#define MATH_PI numbers::pi
42#define MATH_E numbers::e
43#define MATH_SQRT2 numbers::sqrt2
44#define MATH_SQRT1_2 numbers::inv_sqrt2
45
46namespace llvm {
47
49private:
51
52 using FuncInfo = llvm::AMDGPULibFunc;
53
54 // -fuse-native.
55 bool AllNative = false;
56
57 bool useNativeFunc(const StringRef F) const;
58
59 // Return a pointer (pointer expr) to the function if function definition with
60 // "FuncName" exists. It may create a new function prototype in pre-link mode.
61 FunctionCallee getFunction(Module *M, const FuncInfo &fInfo);
62
63 bool parseFunctionName(const StringRef &FMangledName, FuncInfo &FInfo);
64
65 bool TDOFold(CallInst *CI, const FuncInfo &FInfo);
66
67 /* Specialized optimizations */
68
69 // pow/powr/pown
70 bool fold_pow(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
71
72 // rootn
73 bool fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
74
75 // -fuse-native for sincos
76 bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo);
77
78 // evaluate calls if calls' arguments are constants.
79 bool evaluateScalarMathFunc(const FuncInfo &FInfo, double &Res0, double &Res1,
80 Constant *copr0, Constant *copr1);
81 bool evaluateCall(CallInst *aCI, const FuncInfo &FInfo);
82
83 /// Insert a value to sincos function \p Fsincos. Returns (value of sin, value
84 /// of cos, sincos call).
85 std::tuple<Value *, Value *, Value *> insertSinCos(Value *Arg,
86 FastMathFlags FMF,
88 FunctionCallee Fsincos);
89
90 // sin/cos
91 bool fold_sincos(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
92
93 // __read_pipe/__write_pipe
94 bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
95 const FuncInfo &FInfo);
96
97 // Get a scalar native builtin single argument FP function
98 FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo);
99
100 /// Substitute a call to a known libcall with an intrinsic call. If \p
101 /// AllowMinSize is true, allow the replacement in a minsize function.
102 bool shouldReplaceLibcallWithIntrinsic(const CallInst *CI,
103 bool AllowMinSizeF32 = false,
104 bool AllowF64 = false,
105 bool AllowStrictFP = false);
106 void replaceLibCallWithSimpleIntrinsic(IRBuilder<> &B, CallInst *CI,
107 Intrinsic::ID IntrID);
108
109 bool tryReplaceLibcallWithSimpleIntrinsic(IRBuilder<> &B, CallInst *CI,
110 Intrinsic::ID IntrID,
111 bool AllowMinSizeF32 = false,
112 bool AllowF64 = false,
113 bool AllowStrictFP = false);
114
115protected:
116 bool isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const;
117
119
120 static void replaceCall(Instruction *I, Value *With) {
121 I->replaceAllUsesWith(With);
122 I->eraseFromParent();
123 }
124
125 static void replaceCall(FPMathOperator *I, Value *With) {
127 }
128
129public:
131
132 bool fold(CallInst *CI);
133
134 void initNativeFuncs();
135
136 // Replace a normal math function call with that native version
137 bool useNative(CallInst *CI);
138};
139
140} // end namespace llvm
141
142template <typename IRB>
143static CallInst *CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg,
144 const Twine &Name = "") {
145 CallInst *R = B.CreateCall(Callee, Arg, Name);
146 if (Function *F = dyn_cast<Function>(Callee.getCallee()))
147 R->setCallingConv(F->getCallingConv());
148 return R;
149}
150
151template <typename IRB>
152static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1,
153 Value *Arg2, const Twine &Name = "") {
154 CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name);
155 if (Function *F = dyn_cast<Function>(Callee.getCallee()))
156 R->setCallingConv(F->getCallingConv());
157 return R;
158}
159
161 Type *PowNExpTy = Type::getInt32Ty(FT->getContext());
162 if (VectorType *VecTy = dyn_cast<VectorType>(FT->getReturnType()))
163 PowNExpTy = VectorType::get(PowNExpTy, VecTy->getElementCount());
164
165 return FunctionType::get(FT->getReturnType(),
166 {FT->getParamType(0), PowNExpTy}, false);
167}
168
169// Data structures for table-driven optimizations.
170// FuncTbl works for both f32 and f64 functions with 1 input argument
171
173 double result;
174 double input;
175};
176
177/* a list of {result, input} */
178static const TableEntry tbl_acos[] = {
179 {MATH_PI / 2.0, 0.0},
180 {MATH_PI / 2.0, -0.0},
181 {0.0, 1.0},
182 {MATH_PI, -1.0}
183};
184static const TableEntry tbl_acosh[] = {
185 {0.0, 1.0}
186};
187static const TableEntry tbl_acospi[] = {
188 {0.5, 0.0},
189 {0.5, -0.0},
190 {0.0, 1.0},
191 {1.0, -1.0}
192};
193static const TableEntry tbl_asin[] = {
194 {0.0, 0.0},
195 {-0.0, -0.0},
196 {MATH_PI / 2.0, 1.0},
197 {-MATH_PI / 2.0, -1.0}
198};
199static const TableEntry tbl_asinh[] = {
200 {0.0, 0.0},
201 {-0.0, -0.0}
202};
203static const TableEntry tbl_asinpi[] = {
204 {0.0, 0.0},
205 {-0.0, -0.0},
206 {0.5, 1.0},
207 {-0.5, -1.0}
208};
209static const TableEntry tbl_atan[] = {
210 {0.0, 0.0},
211 {-0.0, -0.0},
212 {MATH_PI / 4.0, 1.0},
213 {-MATH_PI / 4.0, -1.0}
214};
215static const TableEntry tbl_atanh[] = {
216 {0.0, 0.0},
217 {-0.0, -0.0}
218};
219static const TableEntry tbl_atanpi[] = {
220 {0.0, 0.0},
221 {-0.0, -0.0},
222 {0.25, 1.0},
223 {-0.25, -1.0}
224};
225static const TableEntry tbl_cbrt[] = {
226 {0.0, 0.0},
227 {-0.0, -0.0},
228 {1.0, 1.0},
229 {-1.0, -1.0},
230};
231static const TableEntry tbl_cos[] = {
232 {1.0, 0.0},
233 {1.0, -0.0}
234};
235static const TableEntry tbl_cosh[] = {
236 {1.0, 0.0},
237 {1.0, -0.0}
238};
239static const TableEntry tbl_cospi[] = {
240 {1.0, 0.0},
241 {1.0, -0.0}
242};
243static const TableEntry tbl_erfc[] = {
244 {1.0, 0.0},
245 {1.0, -0.0}
246};
247static const TableEntry tbl_erf[] = {
248 {0.0, 0.0},
249 {-0.0, -0.0}
250};
251static const TableEntry tbl_exp[] = {
252 {1.0, 0.0},
253 {1.0, -0.0},
254 {MATH_E, 1.0}
255};
256static const TableEntry tbl_exp2[] = {
257 {1.0, 0.0},
258 {1.0, -0.0},
259 {2.0, 1.0}
260};
261static const TableEntry tbl_exp10[] = {
262 {1.0, 0.0},
263 {1.0, -0.0},
264 {10.0, 1.0}
265};
266static const TableEntry tbl_expm1[] = {
267 {0.0, 0.0},
268 {-0.0, -0.0}
269};
270static const TableEntry tbl_log[] = {
271 {0.0, 1.0},
272 {1.0, MATH_E}
273};
274static const TableEntry tbl_log2[] = {
275 {0.0, 1.0},
276 {1.0, 2.0}
277};
278static const TableEntry tbl_log10[] = {
279 {0.0, 1.0},
280 {1.0, 10.0}
281};
282static const TableEntry tbl_rsqrt[] = {
283 {1.0, 1.0},
284 {MATH_SQRT1_2, 2.0}
285};
286static const TableEntry tbl_sin[] = {
287 {0.0, 0.0},
288 {-0.0, -0.0}
289};
290static const TableEntry tbl_sinh[] = {
291 {0.0, 0.0},
292 {-0.0, -0.0}
293};
294static const TableEntry tbl_sinpi[] = {
295 {0.0, 0.0},
296 {-0.0, -0.0}
297};
298static const TableEntry tbl_sqrt[] = {
299 {0.0, 0.0},
300 {1.0, 1.0},
301 {MATH_SQRT2, 2.0}
302};
303static const TableEntry tbl_tan[] = {
304 {0.0, 0.0},
305 {-0.0, -0.0}
306};
307static const TableEntry tbl_tanh[] = {
308 {0.0, 0.0},
309 {-0.0, -0.0}
310};
311static const TableEntry tbl_tanpi[] = {
312 {0.0, 0.0},
313 {-0.0, -0.0}
314};
315static const TableEntry tbl_tgamma[] = {
316 {1.0, 1.0},
317 {1.0, 2.0},
318 {2.0, 3.0},
319 {6.0, 4.0}
320};
321
323 switch(id) {
339 return true;
340 default:;
341 }
342 return false;
343}
344
346
348 switch(id) {
386 default:;
387 }
388 return TableRef();
389}
390
391static inline int getVecSize(const AMDGPULibFunc& FInfo) {
392 return FInfo.getLeads()[0].VectorSize;
393}
394
395static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) {
396 return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType;
397}
398
399FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) {
400 // If we are doing PreLinkOpt, the function is external. So it is safe to
401 // use getOrInsertFunction() at this stage.
402
404 : AMDGPULibFunc::getFunction(M, fInfo);
405}
406
407bool AMDGPULibCalls::parseFunctionName(const StringRef &FMangledName,
408 FuncInfo &FInfo) {
409 return AMDGPULibFunc::parse(FMangledName, FInfo);
410}
411
413 return FPOp->hasApproxFunc() && FPOp->hasNoNaNs() && FPOp->hasNoInfs();
414}
415
417 const FPMathOperator *FPOp) const {
418 // TODO: Refine to approxFunc or contract
419 return FPOp->isFast();
420}
421
423 : SQ(F.getParent()->getDataLayout(),
424 &FAM.getResult<TargetLibraryAnalysis>(F),
425 FAM.getCachedResult<DominatorTreeAnalysis>(F),
426 &FAM.getResult<AssumptionAnalysis>(F)) {}
427
428bool AMDGPULibCalls::useNativeFunc(const StringRef F) const {
429 return AllNative || llvm::is_contained(UseNative, F);
430}
431
433 AllNative = useNativeFunc("all") ||
434 (UseNative.getNumOccurrences() && UseNative.size() == 1 &&
435 UseNative.begin()->empty());
436}
437
438bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
439 bool native_sin = useNativeFunc("sin");
440 bool native_cos = useNativeFunc("cos");
441
442 if (native_sin && native_cos) {
443 Module *M = aCI->getModule();
444 Value *opr0 = aCI->getArgOperand(0);
445
446 AMDGPULibFunc nf;
447 nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType;
448 nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize;
449
452 FunctionCallee sinExpr = getFunction(M, nf);
453
456 FunctionCallee cosExpr = getFunction(M, nf);
457 if (sinExpr && cosExpr) {
458 Value *sinval =
459 CallInst::Create(sinExpr, opr0, "splitsin", aCI->getIterator());
460 Value *cosval =
461 CallInst::Create(cosExpr, opr0, "splitcos", aCI->getIterator());
462 new StoreInst(cosval, aCI->getArgOperand(1), aCI->getIterator());
463
464 DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
465 << " with native version of sin/cos");
466
467 replaceCall(aCI, sinval);
468 return true;
469 }
470 }
471 return false;
472}
473
475 Function *Callee = aCI->getCalledFunction();
476 if (!Callee || aCI->isNoBuiltin())
477 return false;
478
479 FuncInfo FInfo;
480 if (!parseFunctionName(Callee->getName(), FInfo) || !FInfo.isMangled() ||
481 FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
482 getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) ||
483 !(AllNative || useNativeFunc(FInfo.getName()))) {
484 return false;
485 }
486
487 if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS)
488 return sincosUseNative(aCI, FInfo);
489
491 FunctionCallee F = getFunction(aCI->getModule(), FInfo);
492 if (!F)
493 return false;
494
495 aCI->setCalledFunction(F);
496 DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
497 << " with native version");
498 return true;
499}
500
501// Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe
502// builtin, with appended type size and alignment arguments, where 2 or 4
503// indicates the original number of arguments. The library has optimized version
504// of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same
505// power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N
506// for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ...,
507// 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4.
508bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
509 const FuncInfo &FInfo) {
510 auto *Callee = CI->getCalledFunction();
511 if (!Callee->isDeclaration())
512 return false;
513
514 assert(Callee->hasName() && "Invalid read_pipe/write_pipe function");
515 auto *M = Callee->getParent();
516 std::string Name = std::string(Callee->getName());
517 auto NumArg = CI->arg_size();
518 if (NumArg != 4 && NumArg != 6)
519 return false;
520 ConstantInt *PacketSize =
521 dyn_cast<ConstantInt>(CI->getArgOperand(NumArg - 2));
522 ConstantInt *PacketAlign =
523 dyn_cast<ConstantInt>(CI->getArgOperand(NumArg - 1));
524 if (!PacketSize || !PacketAlign)
525 return false;
526
527 unsigned Size = PacketSize->getZExtValue();
528 Align Alignment = PacketAlign->getAlignValue();
529 if (Alignment != Size)
530 return false;
531
532 unsigned PtrArgLoc = CI->arg_size() - 3;
533 Value *PtrArg = CI->getArgOperand(PtrArgLoc);
534 Type *PtrTy = PtrArg->getType();
535
537 for (unsigned I = 0; I != PtrArgLoc; ++I)
538 ArgTys.push_back(CI->getArgOperand(I)->getType());
539 ArgTys.push_back(PtrTy);
540
541 Name = Name + "_" + std::to_string(Size);
542 auto *FTy = FunctionType::get(Callee->getReturnType(),
543 ArrayRef<Type *>(ArgTys), false);
544 AMDGPULibFunc NewLibFunc(Name, FTy);
546 if (!F)
547 return false;
548
550 for (unsigned I = 0; I != PtrArgLoc; ++I)
551 Args.push_back(CI->getArgOperand(I));
552 Args.push_back(PtrArg);
553
554 auto *NCI = B.CreateCall(F, Args);
555 NCI->setAttributes(CI->getAttributes());
556 CI->replaceAllUsesWith(NCI);
557 CI->dropAllReferences();
558 CI->eraseFromParent();
559
560 return true;
561}
562
563// This function returns false if no change; return true otherwise.
565 Function *Callee = CI->getCalledFunction();
566 // Ignore indirect calls.
567 if (!Callee || Callee->isIntrinsic() || CI->isNoBuiltin())
568 return false;
569
570 FuncInfo FInfo;
571 if (!parseFunctionName(Callee->getName(), FInfo))
572 return false;
573
574 // Further check the number of arguments to see if they match.
575 // TODO: Check calling convention matches too
576 if (!FInfo.isCompatibleSignature(*Callee->getParent(), CI->getFunctionType()))
577 return false;
578
579 LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << '\n');
580
581 if (TDOFold(CI, FInfo))
582 return true;
583
584 IRBuilder<> B(CI);
585 if (CI->isStrictFP())
586 B.setIsFPConstrained(true);
587
589 // Under unsafe-math, evaluate calls if possible.
590 // According to Brian Sumner, we can do this for all f32 function calls
591 // using host's double function calls.
592 if (canIncreasePrecisionOfConstantFold(FPOp) && evaluateCall(CI, FInfo))
593 return true;
594
595 // Copy fast flags from the original call.
596 FastMathFlags FMF = FPOp->getFastMathFlags();
597 B.setFastMathFlags(FMF);
598
599 // Specialized optimizations for each function call.
600 //
601 // TODO: Handle native functions
602 switch (FInfo.getId()) {
604 if (FMF.none())
605 return false;
606 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::exp,
607 FMF.approxFunc());
609 if (FMF.none())
610 return false;
611 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::exp2,
612 FMF.approxFunc());
614 if (FMF.none())
615 return false;
616 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log,
617 FMF.approxFunc());
619 if (FMF.none())
620 return false;
621 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log2,
622 FMF.approxFunc());
624 if (FMF.none())
625 return false;
626 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log10,
627 FMF.approxFunc());
629 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::minnum,
630 true, true);
632 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::maxnum,
633 true, true);
635 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fma, true,
636 true);
638 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fmuladd,
639 true, true);
641 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fabs, true,
642 true, true);
644 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::copysign,
645 true, true, true);
647 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::floor, true,
648 true);
650 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::ceil, true,
651 true);
653 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::trunc, true,
654 true);
656 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::rint, true,
657 true);
659 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::round, true,
660 true);
662 if (!shouldReplaceLibcallWithIntrinsic(CI, true, true))
663 return false;
664
665 Value *Arg1 = CI->getArgOperand(1);
666 if (VectorType *VecTy = dyn_cast<VectorType>(CI->getType());
667 VecTy && !isa<VectorType>(Arg1->getType())) {
668 Value *SplatArg1 = B.CreateVectorSplat(VecTy->getElementCount(), Arg1);
669 CI->setArgOperand(1, SplatArg1);
670 }
671
673 CI->getModule(), Intrinsic::ldexp,
674 {CI->getType(), CI->getArgOperand(1)->getType()}));
675 return true;
676 }
678 Module *M = Callee->getParent();
679 AMDGPULibFunc PowrInfo(AMDGPULibFunc::EI_POWR, FInfo);
680 FunctionCallee PowrFunc = getFunction(M, PowrInfo);
682
683 // pow(x, y) -> powr(x, y) for x >= -0.0
684 // TODO: Account for flags on current call
685 if (PowrFunc && cannotBeOrderedLessThanZero(
686 FPOp->getOperand(0), SQ.getWithInstruction(Call))) {
687 Call->setCalledFunction(PowrFunc);
688 return fold_pow(FPOp, B, PowrInfo) || true;
689 }
690
691 // pow(x, y) -> pown(x, y) for known integral y
692 if (isKnownIntegral(FPOp->getOperand(1), SQ.getWithInstruction(CI),
693 FPOp->getFastMathFlags())) {
694 FunctionType *PownType = getPownType(CI->getFunctionType());
695 AMDGPULibFunc PownInfo(AMDGPULibFunc::EI_POWN, PownType, true);
696 FunctionCallee PownFunc = getFunction(M, PownInfo);
697 if (PownFunc) {
698 // TODO: If the incoming integral value is an sitofp/uitofp, it won't
699 // fold out without a known range. We can probably take the source
700 // value directly.
701 Value *CastedArg =
702 B.CreateFPToSI(FPOp->getOperand(1), PownType->getParamType(1));
703 // Have to drop any nofpclass attributes on the original call site.
704 Call->removeParamAttrs(
705 1, AttributeFuncs::typeIncompatible(CastedArg->getType(),
706 Call->getParamAttributes(1)));
707 Call->setCalledFunction(PownFunc);
708 Call->setArgOperand(1, CastedArg);
709 return fold_pow(FPOp, B, PownInfo) || true;
710 }
711 }
712
713 return fold_pow(FPOp, B, FInfo);
714 }
717 return fold_pow(FPOp, B, FInfo);
719 return fold_rootn(FPOp, B, FInfo);
721 // TODO: Allow with strictfp + constrained intrinsic
722 return tryReplaceLibcallWithSimpleIntrinsic(
723 B, CI, Intrinsic::sqrt, true, true, /*AllowStrictFP=*/false);
726 return fold_sincos(FPOp, B, FInfo);
727 default:
728 break;
729 }
730 } else {
731 // Specialized optimizations for each function call
732 switch (FInfo.getId()) {
737 return fold_read_write_pipe(CI, B, FInfo);
738 default:
739 break;
740 }
741 }
742
743 return false;
744}
745
746bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
747 // Table-Driven optimization
748 const TableRef tr = getOptTable(FInfo.getId());
749 if (tr.empty())
750 return false;
751
752 int const sz = (int)tr.size();
753 Value *opr0 = CI->getArgOperand(0);
754
755 if (getVecSize(FInfo) > 1) {
758 for (int eltNo = 0; eltNo < getVecSize(FInfo); ++eltNo) {
760 CV->getElementAsConstant((unsigned)eltNo));
761 assert(eltval && "Non-FP arguments in math function!");
762 bool found = false;
763 for (int i=0; i < sz; ++i) {
764 if (eltval->isExactlyValue(tr[i].input)) {
765 DVal.push_back(tr[i].result);
766 found = true;
767 break;
768 }
769 }
770 if (!found) {
771 // This vector constants not handled yet.
772 return false;
773 }
774 }
775 LLVMContext &context = CI->getContext();
776 Constant *nval;
777 if (getArgType(FInfo) == AMDGPULibFunc::F32) {
779 for (double D : DVal)
780 FVal.push_back((float)D);
781 ArrayRef<float> tmp(FVal);
782 nval = ConstantDataVector::get(context, tmp);
783 } else { // F64
784 ArrayRef<double> tmp(DVal);
785 nval = ConstantDataVector::get(context, tmp);
786 }
787 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
788 replaceCall(CI, nval);
789 return true;
790 }
791 } else {
792 // Scalar version
793 if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
794 for (int i = 0; i < sz; ++i) {
795 if (CF->isExactlyValue(tr[i].input)) {
796 Value *nval = ConstantFP::get(CF->getType(), tr[i].result);
797 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
798 replaceCall(CI, nval);
799 return true;
800 }
801 }
802 }
803 }
804
805 return false;
806}
807
808namespace llvm {
809static double log2(double V) {
810#if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L
811 return ::log2(V);
812#else
813 return log(V) / numbers::ln2;
814#endif
815}
816} // namespace llvm
817
818bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B,
819 const FuncInfo &FInfo) {
820 assert((FInfo.getId() == AMDGPULibFunc::EI_POW ||
821 FInfo.getId() == AMDGPULibFunc::EI_POWR ||
822 FInfo.getId() == AMDGPULibFunc::EI_POWN) &&
823 "fold_pow: encounter a wrong function call");
824
825 Module *M = B.GetInsertBlock()->getModule();
826 Type *eltType = FPOp->getType()->getScalarType();
827 Value *opr0 = FPOp->getOperand(0);
828 Value *opr1 = FPOp->getOperand(1);
829
830 const APFloat *CF = nullptr;
831 const APInt *CINT = nullptr;
832 if (!match(opr1, m_APFloatAllowPoison(CF)))
833 match(opr1, m_APIntAllowPoison(CINT));
834
835 // 0x1111111 means that we don't do anything for this call.
836 int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111);
837
838 if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0)) {
839 // pow/powr/pown(x, 0) == 1
840 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1\n");
841 Constant *cnval = ConstantFP::get(eltType, 1.0);
842 if (getVecSize(FInfo) > 1) {
843 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
844 }
845 replaceCall(FPOp, cnval);
846 return true;
847 }
848 if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) {
849 // pow/powr/pown(x, 1.0) = x
850 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << "\n");
851 replaceCall(FPOp, opr0);
852 return true;
853 }
854 if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) {
855 // pow/powr/pown(x, 2.0) = x*x
856 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << " * "
857 << *opr0 << "\n");
858 Value *nval = B.CreateFMul(opr0, opr0, "__pow2");
859 replaceCall(FPOp, nval);
860 return true;
861 }
862 if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) {
863 // pow/powr/pown(x, -1.0) = 1.0/x
864 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1 / " << *opr0 << "\n");
865 Constant *cnval = ConstantFP::get(eltType, 1.0);
866 if (getVecSize(FInfo) > 1) {
867 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
868 }
869 Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip");
870 replaceCall(FPOp, nval);
871 return true;
872 }
873
874 if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) {
875 // pow[r](x, [-]0.5) = sqrt(x)
876 bool issqrt = CF->isExactlyValue(0.5);
877 if (FunctionCallee FPExpr =
878 getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
880 FInfo))) {
881 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << FInfo.getName()
882 << '(' << *opr0 << ")\n");
883 Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt"
884 : "__pow2rsqrt");
885 replaceCall(FPOp, nval);
886 return true;
887 }
888 }
889
890 if (!isUnsafeFiniteOnlyMath(FPOp))
891 return false;
892
893 // Unsafe Math optimization
894
895 // Remember that ci_opr1 is set if opr1 is integral
896 if (CF) {
897 double dval = (getArgType(FInfo) == AMDGPULibFunc::F32)
898 ? (double)CF->convertToFloat()
899 : CF->convertToDouble();
900 int ival = (int)dval;
901 if ((double)ival == dval) {
902 ci_opr1 = ival;
903 } else
904 ci_opr1 = 0x11111111;
905 }
906
907 // pow/powr/pown(x, c) = [1/](x*x*..x); where
908 // trunc(c) == c && the number of x == c && |c| <= 12
909 unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1;
910 if (abs_opr1 <= 12) {
911 Constant *cnval;
912 Value *nval;
913 if (abs_opr1 == 0) {
914 cnval = ConstantFP::get(eltType, 1.0);
915 if (getVecSize(FInfo) > 1) {
916 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
917 }
918 nval = cnval;
919 } else {
920 Value *valx2 = nullptr;
921 nval = nullptr;
922 while (abs_opr1 > 0) {
923 valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0;
924 if (abs_opr1 & 1) {
925 nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2;
926 }
927 abs_opr1 >>= 1;
928 }
929 }
930
931 if (ci_opr1 < 0) {
932 cnval = ConstantFP::get(eltType, 1.0);
933 if (getVecSize(FInfo) > 1) {
934 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
935 }
936 nval = B.CreateFDiv(cnval, nval, "__1powprod");
937 }
938 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> "
939 << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0
940 << ")\n");
941 replaceCall(FPOp, nval);
942 return true;
943 }
944
945 // If we should use the generic intrinsic instead of emitting a libcall
946 const bool ShouldUseIntrinsic = eltType->isFloatTy() || eltType->isHalfTy();
947
948 // powr ---> exp2(y * log2(x))
949 // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31))
950 FunctionCallee ExpExpr;
951 if (ShouldUseIntrinsic)
952 ExpExpr = Intrinsic::getOrInsertDeclaration(M, Intrinsic::exp2,
953 {FPOp->getType()});
954 else {
955 ExpExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo));
956 if (!ExpExpr)
957 return false;
958 }
959
960 bool needlog = false;
961 bool needabs = false;
962 bool needcopysign = false;
963 Constant *cnval = nullptr;
964 if (getVecSize(FInfo) == 1) {
965 CF = nullptr;
966 match(opr0, m_APFloatAllowPoison(CF));
967
968 if (CF) {
969 double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
970 ? (double)CF->convertToFloat()
971 : CF->convertToDouble();
972
973 V = log2(std::abs(V));
974 cnval = ConstantFP::get(eltType, V);
975 needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR) &&
976 CF->isNegative();
977 } else {
978 needlog = true;
979 needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR;
980 }
981 } else {
982 ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0);
983
984 if (!CDV) {
985 needlog = true;
986 needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR;
987 } else {
988 assert ((int)CDV->getNumElements() == getVecSize(FInfo) &&
989 "Wrong vector size detected");
990
992 for (int i=0; i < getVecSize(FInfo); ++i) {
993 double V = CDV->getElementAsAPFloat(i).convertToDouble();
994 if (V < 0.0) needcopysign = true;
995 V = log2(std::abs(V));
996 DVal.push_back(V);
997 }
998 if (getArgType(FInfo) == AMDGPULibFunc::F32) {
1000 for (double D : DVal)
1001 FVal.push_back((float)D);
1002 ArrayRef<float> tmp(FVal);
1003 cnval = ConstantDataVector::get(M->getContext(), tmp);
1004 } else {
1005 ArrayRef<double> tmp(DVal);
1006 cnval = ConstantDataVector::get(M->getContext(), tmp);
1007 }
1008 }
1009 }
1010
1011 if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW)) {
1012 // We cannot handle corner cases for a general pow() function, give up
1013 // unless y is a constant integral value. Then proceed as if it were pown.
1014 if (!isKnownIntegral(opr1, SQ.getWithInstruction(cast<Instruction>(FPOp)),
1015 FPOp->getFastMathFlags()))
1016 return false;
1017 }
1018
1019 Value *nval;
1020 if (needabs) {
1021 nval = B.CreateUnaryIntrinsic(Intrinsic::fabs, opr0, nullptr, "__fabs");
1022 } else {
1023 nval = cnval ? cnval : opr0;
1024 }
1025 if (needlog) {
1026 FunctionCallee LogExpr;
1027 if (ShouldUseIntrinsic) {
1028 LogExpr = Intrinsic::getOrInsertDeclaration(M, Intrinsic::log2,
1029 {FPOp->getType()});
1030 } else {
1031 LogExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo));
1032 if (!LogExpr)
1033 return false;
1034 }
1035
1036 nval = CreateCallEx(B,LogExpr, nval, "__log2");
1037 }
1038
1039 if (FInfo.getId() == AMDGPULibFunc::EI_POWN) {
1040 // convert int(32) to fp(f32 or f64)
1041 opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F");
1042 }
1043 nval = B.CreateFMul(opr1, nval, "__ylogx");
1044 nval = CreateCallEx(B,ExpExpr, nval, "__exp2");
1045
1046 if (needcopysign) {
1047 Type* nTyS = B.getIntNTy(eltType->getPrimitiveSizeInBits());
1048 Type *nTy = FPOp->getType()->getWithNewType(nTyS);
1049 unsigned size = nTy->getScalarSizeInBits();
1050 Value *opr_n = FPOp->getOperand(1);
1051 if (opr_n->getType()->getScalarType()->isIntegerTy())
1052 opr_n = B.CreateZExtOrTrunc(opr_n, nTy, "__ytou");
1053 else
1054 opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");
1055
1056 Value *sign = B.CreateShl(opr_n, size-1, "__yeven");
1057 sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign");
1058 nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign);
1059 nval = B.CreateBitCast(nval, opr0->getType());
1060 }
1061
1062 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> "
1063 << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n");
1064 replaceCall(FPOp, nval);
1065
1066 return true;
1067}
1068
1069bool AMDGPULibCalls::fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B,
1070 const FuncInfo &FInfo) {
1071 Value *opr0 = FPOp->getOperand(0);
1072 Value *opr1 = FPOp->getOperand(1);
1073
1074 const APInt *CINT = nullptr;
1075 if (!match(opr1, m_APIntAllowPoison(CINT)))
1076 return false;
1077
1078 Function *Parent = B.GetInsertBlock()->getParent();
1079
1080 int ci_opr1 = (int)CINT->getSExtValue();
1081 if (ci_opr1 == 1 && !Parent->hasFnAttribute(Attribute::StrictFP)) {
1082 // rootn(x, 1) = x
1083 //
1084 // TODO: Insert constrained canonicalize for strictfp case.
1085 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << '\n');
1086 replaceCall(FPOp, opr0);
1087 return true;
1088 }
1089
1090 Module *M = B.GetInsertBlock()->getModule();
1091
1092 CallInst *CI = cast<CallInst>(FPOp);
1093 if (ci_opr1 == 2 &&
1094 shouldReplaceLibcallWithIntrinsic(CI,
1095 /*AllowMinSizeF32=*/true,
1096 /*AllowF64=*/true)) {
1097 // rootn(x, 2) = sqrt(x)
1098 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> sqrt(" << *opr0 << ")\n");
1099
1100 CallInst *NewCall = B.CreateUnaryIntrinsic(Intrinsic::sqrt, opr0, CI);
1101 NewCall->takeName(CI);
1102
1103 // OpenCL rootn has a looser ulp of 2 requirement than sqrt, so add some
1104 // metadata.
1105 MDBuilder MDHelper(M->getContext());
1106 MDNode *FPMD = MDHelper.createFPMath(std::max(FPOp->getFPAccuracy(), 2.0f));
1107 NewCall->setMetadata(LLVMContext::MD_fpmath, FPMD);
1108
1109 replaceCall(CI, NewCall);
1110 return true;
1111 }
1112
1113 if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x)
1114 if (FunctionCallee FPExpr =
1115 getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) {
1116 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> cbrt(" << *opr0
1117 << ")\n");
1118 Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt");
1119 replaceCall(FPOp, nval);
1120 return true;
1121 }
1122 } else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x
1123 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1.0 / " << *opr0 << "\n");
1124 Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0),
1125 opr0,
1126 "__rootn2div");
1127 replaceCall(FPOp, nval);
1128 return true;
1129 }
1130
1131 if (ci_opr1 == -2 &&
1132 shouldReplaceLibcallWithIntrinsic(CI,
1133 /*AllowMinSizeF32=*/true,
1134 /*AllowF64=*/true)) {
1135 // rootn(x, -2) = rsqrt(x)
1136
1137 // The original rootn had looser ulp requirements than the resultant sqrt
1138 // and fdiv.
1139 MDBuilder MDHelper(M->getContext());
1140 MDNode *FPMD = MDHelper.createFPMath(std::max(FPOp->getFPAccuracy(), 2.0f));
1141
1142 // TODO: Could handle strictfp but need to fix strict sqrt emission
1143 FastMathFlags FMF = FPOp->getFastMathFlags();
1144 FMF.setAllowContract(true);
1145
1146 CallInst *Sqrt = B.CreateUnaryIntrinsic(Intrinsic::sqrt, opr0, CI);
1148 B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0), Sqrt));
1149 Sqrt->setFastMathFlags(FMF);
1150 RSqrt->setFastMathFlags(FMF);
1151 RSqrt->setMetadata(LLVMContext::MD_fpmath, FPMD);
1152
1153 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> rsqrt(" << *opr0
1154 << ")\n");
1155 replaceCall(CI, RSqrt);
1156 return true;
1157 }
1158
1159 return false;
1160}
1161
1162// Get a scalar native builtin single argument FP function
1163FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M,
1164 const FuncInfo &FInfo) {
1165 if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))
1166 return nullptr;
1167 FuncInfo nf = FInfo;
1169 return getFunction(M, nf);
1170}
1171
1172// Some library calls are just wrappers around llvm intrinsics, but compiled
1173// conservatively. Preserve the flags from the original call site by
1174// substituting them with direct calls with all the flags.
1175bool AMDGPULibCalls::shouldReplaceLibcallWithIntrinsic(const CallInst *CI,
1176 bool AllowMinSizeF32,
1177 bool AllowF64,
1178 bool AllowStrictFP) {
1179 Type *FltTy = CI->getType()->getScalarType();
1180 const bool IsF32 = FltTy->isFloatTy();
1181
1182 // f64 intrinsics aren't implemented for most operations.
1183 if (!IsF32 && !FltTy->isHalfTy() && (!AllowF64 || !FltTy->isDoubleTy()))
1184 return false;
1185
1186 // We're implicitly inlining by replacing the libcall with the intrinsic, so
1187 // don't do it for noinline call sites.
1188 if (CI->isNoInline())
1189 return false;
1190
1191 const Function *ParentF = CI->getFunction();
1192 // TODO: Handle strictfp
1193 if (!AllowStrictFP && ParentF->hasFnAttribute(Attribute::StrictFP))
1194 return false;
1195
1196 if (IsF32 && !AllowMinSizeF32 && ParentF->hasMinSize())
1197 return false;
1198 return true;
1199}
1200
1201void AMDGPULibCalls::replaceLibCallWithSimpleIntrinsic(IRBuilder<> &B,
1202 CallInst *CI,
1203 Intrinsic::ID IntrID) {
1204 if (CI->arg_size() == 2) {
1205 Value *Arg0 = CI->getArgOperand(0);
1206 Value *Arg1 = CI->getArgOperand(1);
1207 VectorType *Arg0VecTy = dyn_cast<VectorType>(Arg0->getType());
1208 VectorType *Arg1VecTy = dyn_cast<VectorType>(Arg1->getType());
1209 if (Arg0VecTy && !Arg1VecTy) {
1210 Value *SplatRHS = B.CreateVectorSplat(Arg0VecTy->getElementCount(), Arg1);
1211 CI->setArgOperand(1, SplatRHS);
1212 } else if (!Arg0VecTy && Arg1VecTy) {
1213 Value *SplatLHS = B.CreateVectorSplat(Arg1VecTy->getElementCount(), Arg0);
1214 CI->setArgOperand(0, SplatLHS);
1215 }
1216 }
1217
1219 CI->getModule(), IntrID, {CI->getType()}));
1220}
1221
1222bool AMDGPULibCalls::tryReplaceLibcallWithSimpleIntrinsic(
1223 IRBuilder<> &B, CallInst *CI, Intrinsic::ID IntrID, bool AllowMinSizeF32,
1224 bool AllowF64, bool AllowStrictFP) {
1225 if (!shouldReplaceLibcallWithIntrinsic(CI, AllowMinSizeF32, AllowF64,
1226 AllowStrictFP))
1227 return false;
1228 replaceLibCallWithSimpleIntrinsic(B, CI, IntrID);
1229 return true;
1230}
1231
1232std::tuple<Value *, Value *, Value *>
1233AMDGPULibCalls::insertSinCos(Value *Arg, FastMathFlags FMF, IRBuilder<> &B,
1234 FunctionCallee Fsincos) {
1235 DebugLoc DL = B.getCurrentDebugLocation();
1236 Function *F = B.GetInsertBlock()->getParent();
1237 B.SetInsertPointPastAllocas(F);
1238
1239 AllocaInst *Alloc = B.CreateAlloca(Arg->getType(), nullptr, "__sincos_");
1240
1241 if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
1242 // If the argument is an instruction, it must dominate all uses so put our
1243 // sincos call there. Otherwise, right after the allocas works well enough
1244 // if it's an argument or constant.
1245
1246 B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator());
1247
1248 // SetInsertPoint unwelcomely always tries to set the debug loc.
1249 B.SetCurrentDebugLocation(DL);
1250 }
1251
1252 Type *CosPtrTy = Fsincos.getFunctionType()->getParamType(1);
1253
1254 // The allocaInst allocates the memory in private address space. This need
1255 // to be addrspacecasted to point to the address space of cos pointer type.
1256 // In OpenCL 2.0 this is generic, while in 1.2 that is private.
1257 Value *CastAlloc = B.CreateAddrSpaceCast(Alloc, CosPtrTy);
1258
1259 CallInst *SinCos = CreateCallEx2(B, Fsincos, Arg, CastAlloc);
1260
1261 // TODO: Is it worth trying to preserve the location for the cos calls for the
1262 // load?
1263
1264 LoadInst *LoadCos = B.CreateLoad(Arg->getType(), Alloc);
1265 return {SinCos, LoadCos, SinCos};
1266}
1267
1268// fold sin, cos -> sincos.
1269bool AMDGPULibCalls::fold_sincos(FPMathOperator *FPOp, IRBuilder<> &B,
1270 const FuncInfo &fInfo) {
1271 assert(fInfo.getId() == AMDGPULibFunc::EI_SIN ||
1272 fInfo.getId() == AMDGPULibFunc::EI_COS);
1273
1274 if ((getArgType(fInfo) != AMDGPULibFunc::F32 &&
1275 getArgType(fInfo) != AMDGPULibFunc::F64) ||
1276 fInfo.getPrefix() != AMDGPULibFunc::NOPFX)
1277 return false;
1278
1279 bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN;
1280
1281 Value *CArgVal = FPOp->getOperand(0);
1282
1283 // TODO: Constant fold the call
1284 if (isa<ConstantData>(CArgVal))
1285 return false;
1286
1287 CallInst *CI = cast<CallInst>(FPOp);
1288
1289 Function *F = B.GetInsertBlock()->getParent();
1290 Module *M = F->getParent();
1291
1292 // Merge the sin and cos. For OpenCL 2.0, there may only be a generic pointer
1293 // implementation. Prefer the private form if available.
1294 AMDGPULibFunc SinCosLibFuncPrivate(AMDGPULibFunc::EI_SINCOS, fInfo);
1295 SinCosLibFuncPrivate.getLeads()[0].PtrKind =
1297
1298 AMDGPULibFunc SinCosLibFuncGeneric(AMDGPULibFunc::EI_SINCOS, fInfo);
1299 SinCosLibFuncGeneric.getLeads()[0].PtrKind =
1301
1302 FunctionCallee FSinCosPrivate = getFunction(M, SinCosLibFuncPrivate);
1303 FunctionCallee FSinCosGeneric = getFunction(M, SinCosLibFuncGeneric);
1304 FunctionCallee FSinCos = FSinCosPrivate ? FSinCosPrivate : FSinCosGeneric;
1305 if (!FSinCos)
1306 return false;
1307
1308 SmallVector<CallInst *> SinCalls;
1309 SmallVector<CallInst *> CosCalls;
1310 SmallVector<CallInst *> SinCosCalls;
1311 FuncInfo PartnerInfo(isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN,
1312 fInfo);
1313 const std::string PairName = PartnerInfo.mangle();
1314
1315 StringRef SinName = isSin ? CI->getCalledFunction()->getName() : PairName;
1316 StringRef CosName = isSin ? PairName : CI->getCalledFunction()->getName();
1317 const std::string SinCosPrivateName = SinCosLibFuncPrivate.mangle();
1318 const std::string SinCosGenericName = SinCosLibFuncGeneric.mangle();
1319
1320 // Intersect the two sets of flags.
1321 FastMathFlags FMF = FPOp->getFastMathFlags();
1322 MDNode *FPMath = CI->getMetadata(LLVMContext::MD_fpmath);
1323
1324 SmallVector<DILocation *> MergeDbgLocs = {CI->getDebugLoc()};
1325
1326 for (User* U : CArgVal->users()) {
1327 CallInst *XI = dyn_cast<CallInst>(U);
1328 if (!XI || XI->getFunction() != F || XI->isNoBuiltin())
1329 continue;
1330
1331 Function *UCallee = XI->getCalledFunction();
1332 if (!UCallee)
1333 continue;
1334
1335 bool Handled = true;
1336
1337 if (UCallee->getName() == SinName)
1338 SinCalls.push_back(XI);
1339 else if (UCallee->getName() == CosName)
1340 CosCalls.push_back(XI);
1341 else if (UCallee->getName() == SinCosPrivateName ||
1342 UCallee->getName() == SinCosGenericName)
1343 SinCosCalls.push_back(XI);
1344 else
1345 Handled = false;
1346
1347 if (Handled) {
1348 MergeDbgLocs.push_back(XI->getDebugLoc());
1349 auto *OtherOp = cast<FPMathOperator>(XI);
1350 FMF &= OtherOp->getFastMathFlags();
1352 FPMath, XI->getMetadata(LLVMContext::MD_fpmath));
1353 }
1354 }
1355
1356 if (SinCalls.empty() || CosCalls.empty())
1357 return false;
1358
1359 B.setFastMathFlags(FMF);
1360 B.setDefaultFPMathTag(FPMath);
1361 DILocation *DbgLoc = DILocation::getMergedLocations(MergeDbgLocs);
1362 B.SetCurrentDebugLocation(DbgLoc);
1363
1364 auto [Sin, Cos, SinCos] = insertSinCos(CArgVal, FMF, B, FSinCos);
1365
1366 auto replaceTrigInsts = [](ArrayRef<CallInst *> Calls, Value *Res) {
1367 for (CallInst *C : Calls)
1368 C->replaceAllUsesWith(Res);
1369
1370 // Leave the other dead instructions to avoid clobbering iterators.
1371 };
1372
1373 replaceTrigInsts(SinCalls, Sin);
1374 replaceTrigInsts(CosCalls, Cos);
1375 replaceTrigInsts(SinCosCalls, SinCos);
1376
1377 // It's safe to delete the original now.
1378 CI->eraseFromParent();
1379 return true;
1380}
1381
1382bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo, double &Res0,
1383 double &Res1, Constant *copr0,
1384 Constant *copr1) {
1385 // By default, opr0/opr1/opr3 holds values of float/double type.
1386 // If they are not float/double, each function has to its
1387 // operand separately.
1388 double opr0 = 0.0, opr1 = 0.0;
1391 if (fpopr0) {
1392 opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1393 ? fpopr0->getValueAPF().convertToDouble()
1394 : (double)fpopr0->getValueAPF().convertToFloat();
1395 }
1396
1397 if (fpopr1) {
1398 opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64)
1399 ? fpopr1->getValueAPF().convertToDouble()
1400 : (double)fpopr1->getValueAPF().convertToFloat();
1401 }
1402
1403 switch (FInfo.getId()) {
1404 default : return false;
1405
1407 Res0 = acos(opr0);
1408 return true;
1409
1411 // acosh(x) == log(x + sqrt(x*x - 1))
1412 Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0));
1413 return true;
1414
1416 Res0 = acos(opr0) / MATH_PI;
1417 return true;
1418
1420 Res0 = asin(opr0);
1421 return true;
1422
1424 // asinh(x) == log(x + sqrt(x*x + 1))
1425 Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0));
1426 return true;
1427
1429 Res0 = asin(opr0) / MATH_PI;
1430 return true;
1431
1433 Res0 = atan(opr0);
1434 return true;
1435
1437 // atanh(x) == (log(x+1) - log(x-1))/2;
1438 Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0;
1439 return true;
1440
1442 Res0 = atan(opr0) / MATH_PI;
1443 return true;
1444
1446 Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0);
1447 return true;
1448
1450 Res0 = cos(opr0);
1451 return true;
1452
1454 Res0 = cosh(opr0);
1455 return true;
1456
1458 Res0 = cos(MATH_PI * opr0);
1459 return true;
1460
1462 Res0 = exp(opr0);
1463 return true;
1464
1466 Res0 = pow(2.0, opr0);
1467 return true;
1468
1470 Res0 = pow(10.0, opr0);
1471 return true;
1472
1474 Res0 = log(opr0);
1475 return true;
1476
1478 Res0 = log(opr0) / log(2.0);
1479 return true;
1480
1482 Res0 = log(opr0) / log(10.0);
1483 return true;
1484
1486 Res0 = 1.0 / sqrt(opr0);
1487 return true;
1488
1490 Res0 = sin(opr0);
1491 return true;
1492
1494 Res0 = sinh(opr0);
1495 return true;
1496
1498 Res0 = sin(MATH_PI * opr0);
1499 return true;
1500
1502 Res0 = tan(opr0);
1503 return true;
1504
1506 Res0 = tanh(opr0);
1507 return true;
1508
1510 Res0 = tan(MATH_PI * opr0);
1511 return true;
1512
1513 // two-arg functions
1516 Res0 = pow(opr0, opr1);
1517 return true;
1518
1520 if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
1521 double val = (double)iopr1->getSExtValue();
1522 Res0 = pow(opr0, val);
1523 return true;
1524 }
1525 return false;
1526 }
1527
1529 if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
1530 double val = (double)iopr1->getSExtValue();
1531 Res0 = pow(opr0, 1.0 / val);
1532 return true;
1533 }
1534 return false;
1535 }
1536
1537 // with ptr arg
1539 Res0 = sin(opr0);
1540 Res1 = cos(opr0);
1541 return true;
1542 }
1543
1544 return false;
1545}
1546
1547bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) {
1548 int numArgs = (int)aCI->arg_size();
1549 if (numArgs > 3)
1550 return false;
1551
1552 Constant *copr0 = nullptr;
1553 Constant *copr1 = nullptr;
1554 if (numArgs > 0) {
1555 if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr)
1556 return false;
1557 }
1558
1559 if (numArgs > 1) {
1560 if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) {
1561 if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS)
1562 return false;
1563 }
1564 }
1565
1566 // At this point, all arguments to aCI are constants.
1567
1568 // max vector size is 16, and sincos will generate two results.
1569 double DVal0[16], DVal1[16];
1570 int FuncVecSize = getVecSize(FInfo);
1571 bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS);
1572 if (FuncVecSize == 1) {
1573 if (!evaluateScalarMathFunc(FInfo, DVal0[0], DVal1[0], copr0, copr1)) {
1574 return false;
1575 }
1576 } else {
1577 ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0);
1578 ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1);
1579 for (int i = 0; i < FuncVecSize; ++i) {
1580 Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr;
1581 Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr;
1582 if (!evaluateScalarMathFunc(FInfo, DVal0[i], DVal1[i], celt0, celt1)) {
1583 return false;
1584 }
1585 }
1586 }
1587
1588 LLVMContext &context = aCI->getContext();
1589 Constant *nval0, *nval1;
1590 if (FuncVecSize == 1) {
1591 nval0 = ConstantFP::get(aCI->getType(), DVal0[0]);
1592 if (hasTwoResults)
1593 nval1 = ConstantFP::get(aCI->getType(), DVal1[0]);
1594 } else {
1595 if (getArgType(FInfo) == AMDGPULibFunc::F32) {
1596 SmallVector <float, 0> FVal0, FVal1;
1597 for (int i = 0; i < FuncVecSize; ++i)
1598 FVal0.push_back((float)DVal0[i]);
1599 ArrayRef<float> tmp0(FVal0);
1600 nval0 = ConstantDataVector::get(context, tmp0);
1601 if (hasTwoResults) {
1602 for (int i = 0; i < FuncVecSize; ++i)
1603 FVal1.push_back((float)DVal1[i]);
1604 ArrayRef<float> tmp1(FVal1);
1605 nval1 = ConstantDataVector::get(context, tmp1);
1606 }
1607 } else {
1608 ArrayRef<double> tmp0(DVal0);
1609 nval0 = ConstantDataVector::get(context, tmp0);
1610 if (hasTwoResults) {
1611 ArrayRef<double> tmp1(DVal1);
1612 nval1 = ConstantDataVector::get(context, tmp1);
1613 }
1614 }
1615 }
1616
1617 if (hasTwoResults) {
1618 // sincos
1619 assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS &&
1620 "math function with ptr arg not supported yet");
1621 new StoreInst(nval1, aCI->getArgOperand(1), aCI->getIterator());
1622 }
1623
1624 replaceCall(aCI, nval0);
1625 return true;
1626}
1627
1630 AMDGPULibCalls Simplifier(F, AM);
1631 Simplifier.initNativeFuncs();
1632
1633 bool Changed = false;
1634
1635 LLVM_DEBUG(dbgs() << "AMDIC: process function ";
1636 F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
1637
1638 for (auto &BB : F) {
1639 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
1640 // Ignore non-calls.
1642 ++I;
1643
1644 if (CI) {
1645 if (Simplifier.fold(CI))
1646 Changed = true;
1647 }
1648 }
1649 }
1651}
1652
1655 if (UseNative.empty())
1656 return PreservedAnalyses::all();
1657
1658 AMDGPULibCalls Simplifier(F, AM);
1659 Simplifier.initNativeFuncs();
1660
1661 bool Changed = false;
1662 for (auto &BB : F) {
1663 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
1664 // Ignore non-calls.
1666 ++I;
1667 if (CI && Simplifier.useNative(CI))
1668 Changed = true;
1669 }
1670 }
1672}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static const TableEntry tbl_log[]
static const TableEntry tbl_tgamma[]
static AMDGPULibFunc::EType getArgType(const AMDGPULibFunc &FInfo)
static const TableEntry tbl_expm1[]
static const TableEntry tbl_asinpi[]
static const TableEntry tbl_cos[]
#define MATH_SQRT2
static const TableEntry tbl_exp10[]
static CallInst * CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg, const Twine &Name="")
static CallInst * CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1, Value *Arg2, const Twine &Name="")
static const TableEntry tbl_rsqrt[]
static const TableEntry tbl_atanh[]
static const TableEntry tbl_cosh[]
static const TableEntry tbl_asin[]
static const TableEntry tbl_sinh[]
static const TableEntry tbl_acos[]
static const TableEntry tbl_tan[]
static const TableEntry tbl_cospi[]
static const TableEntry tbl_tanpi[]
static cl::opt< bool > EnablePreLink("amdgpu-prelink", cl::desc("Enable pre-link mode optimizations"), cl::init(false), cl::Hidden)
static bool HasNative(AMDGPULibFunc::EFuncId id)
ArrayRef< TableEntry > TableRef
static int getVecSize(const AMDGPULibFunc &FInfo)
static const TableEntry tbl_sin[]
static const TableEntry tbl_atan[]
static const TableEntry tbl_log2[]
static const TableEntry tbl_acospi[]
static const TableEntry tbl_sqrt[]
static const TableEntry tbl_asinh[]
#define MATH_E
static TableRef getOptTable(AMDGPULibFunc::EFuncId id)
static const TableEntry tbl_acosh[]
static const TableEntry tbl_exp[]
static const TableEntry tbl_cbrt[]
static const TableEntry tbl_sinpi[]
static const TableEntry tbl_atanpi[]
#define MATH_PI
static FunctionType * getPownType(FunctionType *FT)
static const TableEntry tbl_erf[]
static const TableEntry tbl_log10[]
#define MATH_SQRT1_2
static const TableEntry tbl_erfc[]
static cl::list< std::string > UseNative("amdgpu-use-native", cl::desc("Comma separated list of functions to replace with native, or all"), cl::CommaSeparated, cl::ValueOptional, cl::Hidden)
static const TableEntry tbl_tanh[]
static const TableEntry tbl_exp2[]
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
loop term fold
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
FunctionAnalysisManager FAM
#define LLVM_DEBUG(...)
Definition Debug.h:114
#define DEBUG_WITH_TYPE(TYPE,...)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition Debug.h:72
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
static void replaceCall(FPMathOperator *I, Value *With)
bool isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const
bool canIncreasePrecisionOfConstantFold(const FPMathOperator *FPOp) const
bool fold(CallInst *CI)
static void replaceCall(Instruction *I, Value *With)
AMDGPULibCalls(Function &F, FunctionAnalysisManager &FAM)
bool useNative(CallInst *CI)
static unsigned getEPtrKindFromAddrSpace(unsigned AS)
Wrapper class for AMDGPULIbFuncImpl.
static bool parse(StringRef MangledName, AMDGPULibFunc &Ptr)
std::string getName() const
Get unmangled name for mangled library function and name for unmangled library function.
static FunctionCallee getOrInsertFunction(llvm::Module *M, const AMDGPULibFunc &fInfo)
void setPrefix(ENamePrefix PFX)
bool isCompatibleSignature(const Module &M, const FunctionType *FuncTy) const
EFuncId getId() const
bool isMangled() const
Param * getLeads()
Get leading parameters for mangled lib functions.
void setId(EFuncId Id)
ENamePrefix getPrefix() const
bool isNegative() const
Definition APFloat.h:1512
LLVM_ABI double convertToDouble() const
Converts this APFloat to host double value.
Definition APFloat.cpp:6034
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
Definition APFloat.h:1495
LLVM_ABI float convertToFloat() const
Converts this APFloat to host float value.
Definition APFloat.cpp:6065
bool isZero() const
Definition APFloat.h:1508
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1571
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
A function analysis which provides an AssumptionCache.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
bool isNoBuiltin() const
Return true if the call should not be treated as a call to a builtin.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
bool isNoInline() const
Return true if the call should not be inlined.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
FunctionType * getFunctionType() const
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
LLVM_ABI APFloat getElementAsAPFloat(uint64_t i) const
If this is a sequential container of floating point type, return the specified element as an APFloat.
LLVM_ABI Constant * getElementAsConstant(uint64_t i) const
Return a Constant for a specified index's element.
LLVM_ABI uint64_t getNumElements() const
Return the number of elements in the array or vector.
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
Definition Constants.h:781
static LLVM_ABI Constant * getSplat(unsigned NumElts, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:282
const APFloat & getValueAPF() const
Definition Constants.h:325
LLVM_ABI bool isExactlyValue(const APFloat &V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
Align getAlignValue() const
Return the constant as an llvm::Align, interpreting 0 as Align(1).
Definition Constants.h:186
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI DILocation * getMergedLocations(ArrayRef< DILocation * > Locs)
Try to combine the vector of locations passed as input in a single one.
Analysis pass which computes a DominatorTree.
Definition Dominators.h:283
Utility class for floating point operations which can have information about relaxed accuracy require...
Definition Operator.h:200
bool isFast() const
Test if this operation allows all non-strict floating-point transforms.
Definition Operator.h:286
bool hasNoNaNs() const
Test if this operation's arguments and results are assumed not-NaN.
Definition Operator.h:302
FastMathFlags getFastMathFlags() const
Convenience function for getting all the fast-math flags.
Definition Operator.h:333
bool hasNoInfs() const
Test if this operation's arguments and results are assumed not-infinite.
Definition Operator.h:307
bool hasApproxFunc() const
Test if this operation allows approximations of math library functions or intrinsics.
Definition Operator.h:328
LLVM_ABI float getFPAccuracy() const
Get the maximum error permitted by this operation in ULPs.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
void setAllowContract(bool B=true)
Definition FMF.h:90
bool none() const
Definition FMF.h:57
bool approxFunc() const
Definition FMF.h:70
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
FunctionType * getFunctionType()
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:709
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:729
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2776
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setFastMathFlags(FastMathFlags FMF)
Convenience function for setting multiple fast-math flags on this instruction, which must be an opera...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
static LLVM_ABI MDNode * getMostGenericFPMath(MDNode *A, MDNode *B)
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Analysis pass providing the TargetLibraryInfo.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition Type.h:142
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:156
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
void dropAllReferences()
Drop all references to operands.
Definition User.h:324
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
Base class of all SIMD vector types.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
Changed
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
LLVM_ABI APInt pow(const APInt &X, int64_t N)
Compute X^N for N>=0.
Definition APInt.cpp:3166
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
bool match(Val *V, const Pattern &P)
ap_match< APFloat > m_APFloatAllowPoison(const APFloat *&Res)
Match APFloat while allowing poison in splat vector constants.
initializer< Ty > init(const Ty &Val)
constexpr double ln2
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
static double log2(double V)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1667
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1945
LLVM_ABI bool isKnownIntegral(const Value *V, const SimplifyQuery &SQ, FastMathFlags FMF)
Return true if the floating-point value V is known to be an integer value.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool cannotBeOrderedLessThanZero(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if we can prove that the specified FP value is either NaN or never less than -0....
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39