LLVM  13.0.0git
Classes | Namespaces | Macros | Functions | Variables
AMDGPULibCalls.cpp File Reference
#include "AMDGPU.h"
#include "AMDGPULibFunc.h"
#include "GCNSubtarget.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/InitializePasses.h"
#include "llvm/Target/TargetMachine.h"
Include dependency graph for AMDGPULibCalls.cpp:

Go to the source code of this file.

Classes

class  llvm::AMDGPULibCalls
 
struct  TableRef
 

Namespaces

 llvm
 

Macros

#define DEBUG_TYPE   "amdgpu-simplifylib"
 
#define MATH_PI   numbers::pi
 
#define MATH_E   numbers::e
 
#define MATH_SQRT2   numbers::sqrt2
 
#define MATH_SQRT1_2   numbers::inv_sqrt2
 

Functions

 INITIALIZE_PASS_BEGIN (AMDGPUSimplifyLibCalls, "amdgpu-simplifylib", "Simplify well-known AMD library calls", false, false) INITIALIZE_PASS_END(AMDGPUSimplifyLibCalls
 
amdgpu Simplify well known AMD library false INITIALIZE_PASS (AMDGPUUseNativeCalls, "amdgpu-usenative", "Replace builtin math calls with that native versions.", false, false) template< typename IRB > static CallInst *CreateCallEx(IRB &B
 
static bool HasNative (AMDGPULibFunc::EFuncId id)
 
static TableRef getOptTable (AMDGPULibFunc::EFuncId id)
 
static int getVecSize (const AMDGPULibFunc &FInfo)
 
static AMDGPULibFunc::EType getArgType (const AMDGPULibFunc &FInfo)
 
static double llvm::log2 (double V)
 

Variables

static cl::opt< bool > EnablePreLink ("amdgpu-prelink", cl::desc("Enable pre-link mode optimizations"), cl::init(false), cl::Hidden)
 
static cl::list< std::string > UseNative ("amdgpu-use-native", cl::desc("Comma separated list of functions to replace with native, or all"), cl::CommaSeparated, cl::ValueOptional, cl::Hidden)
 
amdgpu simplifylib
 
amdgpu Simplify well known AMD library calls
 
amdgpu Simplify well known AMD library false
 
amdgpu Simplify well known AMD library false FunctionCallee Callee
 
amdgpu Simplify well known AMD library false FunctionCallee ValueArg
 
amdgpu Simplify well known AMD library false FunctionCallee Value const TwineName
 
static const TableEntry tbl_acos []
 
static const TableEntry tbl_acosh []
 
static const TableEntry tbl_acospi []
 
static const TableEntry tbl_asin []
 
static const TableEntry tbl_asinh []
 
static const TableEntry tbl_asinpi []
 
static const TableEntry tbl_atan []
 
static const TableEntry tbl_atanh []
 
static const TableEntry tbl_atanpi []
 
static const TableEntry tbl_cbrt []
 
static const TableEntry tbl_cos []
 
static const TableEntry tbl_cosh []
 
static const TableEntry tbl_cospi []
 
static const TableEntry tbl_erfc []
 
static const TableEntry tbl_erf []
 
static const TableEntry tbl_exp []
 
static const TableEntry tbl_exp2 []
 
static const TableEntry tbl_exp10 []
 
static const TableEntry tbl_expm1 []
 
static const TableEntry tbl_log []
 
static const TableEntry tbl_log2 []
 
static const TableEntry tbl_log10 []
 
static const TableEntry tbl_rsqrt []
 
static const TableEntry tbl_sin []
 
static const TableEntry tbl_sinh []
 
static const TableEntry tbl_sinpi []
 
static const TableEntry tbl_sqrt []
 
static const TableEntry tbl_tan []
 
static const TableEntry tbl_tanh []
 
static const TableEntry tbl_tanpi []
 
static const TableEntry tbl_tgamma []
 

Detailed Description

This file does AMD library function optimizations.

Definition in file AMDGPULibCalls.cpp.

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE   "amdgpu-simplifylib"

Definition at line 23 of file AMDGPULibCalls.cpp.

◆ MATH_E

#define MATH_E   numbers::e

Definition at line 38 of file AMDGPULibCalls.cpp.

◆ MATH_PI

#define MATH_PI   numbers::pi

Definition at line 37 of file AMDGPULibCalls.cpp.

◆ MATH_SQRT1_2

#define MATH_SQRT1_2   numbers::inv_sqrt2

Definition at line 40 of file AMDGPULibCalls.cpp.

◆ MATH_SQRT2

#define MATH_SQRT2   numbers::sqrt2

Definition at line 39 of file AMDGPULibCalls.cpp.

Function Documentation

◆ getArgType()

static AMDGPULibFunc::EType getArgType ( const AMDGPULibFunc FInfo)
inlinestatic

◆ getOptTable()

static TableRef getOptTable ( AMDGPULibFunc::EFuncId  id)
static

Definition at line 408 of file AMDGPULibCalls.cpp.

References llvm::AMDGPULibFuncBase::EI_ACOS, llvm::AMDGPULibFuncBase::EI_ACOSH, llvm::AMDGPULibFuncBase::EI_ACOSPI, llvm::AMDGPULibFuncBase::EI_ASIN, llvm::AMDGPULibFuncBase::EI_ASINH, llvm::AMDGPULibFuncBase::EI_ASINPI, llvm::AMDGPULibFuncBase::EI_ATAN, llvm::AMDGPULibFuncBase::EI_ATANH, llvm::AMDGPULibFuncBase::EI_ATANPI, llvm::AMDGPULibFuncBase::EI_CBRT, llvm::AMDGPULibFuncBase::EI_COS, llvm::AMDGPULibFuncBase::EI_COSH, llvm::AMDGPULibFuncBase::EI_COSPI, llvm::AMDGPULibFuncBase::EI_ERF, llvm::AMDGPULibFuncBase::EI_ERFC, llvm::AMDGPULibFuncBase::EI_EXP, llvm::AMDGPULibFuncBase::EI_EXP10, llvm::AMDGPULibFuncBase::EI_EXP2, llvm::AMDGPULibFuncBase::EI_EXPM1, llvm::AMDGPULibFuncBase::EI_LOG, llvm::AMDGPULibFuncBase::EI_LOG10, llvm::AMDGPULibFuncBase::EI_LOG2, llvm::AMDGPULibFuncBase::EI_NCOS, llvm::AMDGPULibFuncBase::EI_NEXP2, llvm::AMDGPULibFuncBase::EI_NLOG2, llvm::AMDGPULibFuncBase::EI_NRSQRT, llvm::AMDGPULibFuncBase::EI_NSIN, llvm::AMDGPULibFuncBase::EI_NSQRT, llvm::AMDGPULibFuncBase::EI_RSQRT, llvm::AMDGPULibFuncBase::EI_SIN, llvm::AMDGPULibFuncBase::EI_SINH, llvm::AMDGPULibFuncBase::EI_SINPI, llvm::AMDGPULibFuncBase::EI_SQRT, llvm::AMDGPULibFuncBase::EI_TAN, llvm::AMDGPULibFuncBase::EI_TANH, llvm::AMDGPULibFuncBase::EI_TANPI, llvm::AMDGPULibFuncBase::EI_TGAMMA, tbl_acos, tbl_acosh, tbl_acospi, tbl_asin, tbl_asinh, tbl_asinpi, tbl_atan, tbl_atanh, tbl_atanpi, tbl_cbrt, tbl_cos, tbl_cosh, tbl_cospi, tbl_erf, tbl_erfc, tbl_exp, tbl_exp10, tbl_exp2, tbl_expm1, tbl_log, tbl_log10, tbl_log2, tbl_rsqrt, tbl_sin, tbl_sinh, tbl_sinpi, tbl_sqrt, tbl_tan, tbl_tanh, tbl_tanpi, and tbl_tgamma.

◆ getVecSize()

static int getVecSize ( const AMDGPULibFunc FInfo)
inlinestatic

◆ HasNative()

static bool HasNative ( AMDGPULibFunc::EFuncId  id)
static

◆ INITIALIZE_PASS()

amdgpu Simplify well known AMD library false INITIALIZE_PASS ( AMDGPUUseNativeCalls  ,
"amdgpu-usenative"  ,
"Replace builtin math calls with that native versions."  ,
false  ,
false   
) &

◆ INITIALIZE_PASS_BEGIN()

INITIALIZE_PASS_BEGIN ( AMDGPUSimplifyLibCalls  ,
"amdgpu-simplifylib ,
"Simplify well-known AMD library calls ,
false  ,
false   
)

Variable Documentation

◆ Arg

amdgpu Simplify well known AMD library false FunctionCallee Value* Arg

Definition at line 205 of file AMDGPULibCalls.cpp.

Referenced by llvm::AAEvaluator::AAEvaluator(), llvm::opt::Option::accept(), llvm::AliasSetTracker::add(), AddAliasScopeMetadata(), AddAlignmentAssumptions(), addArgumentReturnedAttrs(), addConditions(), llvm::AnalysisUsage::addPreserved(), addVCallToSet(), llvm::AMDGPUAAResult::alias(), allCallersPassValidPointerForArgument(), allocateSGPR32Input(), allocateSGPR64Input(), llvm::SITargetLowering::allocateSpecialInputVGPRs(), allocateVGPR32Input(), llvm::CCState::AnalyzeArgumentsSecondPass(), llvm::AMDGPUTargetLowering::analyzeFormalArgumentsCompute(), appendArg(), appendString(), llvm::BitVector::apply(), llvm::xray::CallArgRecord::arg(), llvm::MachineFunction::ArgRegPair::ArgRegPair(), llvm::IRPosition::argument(), llvm::CallLowering::ValueHandler::assignValueToAddress(), llvm::BTFDebug::beginFunctionImpl(), CalculateTailCallArgDest(), llvm::orc::shared::ParallelCallGroup::call(), llvm::CallGraph::CallGraph(), llvm::orc::remote::OrcRemoteTargetClient::callIntInt(), checkFunctionMemoryAccess(), llvm::CoroIdInst::clearPromise(), combineFneg(), llvm::computeConstantRange(), llvm::computeExpressionSize(), computeKnownBitsFromAssume(), llvm::computeSignatureVTs(), llvm::DwarfUnit::constructSubprogramArguments(), convertArgumentInfo(), llvm::ArgDescriptor::createArg(), llvm::IRBuilderBase::CreateIsNotNull(), llvm::IRBuilderBase::CreateIsNull(), llvm::Attributor::createShallowWrapper(), llvm::orc::LocalCXXRuntimeOverridesBase::CXAAtExitOverride(), llvm::objcarc::Depends(), llvm::DivergenceInfo::DivergenceInfo(), llvm::dlltoolDriverMain(), doList(), llvm::DominatorTreeBase< BasicBlock, IsPostDom >::DominatorTreeBase(), eliminateSwiftError(), eliminateSwiftErrorArgument(), llvm::AMDGPU::HSAMD::MetadataStreamerV3::emitKernelArg(), llvm::AMDGPU::HSAMD::MetadataStreamerV3::emitKernelArgs(), llvm::remarks::BitstreamRemarkSerializerHelper::emitRemarkBlock(), ExpandResponseFile(), findArgumentCopyElisionCandidates(), findDependencies(), FindPredecessorAutoreleaseWithSafePath(), FindPredecessorRetainWithSafePath(), llvm::objcarc::findSingleDependency(), FindSingleUseIdentifiedObject(), fitArgInto64Bits(), llvm::VarDefInit::Fold(), llvm::FoldingSetBase::FoldingSetBase(), llvm::InstCombinerImpl::foldIntegerTypedPHI(), ForeachDagApply(), llvm::detail::frexp(), llvm::GlobalsAAResult::FunctionInfo::FunctionInfo(), llvm::DagInit::get(), llvm::DILocalVariable::getArg(), llvm::remarks::Remark::getArgsAsMsg(), getArgumentTypeAlign(), llvm::IRPosition::getAssociatedFunction(), llvm::VarDefInit::getAsString(), llvm::AbstractCallSite::getCallArgOperand(), llvm::AbstractCallSite::getCallArgOperandNo(), llvm::CoroSuspendInst::getCoroSave(), llvm::IRPosition::getCtxI(), llvm::AMDGPUSubtarget::getExplicitKernArgSize(), llvm::MemoryLocation::getForArgument(), llvm::cflaa::getGlobalOrArgAttrFromValue(), llvm::DenseMapInfo< PointerEmbeddedInt< IntT, Bits > >::getHashValue(), llvm::DenseMapInfo< MemoryLocOrCall >::getHashValue(), llvm::MDNodeKeyImpl< DILocalVariable >::getHashValue(), getInterfaceValue(), llvm::DiagnosticInfoOptimizationBase::getMsg(), llvm::X86TargetLowering::getNegatedExpression(), llvm::PassRegistry::getPassInfo(), llvm::SIMachineFunctionInfo::getPreloadedReg(), llvm::CoroIdInst::getPromise(), getSearchPaths(), llvm::ARM_AM::getSOImmVal(), llvm::CoroIdAsyncInst::getStorageArgumentIndex(), llvm::CoroSuspendAsyncInst::getStorageArgumentIndex(), llvm::ARM_AM::getT2SOImmVal(), llvm::getTotalArgumentsSizeInBytes(), getVectorCallCosts(), llvm::GlobalsAAResult::GlobalsAAResult(), HandleByValArgument(), handlePhiDef(), HandlePrefixedOrGroupedOption(), llvm::detail::hash_value(), llvm::hash_value(), HasSafePathToPredecessorCall(), llvm::AArch64RegisterInfo::hasSVEArgsOrReturn(), llvm::HexagonEvaluator::HexagonEvaluator(), llvm::Attributor::identifyDefaultAbstractAttributes(), llvm::detail::ilogb(), initSlots2Values(), llvm::yaml::CustomMappingTraits< std::map< std::vector< uint64_t >, WholeProgramDevirtResolution::ByArg > >::inputOne(), llvm::SIInstrInfo::insertNoops(), insertNoopsInBundle(), insertParsePoints(), insertSinCosCall(), insertSpills(), llvm::ARMTTIImpl::instCombineIntrinsic(), llvm::X86TTIImpl::instCombineIntrinsic(), llvm::remarks::StringTable::internalize(), internalizeFunction(), llvm::PreservedAnalyses::intersect(), llvm::invertCondition(), llvm::IRSimilarity::IRSimilarityCandidate::IRSimilarityCandidate(), isCString(), llvm::SITargetLowering::isEligibleForTailCallOptimization(), isIntExtFree(), llvm::InformationCache::isInvolvedInMustTailCall(), llvm::MDNodeKeyImpl< DILocalVariable >::isKeyOf(), isKnownNonNullFromDominatingCondition(), llvm::DILocalVariable::isParameter(), llvm::objcarc::IsPotentialRetainableObjPtr(), isPredicatedOnPHI(), isSafeToPromoteArgument(), llvm::NVPTXTTIImpl::isSourceOfDivergence(), llvm::Value::isSwiftError(), isSwiftError(), llvm::AMDGPUInstrInfo::isUniformMMO(), llvm::Attributor::isValidFunctionSignatureRewrite(), llvm::LazyValueInfo::LazyValueInfo(), LLVMGetNextParam(), LLVMGetPreviousParam(), LLVMRemarkArgGetDebugLoc(), LLVMRemarkArgGetKey(), LLVMRemarkArgGetValue(), LLVMSetParamAlignment(), llvm::AMDGPULegalizerInfo::loadInputValue(), llvm::AMDGPUTargetLowering::loadInputValue(), LookupNearestOption(), llvm::Pass::lookupPassInfo(), llvm::LoopInfoBase< BasicBlock, Loop >::LoopInfoBase(), lowerBALLOTIntrinsic(), llvm::ARMCallLowering::lowerCall(), llvm::MipsCallLowering::lowerCall(), llvm::VETargetLowering::LowerCall(), llvm::HexagonTargetLowering::LowerCall(), llvm::SITargetLowering::LowerCall(), llvm::RISCVTargetLowering::LowerCall(), llvm::CallLowering::lowerCall(), llvm::SparcTargetLowering::LowerCall_32(), llvm::SparcTargetLowering::LowerCall_64(), llvm::FastISel::lowerCallTo(), llvm::SparcTargetLowering::LowerF128_LibCallArg(), llvm::X86CallLowering::lowerFormalArguments(), llvm::ARMCallLowering::lowerFormalArguments(), llvm::R600TargetLowering::LowerFormalArguments(), llvm::AArch64CallLowering::lowerFormalArguments(), llvm::AMDGPUCallLowering::lowerFormalArguments(), llvm::MipsCallLowering::lowerFormalArguments(), llvm::VETargetLowering::LowerFormalArguments(), llvm::SITargetLowering::LowerFormalArguments(), llvm::NVPTXTargetLowering::LowerFormalArguments(), llvm::SparcTargetLowering::LowerFormalArguments_32(), llvm::SparcTargetLowering::LowerFormalArguments_64(), llvm::AMDGPUCallLowering::lowerFormalArgumentsKernel(), LowerFSINCOS(), LowerMemOpCallTo(), llvm::SITargetLowering::LowerReturn(), llvm::SparcTargetLowering::LowerReturn_32(), lowerStatepointMetaArgs(), makeStatepointExplicitImpl(), llvm::coro::LowererBase::makeSubFnCall(), llvm::X86TargetLowering::markLibCallAttributes(), markTails(), MatchingStackOffset(), needsFPFromSig(), onlySingleValue(), llvm::GVNExpression::BasicExpression::op_push_back(), std::hash< llvm::pdb::PDB_SymType >::operator()(), llvm::orc::shared::detail::ReadArgs< ArgT, ArgTs... >::operator()(), llvm::operator<<(), llvm::detail::value_sequence_iterator< ValueT >::operator=(), llvm::LazyValueInfo::operator=(), llvm::MIBundleOperandIteratorBase< const MachineOperand >::operator==(), llvm::MCRegisterInfo::mc_difflist_iterator< mc_subreg_iterator >::operator==(), llvm::BasicBlock::phi_iterator_impl< PHINodeT, BBIteratorT >::operator==(), llvm::LazyCallGraph::postorder_ref_scc_iterator::operator==(), llvm::BlotMapVector< KeyT, ValueT >::operator[](), llvm::yaml::CustomMappingTraits< std::map< std::vector< uint64_t >, WholeProgramDevirtResolution::ByArg > >::output(), llvm::cflaa::parentFunctionOfValue(), llvm::remarks::HotnessThresholdParser::parse(), llvm::cl::parser< const PassInfo * >::parse(), llvm::cl::parser< std::string >::parse(), llvm::cl::parser< char >::parse(), llvm::parseCommandLineOptions(), parseDouble(), AbstractManglingParser< ManglingParser< Alloc >, Alloc >::parseEncoding(), AbstractManglingParser< ManglingParser< Alloc >, Alloc >::parseExpr(), ParseFunctionArgs(), llvm::remarks::parseHotnessThresholdOption(), llvm::GCNTargetMachine::parseMachineFunctionInfo(), llvm::opt::OptTable::ParseOneArg(), llvm::remarks::YAMLRemarkParser::parseRemark(), AbstractManglingParser< ManglingParser< Alloc >, Alloc >::parseSpecialName(), AbstractManglingParser< ManglingParser< Alloc >, Alloc >::parseTemplateArg(), AbstractManglingParser< ManglingParser< Alloc >, Alloc >::parseTemplateArgs(), passingValueIsAlwaysUndefined(), Passv64i1ArgInRegs(), llvm::R600TargetLowering::PerformDAGCombine(), llvm::AMDGPUAAResult::pointsToConstantMemory(), populateExternalRelations(), llvm::LegacyDivergenceAnalysis::print(), llvm::sys::printArg(), processArg(), processDbgDeclares(), processPSInputArgs(), ProfileDagInit(), programUndefinedIfUndefOrPoison(), llvm::promoteCall(), llvm::cl::ProvidePositionalOption(), llvm::Attributor::registerFunctionSignatureRewrite(), llvm::DIExpression::replaceArg(), replaceArgumentUses(), replaceConstants(), replaceSwiftErrorOps(), replaceWithCallToVeclib(), llvm::VarDefInit::resolveReferences(), llvm::DagInit::resolveReferences(), llvm::LoopAnalysisManagerFunctionProxy::Result::Result(), llvm::InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs >::Result::Result(), llvm::DivergenceAnalysisPrinterPass::run(), llvm::RequireAnalysisPass< AnalysisT, IRUnitT, AnalysisManagerT, ExtraArgTs >::run(), llvm::orc::runAsMain(), llvm::runFuzzerOnInputs(), llvm::runIPSCCP(), llvm::IRTranslator::runOnMachineFunction(), llvm::coro::salvageDebugInfo(), llvm::ScalarEvolution::ScalarEvolution(), llvm::detail::scalbn(), llvm::FastISel::selectIntrinsicCall(), llvm::CallLowering::setArgFlags(), llvm::SIMachineFunctionInfo::setWorkItemIDX(), llvm::SIMachineFunctionInfo::setWorkItemIDY(), llvm::SIMachineFunctionInfo::setWorkItemIDZ(), llvm::X86TTIImpl::simplifyDemandedUseBitsIntrinsic(), simplifyInvariantGroupIntrinsic(), simplifyX86movmsk(), StoreTailCallArgumentsToStackSlot(), llvm::SubsumingPositionIterator::SubsumingPositionIterator(), tryConstantFoldCall(), tryToElideArgumentCopy(), llvm::UpgradeARCRuntime(), llvm::IRPosition::value(), llvm::ObjectSizeOffsetVisitor::visitCallBase(), llvm::InstCombinerImpl::visitCallInst(), llvm::InnerLoopVectorizer::widenCallInstruction(), llvm::wouldInstructionBeTriviallyDead(), and writeFunctionTypeMetadataRecords().

◆ Callee

amdgpu Simplify well known AMD library false FunctionCallee Callee

Definition at line 205 of file AMDGPULibCalls.cpp.

Referenced by llvm::AbstractCallSite::AbstractCallSite(), AddCalls(), addCallToCallGraph(), addNoRecurseAttrs(), adjustCallerSSPLevel(), adjustCallerStackProbes(), adjustCallerStackProbeSize(), adjustMinLegalVectorWidth(), adjustNullPointerValidAttr(), allCallersPassValidPointerForArgument(), analyzeLoopUnrollCost(), llvm::ArgumentPromotionPass::areFunctionArgsABICompatible(), llvm::X86TTIImpl::areFunctionArgsABICompatible(), llvm::TargetTransformInfoImplBase::areFunctionArgsABICompatible(), llvm::TargetTransformInfo::areFunctionArgsABICompatible(), llvm::GCNTTIImpl::areInlineCompatible(), llvm::TargetTransformInfoImplBase::areInlineCompatible(), llvm::AttributeFuncs::areInlineCompatible(), llvm::TargetTransformInfo::areInlineCompatible(), llvm::AVRDAGToDAGISel::select< AVRISD::CALL >(), buildCallOperands(), CallExpr::CallExpr(), callsShareTOCBase(), computeFunctionSummary(), copyFeaturesToFunction(), llvm::IRBuilderBase::CreateCall(), llvm::IRBuilderBase::CreateCallBr(), llvm::objcarc::createCallInstWithColors(), llvm::IRBuilderBase::CreateConstrainedFPCall(), llvm::IRBuilderBase::CreateInvoke(), DeleteBasicBlock(), doCallSiteSplitting(), emitBinaryFloatFnCallHelper(), EmitCall(), emitDirectiveRelocJalr(), llvm::emitInlinedInto(), emitLibCall(), emitUnaryFloatFnCallHelper(), findInitTrampoline(), llvm::sampleprof::FunctionSamples::findInlinedFunctions(), findUses(), llvm::AMDGPULibCalls::fold(), for(), llvm::FunctionCallee::FunctionCallee(), functionsHaveCompatibleAttributes(), llvm::ReplayInlineAdvisor::getAdviceImpl(), getAllocationData(), getAllocationDataForFunction(), getAllocationSize(), llvm::IRPosition::getAssociatedArgument(), llvm::getAttributeBasedInliningDecision(), llvm::AbstractCallSite::getCallbackUses(), getCalledFunction(), getCallOpcode(), getEdgeKind(), llvm::FunctionPropertiesInfo::getFunctionPropertiesInfo(), llvm::getInlineCost(), llvm::InlineAdvisor::getMandatoryKind(), llvm::AArch64InstrInfo::getOutliningType(), llvm::ARMBaseInstrInfo::getOutliningType(), llvm::MipsCCState::getSpecialCallingConvForCallee(), handleAttr(), hasReturnsTwiceAttr(), llvm::Attributor::identifyDefaultAbstractAttributes(), INITIALIZE_PASS(), llvm::InformationCache::initializeModuleSlice(), inlineCallIfPossible(), insertSinCosCall(), InstrBreaksNoFree(), InstrBreaksNonThrowing(), InstrBreaksNoSync(), instructionDoesNotReturn(), isCalleeLoad(), llvm::CodeExtractor::isEligible(), llvm::HexagonTargetLowering::IsEligibleForTailCallOptimization(), isEqual(), llvm::isFreeCall(), isFunctionGlobalAddress(), isIndirectCall(), llvm::isInlineViable(), llvm::isLegalToPromote(), isReportingError(), isReturnNonNull(), llvm::isSafeToSpeculativelyExecute(), llvm::VETargetLowering::LowerCall(), llvm::HexagonTargetLowering::LowerCall(), llvm::SITargetLowering::LowerCall(), llvm::RISCVTargetLowering::LowerCall(), llvm::NVPTXTargetLowering::LowerCall(), llvm::SystemZTargetLowering::LowerCall(), llvm::SparcTargetLowering::LowerCall_32(), llvm::SparcTargetLowering::LowerCall_64(), llvm::SelectionDAGBuilder::LowerCallSiteWithDeoptBundle(), llvm::SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl(), llvm::SelectionDAGBuilder::LowerDeoptimizeCall(), llvm::SparcTargetLowering::LowerF128Compare(), llvm::SparcTargetLowering::LowerF128Op(), llvm::SparcTargetLowering::LowerGlobalTLSAddress(), llvm::IntrinsicLowering::LowerIntrinsicCall(), llvm::SelectionDAGBuilder::LowerStatepoint(), llvm::AMDGPUTargetLowering::lowerUnhandledCall(), markAliveBlocks(), CallExpr::match(), llvm::AttributeFuncs::mergeAttributesForInlining(), llvm::pdb::operator<<(), llvm::FortifiedLibCallSimplifier::optimizeCall(), llvm::LibCallSimplifier::optimizeCall(), AbstractManglingParser< ManglingParser< Alloc >, Alloc >::parseExpr(), prepareDescriptorIndirectCall(), prepareIndirectCall(), CallExpr::printLeft(), llvm::sampleprof::ProfiledCallGraph::ProfiledCallGraph(), llvm::promoteCall(), llvm::promoteCallWithIfThenElse(), propagateCallAttrsFromCallee(), llvm::ImportedFunctionsInliningStatistics::recordInline(), llvm::CallGraphNode::removeAnyCallEdgeTo(), llvm::CallGraphNode::removeOneAbstractEdgeTo(), llvm::ReplayInlineAdvisor::ReplayInlineAdvisor(), llvm::AMDGPUSimplifyLibCallsPass::run(), llvm::AMDGPUUseNativeCallsPass::run(), llvm::InlinerPass::run(), runImpl(), runNVVMIntrRange(), llvm::FastISel::selectPatchpoint(), setAND(), setOR(), llvm::shouldInline(), llvm::SimplifyCall(), simplifySuspendPoint(), llvm::SubsumingPositionIterator::SubsumingPositionIterator(), transformCallee(), llvm::tryPromoteCall(), llvm::coro::updateCallGraph(), UpdateCallGraphAfterInlining(), updateCallProfile(), llvm::updateProfileCallee(), llvm::AMDGPULibCalls::useNative(), versionCallSite(), and llvm::sampleprof::SampleProfileWriterBinary::writeBody().

◆ calls

amdgpu Simplify well known AMD library calls

Definition at line 198 of file AMDGPULibCalls.cpp.

◆ EnablePreLink

cl::opt<bool> EnablePreLink("amdgpu-prelink", cl::desc("Enable pre-link mode optimizations"), cl::init(false), cl::Hidden)
static

◆ false

amdgpu Simplify well known AMD library false

Definition at line 198 of file AMDGPULibCalls.cpp.

Referenced by llvm::AMDGPULibCalls::fold().

◆ Name

Initial value:
= "") {
CallInst *R = B.CreateCall(Callee, Arg, Name);
if (Function *F = dyn_cast<Function>(Callee.getCallee()))
R->setCallingConv(F->getCallingConv());
return R;
}
template <typename IRB>
static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1,
Value *Arg2, const Twine &Name = "") {
CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name);
if (Function *F = dyn_cast<Function>(Callee.getCallee()))
R->setCallingConv(F->getCallingConv());
return R;
}
struct TableEntry {
double result;
double input;
}

Definition at line 206 of file AMDGPULibCalls.cpp.

◆ simplifylib

amdgpu simplifylib

Definition at line 197 of file AMDGPULibCalls.cpp.

◆ tbl_acos

const TableEntry tbl_acos[]
static
Initial value:
= {
{MATH_PI / 2.0, 0.0},
{MATH_PI / 2.0, -0.0},
{0.0, 1.0},
{MATH_PI, -1.0}
}

Definition at line 231 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_acosh

const TableEntry tbl_acosh[]
static
Initial value:
= {
{0.0, 1.0}
}

Definition at line 237 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_acospi

const TableEntry tbl_acospi[]
static
Initial value:
= {
{0.5, 0.0},
{0.5, -0.0},
{0.0, 1.0},
{1.0, -1.0}
}

Definition at line 240 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_asin

const TableEntry tbl_asin[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0},
{MATH_PI / 2.0, 1.0},
{-MATH_PI / 2.0, -1.0}
}

Definition at line 246 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_asinh

const TableEntry tbl_asinh[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0}
}

Definition at line 252 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_asinpi

const TableEntry tbl_asinpi[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0},
{0.5, 1.0},
{-0.5, -1.0}
}

Definition at line 256 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_atan

const TableEntry tbl_atan[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0},
{MATH_PI / 4.0, 1.0},
{-MATH_PI / 4.0, -1.0}
}

Definition at line 262 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_atanh

const TableEntry tbl_atanh[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0}
}

Definition at line 268 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_atanpi

const TableEntry tbl_atanpi[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0},
{0.25, 1.0},
{-0.25, -1.0}
}

Definition at line 272 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_cbrt

const TableEntry tbl_cbrt[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0},
{1.0, 1.0},
{-1.0, -1.0},
}

Definition at line 278 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_cos

const TableEntry tbl_cos[]
static
Initial value:
= {
{1.0, 0.0},
{1.0, -0.0}
}

Definition at line 284 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_cosh

const TableEntry tbl_cosh[]
static
Initial value:
= {
{1.0, 0.0},
{1.0, -0.0}
}

Definition at line 288 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_cospi

const TableEntry tbl_cospi[]
static
Initial value:
= {
{1.0, 0.0},
{1.0, -0.0}
}

Definition at line 292 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_erf

const TableEntry tbl_erf[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0}
}

Definition at line 300 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_erfc

const TableEntry tbl_erfc[]
static
Initial value:
= {
{1.0, 0.0},
{1.0, -0.0}
}

Definition at line 296 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_exp

const TableEntry tbl_exp[]
static
Initial value:
= {
{1.0, 0.0},
{1.0, -0.0},
{MATH_E, 1.0}
}

Definition at line 304 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_exp10

const TableEntry tbl_exp10[]
static
Initial value:
= {
{1.0, 0.0},
{1.0, -0.0},
{10.0, 1.0}
}

Definition at line 314 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_exp2

const TableEntry tbl_exp2[]
static
Initial value:
= {
{1.0, 0.0},
{1.0, -0.0},
{2.0, 1.0}
}

Definition at line 309 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_expm1

const TableEntry tbl_expm1[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0}
}

Definition at line 319 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_log

const TableEntry tbl_log[]
static
Initial value:
= {
{0.0, 1.0},
{1.0, MATH_E}
}

Definition at line 323 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_log10

const TableEntry tbl_log10[]
static
Initial value:
= {
{0.0, 1.0},
{1.0, 10.0}
}

Definition at line 331 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_log2

const TableEntry tbl_log2[]
static
Initial value:
= {
{0.0, 1.0},
{1.0, 2.0}
}

Definition at line 327 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_rsqrt

const TableEntry tbl_rsqrt[]
static
Initial value:
= {
{1.0, 1.0},
{MATH_SQRT1_2, 2.0}
}

Definition at line 335 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_sin

const TableEntry tbl_sin[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0}
}

Definition at line 339 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_sinh

const TableEntry tbl_sinh[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0}
}

Definition at line 343 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_sinpi

const TableEntry tbl_sinpi[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0}
}

Definition at line 347 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_sqrt

const TableEntry tbl_sqrt[]
static
Initial value:
= {
{0.0, 0.0},
{1.0, 1.0},
{MATH_SQRT2, 2.0}
}

Definition at line 351 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_tan

const TableEntry tbl_tan[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0}
}

Definition at line 356 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_tanh

const TableEntry tbl_tanh[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0}
}

Definition at line 360 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_tanpi

const TableEntry tbl_tanpi[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0}
}

Definition at line 364 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_tgamma

const TableEntry tbl_tgamma[]
static
Initial value:
= {
{1.0, 1.0},
{1.0, 2.0},
{2.0, 3.0},
{6.0, 4.0}
}

Definition at line 368 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ UseNative

cl::list<std::string> UseNative("amdgpu-use-native", cl::desc("Comma separated list of functions to replace with native, or all"), cl::CommaSeparated, cl::ValueOptional, cl::Hidden)
static
llvm::Function
Definition: Function.h:61
MATH_PI
#define MATH_PI
Definition: AMDGPULibCalls.cpp:37
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:129
result
It looks like we only need to define PPCfmarto for these because according to these instructions perform RTO on fma s result
Definition: README_P9.txt:256
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:205
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
input
The initial backend is deliberately restricted to z10 We should add support for later architectures at some point If an asm ties an i32 r result to an i64 input
Definition: README.txt:10
Name
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
Definition: AMDGPULibCalls.cpp:206
llvm::FunctionCallee::getCallee
Value * getCallee()
Definition: DerivedTypes.h:183
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:205
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
MATH_SQRT2
#define MATH_SQRT2
Definition: AMDGPULibCalls.cpp:39
MATH_E
#define MATH_E
Definition: AMDGPULibCalls.cpp:38
llvm::FunctionCallee
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:164
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1450
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
MATH_SQRT1_2
#define MATH_SQRT1_2
Definition: AMDGPULibCalls.cpp:40