LLVM  14.0.0git
Classes | Namespaces | Macros | Functions | Variables
AMDGPULibCalls.cpp File Reference
#include "AMDGPU.h"
#include "AMDGPULibFunc.h"
#include "GCNSubtarget.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/InitializePasses.h"
#include "llvm/Target/TargetMachine.h"
Include dependency graph for AMDGPULibCalls.cpp:

Go to the source code of this file.

Classes

class  llvm::AMDGPULibCalls
 
struct  TableRef
 

Namespaces

 llvm
 ---------------------— PointerInfo ------------------------------------—
 

Macros

#define DEBUG_TYPE   "amdgpu-simplifylib"
 
#define MATH_PI   numbers::pi
 
#define MATH_E   numbers::e
 
#define MATH_SQRT2   numbers::sqrt2
 
#define MATH_SQRT1_2   numbers::inv_sqrt2
 

Functions

 INITIALIZE_PASS_BEGIN (AMDGPUSimplifyLibCalls, "amdgpu-simplifylib", "Simplify well-known AMD library calls", false, false) INITIALIZE_PASS_END(AMDGPUSimplifyLibCalls
 
amdgpu Simplify well known AMD library false INITIALIZE_PASS (AMDGPUUseNativeCalls, "amdgpu-usenative", "Replace builtin math calls with that native versions.", false, false) template< typename IRB > static CallInst *CreateCallEx(IRB &B
 
static bool HasNative (AMDGPULibFunc::EFuncId id)
 
static TableRef getOptTable (AMDGPULibFunc::EFuncId id)
 
static int getVecSize (const AMDGPULibFunc &FInfo)
 
static AMDGPULibFunc::EType getArgType (const AMDGPULibFunc &FInfo)
 
static double llvm::log2 (double V)
 

Variables

static cl::opt< bool > EnablePreLink ("amdgpu-prelink", cl::desc("Enable pre-link mode optimizations"), cl::init(false), cl::Hidden)
 
static cl::list< std::string > UseNative ("amdgpu-use-native", cl::desc("Comma separated list of functions to replace with native, or all"), cl::CommaSeparated, cl::ValueOptional, cl::Hidden)
 
amdgpu simplifylib
 
amdgpu Simplify well known AMD library calls
 
amdgpu Simplify well known AMD library false
 
amdgpu Simplify well known AMD library false FunctionCallee Callee
 
amdgpu Simplify well known AMD library false FunctionCallee ValueArg
 
amdgpu Simplify well known AMD library false FunctionCallee Value const TwineName
 
static const TableEntry tbl_acos []
 
static const TableEntry tbl_acosh []
 
static const TableEntry tbl_acospi []
 
static const TableEntry tbl_asin []
 
static const TableEntry tbl_asinh []
 
static const TableEntry tbl_asinpi []
 
static const TableEntry tbl_atan []
 
static const TableEntry tbl_atanh []
 
static const TableEntry tbl_atanpi []
 
static const TableEntry tbl_cbrt []
 
static const TableEntry tbl_cos []
 
static const TableEntry tbl_cosh []
 
static const TableEntry tbl_cospi []
 
static const TableEntry tbl_erfc []
 
static const TableEntry tbl_erf []
 
static const TableEntry tbl_exp []
 
static const TableEntry tbl_exp2 []
 
static const TableEntry tbl_exp10 []
 
static const TableEntry tbl_expm1 []
 
static const TableEntry tbl_log []
 
static const TableEntry tbl_log2 []
 
static const TableEntry tbl_log10 []
 
static const TableEntry tbl_rsqrt []
 
static const TableEntry tbl_sin []
 
static const TableEntry tbl_sinh []
 
static const TableEntry tbl_sinpi []
 
static const TableEntry tbl_sqrt []
 
static const TableEntry tbl_tan []
 
static const TableEntry tbl_tanh []
 
static const TableEntry tbl_tanpi []
 
static const TableEntry tbl_tgamma []
 

Detailed Description

This file does AMD library function optimizations.

Definition in file AMDGPULibCalls.cpp.

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE   "amdgpu-simplifylib"

Definition at line 24 of file AMDGPULibCalls.cpp.

◆ MATH_E

#define MATH_E   numbers::e

Definition at line 39 of file AMDGPULibCalls.cpp.

◆ MATH_PI

#define MATH_PI   numbers::pi

Definition at line 38 of file AMDGPULibCalls.cpp.

◆ MATH_SQRT1_2

#define MATH_SQRT1_2   numbers::inv_sqrt2

Definition at line 41 of file AMDGPULibCalls.cpp.

◆ MATH_SQRT2

#define MATH_SQRT2   numbers::sqrt2

Definition at line 40 of file AMDGPULibCalls.cpp.

Function Documentation

◆ getArgType()

static AMDGPULibFunc::EType getArgType ( const AMDGPULibFunc FInfo)
inlinestatic

◆ getOptTable()

static TableRef getOptTable ( AMDGPULibFunc::EFuncId  id)
static

Definition at line 409 of file AMDGPULibCalls.cpp.

References llvm::AMDGPULibFuncBase::EI_ACOS, llvm::AMDGPULibFuncBase::EI_ACOSH, llvm::AMDGPULibFuncBase::EI_ACOSPI, llvm::AMDGPULibFuncBase::EI_ASIN, llvm::AMDGPULibFuncBase::EI_ASINH, llvm::AMDGPULibFuncBase::EI_ASINPI, llvm::AMDGPULibFuncBase::EI_ATAN, llvm::AMDGPULibFuncBase::EI_ATANH, llvm::AMDGPULibFuncBase::EI_ATANPI, llvm::AMDGPULibFuncBase::EI_CBRT, llvm::AMDGPULibFuncBase::EI_COS, llvm::AMDGPULibFuncBase::EI_COSH, llvm::AMDGPULibFuncBase::EI_COSPI, llvm::AMDGPULibFuncBase::EI_ERF, llvm::AMDGPULibFuncBase::EI_ERFC, llvm::AMDGPULibFuncBase::EI_EXP, llvm::AMDGPULibFuncBase::EI_EXP10, llvm::AMDGPULibFuncBase::EI_EXP2, llvm::AMDGPULibFuncBase::EI_EXPM1, llvm::AMDGPULibFuncBase::EI_LOG, llvm::AMDGPULibFuncBase::EI_LOG10, llvm::AMDGPULibFuncBase::EI_LOG2, llvm::AMDGPULibFuncBase::EI_NCOS, llvm::AMDGPULibFuncBase::EI_NEXP2, llvm::AMDGPULibFuncBase::EI_NLOG2, llvm::AMDGPULibFuncBase::EI_NRSQRT, llvm::AMDGPULibFuncBase::EI_NSIN, llvm::AMDGPULibFuncBase::EI_NSQRT, llvm::AMDGPULibFuncBase::EI_RSQRT, llvm::AMDGPULibFuncBase::EI_SIN, llvm::AMDGPULibFuncBase::EI_SINH, llvm::AMDGPULibFuncBase::EI_SINPI, llvm::AMDGPULibFuncBase::EI_SQRT, llvm::AMDGPULibFuncBase::EI_TAN, llvm::AMDGPULibFuncBase::EI_TANH, llvm::AMDGPULibFuncBase::EI_TANPI, llvm::AMDGPULibFuncBase::EI_TGAMMA, tbl_acos, tbl_acosh, tbl_acospi, tbl_asin, tbl_asinh, tbl_asinpi, tbl_atan, tbl_atanh, tbl_atanpi, tbl_cbrt, tbl_cos, tbl_cosh, tbl_cospi, tbl_erf, tbl_erfc, tbl_exp, tbl_exp10, tbl_exp2, tbl_expm1, tbl_log, tbl_log10, tbl_log2, tbl_rsqrt, tbl_sin, tbl_sinh, tbl_sinpi, tbl_sqrt, tbl_tan, tbl_tanh, tbl_tanpi, and tbl_tgamma.

◆ getVecSize()

static int getVecSize ( const AMDGPULibFunc FInfo)
inlinestatic

◆ HasNative()

static bool HasNative ( AMDGPULibFunc::EFuncId  id)
static

◆ INITIALIZE_PASS()

amdgpu Simplify well known AMD library false INITIALIZE_PASS ( AMDGPUUseNativeCalls  ,
"amdgpu-usenative"  ,
"Replace builtin math calls with that native versions."  ,
false  ,
false   
) &

◆ INITIALIZE_PASS_BEGIN()

INITIALIZE_PASS_BEGIN ( AMDGPUSimplifyLibCalls  ,
"amdgpu-simplifylib ,
"Simplify well-known AMD library calls ,
false  ,
false   
)

Variable Documentation

◆ Arg

amdgpu Simplify well known AMD library false FunctionCallee Value* Arg

Definition at line 206 of file AMDGPULibCalls.cpp.

Referenced by llvm::AAEvaluator::AAEvaluator(), llvm::opt::Option::accept(), llvm::AliasSetTracker::add(), AddAliasScopeMetadata(), AddAlignmentAssumptions(), addArgumentReturnedAttrs(), addConditions(), llvm::AnalysisUsage::addPreserved(), addVCallToSet(), llvm::AMDGPUAAResult::alias(), allCallersPassValidPointerForArgument(), allocateSGPR32Input(), allocateSGPR64Input(), llvm::SITargetLowering::allocateSpecialInputVGPRs(), allocateVGPR32Input(), llvm::CCState::AnalyzeArgumentsSecondPass(), llvm::AMDGPUTargetLowering::analyzeFormalArgumentsCompute(), appendArg(), appendString(), llvm::BitVector::apply(), llvm::xray::CallArgRecord::arg(), llvm::MachineFunction::ArgRegPair::ArgRegPair(), llvm::IRPosition::argument(), llvm::CallLowering::ValueHandler::assignValueToAddress(), llvm::BTFDebug::beginFunctionImpl(), CalculateTailCallArgDest(), llvm::orc::shared::ParallelCallGroup::call(), llvm::CallGraph::CallGraph(), llvm::orc::remote::OrcRemoteTargetClient::callIntInt(), checkFunctionMemoryAccess(), llvm::CoroIdInst::clearPromise(), combineFneg(), llvm::computeConstantRange(), llvm::computeExpressionSize(), computeKnownBitsFromAssume(), llvm::computeSignatureVTs(), llvm::DwarfUnit::constructSubprogramArguments(), convertArgumentInfo(), llvm::ArgDescriptor::createArg(), llvm::IRBuilderBase::CreateIsNotNull(), llvm::IRBuilderBase::CreateIsNull(), llvm::Attributor::createShallowWrapper(), llvm::orc::LocalCXXRuntimeOverridesBase::CXAAtExitOverride(), llvm::objcarc::Depends(), llvm::orc::shared::SPSArgList< SPSTagT, SPSTagTs... >::deserialize(), llvm::DivergenceInfo::DivergenceInfo(), llvm::dlltoolDriverMain(), doList(), llvm::DominatorTreeBase< BasicBlock, IsPostDom >::DominatorTreeBase(), eliminateSwiftError(), eliminateSwiftErrorArgument(), llvm::AMDGPU::HSAMD::MetadataStreamerV3::emitKernelArg(), llvm::AMDGPU::HSAMD::MetadataStreamerV3::emitKernelArgs(), llvm::remarks::BitstreamRemarkSerializerHelper::emitRemarkBlock(), ExpandResponseFile(), findArgumentCopyElisionCandidates(), findDependencies(), FindPredecessorAutoreleaseWithSafePath(), FindPredecessorRetainWithSafePath(), llvm::objcarc::findSingleDependency(), FindSingleUseIdentifiedObject(), fitArgInto64Bits(), llvm::VarDefInit::Fold(), llvm::FoldingSetBase::FoldingSetBase(), llvm::InstCombinerImpl::foldIntegerTypedPHI(), ForeachDagApply(), llvm::detail::frexp(), llvm::GlobalsAAResult::FunctionInfo::FunctionInfo(), genericValueTraversal(), llvm::DagInit::get(), llvm::DILocalVariable::getArg(), llvm::remarks::Remark::getArgsAsMsg(), getArgumentTypeAlign(), llvm::IRPosition::getAssociatedFunction(), llvm::VarDefInit::getAsString(), llvm::AbstractCallSite::getCallArgOperand(), llvm::AbstractCallSite::getCallArgOperandNo(), llvm::CoroSuspendInst::getCoroSave(), llvm::IRPosition::getCtxI(), llvm::AMDGPUSubtarget::getExplicitKernArgSize(), llvm::MemoryLocation::getForArgument(), llvm::cflaa::getGlobalOrArgAttrFromValue(), llvm::DenseMapInfo< PointerEmbeddedInt< IntT, Bits > >::getHashValue(), llvm::DenseMapInfo< MemoryLocOrCall >::getHashValue(), llvm::MDNodeKeyImpl< DILocalVariable >::getHashValue(), getInterfaceValue(), llvm::DiagnosticInfoOptimizationBase::getMsg(), llvm::X86TargetLowering::getNegatedExpression(), llvm::PassRegistry::getPassInfo(), llvm::SIMachineFunctionInfo::getPreloadedReg(), llvm::CoroIdInst::getPromise(), getSearchPaths(), llvm::ARM_AM::getSOImmVal(), llvm::CoroIdAsyncInst::getStorageArgumentIndex(), llvm::CoroSuspendAsyncInst::getStorageArgumentIndex(), llvm::ARM_AM::getT2SOImmVal(), llvm::getTotalArgumentsSizeInBytes(), getVectorCallCosts(), llvm::GlobalsAAResult::GlobalsAAResult(), HandleByValArgument(), handlePhiDef(), HandlePrefixedOrGroupedOption(), llvm::detail::hash_value(), llvm::hash_value(), HasSafePathToPredecessorCall(), llvm::AArch64RegisterInfo::hasSVEArgsOrReturn(), llvm::HexagonEvaluator::HexagonEvaluator(), llvm::Attributor::identifyDefaultAbstractAttributes(), AAPrivatizablePtrFloating::identifyPrivatizableType(), llvm::detail::ilogb(), AAReturnedValuesImpl::initialize(), AANoCaptureCallSiteArgument::initialize(), AAMemoryBehaviorArgument::initialize(), AAMemoryBehaviorCallSiteArgument::initialize(), initSlots2Values(), llvm::yaml::CustomMappingTraits< std::map< std::vector< uint64_t >, WholeProgramDevirtResolution::ByArg > >::inputOne(), llvm::SIInstrInfo::insertNoops(), insertNoopsInBundle(), insertParsePoints(), insertSinCosCall(), insertSpills(), llvm::ARMTTIImpl::instCombineIntrinsic(), llvm::X86TTIImpl::instCombineIntrinsic(), instCombineSVECmpNE(), llvm::remarks::StringTable::internalize(), llvm::Attributor::internalizeFunctions(), llvm::PreservedAnalyses::intersect(), llvm::invertCondition(), llvm::IRSimilarity::IRSimilarityCandidate::IRSimilarityCandidate(), isCString(), llvm::SITargetLowering::isEligibleForTailCallOptimization(), isIntExtFree(), llvm::InformationCache::isInvolvedInMustTailCall(), llvm::MDNodeKeyImpl< DILocalVariable >::isKeyOf(), isKnownNonNullFromDominatingCondition(), llvm::DILocalVariable::isParameter(), llvm::objcarc::IsPotentialRetainableObjPtr(), isPredicatedOnPHI(), isSafeToPromoteArgument(), llvm::NVPTXTTIImpl::isSourceOfDivergence(), llvm::Value::isSwiftError(), isSwiftError(), llvm::AMDGPUInstrInfo::isUniformMMO(), llvm::Attributor::isValidFunctionSignatureRewrite(), llvm::LazyValueInfo::LazyValueInfo(), LLVMGetNextParam(), LLVMGetPreviousParam(), LLVMRemarkArgGetDebugLoc(), LLVMRemarkArgGetKey(), LLVMRemarkArgGetValue(), LLVMSetParamAlignment(), llvm::AMDGPULegalizerInfo::loadInputValue(), llvm::AMDGPUTargetLowering::loadInputValue(), LookupNearestOption(), llvm::Pass::lookupPassInfo(), llvm::LoopInfoBase< BasicBlock, Loop >::LoopInfoBase(), lowerBALLOTIntrinsic(), llvm::MipsCallLowering::lowerCall(), llvm::ARMCallLowering::lowerCall(), llvm::VETargetLowering::LowerCall(), llvm::HexagonTargetLowering::LowerCall(), llvm::SITargetLowering::LowerCall(), llvm::RISCVTargetLowering::LowerCall(), llvm::CallLowering::lowerCall(), llvm::SparcTargetLowering::LowerCall_32(), llvm::SparcTargetLowering::LowerCall_64(), llvm::FastISel::lowerCallTo(), llvm::SparcTargetLowering::LowerF128_LibCallArg(), llvm::MipsCallLowering::lowerFormalArguments(), llvm::PPCCallLowering::lowerFormalArguments(), llvm::X86CallLowering::lowerFormalArguments(), llvm::M68kCallLowering::lowerFormalArguments(), llvm::ARMCallLowering::lowerFormalArguments(), llvm::R600TargetLowering::LowerFormalArguments(), llvm::AArch64CallLowering::lowerFormalArguments(), llvm::AMDGPUCallLowering::lowerFormalArguments(), llvm::VETargetLowering::LowerFormalArguments(), llvm::SITargetLowering::LowerFormalArguments(), llvm::NVPTXTargetLowering::LowerFormalArguments(), llvm::SparcTargetLowering::LowerFormalArguments_32(), llvm::SparcTargetLowering::LowerFormalArguments_64(), llvm::AMDGPUCallLowering::lowerFormalArgumentsKernel(), LowerFSINCOS(), LowerMemOpCallTo(), llvm::SITargetLowering::LowerReturn(), llvm::SparcTargetLowering::LowerReturn_32(), lowerStatepointMetaArgs(), makeStatepointExplicitImpl(), llvm::coro::LowererBase::makeSubFnCall(), AAIsDeadArgument::manifest(), AAAlignCallSiteArgument::manifest(), AAPrivatizablePtrArgument::manifest(), llvm::X86TargetLowering::markLibCallAttributes(), markTails(), MatchingStackOffset(), needsFPFromSig(), onlySingleValue(), llvm::GVNExpression::BasicExpression::op_push_back(), std::hash< llvm::pdb::PDB_SymType >::operator()(), llvm::orc::shared::detail::ReadArgs< ArgT, ArgTs... >::operator()(), llvm::operator<<(), llvm::LazyValueInfo::operator=(), llvm::MIBundleOperandIteratorBase< const MachineOperand >::operator==(), llvm::MCRegisterInfo::mc_difflist_iterator< mc_subreg_iterator >::operator==(), llvm::BasicBlock::phi_iterator_impl< PHINodeT, BBIteratorT >::operator==(), llvm::LazyCallGraph::postorder_ref_scc_iterator::operator==(), llvm::BlotMapVector< KeyT, ValueT >::operator[](), llvm::yaml::CustomMappingTraits< std::map< std::vector< uint64_t >, WholeProgramDevirtResolution::ByArg > >::output(), llvm::cflaa::parentFunctionOfValue(), llvm::remarks::HotnessThresholdParser::parse(), llvm::cl::parser< const PassInfo * >::parse(), llvm::cl::parser< std::string >::parse(), llvm::cl::parser< char >::parse(), llvm::parseCommandLineOptions(), parseDouble(), AbstractManglingParser< ManglingParser< Alloc >, Alloc >::parseEncoding(), AbstractManglingParser< ManglingParser< Alloc >, Alloc >::parseExpr(), ParseFunctionArgs(), llvm::remarks::parseHotnessThresholdOption(), llvm::GCNTargetMachine::parseMachineFunctionInfo(), llvm::opt::OptTable::ParseOneArg(), llvm::remarks::YAMLRemarkParser::parseRemark(), AbstractManglingParser< ManglingParser< Alloc >, Alloc >::parseSpecialName(), AbstractManglingParser< ManglingParser< Alloc >, Alloc >::parseTemplateArg(), AbstractManglingParser< ManglingParser< Alloc >, Alloc >::parseTemplateArgs(), passingValueIsAlwaysUndefined(), Passv64i1ArgInRegs(), llvm::R600TargetLowering::PerformDAGCombine(), llvm::AMDGPUAAResult::pointsToConstantMemory(), populateExternalRelations(), llvm::LegacyDivergenceAnalysis::print(), llvm::sys::printArg(), processArg(), processDbgDeclares(), processPSInputArgs(), ProfileDagInit(), programUndefinedIfUndefOrPoison(), llvm::promoteCall(), llvm::cl::ProvidePositionalOption(), llvm::Attributor::registerFunctionSignatureRewrite(), llvm::DIExpression::replaceArg(), replaceArgumentUses(), replaceConstants(), replaceSwiftErrorOps(), replaceWithCallToVeclib(), llvm::VarDefInit::resolveReferences(), llvm::DagInit::resolveReferences(), llvm::LoopAnalysisManagerFunctionProxy::Result::Result(), llvm::InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs >::Result::Result(), llvm::DivergenceAnalysisPrinterPass::run(), llvm::RequireAnalysisPass< AnalysisT, IRUnitT, AnalysisManagerT, ExtraArgTs >::run(), llvm::orc::runAsMain(), llvm::runFuzzerOnInputs(), llvm::runIPSCCP(), llvm::IRTranslator::runOnMachineFunction(), llvm::coro::salvageDebugInfo(), llvm::ScalarEvolution::ScalarEvolution(), llvm::detail::scalbn(), llvm::FastISel::selectIntrinsicCall(), llvm::orc::shared::SPSArgList< SPSTagT, SPSTagTs... >::serialize(), llvm::CallLowering::setArgFlags(), llvm::SIMachineFunctionInfo::setWorkItemIDX(), llvm::SIMachineFunctionInfo::setWorkItemIDY(), llvm::SIMachineFunctionInfo::setWorkItemIDZ(), simplifyInvariantGroupIntrinsic(), simplifyX86movmsk(), llvm::orc::shared::SPSArgList< SPSTagT, SPSTagTs... >::size(), FunctionSpecializer::specializeFunctions(), StoreTailCallArgumentsToStackSlot(), llvm::SubsumingPositionIterator::SubsumingPositionIterator(), llvm::Attributor::translateArgumentToCallSiteContent(), tryConstantFoldCall(), tryToElideArgumentCopy(), AAPointerInfoCallSiteArgument::updateImpl(), AANoFreeCallSiteArgument::updateImpl(), AAIsDeadCallSiteArgument::updateImpl(), AAAlignCallSiteArgument::updateImpl(), AANoCaptureCallSiteArgument::updateImpl(), AAValueSimplifyArgument::updateImpl(), AAPrivatizablePtrArgument::updateImpl(), AAMemoryBehaviorFloating::updateImpl(), AAMemoryBehaviorCallSiteArgument::updateImpl(), llvm::UpgradeARCRuntime(), llvm::UpgradeFunctionAttributes(), llvm::IRPosition::value(), llvm::ObjectSizeOffsetVisitor::visitCallBase(), llvm::InstCombinerImpl::visitCallInst(), llvm::InnerLoopVectorizer::widenCallInstruction(), llvm::wouldInstructionBeTriviallyDead(), and writeFunctionTypeMetadataRecords().

◆ Callee

amdgpu Simplify well known AMD library false FunctionCallee Callee

Definition at line 206 of file AMDGPULibCalls.cpp.

Referenced by llvm::AbstractCallSite::AbstractCallSite(), AddCalls(), addCallToCallGraph(), addNoRecurseAttrs(), adjustCallerSSPLevel(), adjustCallerStackProbes(), adjustCallerStackProbeSize(), adjustMinLegalVectorWidth(), adjustNullPointerValidAttr(), allCallersPassValidPointerForArgument(), analyzeLoopUnrollCost(), llvm::ArgumentPromotionPass::areFunctionArgsABICompatible(), llvm::PPCTTIImpl::areFunctionArgsABICompatible(), llvm::X86TTIImpl::areFunctionArgsABICompatible(), llvm::TargetTransformInfoImplBase::areFunctionArgsABICompatible(), llvm::TargetTransformInfo::areFunctionArgsABICompatible(), llvm::GCNTTIImpl::areInlineCompatible(), llvm::TargetTransformInfoImplBase::areInlineCompatible(), llvm::AttributeFuncs::areInlineCompatible(), llvm::TargetTransformInfo::areInlineCompatible(), llvm::AVRDAGToDAGISel::select< AVRISD::CALL >(), buildCallOperands(), CallExpr::CallExpr(), callsShareTOCBase(), canSinkInstructions(), computeFunctionSummary(), copyFeaturesToFunction(), llvm::IRBuilderBase::CreateCall(), llvm::IRBuilderBase::CreateCallBr(), llvm::objcarc::createCallInstWithColors(), llvm::IRBuilderBase::CreateConstrainedFPCall(), llvm::IRBuilderBase::CreateInvoke(), DeleteBasicBlock(), doCallSiteSplitting(), emitBinaryFloatFnCallHelper(), EmitCall(), emitDirectiveRelocJalr(), llvm::emitInlinedInto(), emitLibCall(), emitUnaryFloatFnCallHelper(), Priority::evaluate(), findInitTrampoline(), llvm::sampleprof::FunctionSamples::findInlinedFunctions(), findUses(), llvm::AMDGPULibCalls::fold(), for(), llvm::FunctionCallee::FunctionCallee(), functionsHaveCompatibleAttributes(), llvm::ReplayInlineAdvisor::getAdviceImpl(), getAllocationData(), getAllocationDataForFunction(), getAllocationSize(), llvm::IRPosition::getAssociatedArgument(), llvm::getAttributeBasedInliningDecision(), llvm::AbstractCallSite::getCallbackUses(), getCalledFunction(), getCallOpcode(), getEdgeKind(), llvm::FunctionPropertiesInfo::getFunctionPropertiesInfo(), llvm::getInlineCost(), llvm::InlineAdvisor::getMandatoryKind(), llvm::AArch64InstrInfo::getOutliningType(), llvm::ARMBaseInstrInfo::getOutliningType(), llvm::MipsCCState::getSpecialCallingConvForCallee(), handleAttr(), hasReturnsTwiceAttr(), llvm::Attributor::identifyDefaultAbstractAttributes(), INITIALIZE_PASS(), llvm::InformationCache::initializeModuleSlice(), inlineCallIfPossible(), insertSinCosCall(), InstrBreaksNoFree(), InstrBreaksNonThrowing(), InstrBreaksNoSync(), isCalleeLoad(), llvm::CodeExtractor::isEligible(), llvm::HexagonTargetLowering::IsEligibleForTailCallOptimization(), llvm::SITargetLowering::isEligibleForTailCallOptimization(), isEqual(), llvm::isFreeCall(), isFunctionGlobalAddress(), isIndirectCall(), llvm::isInlineViable(), llvm::isLegalToPromote(), isReportingError(), isReturnNonNull(), llvm::isSafeToSpeculativelyExecute(), llvm::VETargetLowering::LowerCall(), llvm::HexagonTargetLowering::LowerCall(), llvm::SITargetLowering::LowerCall(), llvm::RISCVTargetLowering::LowerCall(), llvm::NVPTXTargetLowering::LowerCall(), llvm::SystemZTargetLowering::LowerCall(), llvm::SparcTargetLowering::LowerCall_32(), llvm::SparcTargetLowering::LowerCall_64(), llvm::SelectionDAGBuilder::LowerCallSiteWithDeoptBundle(), llvm::SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl(), llvm::SelectionDAGBuilder::LowerDeoptimizeCall(), llvm::SparcTargetLowering::LowerF128Compare(), llvm::SparcTargetLowering::LowerF128Op(), llvm::SparcTargetLowering::LowerGlobalTLSAddress(), llvm::IntrinsicLowering::LowerIntrinsicCall(), llvm::SelectionDAGBuilder::LowerStatepoint(), llvm::AMDGPUTargetLowering::lowerUnhandledCall(), markAliveBlocks(), CallExpr::match(), llvm::AttributeFuncs::mergeAttributesForInlining(), llvm::pdb::operator<<(), llvm::FortifiedLibCallSimplifier::optimizeCall(), llvm::LibCallSimplifier::optimizeCall(), AbstractManglingParser< ManglingParser< Alloc >, Alloc >::parseExpr(), prepareDescriptorIndirectCall(), prepareIndirectCall(), CallExpr::printLeft(), llvm::sampleprof::ProfiledCallGraph::ProfiledCallGraph(), llvm::promoteCall(), llvm::promoteCallWithIfThenElse(), propagateCallAttrsFromCallee(), llvm::ImportedFunctionsInliningStatistics::recordInline(), llvm::CallGraphNode::removeAnyCallEdgeTo(), llvm::CallGraphNode::removeOneAbstractEdgeTo(), llvm::ReplayInlineAdvisor::ReplayInlineAdvisor(), llvm::AMDGPUSimplifyLibCallsPass::run(), llvm::AMDGPUUseNativeCallsPass::run(), llvm::InlinerPass::run(), runImpl(), runNVVMIntrRange(), llvm::FastISel::selectPatchpoint(), setAND(), setOR(), llvm::shouldInline(), llvm::SimplifyCall(), simplifySuspendPoint(), llvm::SubsumingPositionIterator::SubsumingPositionIterator(), llvm::thread::thread(), transformCallee(), llvm::tryPromoteCall(), llvm::coro::updateCallGraph(), UpdateCallGraphAfterInlining(), updateCallProfile(), AAAMDAttributesFunction::updateImpl(), AAUndefinedBehaviorImpl::updateImpl(), llvm::updateProfileCallee(), llvm::AMDGPULibCalls::useNative(), versionCallSite(), and llvm::sampleprof::SampleProfileWriterBinary::writeBody().

◆ calls

amdgpu Simplify well known AMD library calls

Definition at line 199 of file AMDGPULibCalls.cpp.

◆ EnablePreLink

cl::opt<bool> EnablePreLink("amdgpu-prelink", cl::desc("Enable pre-link mode optimizations"), cl::init(false), cl::Hidden)
static

◆ false

amdgpu Simplify well known AMD library false

Definition at line 199 of file AMDGPULibCalls.cpp.

Referenced by llvm::AMDGPULibCalls::fold().

◆ Name

Initial value:
= "") {
CallInst *R = B.CreateCall(Callee, Arg, Name);
if (Function *F = dyn_cast<Function>(Callee.getCallee()))
R->setCallingConv(F->getCallingConv());
return R;
}
template <typename IRB>
static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1,
Value *Arg2, const Twine &Name = "") {
CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name);
if (Function *F = dyn_cast<Function>(Callee.getCallee()))
R->setCallingConv(F->getCallingConv());
return R;
}
struct TableEntry {
double result;
double input;
}

Definition at line 207 of file AMDGPULibCalls.cpp.

◆ simplifylib

amdgpu simplifylib

Definition at line 198 of file AMDGPULibCalls.cpp.

◆ tbl_acos

const TableEntry tbl_acos[]
static
Initial value:
= {
{MATH_PI / 2.0, 0.0},
{MATH_PI / 2.0, -0.0},
{0.0, 1.0},
{MATH_PI, -1.0}
}

Definition at line 232 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_acosh

const TableEntry tbl_acosh[]
static
Initial value:
= {
{0.0, 1.0}
}

Definition at line 238 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_acospi

const TableEntry tbl_acospi[]
static
Initial value:
= {
{0.5, 0.0},
{0.5, -0.0},
{0.0, 1.0},
{1.0, -1.0}
}

Definition at line 241 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_asin

const TableEntry tbl_asin[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0},
{MATH_PI / 2.0, 1.0},
{-MATH_PI / 2.0, -1.0}
}

Definition at line 247 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_asinh

const TableEntry tbl_asinh[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0}
}

Definition at line 253 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_asinpi

const TableEntry tbl_asinpi[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0},
{0.5, 1.0},
{-0.5, -1.0}
}

Definition at line 257 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_atan

const TableEntry tbl_atan[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0},
{MATH_PI / 4.0, 1.0},
{-MATH_PI / 4.0, -1.0}
}

Definition at line 263 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_atanh

const TableEntry tbl_atanh[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0}
}

Definition at line 269 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_atanpi

const TableEntry tbl_atanpi[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0},
{0.25, 1.0},
{-0.25, -1.0}
}

Definition at line 273 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_cbrt

const TableEntry tbl_cbrt[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0},
{1.0, 1.0},
{-1.0, -1.0},
}

Definition at line 279 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_cos

const TableEntry tbl_cos[]
static
Initial value:
= {
{1.0, 0.0},
{1.0, -0.0}
}

Definition at line 285 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_cosh

const TableEntry tbl_cosh[]
static
Initial value:
= {
{1.0, 0.0},
{1.0, -0.0}
}

Definition at line 289 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_cospi

const TableEntry tbl_cospi[]
static
Initial value:
= {
{1.0, 0.0},
{1.0, -0.0}
}

Definition at line 293 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_erf

const TableEntry tbl_erf[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0}
}

Definition at line 301 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_erfc

const TableEntry tbl_erfc[]
static
Initial value:
= {
{1.0, 0.0},
{1.0, -0.0}
}

Definition at line 297 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_exp

const TableEntry tbl_exp[]
static
Initial value:
= {
{1.0, 0.0},
{1.0, -0.0},
{MATH_E, 1.0}
}

Definition at line 305 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_exp10

const TableEntry tbl_exp10[]
static
Initial value:
= {
{1.0, 0.0},
{1.0, -0.0},
{10.0, 1.0}
}

Definition at line 315 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_exp2

const TableEntry tbl_exp2[]
static
Initial value:
= {
{1.0, 0.0},
{1.0, -0.0},
{2.0, 1.0}
}

Definition at line 310 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_expm1

const TableEntry tbl_expm1[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0}
}

Definition at line 320 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_log

const TableEntry tbl_log[]
static
Initial value:
= {
{0.0, 1.0},
{1.0, MATH_E}
}

Definition at line 324 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_log10

const TableEntry tbl_log10[]
static
Initial value:
= {
{0.0, 1.0},
{1.0, 10.0}
}

Definition at line 332 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_log2

const TableEntry tbl_log2[]
static
Initial value:
= {
{0.0, 1.0},
{1.0, 2.0}
}

Definition at line 328 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_rsqrt

const TableEntry tbl_rsqrt[]
static
Initial value:
= {
{1.0, 1.0},
{MATH_SQRT1_2, 2.0}
}

Definition at line 336 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_sin

const TableEntry tbl_sin[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0}
}

Definition at line 340 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_sinh

const TableEntry tbl_sinh[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0}
}

Definition at line 344 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_sinpi

const TableEntry tbl_sinpi[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0}
}

Definition at line 348 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_sqrt

const TableEntry tbl_sqrt[]
static
Initial value:
= {
{0.0, 0.0},
{1.0, 1.0},
{MATH_SQRT2, 2.0}
}

Definition at line 352 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_tan

const TableEntry tbl_tan[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0}
}

Definition at line 357 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_tanh

const TableEntry tbl_tanh[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0}
}

Definition at line 361 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_tanpi

const TableEntry tbl_tanpi[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0}
}

Definition at line 365 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_tgamma

const TableEntry tbl_tgamma[]
static
Initial value:
= {
{1.0, 1.0},
{1.0, 2.0},
{2.0, 3.0},
{6.0, 4.0}
}

Definition at line 369 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ UseNative

cl::list<std::string> UseNative("amdgpu-use-native", cl::desc("Comma separated list of functions to replace with native, or all"), cl::CommaSeparated, cl::ValueOptional, cl::Hidden)
static
llvm::Function
Definition: Function.h:61
MATH_PI
#define MATH_PI
Definition: AMDGPULibCalls.cpp:38
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:180
result
It looks like we only need to define PPCfmarto for these because according to these instructions perform RTO on fma s result
Definition: README_P9.txt:256
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:206
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
input
The initial backend is deliberately restricted to z10 We should add support for later architectures at some point If an asm ties an i32 r result to an i64 input
Definition: README.txt:10
Name
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
Definition: AMDGPULibCalls.cpp:207
llvm::FunctionCallee::getCallee
Value * getCallee()
Definition: DerivedTypes.h:183
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:206
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
MATH_SQRT2
#define MATH_SQRT2
Definition: AMDGPULibCalls.cpp:40
MATH_E
#define MATH_E
Definition: AMDGPULibCalls.cpp:39
llvm::FunctionCallee
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:164
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1475
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
MATH_SQRT1_2
#define MATH_SQRT1_2
Definition: AMDGPULibCalls.cpp:41