LLVM 17.0.0git
Classes | Namespaces | Macros | Typedefs | Functions | Variables
AMDGPULibCalls.cpp File Reference

This file does AMD library function optimizations. More...

#include "AMDGPU.h"
#include "AMDGPULibFunc.h"
#include "GCNSubtarget.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/InitializePasses.h"
#include "llvm/Target/TargetMachine.h"
#include <cmath>
Include dependency graph for AMDGPULibCalls.cpp:

Go to the source code of this file.

Classes

class  llvm::AMDGPULibCalls
 

Namespaces

namespace  llvm
 This is an optimization pass for GlobalISel generic memory operations.
 

Macros

#define DEBUG_TYPE   "amdgpu-simplifylib"
 
#define MATH_PI   numbers::pi
 
#define MATH_E   numbers::e
 
#define MATH_SQRT2   numbers::sqrt2
 
#define MATH_SQRT1_2   numbers::inv_sqrt2
 

Typedefs

using TableRef = ArrayRef< TableEntry >
 

Functions

 INITIALIZE_PASS_BEGIN (AMDGPUSimplifyLibCalls, "amdgpu-simplifylib", "Simplify well-known AMD library calls", false, false) INITIALIZE_PASS_END(AMDGPUSimplifyLibCalls
 
amdgpu Simplify well known AMD library false INITIALIZE_PASS (AMDGPUUseNativeCalls, "amdgpu-usenative", "Replace builtin math calls with that native versions.", false, false) template< typename IRB > static CallInst *CreateCallEx(IRB &B
 
static bool HasNative (AMDGPULibFunc::EFuncId id)
 
static TableRef getOptTable (AMDGPULibFunc::EFuncId id)
 
static int getVecSize (const AMDGPULibFunc &FInfo)
 
static AMDGPULibFunc::EType getArgType (const AMDGPULibFunc &FInfo)
 
static double llvm::log2 (double V)
 

Variables

static cl::opt< boolEnablePreLink ("amdgpu-prelink", cl::desc("Enable pre-link mode optimizations"), cl::init(false), cl::Hidden)
 
static cl::list< std::string > UseNative ("amdgpu-use-native", cl::desc("Comma separated list of functions to replace with native, or all"), cl::CommaSeparated, cl::ValueOptional, cl::Hidden)
 
amdgpu simplifylib
 
amdgpu Simplify well known AMD library calls
 
amdgpu Simplify well known AMD library false
 
amdgpu Simplify well known AMD library false FunctionCallee Callee
 
amdgpu Simplify well known AMD library false FunctionCallee ValueArg
 
amdgpu Simplify well known AMD library false FunctionCallee Value const TwineName
 
static const TableEntry tbl_acos []
 
static const TableEntry tbl_acosh []
 
static const TableEntry tbl_acospi []
 
static const TableEntry tbl_asin []
 
static const TableEntry tbl_asinh []
 
static const TableEntry tbl_asinpi []
 
static const TableEntry tbl_atan []
 
static const TableEntry tbl_atanh []
 
static const TableEntry tbl_atanpi []
 
static const TableEntry tbl_cbrt []
 
static const TableEntry tbl_cos []
 
static const TableEntry tbl_cosh []
 
static const TableEntry tbl_cospi []
 
static const TableEntry tbl_erfc []
 
static const TableEntry tbl_erf []
 
static const TableEntry tbl_exp []
 
static const TableEntry tbl_exp2 []
 
static const TableEntry tbl_exp10 []
 
static const TableEntry tbl_expm1 []
 
static const TableEntry tbl_log []
 
static const TableEntry tbl_log2 []
 
static const TableEntry tbl_log10 []
 
static const TableEntry tbl_rsqrt []
 
static const TableEntry tbl_sin []
 
static const TableEntry tbl_sinh []
 
static const TableEntry tbl_sinpi []
 
static const TableEntry tbl_sqrt []
 
static const TableEntry tbl_tan []
 
static const TableEntry tbl_tanh []
 
static const TableEntry tbl_tanpi []
 
static const TableEntry tbl_tgamma []
 

Detailed Description

This file does AMD library function optimizations.

Definition in file AMDGPULibCalls.cpp.

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE   "amdgpu-simplifylib"

Definition at line 26 of file AMDGPULibCalls.cpp.

◆ MATH_E

#define MATH_E   numbers::e

Definition at line 41 of file AMDGPULibCalls.cpp.

◆ MATH_PI

#define MATH_PI   numbers::pi

Definition at line 40 of file AMDGPULibCalls.cpp.

◆ MATH_SQRT1_2

#define MATH_SQRT1_2   numbers::inv_sqrt2

Definition at line 43 of file AMDGPULibCalls.cpp.

◆ MATH_SQRT2

#define MATH_SQRT2   numbers::sqrt2

Definition at line 42 of file AMDGPULibCalls.cpp.

Typedef Documentation

◆ TableRef

using TableRef = ArrayRef<TableEntry>

Definition at line 380 of file AMDGPULibCalls.cpp.

Function Documentation

◆ getArgType()

static AMDGPULibFunc::EType getArgType ( const AMDGPULibFunc FInfo)
inlinestatic

◆ getOptTable()

static TableRef getOptTable ( AMDGPULibFunc::EFuncId  id)
static

◆ getVecSize()

static int getVecSize ( const AMDGPULibFunc FInfo)
inlinestatic

◆ HasNative()

static bool HasNative ( AMDGPULibFunc::EFuncId  id)
static

Definition at line 357 of file AMDGPULibCalls.cpp.

Referenced by llvm::AMDGPULibCalls::useNative().

◆ INITIALIZE_PASS()

amdgpu Simplify well known AMD library false INITIALIZE_PASS ( AMDGPUUseNativeCalls  ,
"amdgpu-usenative"  ,
"Replace builtin math calls with that native versions."  ,
false  ,
false   
) &

◆ INITIALIZE_PASS_BEGIN()

INITIALIZE_PASS_BEGIN ( AMDGPUSimplifyLibCalls  ,
"amdgpu-simplifylib"  ,
"Simplify well-known AMD library calls"  ,
false  ,
false   
)

Variable Documentation

◆ Arg

amdgpu Simplify well known AMD library false FunctionCallee Value* Arg

Definition at line 187 of file AMDGPULibCalls.cpp.

Referenced by llvm::AAEvaluator::AAEvaluator(), llvm::SPIRVGeneralDuplicatesTracker::add(), llvm::SPIRVGlobalRegistry::add(), llvm::HashBuilderImpl< HasherT, Endianness >::add(), llvm::AliasSetTracker::add(), AddAliasScopeMetadata(), AddAlignmentAssumptions(), addArgumentReturnedAttrs(), addConditions(), llvm::AnalysisUsage::addPreserved(), addVCallToSet(), adjustByValArgAlignment(), llvm::SystemZTTIImpl::adjustInliningThreshold(), llvm::AMDGPUAAResult::alias(), allCallersPassValidPointerForArgument(), allocateSGPR32Input(), allocateSGPR64Input(), llvm::SITargetLowering::allocateSpecialInputVGPRs(), allocateVGPR32Input(), llvm::CCState::AnalyzeArgumentsSecondPass(), llvm::AMDGPUTargetLowering::analyzeFormalArgumentsCompute(), appendArg(), appendString(), llvm::BitVector::apply(), llvm::X86TTIImpl::areInlineCompatible(), llvm::xray::CallArgRecord::arg(), llvm::MachineFunction::ArgRegPair::ArgRegPair(), llvm::IRPosition::argument(), llvm::CallLowering::ValueHandler::assignValueToAddress(), llvm::BTFDebug::beginFunctionImpl(), CalculateTailCallArgDest(), llvm::CallGraph::CallGraph(), checkFunctionMemoryAccess(), llvm::Record::checkUnusedTemplateArgs(), checkZExtBool(), llvm::CoroIdInst::clearPromise(), collectEscapedLocals(), combineFneg(), llvm::computeConstantRange(), llvm::computeExpressionSize(), computeKnownBitsFromAssume(), llvm::computeLTOCacheKey(), llvm::computeSignatureVTs(), llvm::DwarfUnit::constructSubprogramArguments(), convertArgumentInfo(), llvm::ArgDescriptor::createArg(), llvm::IRBuilderBase::CreateIsNeg(), llvm::IRBuilderBase::CreateIsNotNeg(), llvm::IRBuilderBase::CreateIsNotNull(), llvm::IRBuilderBase::CreateIsNull(), llvm::Attributor::createShallowWrapper(), llvm::orc::LocalCXXRuntimeOverridesBase::CXAAtExitOverride(), llvm::objcarc::Depends(), llvm::orc::shared::SPSArgList< SPSTagT, SPSTagTs... >::deserialize(), llvm::DivergenceInfo::DivergenceInfo(), llvm::dlltoolDriverMain(), doList(), llvm::DominatorTreeBase< NodeT, IsPostDom >::DominatorTreeBase(), doPromotion(), eliminateSwiftError(), eliminateSwiftErrorArgument(), llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV3::emitKernelArg(), llvm::AMDGPU::HSAMD::MetadataStreamerMsgPackV3::emitKernelArgs(), llvm::remarks::BitstreamRemarkSerializerHelper::emitRemarkBlock(), llvm::OpenMPIRBuilder::emitTargetKernel(), llvm::Evaluator::EvaluateFunction(), llvm::CodeExtractor::excludeArgFromAggregate(), llvm::VPWidenCallRecipe::execute(), ExpandBasePaths(), llvm::cl::ExpansionContext::expandResponseFiles(), llvm::SPIRVGeneralDuplicatesTracker::find(), findArgParts(), findArgumentCopyElisionCandidates(), findDependencies(), FindPredecessorAutoreleaseWithSafePath(), FindPredecessorRetainWithSafePath(), llvm::objcarc::findSingleDependency(), FindSingleUseIdentifiedObject(), fitArgInto64Bits(), flattenCommandLine(), llvm::VarDefInit::Fold(), llvm::FoldingSetBase::FoldingSetBase(), llvm::InstCombinerImpl::foldIntegerTypedPHI(), foldSqrt(), ForeachDagApply(), llvm::detail::frexp(), llvm::GlobalsAAResult::FunctionInfo::FunctionInfo(), llvm::DagInit::get(), llvm::getAllocSize(), llvm::DILocalVariable::getArg(), llvm::remarks::Remark::getArgsAsMsg(), getArgumentTypeAlign(), llvm::IRPosition::getAssociatedFunction(), llvm::VarDefInit::getAsString(), llvm::AbstractCallSite::getCallArgOperand(), llvm::AbstractCallSite::getCallArgOperandNo(), llvm::CoroSuspendInst::getCoroSave(), llvm::IRPosition::getCtxI(), llvm::AMDGPUSubtarget::getExplicitKernArgSize(), llvm::MemoryLocation::getForArgument(), llvm::cflaa::getGlobalOrArgAttrFromValue(), llvm::MDNodeKeyImpl< DILocalVariable >::getHashValue(), llvm::DenseMapInfo< MemoryLocOrCall >::getHashValue(), llvm::DenseMapInfo< PointerEmbeddedInt< IntT, Bits > >::getHashValue(), llvm::AAResults::getModRefInfo(), llvm::BasicAAResult::getModRefInfoMask(), llvm::DiagnosticInfoOptimizationBase::getMsg(), llvm::X86TargetLowering::getNegatedExpression(), getOriginalFunctionType(), llvm::PassRegistry::getPassInfo(), llvm::SIMachineFunctionInfo::getPreloadedReg(), llvm::CoroIdInst::getPromise(), getSearchPaths(), llvm::ARM_AM::getSOImmVal(), llvm::CoroIdAsyncInst::getStorageArgumentIndex(), llvm::CoroSuspendAsyncInst::getStorageArgumentIndex(), llvm::ARM_AM::getT2SOImmVal(), llvm::AArch64TargetLowering::getTgtMemIntrinsic(), llvm::getTotalArgumentsSizeInBytes(), getVectorCallCosts(), llvm::GlobalsAAResult::GlobalsAAResult(), HandleByValArgument(), handlePhiDef(), HandlePrefixedOrGroupedOption(), llvm::hash_value(), HasSafePathToPredecessorCall(), llvm::HexagonEvaluator::HexagonEvaluator(), llvm::Attributor::identifyDefaultAbstractAttributes(), llvm::GenericUniformityAnalysisImpl< ContextT >::initialize(), INITIALIZE_PASS(), initSlots2Values(), llvm::yaml::CustomMappingTraits< std::map< std::vector< uint64_t >, WholeProgramDevirtResolution::ByArg > >::inputOne(), llvm::SIInstrInfo::insertNoops(), insertNoopsInBundle(), insertParsePoints(), insertSinCosCall(), insertSpills(), llvm::ARMTTIImpl::instCombineIntrinsic(), llvm::X86TTIImpl::instCombineIntrinsic(), instCombineSVECmpNE(), llvm::remarks::StringTable::internalize(), llvm::Attributor::internalizeFunctions(), llvm::PreservedAnalyses::intersect(), llvm::invertCondition(), llvm::IRSimilarity::IRSimilarityCandidate::IRSimilarityCandidate(), llvm::SparcTargetLowering::IsEligibleForTailCallOptimization(), llvm::SITargetLowering::isEligibleForTailCallOptimization(), isIntExtFree(), llvm::InformationCache::isInvolvedInMustTailCall(), llvm::MDNodeKeyImpl< DILocalVariable >::isKeyOf(), isKnownNonNullFromDominatingCondition(), llvm::DILocalVariable::isParameter(), llvm::objcarc::IsPotentialRetainableObjPtr(), isPredicatedOnPHI(), llvm::NVPTXTTIImpl::isSourceOfDivergence(), llvm::Value::isSwiftError(), isSwiftError(), llvm::AMDGPUInstrInfo::isUniformMMO(), llvm::Attributor::isValidFunctionSignatureRewrite(), llvm::LazyValueInfo::LazyValueInfo(), llvm::AMDGPULegalizerInfo::legalizeWorkitemIDIntrinsic(), llvm::libDriverMain(), LLVMGetNextParam(), LLVMGetPreviousParam(), LLVMRemarkArgGetDebugLoc(), LLVMRemarkArgGetKey(), LLVMRemarkArgGetValue(), LLVMSetParamAlignment(), llvm::AMDGPULegalizerInfo::loadInputValue(), llvm::AMDGPUTargetLowering::loadInputValue(), LookupNearestOption(), llvm::Pass::lookupPassInfo(), llvm::LoopInfoBase< BlockT, LoopT >::LoopInfoBase(), lowerBALLOTIntrinsic(), llvm::SITargetLowering::LowerCall(), llvm::ARMCallLowering::lowerCall(), llvm::MipsCallLowering::lowerCall(), llvm::SPIRVCallLowering::lowerCall(), llvm::CallLowering::lowerCall(), llvm::HexagonTargetLowering::LowerCall(), llvm::LoongArchTargetLowering::LowerCall(), llvm::RISCVTargetLowering::LowerCall(), llvm::VETargetLowering::LowerCall(), llvm::SparcTargetLowering::LowerCall_32(), llvm::SparcTargetLowering::LowerCall_64(), llvm::FastISel::lowerCallTo(), llvm::TargetLowering::LowerCallTo(), llvm::SparcTargetLowering::LowerF128_LibCallArg(), llvm::AMDGPUCallLowering::lowerFormalArguments(), llvm::AArch64CallLowering::lowerFormalArguments(), llvm::ARMCallLowering::lowerFormalArguments(), llvm::M68kCallLowering::lowerFormalArguments(), llvm::MipsCallLowering::lowerFormalArguments(), llvm::PPCCallLowering::lowerFormalArguments(), llvm::SPIRVCallLowering::lowerFormalArguments(), llvm::X86CallLowering::lowerFormalArguments(), llvm::R600TargetLowering::LowerFormalArguments(), llvm::SITargetLowering::LowerFormalArguments(), llvm::NVPTXTargetLowering::LowerFormalArguments(), llvm::VETargetLowering::LowerFormalArguments(), llvm::SparcTargetLowering::LowerFormalArguments_32(), llvm::SparcTargetLowering::LowerFormalArguments_64(), llvm::AMDGPUCallLowering::lowerFormalArgumentsKernel(), llvm::AMDGPUTargetLowering::LowerFROUNDEVEN(), LowerFSINCOS(), LowerMemOpCallTo(), llvm::SITargetLowering::LowerReturn(), llvm::SparcTargetLowering::LowerReturn_32(), lowerStatepointMetaArgs(), makeStatepointExplicitImpl(), llvm::coro::LowererBase::makeSubFnCall(), llvm::X86TargetLowering::markLibCallAttributes(), markTails(), MatchingStackOffset(), needsFPFromSig(), onlySingleValue(), llvm::GVNExpression::BasicExpression::op_push_back(), std::hash< llvm::pdb::PDB_SymType >::operator()(), llvm::operator<<(), llvm::LazyValueInfo::operator=(), llvm::MCRegisterInfo::mc_difflist_iterator< SubT >::operator==(), llvm::MIBundleOperandIteratorBase< ValueT >::operator==(), llvm::BasicBlock::phi_iterator_impl< PHINodeT, BBIteratorT >::operator==(), llvm::LazyCallGraph::postorder_ref_scc_iterator::operator==(), llvm::BlotMapVector< KeyT, ValueT >::operator[](), llvm::yaml::CustomMappingTraits< std::map< std::vector< uint64_t >, WholeProgramDevirtResolution::ByArg > >::output(), llvm::remarks::HotnessThresholdParser::parse(), llvm::cl::parser< char >::parse(), llvm::cl::parser< std::string >::parse(), llvm::cl::parser< DataType >::parse(), llvm::parseCommandLineOptions(), parseDouble(), AbstractManglingParser< Derived, Alloc >::parseEncoding(), AbstractManglingParser< Derived, Alloc >::parseExpr(), ParseFunctionArgs(), llvm::remarks::parseHotnessThresholdOption(), llvm::GCNTargetMachine::parseMachineFunctionInfo(), llvm::remarks::YAMLRemarkParser::parseRemark(), AbstractManglingParser< Derived, Alloc >::parseSpecialName(), AbstractManglingParser< Derived, Alloc >::parseTemplateArg(), AbstractManglingParser< Derived, Alloc >::parseTemplateArgs(), passingValueIsAlwaysUndefined(), Passv64i1ArgInRegs(), llvm::R600TargetLowering::PerformDAGCombine(), llvm::SwiftErrorValueTracking::preassignVRegs(), llvm::LegacyDivergenceAnalysisImpl::print(), llvm::sys::printArg(), processArg(), processDbgDeclare(), processPSInputArgs(), ProfileDagInit(), programUndefinedIfUndefOrPoison(), llvm::promoteCall(), llvm::cl::ProvidePositionalOption(), llvm::Attributor::registerFunctionSignatureRewrite(), llvm::DIExpression::replaceArg(), replaceArgumentUses(), replaceConstants(), replaceSwiftErrorOps(), replaceWithCallToVeclib(), llvm::VarDefInit::resolveReferences(), llvm::DagInit::resolveReferences(), llvm::LoopAnalysisManagerFunctionProxy::Result::Result(), llvm::InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs >::Result::Result(), llvm::FunctionSpecializer::run(), llvm::PAEvalPass::run(), llvm::DivergenceAnalysisPrinterPass::run(), llvm::RequireAnalysisPass< AnalysisT, IRUnitT, AnalysisManagerT, ExtraArgTs >::run(), llvm::orc::SelfExecutorProcessControl::runAsIntFunction(), llvm::orc::SimpleRemoteEPC::runAsIntFunction(), llvm::orc::runAsIntFunction(), llvm::orc::rt_bootstrap::runAsIntFunctionWrapper(), llvm::orc::runAsMain(), llvm::runFuzzerOnInputs(), runIPSCCP(), llvm::IRTranslator::runOnMachineFunction(), llvm::coro::salvageDebugInfo(), llvm::ScalarEvolution::ScalarEvolution(), llvm::detail::scalbn(), llvm::FastISel::selectIntrinsicCall(), llvm::orc::shared::SPSArgList< SPSTagT, SPSTagTs... >::serialize(), llvm::CallLowering::setArgFlags(), llvm::SIMachineFunctionInfo::setWorkItemIDX(), llvm::SIMachineFunctionInfo::setWorkItemIDY(), llvm::SIMachineFunctionInfo::setWorkItemIDZ(), simplifyInvariantGroupIntrinsic(), simplifyX86movmsk(), llvm::orc::shared::SPSArgList< SPSTagT, SPSTagTs... >::size(), StoreTailCallArgumentsToStackSlot(), llvm::SubsumingPositionIterator::SubsumingPositionIterator(), llvm::Attributor::translateArgumentToCallSiteContent(), tryConstantFoldCall(), tryToElideArgumentCopy(), llvm::UpgradeARCRuntime(), llvm::UpgradeFunctionAttributes(), llvm::UpgradeIntrinsicCall(), llvm::IRPosition::value(), llvm::InstCombinerImpl::visitCallInst(), llvm::wouldInstructionBeTriviallyDead(), writeDIArgList(), and writeFunctionTypeMetadataRecords().

◆ Callee

amdgpu Simplify well known AMD library false FunctionCallee Callee

Definition at line 187 of file AMDGPULibCalls.cpp.

Referenced by llvm::AbstractCallSite::AbstractCallSite(), AddCalls(), addCallToCallGraph(), addNoRecurseAttrs(), adjustCallerSSPLevel(), adjustCallerStackProbes(), adjustCallerStackProbeSize(), llvm::SystemZTTIImpl::adjustInliningThreshold(), adjustMinLegalVectorWidth(), adjustNullPointerValidAttr(), allCallersPassValidPointerForArgument(), analyzeLoopUnrollCost(), llvm::AttributeFuncs::areInlineCompatible(), llvm::TargetTransformInfo::areInlineCompatible(), llvm::TargetTransformInfoImplBase::areInlineCompatible(), llvm::AArch64TTIImpl::areInlineCompatible(), llvm::GCNTTIImpl::areInlineCompatible(), llvm::X86TTIImpl::areInlineCompatible(), llvm::X86TTIImpl::areTypesABICompatible(), llvm::TargetTransformInfo::areTypesABICompatible(), llvm::TargetTransformInfoImplBase::areTypesABICompatible(), llvm::PPCTTIImpl::areTypesABICompatible(), AVRDAGToDAGISel::select< AVRISD::CALL >(), buildCallOperands(), CallExpr::CallExpr(), callsShareTOCBase(), canLongjmp(), canSinkInstructions(), computeFunctionSummary(), llvm::IRBuilderBase::CreateCall(), llvm::IRBuilderBase::CreateCallBr(), llvm::objcarc::createCallInstWithColors(), llvm::IRBuilderBase::CreateConstrainedFPCall(), llvm::IRBuilderBase::CreateInvoke(), doCallSiteSplitting(), emitBinaryFloatFnCallHelper(), EmitCall(), emitDirectiveRelocJalr(), llvm::emitInlinedInto(), llvm::emitInlinedIntoBasedOnCost(), emitLibCall(), emitTPIDR2Save(), emitUnaryFloatFnCallHelper(), findInitTrampoline(), llvm::sampleprof::FunctionSamples::findInlinedFunctions(), findUses(), llvm::AMDGPULibCalls::fold(), for(), llvm::FunctionCallee::FunctionCallee(), functionsHaveCompatibleAttributes(), llvm::MLInlineAdvisor::getAdviceImpl(), llvm::ReplayInlineAdvisor::getAdviceImpl(), getAllocationData(), getAllocationDataForFunction(), llvm::getAllocationFamily(), getAllocationSize(), llvm::IRPosition::getAssociatedArgument(), llvm::getAttributeBasedInliningDecision(), llvm::AbstractCallSite::getCallbackUses(), getCalledFunction(), getCallOpcode(), getDefaultInlineAdvice(), llvm::DOTGraphTraits< CallGraphDOTInfo * >::getEdgeAttributes(), getEdgeKind(), llvm::getFreedOperand(), getInlinableCS(), llvm::getInlineCost(), llvm::InlineAdvisor::getMandatoryKind(), llvm::AArch64InstrInfo::getOutliningType(), llvm::ARMBaseInstrInfo::getOutliningType(), llvm::MipsCCState::getSpecialCallingConvForCallee(), hasReturnsTwiceAttr(), llvm::Attributor::identifyDefaultAbstractAttributes(), INITIALIZE_PASS(), llvm::InformationCache::initializeModuleSlice(), inlineCallIfPossible(), inlineCallsImpl(), insertSinCosCall(), InstrBreaksNoFree(), InstrBreaksNonThrowing(), InstrBreaksNoSync(), isCalleeLoad(), llvm::CodeExtractor::isEligible(), llvm::HexagonTargetLowering::IsEligibleForTailCallOptimization(), llvm::SITargetLowering::isEligibleForTailCallOptimization(), isEmAsmCall(), isEqual(), isFunctionGlobalAddress(), isIndirectCall(), llvm::isInlineViable(), llvm::isLegalToPromote(), isReportingError(), isReturnNonNull(), llvm::isSafeToSpeculativelyExecuteWithOpcode(), llvm::SITargetLowering::LowerCall(), llvm::NVPTXTargetLowering::LowerCall(), llvm::SystemZTargetLowering::LowerCall(), llvm::HexagonTargetLowering::LowerCall(), llvm::LoongArchTargetLowering::LowerCall(), llvm::RISCVTargetLowering::LowerCall(), llvm::VETargetLowering::LowerCall(), llvm::SparcTargetLowering::LowerCall_32(), llvm::SparcTargetLowering::LowerCall_64(), llvm::SelectionDAGBuilder::LowerCallSiteWithDeoptBundle(), llvm::SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl(), llvm::SelectionDAGBuilder::LowerCallTo(), llvm::SelectionDAGBuilder::LowerDeoptimizeCall(), llvm::VETargetLowering::lowerDYNAMIC_STACKALLOC(), llvm::SparcTargetLowering::LowerF128Compare(), llvm::SparcTargetLowering::LowerF128Op(), LowerFSINCOS(), llvm::SparcTargetLowering::LowerGlobalTLSAddress(), llvm::IntrinsicLowering::LowerIntrinsicCall(), llvm::SelectionDAGBuilder::LowerStatepoint(), llvm::AMDGPUTargetLowering::lowerUnhandledCall(), llvm::SystemZTargetLowering::makeExternalCall(), llvm::TargetLowering::makeLibCall(), markAliveBlocks(), CallExpr::match(), llvm::SIMachineFunctionInfo::mayUseAGPRs(), llvm::AttributeFuncs::mergeAttributesForInlining(), llvm::MLInlineAdvisor::onSuccessfulInlining(), llvm::pdb::operator<<(), llvm::FortifiedLibCallSimplifier::optimizeCall(), llvm::LibCallSimplifier::optimizeCall(), AbstractManglingParser< Derived, Alloc >::parseExpr(), llvm::CallGraph::populateCallGraphNode(), llvm::SelectionDAGBuilder::populateCallLoweringInfo(), prepareDescriptorIndirectCall(), prepareIndirectCall(), CallExpr::printLeft(), llvm::sampleprof::ProfiledCallGraph::ProfiledCallGraph(), llvm::promoteCall(), llvm::promoteCallWithIfThenElse(), propagateCallAttrsFromCallee(), llvm::ImportedFunctionsInliningStatistics::recordInline(), llvm::CallGraphNode::removeAnyCallEdgeTo(), llvm::CallGraphNode::removeOneAbstractEdgeTo(), llvm::ReplayInlineAdvisor::ReplayInlineAdvisor(), llvm::SMEAttrs::requiresLazySave(), llvm::SMEAttrs::requiresSMChange(), llvm::AMDGPUSimplifyLibCallsPass::run(), llvm::AMDGPUUseNativeCallsPass::run(), llvm::InlinerPass::run(), llvm::ModuleInlinerPass::run(), runNVVMIntrRange(), llvm::FastISel::selectPatchpoint(), setAND(), setOR(), llvm::shouldInline(), llvm::simplifyCall(), simplifySuspendPoint(), llvm::SubsumingPositionIterator::SubsumingPositionIterator(), llvm::PPCTTIImpl::supportsTailCallFor(), llvm::thinLTOPropagateFunctionAttrs(), transformCallee(), llvm::tryPromoteCall(), UpdateCallGraphAfterInlining(), updateCallProfile(), updateCGAndAnalysisManagerForPass(), llvm::updateProfileCallee(), llvm::AMDGPULibCalls::useNative(), llvm::versionCallSite(), and llvm::sampleprof::SampleProfileWriterBinary::writeBody().

◆ calls

amdgpu Simplify well known AMD library calls

Definition at line 180 of file AMDGPULibCalls.cpp.

◆ EnablePreLink

cl::opt< bool > EnablePreLink("amdgpu-prelink", cl::desc("Enable pre-link mode optimizations"), cl::init(false), cl::Hidden) ( "amdgpu-prelink"  ,
cl::desc("Enable pre-link mode optimizations")  ,
cl::init(false)  ,
cl::Hidden   
)
static

◆ false

amdgpu Simplify well known AMD library false

Definition at line 180 of file AMDGPULibCalls.cpp.

◆ Name

amdgpu Simplify well known AMD library false FunctionCallee Value const Twine& Name
Initial value:
= "") {
CallInst *R = B.CreateCall(Callee, Arg, Name);
if (Function *F = dyn_cast<Function>(Callee.getCallee()))
R->setCallingConv(F->getCallingConv());
return R;
}
template <typename IRB>
static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1,
Value *Arg2, const Twine &Name = "") {
CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name);
if (Function *F = dyn_cast<Function>(Callee.getCallee()))
R->setCallingConv(F->getCallingConv());
return R;
}
struct TableEntry {
double result;
double input;
}
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define F(x, y, z)
Definition: MD5.cpp:55
This class represents a function call, abstracting a target machine's calling convention.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:165
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
LLVM Value Representation.
Definition: Value.h:74

Definition at line 188 of file AMDGPULibCalls.cpp.

◆ simplifylib

amdgpu simplifylib

Definition at line 179 of file AMDGPULibCalls.cpp.

◆ tbl_acos

const TableEntry tbl_acos[]
static
Initial value:
= {
{MATH_PI / 2.0, 0.0},
{MATH_PI / 2.0, -0.0},
{0.0, 1.0},
{MATH_PI, -1.0}
}
#define MATH_PI

Definition at line 213 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_acosh

const TableEntry tbl_acosh[]
static
Initial value:
= {
{0.0, 1.0}
}

Definition at line 219 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_acospi

const TableEntry tbl_acospi[]
static
Initial value:
= {
{0.5, 0.0},
{0.5, -0.0},
{0.0, 1.0},
{1.0, -1.0}
}

Definition at line 222 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_asin

const TableEntry tbl_asin[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0},
{MATH_PI / 2.0, 1.0},
{-MATH_PI / 2.0, -1.0}
}

Definition at line 228 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_asinh

const TableEntry tbl_asinh[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0}
}

Definition at line 234 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_asinpi

const TableEntry tbl_asinpi[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0},
{0.5, 1.0},
{-0.5, -1.0}
}

Definition at line 238 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_atan

const TableEntry tbl_atan[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0},
{MATH_PI / 4.0, 1.0},
{-MATH_PI / 4.0, -1.0}
}

Definition at line 244 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_atanh

const TableEntry tbl_atanh[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0}
}

Definition at line 250 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_atanpi

const TableEntry tbl_atanpi[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0},
{0.25, 1.0},
{-0.25, -1.0}
}

Definition at line 254 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_cbrt

const TableEntry tbl_cbrt[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0},
{1.0, 1.0},
{-1.0, -1.0},
}

Definition at line 260 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_cos

const TableEntry tbl_cos[]
static
Initial value:
= {
{1.0, 0.0},
{1.0, -0.0}
}

Definition at line 266 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_cosh

const TableEntry tbl_cosh[]
static
Initial value:
= {
{1.0, 0.0},
{1.0, -0.0}
}

Definition at line 270 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_cospi

const TableEntry tbl_cospi[]
static
Initial value:
= {
{1.0, 0.0},
{1.0, -0.0}
}

Definition at line 274 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_erf

const TableEntry tbl_erf[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0}
}

Definition at line 282 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_erfc

const TableEntry tbl_erfc[]
static
Initial value:
= {
{1.0, 0.0},
{1.0, -0.0}
}

Definition at line 278 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_exp

const TableEntry tbl_exp[]
static
Initial value:
= {
{1.0, 0.0},
{1.0, -0.0},
{MATH_E, 1.0}
}
#define MATH_E

Definition at line 286 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_exp10

const TableEntry tbl_exp10[]
static
Initial value:
= {
{1.0, 0.0},
{1.0, -0.0},
{10.0, 1.0}
}

Definition at line 296 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_exp2

const TableEntry tbl_exp2[]
static
Initial value:
= {
{1.0, 0.0},
{1.0, -0.0},
{2.0, 1.0}
}

Definition at line 291 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_expm1

const TableEntry tbl_expm1[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0}
}

Definition at line 301 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_log

const TableEntry tbl_log[]
static
Initial value:
= {
{0.0, 1.0},
{1.0, MATH_E}
}

Definition at line 305 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_log10

const TableEntry tbl_log10[]
static
Initial value:
= {
{0.0, 1.0},
{1.0, 10.0}
}

Definition at line 313 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_log2

const TableEntry tbl_log2[]
static
Initial value:
= {
{0.0, 1.0},
{1.0, 2.0}
}

Definition at line 309 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_rsqrt

const TableEntry tbl_rsqrt[]
static
Initial value:
= {
{1.0, 1.0},
{MATH_SQRT1_2, 2.0}
}
#define MATH_SQRT1_2

Definition at line 317 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_sin

const TableEntry tbl_sin[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0}
}

Definition at line 321 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_sinh

const TableEntry tbl_sinh[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0}
}

Definition at line 325 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_sinpi

const TableEntry tbl_sinpi[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0}
}

Definition at line 329 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_sqrt

const TableEntry tbl_sqrt[]
static
Initial value:
= {
{0.0, 0.0},
{1.0, 1.0},
{MATH_SQRT2, 2.0}
}
#define MATH_SQRT2

Definition at line 333 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_tan

const TableEntry tbl_tan[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0}
}

Definition at line 338 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_tanh

const TableEntry tbl_tanh[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0}
}

Definition at line 342 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_tanpi

const TableEntry tbl_tanpi[]
static
Initial value:
= {
{0.0, 0.0},
{-0.0, -0.0}
}

Definition at line 346 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ tbl_tgamma

const TableEntry tbl_tgamma[]
static
Initial value:
= {
{1.0, 1.0},
{1.0, 2.0},
{2.0, 3.0},
{6.0, 4.0}
}

Definition at line 350 of file AMDGPULibCalls.cpp.

Referenced by getOptTable().

◆ UseNative

cl::list< std::string > UseNative("amdgpu-use-native", cl::desc("Comma separated list of functions to replace with native, or all"), cl::CommaSeparated, cl::ValueOptional, cl::Hidden) ( "amdgpu-use-native"  ,
cl::desc("Comma separated list of functions to replace with native, or all")  ,
cl::CommaSeparated  ,
cl::ValueOptional  ,
cl::Hidden   
)
static