LLVM
15.0.0git
|
#include "AMDGPUTargetMachine.h"
#include "AMDGPU.h"
#include "AMDGPUAliasAnalysis.h"
#include "AMDGPUExportClustering.h"
#include "AMDGPUIGroupLP.h"
#include "AMDGPUMacroFusion.h"
#include "AMDGPUTargetObjectFile.h"
#include "AMDGPUTargetTransformInfo.h"
#include "GCNIterativeScheduler.h"
#include "GCNSchedStrategy.h"
#include "R600.h"
#include "R600TargetMachine.h"
#include "SIMachineFunctionInfo.h"
#include "SIMachineScheduler.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/Localizer.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/MIRParser/MIParser.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/GlobalDCE.h"
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
#include "llvm/Transforms/Vectorize.h"
Go to the source code of this file.
Functions | |
LLVM_EXTERNAL_VISIBILITY void | LLVMInitializeAMDGPUTarget () |
static std::unique_ptr< TargetLoweringObjectFile > | createTLOF (const Triple &TT) |
static ScheduleDAGInstrs * | createSIMachineScheduler (MachineSchedContext *C) |
static ScheduleDAGInstrs * | createGCNMaxOccupancyMachineScheduler (MachineSchedContext *C) |
static ScheduleDAGInstrs * | createIterativeGCNMaxOccupancyMachineScheduler (MachineSchedContext *C) |
static ScheduleDAGInstrs * | createMinRegScheduler (MachineSchedContext *C) |
static ScheduleDAGInstrs * | createIterativeILPMachineScheduler (MachineSchedContext *C) |
static StringRef | computeDataLayout (const Triple &TT) |
static LLVM_READNONE StringRef | getGPUOrDefault (const Triple &TT, StringRef GPU) |
static Reloc::Model | getEffectiveRelocModel (Optional< Reloc::Model > RM) |
static bool | mustPreserveGV (const GlobalValue &GV) |
Predicate for Internalize pass. More... | |
Variables | |
static cl::opt< bool > | EnableSROA ("amdgpu-sroa", cl::desc("Run SROA after promote alloca pass"), cl::ReallyHidden, cl::init(true)) |
static cl::opt< bool > | EnableEarlyIfConversion ("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false)) |
static cl::opt< bool > | OptExecMaskPreRA ("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true)) |
static cl::opt< bool > | EnableLoadStoreVectorizer ("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden) |
static cl::opt< bool > | ScalarizeGlobal ("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden) |
static cl::opt< bool > | InternalizeSymbols ("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden) |
static cl::opt< bool > | EarlyInlineAll ("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden) |
static cl::opt< bool > | EnableSDWAPeephole ("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true)) |
static cl::opt< bool > | EnableDPPCombine ("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true)) |
static cl::opt< bool > | EnableAMDGPUAliasAnalysis ("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true)) |
static cl::opt< bool, true > | LateCFGStructurize ("amdgpu-late-structurize", cl::desc("Enable late CFG structurization"), cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden) |
static cl::opt< bool > | EnableLibCallSimplify ("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden) |
static cl::opt< bool > | EnableLowerKernelArguments ("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden) |
static cl::opt< bool > | EnableRegReassign ("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden) |
static cl::opt< bool > | OptVGPRLiveRange ("amdgpu-opt-vgpr-liverange", cl::desc("Enable VGPR liverange optimizations for if-else structure"), cl::init(true), cl::Hidden) |
static cl::opt< bool > | EnableAtomicOptimizations ("amdgpu-atomic-optimizations", cl::desc("Enable atomic optimizations"), cl::init(false), cl::Hidden) |
static cl::opt< bool > | EnableSIModeRegisterPass ("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden) |
static cl::opt< bool > | EnableInsertDelayAlu ("amdgpu-enable-delay-alu", cl::desc("Enable s_delay_alu insertion"), cl::init(true), cl::Hidden) |
static cl::opt< bool > | EnableDCEInRA ("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc")) |
static cl::opt< bool > | EnableSetWavePriority ("amdgpu-set-wave-priority", cl::desc("Adjust wave priority"), cl::init(false), cl::Hidden) |
static cl::opt< bool > | EnableScalarIRPasses ("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden) |
static cl::opt< bool > | EnableStructurizerWorkarounds ("amdgpu-enable-structurizer-workarounds", cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true), cl::Hidden) |
static cl::opt< bool > | EnableLDSReplaceWithPointer ("amdgpu-enable-lds-replace-with-pointer", cl::desc("Enable LDS replace with pointer pass"), cl::init(false), cl::Hidden) |
static cl::opt< bool, true > | EnableLowerModuleLDS ("amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true), cl::Hidden) |
static cl::opt< bool > | EnablePreRAOptimizations ("amdgpu-enable-pre-ra-optimizations", cl::desc("Enable Pre-RA optimizations pass"), cl::init(true), cl::Hidden) |
static cl::opt< bool > | EnablePromoteKernelArguments ("amdgpu-enable-promote-kernel-arguments", cl::desc("Enable promotion of flat kernel pointer arguments to global"), cl::Hidden, cl::init(true)) |
static MachineSchedRegistry | SISchedRegistry ("si", "Run SI's custom scheduler", createSIMachineScheduler) |
static MachineSchedRegistry | GCNMaxOccupancySchedRegistry ("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler) |
static MachineSchedRegistry | IterativeGCNMaxOccupancySchedRegistry ("gcn-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler) |
static MachineSchedRegistry | GCNMinRegSchedRegistry ("gcn-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler) |
static MachineSchedRegistry | GCNILPSchedRegistry ("gcn-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler) |
static const char | RegAllocOptNotSupportedMessage [] |
The AMDGPU target machine contains all of the hardware specific information needed to emit code for SI+ GPUs.
Definition in file AMDGPUTargetMachine.cpp.
Definition at line 471 of file AMDGPUTargetMachine.cpp.
References llvm::Triple::r600.
|
static |
Definition at line 405 of file AMDGPUTargetMachine.cpp.
References llvm::createAMDGPUExportClusteringDAGMutation(), llvm::createAMDGPUMacroFusionDAGMutation(), llvm::createIGroupLPDAGMutation(), llvm::createLoadClusterDAGMutation(), llvm::createSchedBarrierDAGMutation(), llvm::createStoreClusterDAGMutation(), and llvm::ARM_MB::ST.
|
static |
Definition at line 420 of file AMDGPUTargetMachine.cpp.
References llvm::createLoadClusterDAGMutation(), llvm::createStoreClusterDAGMutation(), llvm::GCNIterativeScheduler::SCHEDULE_LEGACYMAXOCCUPANCY, and llvm::ARM_MB::ST.
|
static |
Definition at line 436 of file AMDGPUTargetMachine.cpp.
References llvm::createAMDGPUMacroFusionDAGMutation(), llvm::createLoadClusterDAGMutation(), llvm::createStoreClusterDAGMutation(), llvm::GCNIterativeScheduler::SCHEDULE_ILP, and llvm::ARM_MB::ST.
|
static |
Definition at line 430 of file AMDGPUTargetMachine.cpp.
References llvm::GCNIterativeScheduler::SCHEDULE_MINREGFORCED.
|
static |
Definition at line 400 of file AMDGPUTargetMachine.cpp.
|
static |
Definition at line 396 of file AMDGPUTargetMachine.cpp.
|
static |
Definition at line 498 of file AMDGPUTargetMachine.cpp.
References llvm::Reloc::PIC_.
|
static |
Definition at line 487 of file AMDGPUTargetMachine.cpp.
References llvm::Triple::amdgcn, llvm::Triple::AMDHSA, and llvm::StringRef::empty().
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget | ( | ) |
Definition at line 322 of file AMDGPUTargetMachine.cpp.
References llvm::PassRegistry::getPassRegistry(), llvm::getTheAMDGPUTarget(), llvm::getTheGCNTarget(), llvm::initializeAMDGPUAAWrapperPassPass(), llvm::initializeAMDGPUAlwaysInlinePass(), llvm::initializeAMDGPUAnnotateKernelFeaturesPass(), llvm::initializeAMDGPUAnnotateUniformValuesPass(), llvm::initializeAMDGPUArgumentUsageInfoPass(), llvm::initializeAMDGPUAtomicOptimizerPass(), llvm::initializeAMDGPUAttributorPass(), llvm::initializeAMDGPUCodeGenPreparePass(), llvm::initializeAMDGPUCtorDtorLoweringPass(), llvm::initializeAMDGPUDAGToDAGISelPass(), llvm::initializeAMDGPUExternalAAWrapperPass(), llvm::initializeAMDGPUInsertDelayAluPass(), llvm::initializeAMDGPULateCodeGenPreparePass(), llvm::initializeAMDGPULowerIntrinsicsPass(), llvm::initializeAMDGPULowerKernelArgumentsPass(), llvm::initializeAMDGPULowerKernelAttributesPass(), llvm::initializeAMDGPULowerModuleLDSPass(), llvm::initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(), llvm::initializeAMDGPUPostLegalizerCombinerPass(), llvm::initializeAMDGPUPreLegalizerCombinerPass(), llvm::initializeAMDGPUPrintfRuntimeBindingPass(), llvm::initializeAMDGPUPromoteAllocaPass(), llvm::initializeAMDGPUPromoteAllocaToVectorPass(), llvm::initializeAMDGPUPromoteKernelArgumentsPass(), llvm::initializeAMDGPUPropagateAttributesEarlyPass(), llvm::initializeAMDGPUPropagateAttributesLatePass(), llvm::initializeAMDGPURegBankCombinerPass(), llvm::initializeAMDGPUReplaceLDSUseWithPointerPass(), llvm::initializeAMDGPUResourceUsageAnalysisPass(), llvm::initializeAMDGPURewriteOutArgumentsPass(), llvm::initializeAMDGPUSimplifyLibCallsPass(), llvm::initializeAMDGPUUnifyDivergentExitNodesPass(), llvm::initializeAMDGPUUnifyMetadataPass(), llvm::initializeAMDGPUUseNativeCallsPass(), llvm::initializeGCNDPPCombinePass(), llvm::initializeGCNNSAReassignPass(), llvm::initializeGCNPreRAOptimizationsPass(), llvm::initializeGlobalISel(), llvm::initializeR600ClauseMergePassPass(), llvm::initializeR600ControlFlowFinalizerPass(), llvm::initializeR600ExpandSpecialInstrsPassPass(), llvm::initializeR600PacketizerPass(), llvm::initializeR600VectorRegMergerPass(), llvm::initializeSIAnnotateControlFlowPass(), llvm::initializeSIFixSGPRCopiesPass(), llvm::initializeSIFixVGPRCopiesPass(), llvm::initializeSIFoldOperandsPass(), llvm::initializeSIFormMemoryClausesPass(), llvm::initializeSIInsertHardClausesPass(), llvm::initializeSIInsertWaitcntsPass(), llvm::initializeSILateBranchLoweringPass(), llvm::initializeSILoadStoreOptimizerPass(), llvm::initializeSILowerControlFlowPass(), llvm::initializeSILowerI1CopiesPass(), llvm::initializeSILowerSGPRSpillsPass(), llvm::initializeSIMemoryLegalizerPass(), llvm::initializeSIModeRegisterPass(), llvm::initializeSIOptimizeExecMaskingPass(), llvm::initializeSIOptimizeExecMaskingPreRAPass(), llvm::initializeSIOptimizeVGPRLiveRangePass(), llvm::initializeSIPeepholeSDWAPass(), llvm::initializeSIPostRABundlerPass(), llvm::initializeSIPreAllocateWWMRegsPass(), llvm::initializeSIPreEmitPeepholePass(), llvm::initializeSIShrinkInstructionsPass(), llvm::initializeSIWholeQuadModePass(), X, and Y.
|
static |
Predicate for Internalize pass.
Definition at line 542 of file AMDGPUTargetMachine.cpp.
References F, llvm::AMDGPU::isEntryFunctionCC(), llvm::Constant::removeDeadConstantUsers(), and llvm::Value::use_empty().
Referenced by llvm::AMDGPUTargetMachine::adjustPassManager(), and llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks().
|
static |
Referenced by llvm::AMDGPUPassConfig::addIRPasses(), and llvm::AMDGPUTargetMachine::adjustPassManager().
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
Referenced by llvm::AMDGPUPassConfig::addIRPasses().
|
static |
Referenced by llvm::AMDGPUPassConfig::addCodeGenPrepare().
|
static |
Referenced by llvm::AMDGPUPassConfig::addCodeGenPrepare().
|
static |
Referenced by llvm::AMDGPUPassConfig::addIRPasses().
|
static |
|
static |
|
static |
Referenced by llvm::AMDGPUPassConfig::addIRPasses().
|
static |
|
static |
|
static |
|
static |
Referenced by llvm::AMDGPUPassConfig::addIRPasses().
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
|
static |
Definition at line 1348 of file AMDGPUTargetMachine.cpp.
|
static |
Referenced by llvm::GCNTargetMachine::getSubtargetImpl().
|
static |