This file contains both AMDGPU target machine and the CodeGen pass builder. More...

#include "AMDGPUTargetMachine.h"
#include "AMDGPU.h"
#include "AMDGPUAliasAnalysis.h"
#include "AMDGPUBarrierLatency.h"
#include "AMDGPUCoExecSchedStrategy.h"
#include "AMDGPUCtorDtorLowering.h"
#include "AMDGPUExportClustering.h"
#include "AMDGPUExportKernelRuntimeHandles.h"
#include "AMDGPUHazardLatency.h"
#include "AMDGPUIGroupLP.h"
#include "AMDGPUISelDAGToDAG.h"
#include "AMDGPULowerVGPREncoding.h"
#include "AMDGPUMacroFusion.h"
#include "AMDGPUNextUseAnalysis.h"
#include "AMDGPUPerfHintAnalysis.h"
#include "AMDGPUPreloadKernArgProlog.h"
#include "AMDGPUPrepareAGPRAlloc.h"
#include "AMDGPURemoveIncompatibleFunctions.h"
#include "AMDGPUReserveWWMRegs.h"
#include "AMDGPUResourceUsageAnalysis.h"
#include "AMDGPUSplitModule.h"
#include "AMDGPUTargetObjectFile.h"
#include "AMDGPUTargetTransformInfo.h"
#include "AMDGPUUnifyDivergentExitNodes.h"
#include "AMDGPUWaitSGPRHazards.h"
#include "GCNDPPCombine.h"
#include "GCNIterativeScheduler.h"
#include "GCNNSAReassign.h"
#include "GCNPreRALongBranchReg.h"
#include "GCNPreRAOptimizations.h"
#include "GCNRewritePartialRegUses.h"
#include "GCNSchedStrategy.h"
#include "GCNVOPDUtils.h"
#include "R600.h"
#include "R600TargetMachine.h"
#include "SIFixSGPRCopies.h"
#include "SIFixVGPRCopies.h"
#include "SIFoldOperands.h"
#include "SIFormMemoryClauses.h"
#include "SILoadStoreOptimizer.h"
#include "SILowerControlFlow.h"
#include "SILowerSGPRSpills.h"
#include "SILowerWWMCopies.h"
#include "SIMachineFunctionInfo.h"
#include "SIMachineScheduler.h"
#include "SIOptimizeExecMasking.h"
#include "SIOptimizeExecMaskingPreRA.h"
#include "SIOptimizeVGPRLiveRange.h"
#include "SIPeepholeSDWA.h"
#include "SIPostRABundler.h"
#include "SIPreAllocateWWMRegs.h"
#include "SIShrinkInstructions.h"
#include "SIWholeQuadMode.h"
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/KernelInfo.h"
#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/CodeGen/AtomicExpand.h"
#include "llvm/CodeGen/BranchRelaxation.h"
#include "llvm/CodeGen/DeadMachineInstructionElim.h"
#include "llvm/CodeGen/EarlyIfConversion.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/Legalizer.h"
#include "llvm/CodeGen/GlobalISel/Localizer.h"
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/MIRParser/MIParser.h"
#include "llvm/CodeGen/MachineCSE.h"
#include "llvm/CodeGen/MachineLICM.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/PostRAHazardRecognizer.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Passes/CodeGenPassBuilder.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Transforms/HipStdPar/HipStdPar.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/ExpandVariadics.h"
#include "llvm/Transforms/IPO/GlobalDCE.h"
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/EarlyCSE.h"
#include "llvm/Transforms/Scalar/FlattenCFG.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
#include "llvm/Transforms/Scalar/LICM.h"
#include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Scalar/NaryReassociate.h"
#include "llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h"
#include "llvm/Transforms/Scalar/Sink.h"
#include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h"
#include "llvm/Transforms/Scalar/StructurizeCFG.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/FixIrreducible.h"
#include "llvm/Transforms/Utils/LCSSA.h"
#include "llvm/Transforms/Utils/LowerSwitch.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
#include "llvm/Transforms/Utils/UnifyLoopExits.h"
#include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
#include <optional>
#include "llvm/Passes/TargetPassRegistry.inc"

Macros
#define	GET_PASS_REGISTRY "AMDGPUPassRegistry.def"
#define	ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)
#define	ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS(NAME, CLASS)
#define	MODULE_ANALYSIS(NAME, CREATE_PASS)
#define	MODULE_PASS(NAME, CREATE_PASS)
#define	MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)
#define	FUNCTION_ANALYSIS(NAME, CREATE_PASS)
#define	FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS)
#define	FUNCTION_PASS(NAME, CREATE_PASS)
#define	FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)
#define	LOOP_ANALYSIS(NAME, CREATE_PASS)
#define	LOOP_PASS(NAME, CREATE_PASS)
#define	MACHINE_FUNCTION_ANALYSIS(NAME, CREATE_PASS)
#define	MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
#define	MACHINE_FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)
#define	ADD_PASS(NAME, CREATE_PASS)
#define	ADD_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)
#define	ADD_ANALYSIS_PASS(NAME, CREATE_PASS)
#define	MODULE_PASS(NAME, CREATE_PASS)
#define	MODULE_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)
#define	FUNCTION_PASS(NAME, CREATE_PASS)
#define	FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)
#define	LOOP_PASS(NAME, CREATE_PASS)
#define	MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
#define	MACHINE_FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, PARAMS)
#define	MACHINE_FUNCTION_ANALYSIS(NAME, CREATE_PASS)
#define	MODULE_ANALYSIS(NAME, CREATE_PASS)
#define	FUNCTION_ANALYSIS(NAME, CREATE_PASS)
#define	FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS)
#define	LOOP_ANALYSIS(NAME, CREATE_PASS)
#define	MACHINE_FUNCTION_ANALYSIS(NAME, CREATE_PASS)

Functions
static void	diagnoseUnsupportedCoExecSchedulerSelection (const Function &F, const GCNSubtarget &ST)
static bool	useNoopPostScheduler (const Function &F)
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void	LLVMInitializeAMDGPUTarget ()
static std::unique_ptr< TargetLoweringObjectFile >	createTLOF (const Triple &TT)
static ScheduleDAGInstrs *	createSIMachineScheduler (MachineSchedContext *C)
static ScheduleDAGInstrs *	createGCNMaxOccupancyMachineScheduler (MachineSchedContext *C)
static ScheduleDAGInstrs *	createGCNMaxILPMachineScheduler (MachineSchedContext *C)
static ScheduleDAGInstrs *	createGCNMaxMemoryClauseMachineScheduler (MachineSchedContext *C)
static ScheduleDAGInstrs *	createIterativeGCNMaxOccupancyMachineScheduler (MachineSchedContext *C)
static ScheduleDAGInstrs *	createMinRegScheduler (MachineSchedContext *C)
static ScheduleDAGInstrs *	createIterativeILPMachineScheduler (MachineSchedContext *C)
static LLVM_READNONE StringRef	getGPUOrDefault (const Triple &TT, StringRef GPU)
static Reloc::Model	getEffectiveRelocModel ()
static bool	mustPreserveGV (const GlobalValue &GV)
	Predicate for Internalize pass.
static Expected< ScanOptions >	parseAMDGPUAtomicOptimizerStrategy (StringRef Params)
Expected< AMDGPUAttributorOptions >	parseAMDGPUAttributorPassOptions (StringRef Params)

Variables
static cl::opt< bool >	EnableEarlyIfConversion ("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false))
static cl::opt< bool >	OptExecMaskPreRA ("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true))
static cl::opt< bool >	LowerCtorDtor ("amdgpu-lower-global-ctor-dtor", cl::desc("Lower GPU ctor / dtors to globals on the device."), cl::init(true), cl::Hidden)
static cl::opt< bool >	EnableLoadStoreVectorizer ("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)
static cl::opt< bool >	ScalarizeGlobal ("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
static cl::opt< bool >	InternalizeSymbols ("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden)
static cl::opt< bool >	EarlyInlineAll ("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden)
static cl::opt< bool >	RemoveIncompatibleFunctions ("amdgpu-enable-remove-incompatible-functions", cl::Hidden, cl::desc("Enable removal of functions when they" "use features not supported by the target GPU"), cl::init(true))
static cl::opt< bool >	EnableSDWAPeephole ("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true))
static cl::opt< bool >	EnableDPPCombine ("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true))
static cl::opt< bool >	EnableAMDGPUAliasAnalysis ("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true))
static cl::opt< bool >	EnableLibCallSimplify ("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden)
static cl::opt< bool >	EnableLowerKernelArguments ("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden)
static cl::opt< bool >	EnableRegReassign ("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden)
static cl::opt< bool >	OptVGPRLiveRange ("amdgpu-opt-vgpr-liverange", cl::desc("Enable VGPR liverange optimizations for if-else structure"), cl::init(true), cl::Hidden)
static cl::opt< ScanOptions >	AMDGPUAtomicOptimizerStrategy ("amdgpu-atomic-optimizer-strategy", cl::desc("Select DPP or Iterative strategy for scan"), cl::init(ScanOptions::Iterative), cl::values(clEnumValN(ScanOptions::DPP, "DPP", "Use DPP operations for scan"), clEnumValN(ScanOptions::Iterative, "Iterative", "Use Iterative approach for scan"), clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer")))
static cl::opt< bool >	EnableSIModeRegisterPass ("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden)
static cl::opt< bool >	EnableInsertDelayAlu ("amdgpu-enable-delay-alu", cl::desc("Enable s_delay_alu insertion"), cl::init(true), cl::Hidden)
static cl::opt< bool >	EnableVOPD ("amdgpu-enable-vopd", cl::desc("Enable VOPD, dual issue of VALU in wave32"), cl::init(true), cl::Hidden)
static cl::opt< bool >	EnableDCEInRA ("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc"))
static cl::opt< bool >	EnableSetWavePriority ("amdgpu-set-wave-priority", cl::desc("Adjust wave priority"), cl::init(false), cl::Hidden)
static cl::opt< bool >	EnableScalarIRPasses ("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden)
static cl::opt< bool >	EnableLowerExecSync ("amdgpu-enable-lower-exec-sync", cl::desc("Enable lowering of execution synchronization."), cl::init(true), cl::Hidden)
static cl::opt< bool >	EnableSwLowerLDS ("amdgpu-enable-sw-lower-lds", cl::desc("Enable lowering of lds to global memory pass " "and asan instrument resulting IR."), cl::init(true), cl::Hidden)
static cl::opt< bool, true >	EnableObjectLinking ("amdgpu-enable-object-linking", cl::desc("Enable object linking for cross-TU LDS and ABI support"), cl::location(AMDGPUTargetMachine::EnableObjectLinking), cl::init(false), cl::Hidden)
static cl::opt< bool, true >	EnableLowerModuleLDS ("amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true), cl::Hidden)
static cl::opt< bool >	EnablePreRAOptimizations ("amdgpu-enable-pre-ra-optimizations", cl::desc("Enable Pre-RA optimizations pass"), cl::init(true), cl::Hidden)
static cl::opt< bool >	EnablePromoteKernelArguments ("amdgpu-enable-promote-kernel-arguments", cl::desc("Enable promotion of flat kernel pointer arguments to global"), cl::Hidden, cl::init(true))
static cl::opt< bool >	EnableImageIntrinsicOptimizer ("amdgpu-enable-image-intrinsic-optimizer", cl::desc("Enable image intrinsic optimizer pass"), cl::init(true), cl::Hidden)
static cl::opt< bool >	EnableLoopPrefetch ("amdgpu-loop-prefetch", cl::desc("Enable loop data prefetch on AMDGPU"), cl::Hidden, cl::init(false))
static cl::opt< std::string >	AMDGPUSchedStrategy ("amdgpu-sched-strategy", cl::desc("Select custom AMDGPU scheduling strategy."), cl::Hidden, cl::init(""))
static cl::opt< bool >	EnableRewritePartialRegUses ("amdgpu-enable-rewrite-partial-reg-uses", cl::desc("Enable rewrite partial reg uses pass"), cl::init(true), cl::Hidden)
static cl::opt< bool >	EnableHipStdPar ("amdgpu-enable-hipstdpar", cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false), cl::Hidden)
static cl::opt< bool >	EnableAMDGPUAttributor ("amdgpu-attributor-enable", cl::desc("Enable AMDGPUAttributorPass"), cl::init(true), cl::Hidden)
static cl::opt< bool >	NewRegBankSelect ("new-reg-bank-select", cl::desc("Run amdgpu-regbankselect and amdgpu-regbanklegalize instead of " "regbankselect"), cl::init(false), cl::Hidden)
static cl::opt< bool >	HasClosedWorldAssumption ("amdgpu-link-time-closed-world", cl::desc("Whether has closed-world assumption at link time"), cl::init(false), cl::Hidden)
static cl::opt< bool >	EnableUniformIntrinsicCombine ("amdgpu-enable-uniform-intrinsic-combine", cl::desc("Enable/Disable the Uniform Intrinsic Combine Pass"), cl::init(true), cl::Hidden)
static MachineSchedRegistry	SISchedRegistry ("si", "Run SI's custom scheduler", createSIMachineScheduler)
static MachineSchedRegistry	GCNMaxOccupancySchedRegistry ("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)
static MachineSchedRegistry	GCNMaxILPSchedRegistry ("gcn-max-ilp", "Run GCN scheduler to maximize ilp", createGCNMaxILPMachineScheduler)
static MachineSchedRegistry	GCNMaxMemoryClauseSchedRegistry ("gcn-max-memory-clause", "Run GCN scheduler to maximize memory clause", createGCNMaxMemoryClauseMachineScheduler)
static MachineSchedRegistry	IterativeGCNMaxOccupancySchedRegistry ("gcn-iterative-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler)
static MachineSchedRegistry	GCNMinRegSchedRegistry ("gcn-iterative-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler)
static MachineSchedRegistry	GCNILPSchedRegistry ("gcn-iterative-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler)
static const char	RegAllocOptNotSupportedMessage []

Detailed Description

This file contains both AMDGPU target machine and the CodeGen pass builder.

The AMDGPU target machine contains all of the hardware specific information needed to emit code for SI+ GPUs in the legacy pass manager pipeline. The CodeGen pass builder handles the pass pipeline for new pass manager.

Definition in file AMDGPUTargetMachine.cpp.

Macro Definition Documentation

◆ ADD_ANALYSIS_PASS

#define ADD_ANALYSIS_PASS	(		NAME,
			CREATE_PASS )

Value:

  if (Name == "invalidate<" NAME ">") {                                        \
    PM.addPass(InvalidateAnalysisPass<                                         \
               std::remove_reference_t<decltype(CREATE_PASS)>>());             \
    return true;                                                               \
  }                                                                            \
  if (Name == "require<" NAME ">") {                                           \
    PM.addPass(                                                                \
        RequireAnalysisPass<std::remove_reference_t<decltype(CREATE_PASS)>,    \
                            MachineFunction>());                               \
    return true;                                                               \
  }

◆ ADD_CLASS_PASS_TO_PASS_NAME

#define ADD_CLASS_PASS_TO_PASS_NAME	(		NAME,
			CREATE_PASS )

Value:

PIC->addClassToPassName(decltype(CREATE_PASS)::name(), NAME);

PIC

PassInstrumentationCallbacks PIC

Definition PassBuilderBindings.cpp:57

name

static const char * name

Definition SVEIntrinsicOpts.cpp:73

◆ ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS

#define ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS	(		NAME,
			CLASS )

Value:

PIC->addClassToPassName(CLASS, NAME);

◆ ADD_PASS

#define ADD_PASS	(		NAME,
			CREATE_PASS )

Value:

  if (Name == NAME) {                                                          \
    PM.addPass(CREATE_PASS);                                                   \
    return true;                                                               \
  }

◆ ADD_PASS_WITH_PARAMS

#define ADD_PASS_WITH_PARAMS	(	NAME,
		CREATE_PASS,
		PARSER )

Value:

  if (PassBuilder::checkParametrizedPassName(Name, NAME)) {                    \
    auto Params = PassBuilder::parsePassParameters(PARSER, Name, NAME);        \
    if (!Params) {                                                             \
      errs() << NAME ": " << toString(Params.takeError()) << '\n';             \
      return false;                                                            \
    }                                                                          \
    PM.addPass(CREATE_PASS(Params.get()));                                     \
    return true;                                                               \
  }

◆ FUNCTION_ALIAS_ANALYSIS [1/2]

#define FUNCTION_ALIAS_ANALYSIS	(		NAME,
			CREATE_PASS )

Value:

ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)

ADD_CLASS_PASS_TO_PASS_NAME

#define ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)

◆ FUNCTION_ALIAS_ANALYSIS [2/2]

#define FUNCTION_ALIAS_ANALYSIS	(		NAME,
			CREATE_PASS )

Value:

  if (Name == NAME) {                                                          \
    AM.registerFunctionAnalysis<                                               \
        std::remove_reference_t<decltype(CREATE_PASS)>>();                     \
    return true;                                                               \
  }

◆ FUNCTION_ANALYSIS [1/2]

#define FUNCTION_ANALYSIS	(		NAME,
			CREATE_PASS )

Value:

ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)

◆ FUNCTION_ANALYSIS [2/2]

#define FUNCTION_ANALYSIS	(		NAME,
			CREATE_PASS )

Value:

AM.registerPass([&] { return CREATE_PASS; });

◆ FUNCTION_PASS [1/2]

#define FUNCTION_PASS	(		NAME,
			CREATE_PASS )

Value:

ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)

◆ FUNCTION_PASS [2/2]

#define FUNCTION_PASS	(		NAME,
			CREATE_PASS )

Value:

ADD_PASS(NAME, CREATE_PASS)

ADD_PASS

#define ADD_PASS(NAME, CREATE_PASS)

◆ FUNCTION_PASS_WITH_PARAMS [1/2]

#define FUNCTION_PASS_WITH_PARAMS	(	NAME,
		CLASS,
		CREATE_PASS,
		PARSER,
		PARAMS )

Value:

ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS(NAME, CLASS)

ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS

#define ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS(NAME, CLASS)

◆ FUNCTION_PASS_WITH_PARAMS [2/2]

#define FUNCTION_PASS_WITH_PARAMS	(	NAME,
		CLASS,
		CREATE_PASS,
		PARSER,
		PARAMS )

Value:

ADD_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)

ADD_PASS_WITH_PARAMS

#define ADD_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)

◆ GET_PASS_REGISTRY

#define GET_PASS_REGISTRY "AMDGPUPassRegistry.def"

◆ LOOP_ANALYSIS [1/2]

#define LOOP_ANALYSIS	(		NAME,
			CREATE_PASS )

Value:

ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)

◆ LOOP_ANALYSIS [2/2]

#define LOOP_ANALYSIS	(		NAME,
			CREATE_PASS )

Value:

AM.registerPass([&] { return CREATE_PASS; });

◆ LOOP_PASS [1/2]

#define LOOP_PASS	(		NAME,
			CREATE_PASS )

Value:

ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)

◆ LOOP_PASS [2/2]

#define LOOP_PASS	(		NAME,
			CREATE_PASS )

Value:

ADD_PASS(NAME, CREATE_PASS)

◆ MACHINE_FUNCTION_ANALYSIS [1/3]

#define MACHINE_FUNCTION_ANALYSIS	(		NAME,
			CREATE_PASS )

Value:

ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)

◆ MACHINE_FUNCTION_ANALYSIS [2/3]

#define MACHINE_FUNCTION_ANALYSIS	(		NAME,
			CREATE_PASS )

Value:

ADD_ANALYSIS_PASS(NAME, CREATE_PASS)

ADD_ANALYSIS_PASS

#define ADD_ANALYSIS_PASS(NAME, CREATE_PASS)

◆ MACHINE_FUNCTION_ANALYSIS [3/3]

#define MACHINE_FUNCTION_ANALYSIS	(		NAME,
			CREATE_PASS )

Value:

AM.registerPass([&] { return CREATE_PASS; });

◆ MACHINE_FUNCTION_PASS [1/2]

#define MACHINE_FUNCTION_PASS	(		NAME,
			CREATE_PASS )

Value:

ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)

◆ MACHINE_FUNCTION_PASS [2/2]

#define MACHINE_FUNCTION_PASS	(		NAME,
			CREATE_PASS )

Value:

ADD_PASS(NAME, CREATE_PASS)

◆ MACHINE_FUNCTION_PASS_WITH_PARAMS [1/2]

#define MACHINE_FUNCTION_PASS_WITH_PARAMS	(	NAME,
		CLASS,
		CREATE_PASS,
		PARSER,
		PARAMS )

Value:

ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS(NAME, CLASS)

◆ MACHINE_FUNCTION_PASS_WITH_PARAMS [2/2]

#define MACHINE_FUNCTION_PASS_WITH_PARAMS	(	NAME,
		CLASS,
		CREATE_PASS,
		PARSER,
		PARAMS )

Value:

ADD_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)

◆ MODULE_ANALYSIS [1/2]

#define MODULE_ANALYSIS	(		NAME,
			CREATE_PASS )

Value:

ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)

◆ MODULE_ANALYSIS [2/2]

#define MODULE_ANALYSIS	(		NAME,
			CREATE_PASS )

Value:

AM.registerPass([&] { return CREATE_PASS; });

◆ MODULE_PASS [1/2]

#define MODULE_PASS	(		NAME,
			CREATE_PASS )

Value:

ADD_CLASS_PASS_TO_PASS_NAME(NAME, CREATE_PASS)

◆ MODULE_PASS [2/2]

#define MODULE_PASS	(		NAME,
			CREATE_PASS )

Value:

ADD_PASS(NAME, CREATE_PASS)

◆ MODULE_PASS_WITH_PARAMS [1/2]

#define MODULE_PASS_WITH_PARAMS	(	NAME,
		CLASS,
		CREATE_PASS,
		PARSER,
		PARAMS )

Value:

ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS(NAME, CLASS)

◆ MODULE_PASS_WITH_PARAMS [2/2]

#define MODULE_PASS_WITH_PARAMS	(	NAME,
		CLASS,
		CREATE_PASS,
		PARSER,
		PARAMS )

Value:

ADD_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)

Function Documentation

◆ createGCNMaxILPMachineScheduler()

ScheduleDAGInstrs * createGCNMaxILPMachineScheduler ( MachineSchedContext * C )

static

Definition at line 766 of file AMDGPUTargetMachine.cpp.

References llvm::ScheduleDAGMI::addMutation(), llvm::CallingConv::C, llvm::createIGroupLPDAGMutation(), and llvm::AMDGPU::Initial.

Referenced by llvm::GCNTargetMachine::createMachineScheduler().

◆ createGCNMaxMemoryClauseMachineScheduler()

ScheduleDAGInstrs * createGCNMaxMemoryClauseMachineScheduler ( MachineSchedContext * C )

static

Definition at line 774 of file AMDGPUTargetMachine.cpp.

References llvm::CallingConv::C, llvm::createAMDGPUBarrierLatencyDAGMutation(), llvm::createAMDGPUExportClusteringDAGMutation(), llvm::createAMDGPUHazardLatencyDAGMutation(), llvm::createLoadClusterDAGMutation(), and llvm::createStoreClusterDAGMutation().

Referenced by llvm::GCNTargetMachine::createMachineScheduler().

◆ createGCNMaxOccupancyMachineScheduler()

ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler ( MachineSchedContext * C )

static

Definition at line 750 of file AMDGPUTargetMachine.cpp.

References llvm::CallingConv::C, llvm::createAMDGPUBarrierLatencyDAGMutation(), llvm::createAMDGPUExportClusteringDAGMutation(), llvm::createAMDGPUHazardLatencyDAGMutation(), llvm::createAMDGPUMacroFusionDAGMutation(), llvm::createIGroupLPDAGMutation(), llvm::createLoadClusterDAGMutation(), llvm::createStoreClusterDAGMutation(), and llvm::AMDGPU::Initial.

Referenced by llvm::GCNTargetMachine::createMachineScheduler().

◆ createIterativeGCNMaxOccupancyMachineScheduler()

ScheduleDAGInstrs * createIterativeGCNMaxOccupancyMachineScheduler ( MachineSchedContext * C )

static

Definition at line 788 of file AMDGPUTargetMachine.cpp.

References llvm::CallingConv::C, llvm::createIGroupLPDAGMutation(), llvm::createLoadClusterDAGMutation(), llvm::createStoreClusterDAGMutation(), llvm::AMDGPU::Initial, and llvm::GCNIterativeScheduler::SCHEDULE_LEGACYMAXOCCUPANCY.

Referenced by llvm::GCNTargetMachine::createMachineScheduler().

◆ createIterativeILPMachineScheduler()

ScheduleDAGInstrs * createIterativeILPMachineScheduler ( MachineSchedContext * C )

static

Definition at line 807 of file AMDGPUTargetMachine.cpp.

References llvm::CallingConv::C, llvm::createAMDGPUMacroFusionDAGMutation(), llvm::createIGroupLPDAGMutation(), llvm::createLoadClusterDAGMutation(), llvm::createStoreClusterDAGMutation(), llvm::AMDGPU::Initial, and llvm::GCNIterativeScheduler::SCHEDULE_ILP.

Referenced by llvm::GCNTargetMachine::createMachineScheduler().

◆ createMinRegScheduler()

ScheduleDAGInstrs * createMinRegScheduler ( MachineSchedContext * C )

static

Definition at line 799 of file AMDGPUTargetMachine.cpp.

References llvm::CallingConv::C, llvm::createIGroupLPDAGMutation(), llvm::AMDGPU::Initial, and llvm::GCNIterativeScheduler::SCHEDULE_MINREGFORCED.

Referenced by llvm::GCNTargetMachine::createMachineScheduler().

◆ createSIMachineScheduler()

ScheduleDAGInstrs * createSIMachineScheduler ( MachineSchedContext * C )

static

Definition at line 745 of file AMDGPUTargetMachine.cpp.

References llvm::CallingConv::C.

Referenced by llvm::GCNTargetMachine::createMachineScheduler().

◆ createTLOF()

std::unique_ptr< TargetLoweringObjectFile > createTLOF ( const Triple & TT )

static

Definition at line 741 of file AMDGPUTargetMachine.cpp.

◆ diagnoseUnsupportedCoExecSchedulerSelection()

void diagnoseUnsupportedCoExecSchedulerSelection	(	const Function &	F,
		const GCNSubtarget &	ST )

static

Definition at line 601 of file AMDGPUTargetMachine.cpp.

References llvm::DS_Warning, and F.

Referenced by llvm::GCNTargetMachine::createMachineScheduler().

◆ getEffectiveRelocModel()

Reloc::Model getEffectiveRelocModel ( )

static

Definition at line 862 of file AMDGPUTargetMachine.cpp.

References llvm::Reloc::PIC_.

◆ getGPUOrDefault()

LLVM_READNONE StringRef getGPUOrDefault	(	const Triple &	TT,
		StringRef	GPU )

static

Definition at line 851 of file AMDGPUTargetMachine.cpp.

References llvm::Triple::AMDHSA, and llvm::StringRef::empty().

Referenced by llvm::AMDGPUTargetMachine::AMDGPUTargetMachine().

◆ LLVMInitializeAMDGPUTarget()

LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget ( )

Definition at line 649 of file AMDGPUTargetMachine.cpp.

◆ mustPreserveGV()

bool mustPreserveGV ( const GlobalValue & GV )

static

Predicate for Internalize pass.

Definition at line 917 of file AMDGPUTargetMachine.cpp.

References llvm::dyn_cast(), F, llvm::AMDGPU::isEntryFunctionCC(), llvm::Constant::removeDeadConstantUsers(), and llvm::Value::use_empty().

Referenced by llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks().

◆ parseAMDGPUAtomicOptimizerStrategy()

Expected< ScanOptions > parseAMDGPUAtomicOptimizerStrategy ( StringRef Params )

static

Definition at line 933 of file AMDGPUTargetMachine.cpp.

References llvm::StringSwitch< T, R >::Cases(), llvm::StringRef::consume_front(), llvm::StringSwitch< T, R >::Default(), llvm::DPP, llvm::StringRef::empty(), llvm::inconvertibleErrorCode(), llvm::Iterative, llvm::make_error(), and llvm::None.

◆ parseAMDGPUAttributorPassOptions()

Expected< AMDGPUAttributorOptions > parseAMDGPUAttributorPassOptions ( StringRef Params )

Definition at line 948 of file AMDGPUTargetMachine.cpp.

References llvm::StringRef::empty(), llvm::formatv(), llvm::inconvertibleErrorCode(), llvm::make_error(), and llvm::StringRef::split().

◆ useNoopPostScheduler()

bool useNoopPostScheduler ( const Function & F )

static

Definition at line 611 of file AMDGPUTargetMachine.cpp.

References F, llvm::Attribute::getValueAsString(), and llvm::Attribute::isValid().

Referenced by llvm::GCNTargetMachine::createPostMachineScheduler().

Variable Documentation

◆ AMDGPUAtomicOptimizerStrategy

cl::opt< ScanOptions > AMDGPUAtomicOptimizerStrategy("amdgpu-atomic-optimizer-strategy", cl::desc("Select DPP or Iterative strategy for scan"), cl::init(ScanOptions::Iterative), cl::values( clEnumValN(ScanOptions::DPP, "DPP", "Use DPP operations for scan"), clEnumValN(ScanOptions::Iterative, "Iterative", "Use Iterative approach for scan"), clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer")))	(	"amdgpu-atomic-optimizer-strategy"	,
		cl::desc("Select DPP or Iterative strategy for scan")	,
		cl::init(ScanOptions::Iterative)	,
		cl::values( clEnumValN(ScanOptions::DPP, "DPP", "Use DPP operations for scan"), clEnumValN(ScanOptions::Iterative, "Iterative", "Use Iterative approach for scan"), clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer"))	)

static

Referenced by llvm::AMDGPUPassConfig::addIRPasses().

◆ AMDGPUSchedStrategy

cl::opt< std::string > AMDGPUSchedStrategy("amdgpu-sched-strategy", cl::desc("Select custom AMDGPU scheduling strategy."), cl::Hidden, cl::init(""))	(	"amdgpu-sched-strategy"	,
		cl::desc("Select custom AMDGPU scheduling strategy.")	,
		cl::Hidden	,
		cl::init("")	)

static

Referenced by llvm::AMDGPU::getSchedStrategy().

◆ EarlyInlineAll

cl::opt< bool > EarlyInlineAll("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden)	(	"amdgpu-early-inline-all"	,
		cl::desc("Inline all functions early")	,
		cl::init(false)	,
		cl::Hidden	)

static

Referenced by llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks().

◆ EnableAMDGPUAliasAnalysis

cl::opt< bool > EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true))	(	"enable-amdgpu-aa"	,
		cl::Hidden	,
		cl::desc("Enable AMDGPU Alias Analysis")	,
		cl::init(true)	)

static

Referenced by llvm::AMDGPUPassConfig::addIRPasses(), and llvm::AMDGPUTargetMachine::registerDefaultAliasAnalyses().

◆ EnableAMDGPUAttributor

cl::opt< bool > EnableAMDGPUAttributor("amdgpu-attributor-enable", cl::desc("Enable AMDGPUAttributorPass"), cl::init(true), cl::Hidden)	(	"amdgpu-attributor-enable"	,
		cl::desc("Enable AMDGPUAttributorPass")	,
		cl::init(true)	,
		cl::Hidden	)

static

Referenced by llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks().

◆ EnableDCEInRA

cl::opt< bool > EnableDCEInRA("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc"))	(	"amdgpu-dce-in-ra"	,
		cl::init(true)	,
		cl::Hidden	,
		cl::desc("Enable machine DCE inside regalloc")	)

static

◆ EnableDPPCombine

cl::opt< bool > EnableDPPCombine("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true))	(	"amdgpu-dpp-combine"	,
		cl::desc("Enable DPP combiner")	,
		cl::init(true)	)

static

◆ EnableEarlyIfConversion

cl::opt< bool > EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false))	(	"amdgpu-early-ifcvt"	,
		cl::Hidden	,
		cl::desc("Run early if-conversion")	,
		cl::init(false)	)

static

◆ EnableHipStdPar

cl::opt< bool > EnableHipStdPar("amdgpu-enable-hipstdpar", cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false), cl::Hidden)	(	"amdgpu-enable-hipstdpar"	,
		cl::desc("Enable HIP Standard Parallelism Offload support")	,
		cl::init(false)	,
		cl::Hidden	)

static

Referenced by llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks().

◆ EnableImageIntrinsicOptimizer

cl::opt< bool > EnableImageIntrinsicOptimizer("amdgpu-enable-image-intrinsic-optimizer", cl::desc("Enable image intrinsic optimizer pass"), cl::init(true), cl::Hidden)	(	"amdgpu-enable-image-intrinsic-optimizer"	,
		cl::desc("Enable image intrinsic optimizer pass")	,
		cl::init(true)	,
		cl::Hidden	)

static

Referenced by llvm::AMDGPUPassConfig::addIRPasses().

◆ EnableInsertDelayAlu

cl::opt< bool > EnableInsertDelayAlu("amdgpu-enable-delay-alu", cl::desc("Enable s_delay_alu insertion"), cl::init(true), cl::Hidden)	(	"amdgpu-enable-delay-alu"	,
		cl::desc("Enable s_delay_alu insertion")	,
		cl::init(true)	,
		cl::Hidden	)

static

◆ EnableLibCallSimplify

cl::opt< bool > EnableLibCallSimplify("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden)	(	"amdgpu-simplify-libcall"	,
		cl::desc("Enable amdgpu library simplifications")	,
		cl::init(true)	,
		cl::Hidden	)

static

Referenced by llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks().

◆ EnableLoadStoreVectorizer

cl::opt< bool > EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)	(	"amdgpu-load-store-vectorizer"	,
		cl::desc("Enable load store vectorizer")	,
		cl::init(true)	,
		cl::Hidden	)

static

Referenced by llvm::AMDGPUPassConfig::addCodeGenPrepare().

◆ EnableLoopPrefetch

cl::opt< bool > EnableLoopPrefetch("amdgpu-loop-prefetch", cl::desc("Enable loop data prefetch on AMDGPU"), cl::Hidden, cl::init(false))	(	"amdgpu-loop-prefetch"	,
		cl::desc("Enable loop data prefetch on AMDGPU")	,
		cl::Hidden	,
		cl::init(false)	)

static

Referenced by llvm::AMDGPUPassConfig::addStraightLineScalarOptimizationPasses().

◆ EnableLowerExecSync

cl::opt< bool > EnableLowerExecSync("amdgpu-enable-lower-exec-sync", cl::desc("Enable lowering of execution synchronization."), cl::init(true), cl::Hidden)	(	"amdgpu-enable-lower-exec-sync"	,
		cl::desc("Enable lowering of execution synchronization.")	,
		cl::init(true)	,
		cl::Hidden	)

static

Referenced by llvm::AMDGPUPassConfig::addIRPasses(), and llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks().

◆ EnableLowerKernelArguments

cl::opt< bool > EnableLowerKernelArguments("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden)	(	"amdgpu-ir-lower-kernel-arguments"	,
		cl::desc("Lower kernel argument loads in IR pass")	,
		cl::init(true)	,
		cl::Hidden	)

static

Referenced by llvm::AMDGPUPassConfig::addCodeGenPrepare().

◆ EnableLowerModuleLDS

cl::opt< bool, true > EnableLowerModuleLDS("amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true), cl::Hidden)	(	"amdgpu-enable-lower-module-lds"	,
		cl::desc("Enable lower module lds pass")	,
		cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS)	,
		cl::init(true)	,
		cl::Hidden	)

static

Referenced by llvm::AMDGPUPassConfig::addIRPasses().

◆ EnableObjectLinking

cl::opt< bool, true > EnableObjectLinking("amdgpu-enable-object-linking", cl::desc("Enable object linking for cross-TU LDS and ABI support"), cl::location(AMDGPUTargetMachine::EnableObjectLinking), cl::init(false), cl::Hidden)	(	"amdgpu-enable-object-linking"	,
		cl::desc("Enable object linking for cross-TU LDS and ABI support")	,
		cl::location(AMDGPUTargetMachine::EnableObjectLinking)	,
		cl::init(false)	,
		cl::Hidden	)

static

◆ EnablePreRAOptimizations

cl::opt< bool > EnablePreRAOptimizations("amdgpu-enable-pre-ra-optimizations", cl::desc("Enable Pre-RA optimizations pass"), cl::init(true), cl::Hidden)	(	"amdgpu-enable-pre-ra-optimizations"	,
		cl::desc("Enable Pre-RA optimizations pass")	,
		cl::init(true)	,
		cl::Hidden	)

static

◆ EnablePromoteKernelArguments

cl::opt< bool > EnablePromoteKernelArguments("amdgpu-enable-promote-kernel-arguments", cl::desc("Enable promotion of flat kernel pointer arguments to global"), cl::Hidden, cl::init(true))	(	"amdgpu-enable-promote-kernel-arguments"	,
		cl::desc("Enable promotion of flat kernel pointer arguments to global")	,
		cl::Hidden	,
		cl::init(true)	)

static

Referenced by llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks().

◆ EnableRegReassign

cl::opt< bool > EnableRegReassign("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden)	(	"amdgpu-reassign-regs"	,
		cl::desc("Enable register reassign optimizations on gfx10+")	,
		cl::init(true)	,
		cl::Hidden	)

static

◆ EnableRewritePartialRegUses

cl::opt< bool > EnableRewritePartialRegUses("amdgpu-enable-rewrite-partial-reg-uses", cl::desc("Enable rewrite partial reg uses pass"), cl::init(true), cl::Hidden)	(	"amdgpu-enable-rewrite-partial-reg-uses"	,
		cl::desc("Enable rewrite partial reg uses pass")	,
		cl::init(true)	,
		cl::Hidden	)

static

◆ EnableScalarIRPasses

cl::opt< bool > EnableScalarIRPasses("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden)	(	"amdgpu-scalar-ir-passes"	,
		cl::desc("Enable scalar IR passes")	,
		cl::init(true)	,
		cl::Hidden	)

static

Referenced by llvm::AMDGPUPassConfig::addIRPasses().

◆ EnableSDWAPeephole

cl::opt< bool > EnableSDWAPeephole("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true))	(	"amdgpu-sdwa-peephole"	,
		cl::desc("Enable SDWA peepholer")	,
		cl::init(true)	)

static

◆ EnableSetWavePriority

cl::opt< bool > EnableSetWavePriority("amdgpu-set-wave-priority", cl::desc("Adjust wave priority"), cl::init(false), cl::Hidden)	(	"amdgpu-set-wave-priority"	,
		cl::desc("Adjust wave priority")	,
		cl::init(false)	,
		cl::Hidden	)

static

◆ EnableSIModeRegisterPass

cl::opt< bool > EnableSIModeRegisterPass("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden)	(	"amdgpu-mode-register"	,
		cl::desc("Enable mode register pass")	,
		cl::init(true)	,
		cl::Hidden	)

static

◆ EnableSwLowerLDS

cl::opt< bool > EnableSwLowerLDS("amdgpu-enable-sw-lower-lds", cl::desc("Enable lowering of lds to global memory pass " "and asan instrument resulting IR."), cl::init(true), cl::Hidden)	(	"amdgpu-enable-sw-lower-lds"	,
		cl::desc("Enable lowering of lds to global memory pass " "and asan instrument resulting IR.")	,
		cl::init(true)	,
		cl::Hidden	)

static

Referenced by llvm::AMDGPUPassConfig::addIRPasses(), and llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks().

◆ EnableUniformIntrinsicCombine

cl::opt< bool > EnableUniformIntrinsicCombine("amdgpu-enable-uniform-intrinsic-combine", cl::desc("Enable/Disable the Uniform Intrinsic Combine Pass"), cl::init(true), cl::Hidden)	(	"amdgpu-enable-uniform-intrinsic-combine"	,
		cl::desc("Enable/Disable the Uniform Intrinsic Combine Pass")	,
		cl::init(true)	,
		cl::Hidden	)

static

Referenced by llvm::AMDGPUPassConfig::addIRPasses(), and llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks().

◆ EnableVOPD

cl::opt< bool > EnableVOPD("amdgpu-enable-vopd", cl::desc("Enable VOPD, dual issue of VALU in wave32"), cl::init(true), cl::Hidden)	(	"amdgpu-enable-vopd"	,
		cl::desc("Enable VOPD, dual issue of VALU in wave32")	,
		cl::init(true)	,
		cl::Hidden	)

static

Referenced by llvm::GCNTargetMachine::createPostMachineScheduler().

◆ GCNILPSchedRegistry

MachineSchedRegistry GCNILPSchedRegistry("gcn-iterative-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler)	(	"gcn-iterative-ilp"	,
		"Run GCN iterative scheduler for ILP scheduling (experimental)"	,
		createIterativeILPMachineScheduler	)

static

◆ GCNMaxILPSchedRegistry

MachineSchedRegistry GCNMaxILPSchedRegistry("gcn-max-ilp", "Run GCN scheduler to maximize ilp", createGCNMaxILPMachineScheduler)	(	"gcn-max-ilp"	,
		"Run GCN scheduler to maximize ilp"	,
		createGCNMaxILPMachineScheduler	)

static

◆ GCNMaxMemoryClauseSchedRegistry

MachineSchedRegistry GCNMaxMemoryClauseSchedRegistry("gcn-max-memory-clause", "Run GCN scheduler to maximize memory clause", createGCNMaxMemoryClauseMachineScheduler)	(	"gcn-max-memory-clause"	,
		"Run GCN scheduler to maximize memory clause"	,
		createGCNMaxMemoryClauseMachineScheduler	)

static

◆ GCNMaxOccupancySchedRegistry

MachineSchedRegistry GCNMaxOccupancySchedRegistry("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)	(	"gcn-max-occupancy"	,
		"Run GCN scheduler to maximize occupancy"	,
		createGCNMaxOccupancyMachineScheduler	)

static

◆ GCNMinRegSchedRegistry

MachineSchedRegistry GCNMinRegSchedRegistry("gcn-iterative-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler)	(	"gcn-iterative-minreg"	,
		"Run GCN iterative scheduler for minimal register usage (experimental)"	,
		createMinRegScheduler	)

static

◆ HasClosedWorldAssumption

cl::opt< bool > HasClosedWorldAssumption("amdgpu-link-time-closed-world", cl::desc("Whether has closed-world assumption at link time"), cl::init(false), cl::Hidden)	(	"amdgpu-link-time-closed-world"	,
		cl::desc("Whether has closed-world assumption at link time")	,
		cl::init(false)	,
		cl::Hidden	)

static

Referenced by llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks().

◆ InternalizeSymbols

cl::opt< bool > InternalizeSymbols("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden)	(	"amdgpu-internalize-symbols"	,
		cl::desc("Enable elimination of non-kernel functions and unused globals")	,
		cl::init(false)	,
		cl::Hidden	)

static

Referenced by llvm::AMDGPUTargetMachine::registerPassBuilderCallbacks().

◆ IterativeGCNMaxOccupancySchedRegistry

MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry("gcn-iterative-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler)	(	"gcn-iterative-max-occupancy-experimental"	,
		"Run GCN scheduler to maximize occupancy (experimental)"	,
		createIterativeGCNMaxOccupancyMachineScheduler	)

static

◆ LowerCtorDtor

cl::opt< bool > LowerCtorDtor("amdgpu-lower-global-ctor-dtor", cl::desc("Lower GPU ctor / dtors to globals on the device."), cl::init(true), cl::Hidden)	(	"amdgpu-lower-global-ctor-dtor"	,
		cl::desc("Lower GPU ctor / dtors to globals on the device.")	,
		cl::init(true)	,
		cl::Hidden	)

static

Referenced by llvm::AMDGPUPassConfig::addIRPasses().

◆ NewRegBankSelect

cl::opt< bool > NewRegBankSelect("new-reg-bank-select", cl::desc("Run amdgpu-regbankselect and amdgpu-regbanklegalize instead of " "regbankselect"), cl::init(false), cl::Hidden)	(	"new-reg-bank-select"	,
		cl::desc("Run amdgpu-regbankselect and amdgpu-regbanklegalize instead of " "regbankselect")	,
		cl::init(false)	,
		cl::Hidden	)

static

◆ OptExecMaskPreRA

cl::opt< bool > OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true))	(	"amdgpu-opt-exec-mask-pre-ra"	,
		cl::Hidden	,
		cl::desc("Run pre-RA exec mask optimizations")	,
		cl::init(true)	)

static

◆ OptVGPRLiveRange

cl::opt< bool > OptVGPRLiveRange("amdgpu-opt-vgpr-liverange", cl::desc("Enable VGPR liverange optimizations for if-else structure"), cl::init(true), cl::Hidden)	(	"amdgpu-opt-vgpr-liverange"	,
		cl::desc("Enable VGPR liverange optimizations for if-else structure")	,
		cl::init(true)	,
		cl::Hidden	)

static

◆ RegAllocOptNotSupportedMessage

const char RegAllocOptNotSupportedMessage[]

static

Initial value:

=
    "-regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, "
    "and -vgpr-regalloc"

Definition at line 1810 of file AMDGPUTargetMachine.cpp.

◆ RemoveIncompatibleFunctions

cl::opt< bool > RemoveIncompatibleFunctions("amdgpu-enable-remove-incompatible-functions", cl::Hidden, cl::desc("Enable removal of functions when they" "use features not supported by the target GPU"), cl::init(true))	(	"amdgpu-enable-remove-incompatible-functions"	,
		cl::Hidden	,
		cl::desc("Enable removal of functions when they" "use features not supported by the target GPU")	,
		cl::init(true)	)

static

Referenced by llvm::AMDGPUPassConfig::addIRPasses().

◆ ScalarizeGlobal

cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)	(	"amdgpu-scalarize-global-loads"	,
		cl::desc("Enable global load scalarization")	,
		cl::init(true)	,
		cl::Hidden	)

static

Referenced by llvm::GCNTargetMachine::getSubtargetImpl().

◆ SISchedRegistry

MachineSchedRegistry SISchedRegistry("si", "Run SI's custom scheduler", createSIMachineScheduler)	(	"si"	,
		"Run SI's custom scheduler"	,
		createSIMachineScheduler	)

static

Macros

Functions

Variables

Detailed Description

Macro Definition Documentation

◆ ADD_ANALYSIS_PASS

◆ ADD_CLASS_PASS_TO_PASS_NAME

◆ ADD_CLASS_PASS_TO_PASS_NAME_WITH_PARAMS

◆ ADD_PASS

◆ ADD_PASS_WITH_PARAMS

◆ FUNCTION_ALIAS_ANALYSIS [1/2]

◆ FUNCTION_ALIAS_ANALYSIS [2/2]

◆ FUNCTION_ANALYSIS [1/2]

◆ FUNCTION_ANALYSIS [2/2]

◆ FUNCTION_PASS [1/2]

◆ FUNCTION_PASS [2/2]

◆ FUNCTION_PASS_WITH_PARAMS [1/2]

◆ FUNCTION_PASS_WITH_PARAMS [2/2]

◆ GET_PASS_REGISTRY

◆ LOOP_ANALYSIS [1/2]

◆ LOOP_ANALYSIS [2/2]

◆ LOOP_PASS [1/2]

◆ LOOP_PASS [2/2]

◆ MACHINE_FUNCTION_ANALYSIS [1/3]

◆ MACHINE_FUNCTION_ANALYSIS [2/3]

◆ MACHINE_FUNCTION_ANALYSIS [3/3]

◆ MACHINE_FUNCTION_PASS [1/2]

◆ MACHINE_FUNCTION_PASS [2/2]

◆ MACHINE_FUNCTION_PASS_WITH_PARAMS [1/2]

◆ MACHINE_FUNCTION_PASS_WITH_PARAMS [2/2]

◆ MODULE_ANALYSIS [1/2]

◆ MODULE_ANALYSIS [2/2]

◆ MODULE_PASS [1/2]

◆ MODULE_PASS [2/2]

◆ MODULE_PASS_WITH_PARAMS [1/2]

◆ MODULE_PASS_WITH_PARAMS [2/2]

Function Documentation

◆ createGCNMaxILPMachineScheduler()

◆ createGCNMaxMemoryClauseMachineScheduler()

◆ createGCNMaxOccupancyMachineScheduler()

◆ createIterativeGCNMaxOccupancyMachineScheduler()

◆ createIterativeILPMachineScheduler()

◆ createMinRegScheduler()

◆ createSIMachineScheduler()

◆ createTLOF()

◆ diagnoseUnsupportedCoExecSchedulerSelection()

◆ getEffectiveRelocModel()

◆ getGPUOrDefault()

◆ LLVMInitializeAMDGPUTarget()

◆ mustPreserveGV()

◆ parseAMDGPUAtomicOptimizerStrategy()

◆ parseAMDGPUAttributorPassOptions()

◆ useNoopPostScheduler()

Variable Documentation

◆ AMDGPUAtomicOptimizerStrategy

◆ AMDGPUSchedStrategy

◆ EarlyInlineAll

◆ EnableAMDGPUAliasAnalysis

◆ EnableAMDGPUAttributor

◆ EnableDCEInRA

◆ EnableDPPCombine

◆ EnableEarlyIfConversion

◆ EnableHipStdPar

◆ EnableImageIntrinsicOptimizer

◆ EnableInsertDelayAlu

◆ EnableLibCallSimplify

◆ EnableLoadStoreVectorizer

◆ EnableLoopPrefetch

◆ EnableLowerExecSync

◆ EnableLowerKernelArguments

◆ EnableLowerModuleLDS

◆ EnableObjectLinking

◆ EnablePreRAOptimizations

◆ EnablePromoteKernelArguments

◆ EnableRegReassign

◆ EnableRewritePartialRegUses

◆ EnableScalarIRPasses

◆ EnableSDWAPeephole

◆ EnableSetWavePriority

◆ EnableSIModeRegisterPass