Go to the documentation of this file.
56 "r600-ir-structurize",
57 cl::desc(
"Use StructurizeCFG IR pass"),
62 cl::desc(
"Run SROA after promote alloca pass"),
73 cl::desc(
"Run pre-RA exec mask optimizations"),
84 "amdgpu-load-store-vectorizer",
85 cl::desc(
"Enable load store vectorizer"),
91 "amdgpu-scalarize-global-loads",
92 cl::desc(
"Enable global load scalarization"),
98 "amdgpu-internalize-symbols",
99 cl::desc(
"Enable elimination of non-kernel functions and unused globals"),
105 "amdgpu-early-inline-all",
106 cl::desc(
"Inline all functions early"),
111 "amdgpu-sdwa-peephole",
116 "amdgpu-dpp-combine",
122 cl::desc(
"Enable AMDGPU Alias Analysis"),
127 "amdgpu-late-structurize",
128 cl::desc(
"Enable late CFG structurization"),
133 "amdgpu-function-calls",
134 cl::desc(
"Enable AMDGPU function call support"),
140 "amdgpu-fixed-function-abi",
141 cl::desc(
"Enable all implicit function arguments"),
148 "amdgpu-simplify-libcall",
149 cl::desc(
"Enable amdgpu library simplifications"),
154 "amdgpu-ir-lower-kernel-arguments",
155 cl::desc(
"Lower kernel argument loads in IR pass"),
160 "amdgpu-reassign-regs",
161 cl::desc(
"Enable register reassign optimizations on gfx10+"),
167 "amdgpu-atomic-optimizations",
168 cl::desc(
"Enable atomic optimizations"),
174 "amdgpu-mode-register",
175 cl::desc(
"Enable mode register pass"),
183 cl::desc(
"Enable machine DCE inside regalloc"));
186 "amdgpu-scalar-ir-passes",
187 cl::desc(
"Enable scalar IR passes"),
192 "amdgpu-enable-structurizer-workarounds",
197 "amdgpu-enable-lower-module-lds",
cl::desc(
"Enable lower module lds pass"),
270 return std::make_unique<AMDGPUTargetObjectFile>();
323 "Run GCN scheduler to maximize occupancy",
328 "Run GCN scheduler to maximize occupancy (experimental)",
333 "Run GCN iterative scheduler for minimal register usage (experimental)",
338 "Run GCN iterative scheduler for ILP scheduling (experimental)",
344 return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
345 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
350 return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
351 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
352 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
401 Attribute GPUAttr =
F.getFnAttribute(
"target-cpu");
406 Attribute FSAttr =
F.getFnAttribute(
"target-features");
414 if (
const Function *
F = dyn_cast<Function>(&GV))
421 Builder.DivergentTarget =
true;
490 bool DebugPassManager) {
494 if (
PassName ==
"amdgpu-propagate-attributes-late") {
498 if (
PassName ==
"amdgpu-unify-metadata") {
502 if (
PassName ==
"amdgpu-printf-runtime-binding") {
506 if (
PassName ==
"amdgpu-always-inline") {
510 if (
PassName ==
"amdgpu-lower-module-lds") {
519 if (
PassName ==
"amdgpu-simplifylib") {
523 if (
PassName ==
"amdgpu-usenative") {
527 if (
PassName ==
"amdgpu-promote-alloca") {
531 if (
PassName ==
"amdgpu-promote-alloca-to-vector") {
535 if (
PassName ==
"amdgpu-lower-kernel-attributes") {
539 if (
PassName ==
"amdgpu-propagate-attributes-early") {
551 if (AAName ==
"amdgpu-aa") {
645 auto &
I = SubtargetMap[SubtargetKey];
666 unsigned DestAS)
const {
672 const auto *
LD = dyn_cast<LoadInst>(V);
680 const auto *Ptr =
LD->getPointerOperand();
714 auto &
I = SubtargetMap[SubtargetKey];
750 return getTM<AMDGPUTargetMachine>();
760 void addEarlyCSEOrGVNPass();
761 void addStraightLineScalarOptimizationPasses();
762 void addIRPasses()
override;
763 void addCodeGenPrepare()
override;
764 bool addPreISel()
override;
765 bool addInstSelector()
override;
766 bool addGCPasses()
override;
768 std::unique_ptr<CSEConfigBase> getCSEConfig()
const override;
771 std::unique_ptr<CSEConfigBase> AMDGPUPassConfig::getCSEConfig()
const {
775 class R600PassConfig final :
public AMDGPUPassConfig {
778 : AMDGPUPassConfig(
TM, PM) {}
785 bool addPreISel()
override;
786 bool addInstSelector()
override;
787 void addPreRegAlloc()
override;
788 void addPreSched2()
override;
789 void addPreEmitPass()
override;
792 class GCNPassConfig final :
public AMDGPUPassConfig {
795 : AMDGPUPassConfig(
TM, PM) {
799 setRequiresCodeGenSCCOrder(
true);
803 return getTM<GCNTargetMachine>();
809 bool addPreISel()
override;
810 void addMachineSSAOptimization()
override;
811 bool addILPOpts()
override;
812 bool addInstSelector()
override;
813 bool addIRTranslator()
override;
814 void addPreLegalizeMachineIR()
override;
815 bool addLegalizeMachineIR()
override;
816 void addPreRegBankSelect()
override;
817 bool addRegBankSelect()
override;
818 void addPreGlobalInstructionSelect()
override;
819 bool addGlobalInstructionSelect()
override;
820 void addFastRegAlloc()
override;
821 void addOptimizedRegAlloc()
override;
822 void addPreRegAlloc()
override;
823 bool addPreRewrite()
override;
824 void addPostRegAlloc()
override;
825 void addPreSched2()
override;
826 void addPreEmitPass()
override;
831 void AMDGPUPassConfig::addEarlyCSEOrGVNPass() {
838 void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
847 addEarlyCSEOrGVNPass();
855 void AMDGPUPassConfig::addIRPasses() {
906 addStraightLineScalarOptimizationPasses();
938 addEarlyCSEOrGVNPass();
941 void AMDGPUPassConfig::addCodeGenPrepare() {
963 bool AMDGPUPassConfig::addPreISel() {
968 bool AMDGPUPassConfig::addInstSelector() {
974 bool AMDGPUPassConfig::addGCPasses() {
983 bool R600PassConfig::addPreISel() {
984 AMDGPUPassConfig::addPreISel();
991 bool R600PassConfig::addInstSelector() {
996 void R600PassConfig::addPreRegAlloc() {
1000 void R600PassConfig::addPreSched2() {
1007 void R600PassConfig::addPreEmitPass() {
1016 return new R600PassConfig(*
this, PM);
1026 if (
ST.enableSIScheduler())
1031 bool GCNPassConfig::addPreISel() {
1032 AMDGPUPassConfig::addPreISel();
1062 void GCNPassConfig::addMachineSSAOptimization() {
1087 bool GCNPassConfig::addILPOpts() {
1095 bool GCNPassConfig::addInstSelector() {
1096 AMDGPUPassConfig::addInstSelector();
1102 bool GCNPassConfig::addIRTranslator() {
1107 void GCNPassConfig::addPreLegalizeMachineIR() {
1113 bool GCNPassConfig::addLegalizeMachineIR() {
1118 void GCNPassConfig::addPreRegBankSelect() {
1123 bool GCNPassConfig::addRegBankSelect() {
1128 void GCNPassConfig::addPreGlobalInstructionSelect() {
1133 bool GCNPassConfig::addGlobalInstructionSelect() {
1138 void GCNPassConfig::addPreRegAlloc() {
1144 void GCNPassConfig::addFastRegAlloc() {
1159 void GCNPassConfig::addOptimizedRegAlloc() {
1180 bool GCNPassConfig::addPreRewrite() {
1188 void GCNPassConfig::addPostRegAlloc() {
1198 void GCNPassConfig::addPreSched2() {
1202 void GCNPassConfig::addPreEmitPass() {
1227 return new GCNPassConfig(*
this, PM);
1251 if (MFI->Occupancy == 0) {
1260 SourceRange =
RegName.SourceRange;
1274 "incorrect register class for field",
RegName.Value,
1276 SourceRange =
RegName.SourceRange;
1280 if (parseRegister(YamlMFI.
ScratchRSrcReg, MFI->ScratchRSrcReg) ||
1285 if (MFI->ScratchRSrcReg != AMDGPU::PRIVATE_RSRC_REG &&
1286 !AMDGPU::SGPR_128RegClass.contains(MFI->ScratchRSrcReg)) {
1290 if (MFI->FrameOffsetReg != AMDGPU::FP_REG &&
1291 !AMDGPU::SGPR_32RegClass.contains(MFI->FrameOffsetReg)) {
1295 if (MFI->StackPtrOffsetReg != AMDGPU::SP_REG &&
1296 !AMDGPU::SGPR_32RegClass.contains(MFI->StackPtrOffsetReg)) {
1303 unsigned SystemSGPRs) {
1308 if (A->IsRegister) {
1311 SourceRange = A->RegisterName.SourceRange;
1314 if (!RC.contains(
Reg))
1315 return diagnoseRegisterClass(A->RegisterName);
1323 MFI->NumUserSGPRs += UserSGPRs;
1324 MFI->NumSystemSGPRs += SystemSGPRs;
1329 (parseAndCheckArgument(YamlMFI.
ArgInfo->PrivateSegmentBuffer,
1330 AMDGPU::SGPR_128RegClass,
1332 parseAndCheckArgument(YamlMFI.
ArgInfo->DispatchPtr,
1333 AMDGPU::SReg_64RegClass, MFI->ArgInfo.
DispatchPtr,
1335 parseAndCheckArgument(YamlMFI.
ArgInfo->QueuePtr, AMDGPU::SReg_64RegClass,
1337 parseAndCheckArgument(YamlMFI.
ArgInfo->KernargSegmentPtr,
1338 AMDGPU::SReg_64RegClass,
1340 parseAndCheckArgument(YamlMFI.
ArgInfo->DispatchID,
1341 AMDGPU::SReg_64RegClass, MFI->ArgInfo.
DispatchID,
1343 parseAndCheckArgument(YamlMFI.
ArgInfo->FlatScratchInit,
1344 AMDGPU::SReg_64RegClass,
1346 parseAndCheckArgument(YamlMFI.
ArgInfo->PrivateSegmentSize,
1347 AMDGPU::SGPR_32RegClass,
1349 parseAndCheckArgument(YamlMFI.
ArgInfo->WorkGroupIDX,
1352 parseAndCheckArgument(YamlMFI.
ArgInfo->WorkGroupIDY,
1355 parseAndCheckArgument(YamlMFI.
ArgInfo->WorkGroupIDZ,
1358 parseAndCheckArgument(YamlMFI.
ArgInfo->WorkGroupInfo,
1359 AMDGPU::SGPR_32RegClass,
1361 parseAndCheckArgument(YamlMFI.
ArgInfo->PrivateSegmentWaveByteOffset,
1362 AMDGPU::SGPR_32RegClass,
1364 parseAndCheckArgument(YamlMFI.
ArgInfo->ImplicitArgPtr,
1365 AMDGPU::SReg_64RegClass,
1367 parseAndCheckArgument(YamlMFI.
ArgInfo->ImplicitBufferPtr,
1368 AMDGPU::SReg_64RegClass,
1370 parseAndCheckArgument(YamlMFI.
ArgInfo->WorkItemIDX,
1371 AMDGPU::VGPR_32RegClass,
1373 parseAndCheckArgument(YamlMFI.
ArgInfo->WorkItemIDY,
1374 AMDGPU::VGPR_32RegClass,
1376 parseAndCheckArgument(YamlMFI.
ArgInfo->WorkItemIDZ,
1377 AMDGPU::VGPR_32RegClass,
void addAAResult(AAResultT &AAResult)
Register a specific AA result.
void initializeR600ControlFlowFinalizerPass(PassRegistry &)
virtual void addPostRegAlloc()
This method may be implemented by targets that want to run passes after register allocation pass pipe...
FunctionPass * createR600ExpandSpecialInstrsPass()
static cl::opt< bool > EnableDCEInRA("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, cl::desc("Enable machine DCE inside regalloc"))
CodeGenOpt::Level getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
A manager for alias analyses.
Analysis pass providing a never-invalidated alias analysis result.
static constexpr ArgDescriptor createStack(unsigned Offset, unsigned Mask=~0u)
static bool EnableFixedFunctionABI
static cl::opt< bool, true > EnableLowerModuleLDS("amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true), cl::Hidden)
void initializeR600PacketizerPass(PassRegistry &)
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget()
static cl::opt< bool > EnableSIModeRegisterPass("amdgpu-mode-register", cl::desc("Enable mode register pass"), cl::init(true), cl::Hidden)
bool isValid() const
Return true if the attribute is any kind of attribute.
void registerDefaultAliasAnalyses(AAManager &) override
Allow the target to register alias analyses with the AAManager for use with the new pass manager.
static bool mustPreserveGV(const GlobalValue &GV)
Predicate for Internalize pass.
FunctionPass * createSeparateConstOffsetFromGEPPass(bool LowerGEP=false)
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
yaml::MachineFunctionInfo * convertFuncInfoToYAML(const MachineFunction &MF) const override
Allocate and initialize an instance of the YAML representation of the MachineFunctionInfo.
void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &)
void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &)
void initializeAMDGPUPromoteAllocaPass(PassRegistry &)
FunctionPass * createSIMemoryLegalizerPass()
char & SILowerSGPRSpillsID
void registerPipelineStartEPCallback(const std::function< void(ModulePassManager &, OptimizationLevel)> &C)
Register a callback for a default optimizer pipeline extension point.
bool isPointerTy() const
True if this is an instance of PointerType.
Optional< SIArgumentInfo > ArgInfo
static constexpr ArgDescriptor createArg(const ArgDescriptor &Arg, unsigned Mask)
static ScheduleDAGInstrs * createMinRegScheduler(MachineSchedContext *C)
LocationClass< Ty > location(Ty &L)
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
void initializeAMDGPUAlwaysInlinePass(PassRegistry &)
Targets should override this in a way that mirrors the implementation of llvm::MachineFunctionInfo.
char & PHIEliminationID
PHIElimination - This pass eliminates machine instruction PHI nodes by inserting copy instructions.
void initializeSIInsertHardClausesPass(PassRegistry &)
void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &)
void initializeSIPreAllocateWWMRegsPass(PassRegistry &)
void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &)
bool IEEE
Floating point opcodes that support exception flag gathering quiet and propagate signaling NaN inputs...
Pass * createAlwaysInlinerLegacyPass(bool InsertLifetime=true)
Create a legacy pass manager instance of a pass to inline and remove functions marked as "always_inli...
static LLVM_READNONE StringRef getGPUOrDefault(const Triple &TT, StringRef GPU)
Target - Wrapper for Target specific information.
void initializeAMDGPULateCodeGenPreparePass(PassRegistry &)
FunctionPass * createFixIrreduciblePass()
MachineSchedRegistry provides a selection of available machine instruction schedulers.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
FunctionPass * createAMDGPULateCodeGenPreparePass()
FunctionPass * createSILowerI1CopiesPass()
void initializeR600ClauseMergePassPass(PassRegistry &)
@ SCHEDULE_LEGACYMAXOCCUPANCY
FunctionPass * createFlattenCFGPass()
A pass that internalizes all functions and variables other than those that must be preserved accordin...
void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry &)
ArgDescriptor FlatScratchInit
Triple - Helper class for working with autoconf configuration names.
FunctionPass * createEarlyCSEPass(bool UseMemorySSA=false)
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
void initializeSILowerI1CopiesPass(PassRegistry &)
char & SIPreEmitPeepholeID
FunctionPass * createAMDGPUPostLegalizeCombiner(bool IsOptNone)
void initializeAMDGPUDAGToDAGISelPass(PassRegistry &)
void initializeSIPeepholeSDWAPass(PassRegistry &)
char & SILowerControlFlowID
unsigned getLDSSize() const
ModulePass * createAMDGPUUnifyMetadataPass()
static cl::opt< bool > EnableStructurizerWorkarounds("amdgpu-enable-structurizer-workarounds", cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true), cl::Hidden)
Legacy wrapper pass to provide the AMDGPUAAResult object.
CGSCCToFunctionPassAdaptor createCGSCCToFunctionPassAdaptor(FunctionPassT Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
static cl::opt< bool > EnableAtomicOptimizations("amdgpu-atomic-optimizations", cl::desc("Enable atomic optimizations"), cl::init(false), cl::Hidden)
static ScheduleDAGInstrs * createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
void initializeSIFoldOperandsPass(PassRegistry &)
ModulePass * createBarrierNoopPass()
createBarrierNoopPass - This pass is purely a module pass barrier in a pass manager.
FunctionPass * createAMDGPUISelDag(TargetMachine *TM=nullptr, CodeGenOpt::Level OptLevel=CodeGenOpt::Default)
This pass converts a legalized DAG into a AMDGPU-specific.
static cl::opt< bool > InternalizeSymbols("amdgpu-internalize-symbols", cl::desc("Enable elimination of non-kernel functions and unused globals"), cl::init(false), cl::Hidden)
void initializeGlobalISel(PassRegistry &)
Initialize all passes linked into the GlobalISel library.
bool FP32InputDenormals
If this is set, neither input or output denormals are flushed for most f32 instructions.
void registerAnalysisRegistrationCallback(const std::function< void(CGSCCAnalysisManager &)> &C)
{{@ Register callbacks for analysis registration with this PassBuilder instance.
FunctionPass * createGVNPass(bool NoMemDepAnalysis=false)
Create a legacy GVN pass.
ArgDescriptor PrivateSegmentSize
ModulePass * createR600OpenCLImageTypeLoweringPass()
FunctionPass * createR600ClauseMergePass()
ArgDescriptor DispatchPtr
void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &)
char & SIPreAllocateWWMRegsID
void initializeSIShrinkInstructionsPass(PassRegistry &)
char & TwoAddressInstructionPassID
TwoAddressInstruction - This pass reduces two-address instructions to use two operands.
bool parseMachineFunctionInfo(const yaml::MachineFunctionInfo &, PerFunctionMIParsingState &PFS, SMDiagnostic &Error, SMRange &SourceRange) const override
Parse out the target's MachineFunctionInfo from the YAML reprsentation.
void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &)
static ScheduleDAGInstrs * createSIMachineScheduler(MachineSchedContext *C)
This interface provides simple read-only access to a block of memory, and provides simple methods for...
AMDGPU::SIModeRegisterDefaults Mode
ImmutablePass * createAMDGPUExternalAAWrapperPass()
void initializeAMDGPULowerIntrinsicsPass(PassRegistry &)
void initializeGCNDPPCombinePass(PassRegistry &)
static const OptimizationLevel O0
Disable as many optimizations as possible.
ArgDescriptor ImplicitArgPtr
static cl::opt< bool > EnableSDWAPeephole("amdgpu-sdwa-peephole", cl::desc("Enable SDWA peepholer"), cl::init(true))
char & SIOptimizeExecMaskingID
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
void initializeAMDGPUUnifyMetadataPass(PassRegistry &)
StringValue FrameOffsetReg
void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &)
static MachineSchedRegistry SISchedRegistry("si", "Run SI's custom scheduler", createSIMachineScheduler)
ArgDescriptor WorkGroupIDX
GCNTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL, bool JIT)
FunctionPass * createInferAddressSpacesPass(unsigned AddressSpace=~0u)
void initializeSILateBranchLoweringPass(PassRegistry &)
FunctionPass * createAMDGPUUseNativeCallsPass()
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Represents a location in source code.
FunctionPass * createR600Packetizer()
TargetTransformInfo getTargetTransformInfo(const Function &F) override
Get a TargetTransformInfo implementation for the target.
void registerParseAACallback(const std::function< bool(StringRef Name, AAManager &AA)> &C)
Register a callback for parsing an AliasAnalysis Name to populate the given AAManager AA.
static cl::opt< bool > ScalarizeGlobal("amdgpu-scalarize-global-loads", cl::desc("Enable global load scalarization"), cl::init(true), cl::Hidden)
FunctionPass * createNaryReassociatePass()
char & PostRAHazardRecognizerID
PostRAHazardRecognizer - This pass runs the post-ra hazard recognizer.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &)
void initializeSIWholeQuadModePass(PassRegistry &)
void initializeAMDGPUAtomicOptimizerPass(PassRegistry &)
Target & getTheAMDGPUTarget()
The target which supports all AMD GPUs.
ArgDescriptor WorkItemIDX
(vector float) vec_cmpeq(*A, *B) C
static cl::opt< bool > EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, cl::desc("Enable AMDGPU Alias Analysis"), cl::init(true))
static cl::opt< bool > EnableLowerKernelArguments("amdgpu-ir-lower-kernel-arguments", cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), cl::Hidden)
static cl::opt< bool > EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer", cl::desc("Enable load store vectorizer"), cl::init(true), cl::Hidden)
ModulePass * createAMDGPULowerModuleLDSPass()
char & FuncletLayoutID
This pass lays out funclets contiguously.
void initializeAMDGPUUseNativeCallsPass(PassRegistry &)
FunctionPass * createSIInsertWaitcntsPass()
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
This class provides access to building LLVM's passes.
static cl::opt< bool > EnableRegReassign("amdgpu-reassign-regs", cl::desc("Enable register reassign optimizations on gfx10+"), cl::init(true), cl::Hidden)
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
bool FP64FP16InputDenormals
FunctionPass * createAMDGPUAnnotateUniformValues()
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
FunctionPass * createR600EmitClauseMarkers()
void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry &)
char & EarlyIfConverterID
EarlyIfConverter - This pass performs if-conversion on SSA form by inserting cmov instructions.
FunctionPass * createAtomicExpandPass()
This pass is responsible for selecting generic machine instructions to target-specific instructions.
void initializeGCNRegBankReassignPass(PassRegistry &)
static int64_t getNullPointerValue(unsigned AddrSpace)
Get the integer value of a null pointer in the given address space.
RegisterTargetMachine - Helper template for registering a target machine implementation,...
void addMutation(std::unique_ptr< ScheduleDAGMutation > Mutation)
Add a postprocessing step to the DAG builder.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
FunctionPass * createUnifyLoopExitsPass()
static std::unique_ptr< TargetLoweringObjectFile > createTLOF(const Triple &TT)
unsigned getMainFileID() const
ModulePass * createAMDGPUPropagateAttributesLatePass(const TargetMachine *)
void initializeSIMemoryLegalizerPass(PassRegistry &)
static cl::opt< bool > EnableDPPCombine("amdgpu-dpp-combine", cl::desc("Enable DPP combiner"), cl::init(true))
ModulePass * createAMDGPULowerIntrinsicsPass()
char & StackMapLivenessID
StackMapLiveness - This pass analyses the register live-out set of stackmap/patchpoint intrinsics and...
Pass * createAMDGPUAnnotateKernelFeaturesPass()
~AMDGPUTargetMachine() override
const TargetSubtargetInfo * getSubtargetImpl() const
FunctionPass * createSinkingPass()
@ REGION_ADDRESS
Address space for region memory. (GDS)
FunctionPass * createSpeculativeExecutionPass()
char & SILoadStoreOptimizerID
StringRef getValueAsString() const
Return the attribute's value as a string.
std::unique_ptr< ScheduleDAGMutation > createAMDGPUExportClusteringDAGMutation()
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
void resetTargetOptions(const Function &F) const
Reset the target options based on the function's attributes.
bool isEntryFunctionCC(CallingConv::ID CC)
const MemoryBuffer * getMemoryBuffer(unsigned i) const
Pass * createFunctionInliningPass()
createFunctionInliningPass - Return a new pass object that uses a heuristic to inline direct function...
virtual void add(Pass *P)=0
Add a pass to the queue of passes to run.
@ PRIVATE_ADDRESS
Address space for private memory.
virtual StringRef getBufferIdentifier() const
Return an identifier for this buffer, typically the filename it was read from.
ImmutablePass * createAMDGPUAAWrapperPass()
PassManagerBuilder - This class is used to set up a standard optimization sequence for languages like...
FunctionPass * createLowerSwitchPass()
ModulePass * createAMDGPUPrintfRuntimeBinding()
ModulePass * createAMDGPUAlwaysInlinePass(bool GlobalOpt=true)
Target-Independent Code Generator Pass Configuration Options.
void append(StringRef RHS)
Append from a StringRef.
void initializeSILowerSGPRSpillsPass(PassRegistry &)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void registerPipelineEarlySimplificationEPCallback(const std::function< void(ModulePassManager &, OptimizationLevel)> &C)
Register a callback for a default optimizer pipeline extension point.
StringRef getFeatureString(const Function &F) const
static MachineSchedRegistry R600SchedRegistry("r600", "Run R600's custom scheduler", createR600MachineScheduler)
static cl::opt< bool > EnableR600StructurizeCFG("r600-ir-structurize", cl::desc("Use StructurizeCFG IR pass"), cl::init(true))
ModuleToFunctionPassAdaptor createModuleToFunctionPassAdaptor(FunctionPassT Pass)
A function to deduce a function pass type and wrap it in the templated adaptor.
Triple TargetTriple
Triple string, CPU name, and target feature strings the TargetMachine instance is created with.
static cl::opt< bool > OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, cl::desc("Run pre-RA exec mask optimizations"), cl::init(true))
StringValue ScratchRSrcReg
char & AMDGPUUnifyDivergentExitNodesID
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
void initializeSIInsertWaitcntsPass(PassRegistry &)
void setRequiresStructuredCFG(bool Value)
void initializeSIAnnotateControlFlowPass(PassRegistry &)
ScheduleDAGMILive * createGenericSchedLive(MachineSchedContext *C)
Create the standard converging machine scheduler.
ArgDescriptor WorkGroupIDZ
#define LLVM_EXTERNAL_VISIBILITY
char & DetectDeadLanesID
This pass adds dead/undef flags after analyzing subregister lanes.
const MCSubtargetInfo * getMCSubtargetInfo() const
ArgDescriptor PrivateSegmentBuffer
FunctionPass * createAMDGPUAtomicOptimizerPass()
void initializeR600VectorRegMergerPass(PassRegistry &)
ModulePass * createGlobalDCEPass()
createGlobalDCEPass - This transform is designed to eliminate unreachable internal globals (functions...
char & FinalizeMachineBundlesID
FinalizeMachineBundles - This pass finalize machine instruction bundles (created earlier,...
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &)
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
static ScheduleDAGInstrs * createR600MachineScheduler(MachineSchedContext *C)
virtual void addIRPasses()
Add common target configurable passes that perform LLVM IR to IR transforms following machine indepen...
initializer< Ty > init(const Ty &Val)
virtual void addOptimizedRegAlloc()
addOptimizedRegAlloc - Add passes related to register allocation.
ArgDescriptor PrivateSegmentWaveByteOffset
char & SIFormMemoryClausesID
static cl::opt< bool, true > LateCFGStructurize("amdgpu-late-structurize", cl::desc("Enable late CFG structurization"), cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden)
ImmutablePass * createExternalAAWrapperPass(std::function< void(Pass &, Function &, AAResults &)> Callback)
A wrapper pass around a callback which can be used to populate the AAResults in the AAResultsWrapperP...
ArgDescriptor WorkGroupIDY
char & MachineCSEID
MachineCSE - This pass performs global CSE on machine instructions.
virtual void addCodeGenPrepare()
Add pass to prepare the LLVM IR for code generation.
bool DX10Clamp
Used by the vector ALU to force DX10-style treatment of NaNs: when set, clamp NaN to zero; otherwise,...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
char & SIInsertHardClausesID
static MachineSchedRegistry GCNMinRegSchedRegistry("gcn-minreg", "Run GCN iterative scheduler for minimal register usage (experimental)", createMinRegScheduler)
bool isFlatGlobalAddrSpace(unsigned AS)
bool FP64FP16InputDenormals
If this is set, neither input or output denormals are flushed for both f64 and f16/v2f16 instructions...
Target & getTheGCNTarget()
The target for GCN GPUs.
void initializeSIOptimizeExecMaskingPass(PassRegistry &)
void initializeSIPostRABundlerPass(PassRegistry &)
void registerPipelineParsingCallback(const std::function< bool(StringRef Name, CGSCCPassManager &, ArrayRef< PipelineElement >)> &C)
{{@ Register pipeline parsing callbacks with this pass builder instance.
void initializeAMDGPUAAWrapperPassPass(PassRegistry &)
void initializeAMDGPUCodeGenPreparePass(PassRegistry &)
ModulePass * createAMDGPUOpenCLEnqueuedBlockLoweringPass()
void initializeGCNNSAReassignPass(PassRegistry &)
static bool EnableLowerModuleLDS
A wrapper around std::string which contains a source range that's being set during parsing.
Pass to remove unused function declarations.
char & PatchableFunctionID
This pass implements the "patchable-function" attribute.
static MachineSchedRegistry IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental", "Run GCN scheduler to maximize occupancy (experimental)", createIterativeGCNMaxOccupancyMachineScheduler)
ArgDescriptor WorkItemIDZ
FunctionPass * createSIShrinkInstructionsPass()
FunctionPass * createAMDGPUMachineCFGStructurizerPass()
static cl::opt< bool, true > EnableAMDGPUFunctionCallsOpt("amdgpu-function-calls", cl::desc("Enable AMDGPU function call support"), cl::location(AMDGPUTargetMachine::EnableFunctionCalls), cl::init(true), cl::Hidden)
const TargetRegisterInfo * TRI
Target processor register info.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
void initializeSIFormMemoryClausesPass(PassRegistry &)
static StringRef computeDataLayout(const Triple &TT)
StringRef - Represent a constant reference to a string, i.e.
void initializeAMDGPUExternalAAWrapperPass(PassRegistry &)
TargetTransformInfo getTargetTransformInfo(const Function &F) override
Get a TargetTransformInfo implementation for the target.
StringValue StackPtrOffsetReg
Type * getType() const
All values are typed, get the type of this value.
std::unique_ptr< CSEConfigBase > getStandardCSEConfigForOpt(CodeGenOpt::Level Level)
FunctionPass * createAMDGPURegBankCombiner(bool IsOptNone)
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI)
StringRef getTargetFeatureString() const
static cl::opt< bool > EarlyInlineAll("amdgpu-early-inline-all", cl::desc("Inline all functions early"), cl::init(false), cl::Hidden)
ArgDescriptor ImplicitBufferPtr
static Reloc::Model getEffectiveRelocModel(Optional< Reloc::Model > RM)
static cl::opt< bool > EnableSROA("amdgpu-sroa", cl::desc("Run SROA after promote alloca pass"), cl::ReallyHidden, cl::init(true))
void initializeAMDGPULowerKernelAttributesPass(PassRegistry &)
CodeModel::Model getEffectiveCodeModel(Optional< CodeModel::Model > CM, CodeModel::Model Default)
Helper method for getting the code model, returning Default if CM does not have a value.
void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry &)
Pass * createLoadStoreVectorizerPass()
Create a legacy pass manager instance of the LoadStoreVectorizer pass.
Wrapper class representing virtual and physical registers.
Pass * createStructurizeCFGPass(bool SkipUniformRegions=false)
When SkipUniformRegions is true the structizer will not structurize regions that only contain uniform...
bool FP64FP16OutputDenormals
TargetPassConfig * createPassConfig(PassManagerBase &PM) override
Create a pass configuration object to be used by addPassToEmitX methods for generating a pipeline of ...
ModulePass * createAMDGPULowerKernelAttributesPass()
void initializeSIFixSGPRCopiesPass(PassRegistry &)
ArgDescriptor WorkGroupInfo
FunctionPass * createAMDGPUPromoteAllocaToVector()
R600TargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL, bool JIT)
FunctionPass * createR600VectorRegMerger()
void initializeAMDGPULowerModuleLDSPass(PassRegistry &)
static ScheduleDAGInstrs * createIterativeILPMachineScheduler(MachineSchedContext *C)
bool parseNamedRegisterReference(PerFunctionMIParsingState &PFS, Register &Reg, StringRef Src, SMDiagnostic &Error)
static cl::opt< bool > EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, cl::desc("Run early if-conversion"), cl::init(false))
void initializeSIFixVGPRCopiesPass(PassRegistry &)
void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry &)
static cl::opt< bool > EnableScalarIRPasses("amdgpu-scalar-ir-passes", cl::desc("Enable scalar IR passes"), cl::init(true), cl::Hidden)
static cl::opt< bool > EnableR600IfConvert("r600-if-convert", cl::desc("Use if conversion pass"), cl::ReallyHidden, cl::init(true))
void initializeSIPreEmitPeepholePass(PassRegistry &)
static ScheduleDAGInstrs * createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C)
LLVM-provided high-level optimization levels.
Function & getFunction()
Return the LLVM function that this machine code represents.
FunctionPass * createR600ControlFlowFinalizer()
virtual void addMachineSSAOptimization()
addMachineSSAOptimization - Add standard passes that optimize machine instructions in SSA form.
FunctionPass * createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *)
void initializeSIModeRegisterPass(PassRegistry &)
Lightweight error class with error context and mandatory checking.
@ LOCAL_ADDRESS
Address space for local memory.
std::unique_ptr< ScheduleDAGMutation > createLoadClusterDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
const TargetInstrInfo * TII
Target instruction information.
static MachineSchedRegistry GCNMaxOccupancySchedRegistry("gcn-max-occupancy", "Run GCN scheduler to maximize occupancy", createGCNMaxOccupancyMachineScheduler)
FunctionPass * createAMDGPULowerKernelArgumentsPass()
bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override
Returns true if a cast between SrcAS and DestAS is a noop.
@ EP_ModuleOptimizerEarly
EP_ModuleOptimizerEarly - This extension point allows adding passes just before the main module-level...
FunctionPass * createSIModeRegisterPass()
static constexpr ArgDescriptor createRegister(Register Reg, unsigned Mask=~0u)
ModulePass * createInternalizePass(std::function< bool(const GlobalValue &)> MustPreserveGV)
createInternalizePass - This pass loops over all of the functions in the input module,...
void adjustPassManager(PassManagerBuilder &) override
Allow the target to modify the pass manager, e.g.
This class describes a target machine that is implemented with the LLVM target-independent code gener...
char & DeadMachineInstructionElimID
DeadMachineInstructionElim - This pass removes dead machine instructions.
static MachineSchedRegistry GCNILPSchedRegistry("gcn-ilp", "Run GCN iterative scheduler for ILP scheduling (experimental)", createIterativeILPMachineScheduler)
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
ArgDescriptor KernargSegmentPtr
FunctionPass * createAMDGPUPromoteAlloca()
void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry &)
void registerFunctionAnalysis()
Register a specific AA result.
char & BranchRelaxationPassID
BranchRelaxation - This pass replaces branches that need to jump further than is supported by a branc...
void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &)
FunctionPass * createAMDGPUCodeGenPreparePass()
char & MachineSchedulerID
MachineScheduler - This pass schedules machine instructions.
static bool EnableFunctionCalls
Pass interface - Implemented by all 'passes'.
ModulePass * createAMDGPUFixFunctionBitcastsPass()
StringRef getTargetCPU() const
@ EP_EarlyAsPossible
EP_EarlyAsPossible - This extension point allows adding passes before any other transformations,...
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &)
ArgDescriptor WorkItemIDY
FunctionPass * createAMDGPUPreLegalizeCombiner(bool IsOptNone)
unsigned getAssumedAddrSpace(const Value *V) const override
If the specified generic pointer could be assumed as a pointer to a specific address space,...
Represents a range in source code.
FunctionPass * createStraightLineStrengthReducePass()
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &)
void initializeSILoadStoreOptimizerPass(PassRegistry &)
PassManagerBase - An abstract interface to allow code to add passes to a pass manager without having ...
void registerCGSCCOptimizerLateEPCallback(const std::function< void(CGSCCPassManager &, OptimizationLevel)> &C)
Register a callback for a default optimizer pipeline extension point.
void initializeAMDGPURegBankCombinerPass(PassRegistry &)
FunctionPass * createSIAnnotateControlFlowPass()
Create the annotation pass.
A container for analyses that lazily runs them and caches their results.
This pass implements the reg bank selector pass used in the GlobalISel pipeline.
@ FLAT_ADDRESS
Address space for flat memory.
char & EarlyMachineLICMID
This pass performs loop invariant code motion on machine instructions.
StringRef getGPUName(const Function &F) const
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
ScheduleDAGMILive is an implementation of ScheduleDAGInstrs that schedules machine instructions while...
A ScheduleDAG for scheduling lists of MachineInstr.
@ EP_CGSCCOptimizerLate
EP_CGSCCOptimizerLate - This extension point allows adding CallGraphSCC passes at the end of the main...
FunctionPass * createAMDGPUCFGStructurizerPass()
FunctionPass * createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel)
This pass converts a legalized DAG into a R600-specific.
char & IfConverterID
IfConverter - This pass performs machine code if conversion.
AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, TargetOptions Options, Optional< Reloc::Model > RM, Optional< CodeModel::Model > CM, CodeGenOpt::Level OL)
virtual void addFastRegAlloc()
addFastRegAlloc - Add the minimum set of target-independent passes that are required for fast registe...
char & AMDGPUPerfHintAnalysisID
FunctionPass * createSROAPass()
static cl::opt< bool > EnableLibCallSimplify("amdgpu-simplify-libcall", cl::desc("Enable amdgpu library simplifications"), cl::init(true), cl::Hidden)
bool FP64FP16OutputDenormals
char & SIOptimizeExecMaskingPreRAID
MCRegisterInfo * createGCNMCRegisterInfo(AMDGPUDwarfFlavour DwarfFlavour)
std::unique_ptr< const MCRegisterInfo > MRI
LLVM Value Representation.
static cl::opt< bool, true > EnableAMDGPUFixedFunctionABIOpt("amdgpu-fixed-function-abi", cl::desc("Enable all implicit function arguments"), cl::location(AMDGPUTargetMachine::EnableFixedFunctionABI), cl::init(false), cl::Hidden)
MachineFunctionPass * createGCNRegBankReassignPass(AMDGPU::RegBankReassignMode Mode)
static bool EnableLateStructurizeCFG
void registerPassBuilderCallbacks(PassBuilder &PB, bool DebugPassManager) override
Allow the target to modify the pass pipeline with New Pass Manager (similar to adjustPassManager for ...
virtual bool addILPOpts()
Add passes that optimize instruction level parallelism for out-of-order targets.
yaml::MachineFunctionInfo * createDefaultFuncInfoYAML() const override
Allocate and return a default initialized instance of the YAML representation for the MachineFunction...
std::enable_if_t<!std::is_same< PassT, PassManager >::value > addPass(PassT Pass)
static const char PassName[]
void initializeSILowerControlFlowPass(PassRegistry &)
char & SILateBranchLoweringPassID
FunctionPass * createAMDGPUSimplifyLibCallsPass(const TargetMachine *)
This pass implements the localization mechanism described at the top of this file.
std::unique_ptr< ScheduleDAGMutation > createAMDGPUMacroFusionDAGMutation()
Note that you have to add: DAG.addMutation(createAMDGPUMacroFusionDAGMutation()); to AMDGPUPassConfig...