|
LLVM 23.0.0git
|
Insert wait instructions for memory reads and writes. More...
#include "AMDGPU.h"#include "AMDGPUHWEvents.h"#include "AMDGPUWaitcntUtils.h"#include "GCNSubtarget.h"#include "MCTargetDesc/AMDGPUMCTargetDesc.h"#include "SIMachineFunctionInfo.h"#include "Utils/AMDGPUBaseInfo.h"#include "llvm/ADT/MapVector.h"#include "llvm/ADT/PostOrderIterator.h"#include "llvm/ADT/Sequence.h"#include "llvm/Analysis/AliasAnalysis.h"#include "llvm/CodeGen/MachineFrameInfo.h"#include "llvm/CodeGen/MachineLoopInfo.h"#include "llvm/CodeGen/MachinePassManager.h"#include "llvm/CodeGen/MachinePostDominators.h"#include "llvm/IR/Dominators.h"#include "llvm/InitializePasses.h"#include "llvm/TargetParser/AMDGPUTargetParser.h"Go to the source code of this file.
Macros | |
| #define | DEBUG_TYPE "si-insert-waitcnts" |
Typedefs | |
| using | HWEventSet = AMDGPU::HWEventSet |
| using | HWEvent = AMDGPU::HWEvent |
Functions | |
| INITIALIZE_PASS_BEGIN (SIInsertWaitcntsLegacy, DEBUG_TYPE, "SI Insert Waitcnts", false, false) INITIALIZE_PASS_END(SIInsertWaitcntsLegacy | |
| static bool | updateOperandIfDifferent (MachineInstr &MI, AMDGPU::OpName OpName, unsigned NewEnc) |
| static bool | isWaitInstr (MachineInstr &Inst) |
Variables | |
| static cl::opt< bool > | ForceEmitZeroFlag ("amdgpu-waitcnt-forcezero", cl::desc("Force all waitcnt instrs to be emitted as " "s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)"), cl::init(false), cl::Hidden) |
| static cl::opt< bool > | ForceEmitZeroLoadFlag ("amdgpu-waitcnt-load-forcezero", cl::desc("Force all waitcnt load counters to wait until 0"), cl::init(false), cl::Hidden) |
| static cl::opt< bool > | ExpertSchedulingModeFlag ("amdgpu-expert-scheduling-mode", cl::desc("Enable expert scheduling mode 2 for all functions (GFX12+ only)"), cl::init(false), cl::Hidden) |
| DEBUG_TYPE | |
| SI Insert | Waitcnts |
| SI Insert | false |
Insert wait instructions for memory reads and writes.
Memory reads and writes are issued asynchronously, so we need to insert S_WAITCNT instructions when we want to access any of their results or overwrite any register that's used asynchronously.
TODO: This pass currently keeps one timeline per hardware counter. A more finely-grained approach that keeps one timeline per event type could sometimes get away with generating weaker s_waitcnt instructions. For example, when both SMEM and LDS are in flight and we need to wait for the i-th-last LDS instruction, then an lgkmcnt(i) is actually sufficient, but the pass will currently generate a conservative lgkmcnt(0) because multiple event types are in flight.
Definition in file SIInsertWaitcnts.cpp.
| #define DEBUG_TYPE "si-insert-waitcnts" |
Definition at line 50 of file SIInsertWaitcnts.cpp.
| using HWEvent = AMDGPU::HWEvent |
Definition at line 48 of file SIInsertWaitcnts.cpp.
| using HWEventSet = AMDGPU::HWEventSet |
Definition at line 47 of file SIInsertWaitcnts.cpp.
| INITIALIZE_PASS_BEGIN | ( | SIInsertWaitcntsLegacy | , |
| DEBUG_TYPE | , | ||
| "SI Insert Waitcnts" | , | ||
| false | , | ||
| false | ) |
References DEBUG_TYPE, and INITIALIZE_PASS_DEPENDENCY.
|
static |
Definition at line 2866 of file SIInsertWaitcnts.cpp.
References llvm::AMDGPU::counterTypeForInstr(), llvm::SIInstrInfo::getNonSoftWaitcntOpcode(), llvm::MachineInstr::getOpcode(), llvm::MachineInstr::getOperand(), llvm::MachineOperand::getReg(), and isReg().
|
static |
Definition at line 1577 of file SIInsertWaitcnts.cpp.
References assert(), llvm::MachineOperand::getImm(), MI, OpIdx, and llvm::MachineOperand::setImm().
| DEBUG_TYPE |
Definition at line 1566 of file SIInsertWaitcnts.cpp.
|
static |
| SI Insert false |
Definition at line 1567 of file SIInsertWaitcnts.cpp.
|
static |
|
static |
| SI Insert Waitcnts |
Definition at line 1566 of file SIInsertWaitcnts.cpp.