LLVM 23.0.0git
SIInsertWaitcnts.cpp File Reference

Insert wait instructions for memory reads and writes. More...

Go to the source code of this file.

Macros

#define DEBUG_TYPE   "si-insert-waitcnts"

Typedefs

using HWEventSet = AMDGPU::HWEventSet
using HWEvent = AMDGPU::HWEvent

Functions

 INITIALIZE_PASS_BEGIN (SIInsertWaitcntsLegacy, DEBUG_TYPE, "SI Insert Waitcnts", false, false) INITIALIZE_PASS_END(SIInsertWaitcntsLegacy
static bool updateOperandIfDifferent (MachineInstr &MI, AMDGPU::OpName OpName, unsigned NewEnc)
static bool isWaitInstr (MachineInstr &Inst)

Variables

static cl::opt< boolForceEmitZeroFlag ("amdgpu-waitcnt-forcezero", cl::desc("Force all waitcnt instrs to be emitted as " "s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)"), cl::init(false), cl::Hidden)
static cl::opt< boolForceEmitZeroLoadFlag ("amdgpu-waitcnt-load-forcezero", cl::desc("Force all waitcnt load counters to wait until 0"), cl::init(false), cl::Hidden)
static cl::opt< boolExpertSchedulingModeFlag ("amdgpu-expert-scheduling-mode", cl::desc("Enable expert scheduling mode 2 for all functions (GFX12+ only)"), cl::init(false), cl::Hidden)
 DEBUG_TYPE
SI Insert Waitcnts
SI Insert false

Detailed Description

Insert wait instructions for memory reads and writes.

Memory reads and writes are issued asynchronously, so we need to insert S_WAITCNT instructions when we want to access any of their results or overwrite any register that's used asynchronously.

TODO: This pass currently keeps one timeline per hardware counter. A more finely-grained approach that keeps one timeline per event type could sometimes get away with generating weaker s_waitcnt instructions. For example, when both SMEM and LDS are in flight and we need to wait for the i-th-last LDS instruction, then an lgkmcnt(i) is actually sufficient, but the pass will currently generate a conservative lgkmcnt(0) because multiple event types are in flight.

Definition in file SIInsertWaitcnts.cpp.

Macro Definition Documentation

◆ DEBUG_TYPE

#define DEBUG_TYPE   "si-insert-waitcnts"

Definition at line 50 of file SIInsertWaitcnts.cpp.

Typedef Documentation

◆ HWEvent

Definition at line 48 of file SIInsertWaitcnts.cpp.

◆ HWEventSet

Definition at line 47 of file SIInsertWaitcnts.cpp.

Function Documentation

◆ INITIALIZE_PASS_BEGIN()

INITIALIZE_PASS_BEGIN ( SIInsertWaitcntsLegacy ,
DEBUG_TYPE ,
"SI Insert Waitcnts" ,
false ,
false  )

◆ isWaitInstr()

◆ updateOperandIfDifferent()

bool updateOperandIfDifferent ( MachineInstr & MI,
AMDGPU::OpName OpName,
unsigned NewEnc )
static

Variable Documentation

◆ DEBUG_TYPE

DEBUG_TYPE

Definition at line 1566 of file SIInsertWaitcnts.cpp.

◆ ExpertSchedulingModeFlag

cl::opt< bool > ExpertSchedulingModeFlag("amdgpu-expert-scheduling-mode", cl::desc("Enable expert scheduling mode 2 for all functions (GFX12+ only)"), cl::init(false), cl::Hidden) ( "amdgpu-expert-scheduling-mode" ,
cl::desc("Enable expert scheduling mode 2 for all functions (GFX12+ only)") ,
cl::init(false) ,
cl::Hidden  )
static

◆ false

SI Insert false

Definition at line 1567 of file SIInsertWaitcnts.cpp.

◆ ForceEmitZeroFlag

cl::opt< bool > ForceEmitZeroFlag("amdgpu-waitcnt-forcezero", cl::desc("Force all waitcnt instrs to be emitted as " "s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)"), cl::init(false), cl::Hidden) ( "amdgpu-waitcnt-forcezero" ,
cl::desc("Force all waitcnt instrs to be emitted as " "s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)") ,
cl::init(false) ,
cl::Hidden  )
static

◆ ForceEmitZeroLoadFlag

cl::opt< bool > ForceEmitZeroLoadFlag("amdgpu-waitcnt-load-forcezero", cl::desc("Force all waitcnt load counters to wait until 0"), cl::init(false), cl::Hidden) ( "amdgpu-waitcnt-load-forcezero" ,
cl::desc("Force all waitcnt load counters to wait until 0") ,
cl::init(false) ,
cl::Hidden  )
static

◆ Waitcnts

SI Insert Waitcnts

Definition at line 1566 of file SIInsertWaitcnts.cpp.