LLVM 22.0.0git
SIInsertWaitcnts.cpp File Reference

Insert wait instructions for memory reads and writes. More...

Go to the source code of this file.

Classes

struct  llvm::enum_iteration_traits< InstCounterType >

Namespaces

namespace  llvm
 This is an optimization pass for GlobalISel generic memory operations.

Macros

#define DEBUG_TYPE   "si-insert-waitcnts"
#define AMDGPU_DECLARE_WAIT_EVENTS(DECL)
#define AMDGPU_EVENT_ENUM(Name)
#define AMDGPU_EVENT_NAME(Name)

Functions

 DEBUG_COUNTER (ForceExpCounter, DEBUG_TYPE "-forceexp", "Force emit s_waitcnt expcnt(0) instrs")
 DEBUG_COUNTER (ForceLgkmCounter, DEBUG_TYPE "-forcelgkm", "Force emit s_waitcnt lgkmcnt(0) instrs")
 DEBUG_COUNTER (ForceVMCounter, DEBUG_TYPE "-forcevm", "Force emit s_waitcnt vmcnt(0) instrs")
 INITIALIZE_PASS_BEGIN (SIInsertWaitcntsLegacy, DEBUG_TYPE, "SI Insert Waitcnts", false, false) INITIALIZE_PASS_END(SIInsertWaitcntsLegacy
static bool updateOperandIfDifferent (MachineInstr &MI, AMDGPU::OpName OpName, unsigned NewEnc)
static std::optional< InstCounterType > counterTypeForInstr (unsigned Opcode)
 Determine if MI is a gfx12+ single-counter S_WAIT_*CNT instruction, and if so, which counter it is waiting on.
static bool readsVCCZ (const MachineInstr &MI)
static bool callWaitsOnFunctionEntry (const MachineInstr &MI)
static bool callWaitsOnFunctionReturn (const MachineInstr &MI)
static bool isGFX12CacheInvOrWBInst (MachineInstr &Inst)
static bool isWaitInstr (MachineInstr &Inst)

Variables

static cl::opt< boolForceEmitZeroFlag ("amdgpu-waitcnt-forcezero", cl::desc("Force all waitcnt instrs to be emitted as " "s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)"), cl::init(false), cl::Hidden)
static cl::opt< boolForceEmitZeroLoadFlag ("amdgpu-waitcnt-load-forcezero", cl::desc("Force all waitcnt load counters to wait until 0"), cl::init(false), cl::Hidden)
 DEBUG_TYPE
SI Insert Waitcnts
SI Insert false

Detailed Description

Insert wait instructions for memory reads and writes.

Memory reads and writes are issued asynchronously, so we need to insert S_WAITCNT instructions when we want to access any of their results or overwrite any register that's used asynchronously.

TODO: This pass currently keeps one timeline per hardware counter. A more finely-grained approach that keeps one timeline per event type could sometimes get away with generating weaker s_waitcnt instructions. For example, when both SMEM and LDS are in flight and we need to wait for the i-th-last LDS instruction, then an lgkmcnt(i) is actually sufficient, but the pass will currently generate a conservative lgkmcnt(0) because multiple event types are in flight.

Definition in file SIInsertWaitcnts.cpp.

Macro Definition Documentation

◆ AMDGPU_DECLARE_WAIT_EVENTS

#define AMDGPU_DECLARE_WAIT_EVENTS ( DECL)
Value:
DECL(VMEM_ACCESS) /* vmem read & write */ \
DECL(VMEM_READ_ACCESS) /* vmem read */ \
DECL(VMEM_SAMPLER_READ_ACCESS) /* vmem SAMPLER read (gfx12+ only) */ \
DECL(VMEM_BVH_READ_ACCESS) /* vmem BVH read (gfx12+ only) */ \
DECL(VMEM_WRITE_ACCESS) /* vmem write that is not scratch */ \
DECL(SCRATCH_WRITE_ACCESS) /* vmem write that may be scratch */ \
DECL(VMEM_GROUP) /* vmem group */ \
DECL(LDS_ACCESS) /* lds read & write */ \
DECL(GDS_ACCESS) /* gds read & write */ \
DECL(SQ_MESSAGE) /* send message */ \
DECL(SMEM_ACCESS) /* scalar-memory read & write */ \
DECL(SMEM_GROUP) /* scalar-memory group */ \
DECL(EXP_GPR_LOCK) /* export holding on its data src */ \
DECL(GDS_GPR_LOCK) /* GDS holding on its data and addr src */ \
DECL(EXP_POS_ACCESS) /* write to export position */ \
DECL(EXP_PARAM_ACCESS) /* write to export parameter */ \
DECL(VMW_GPR_LOCK) /* vmem write holding on its data src */ \
DECL(EXP_LDS_ACCESS) /* read by ldsdir counting as export */

Definition at line 113 of file SIInsertWaitcnts.cpp.

◆ AMDGPU_EVENT_ENUM

#define AMDGPU_EVENT_ENUM ( Name)
Value:
Name,

Definition at line 134 of file SIInsertWaitcnts.cpp.

◆ AMDGPU_EVENT_NAME

#define AMDGPU_EVENT_NAME ( Name)
Value:
#Name,

Definition at line 141 of file SIInsertWaitcnts.cpp.

◆ DEBUG_TYPE

#define DEBUG_TYPE   "si-insert-waitcnts"

Definition at line 46 of file SIInsertWaitcnts.cpp.

Function Documentation

◆ callWaitsOnFunctionEntry()

bool callWaitsOnFunctionEntry ( const MachineInstr & MI)
static
Returns
true if the callee inserts an s_waitcnt 0 on function entry.

Definition at line 1785 of file SIInsertWaitcnts.cpp.

References MI.

◆ callWaitsOnFunctionReturn()

bool callWaitsOnFunctionReturn ( const MachineInstr & MI)
static
Returns
true if the callee is expected to wait for any outstanding waits before returning.

Definition at line 1795 of file SIInsertWaitcnts.cpp.

References MI.

◆ counterTypeForInstr()

std::optional< InstCounterType > counterTypeForInstr ( unsigned Opcode)
static

Determine if MI is a gfx12+ single-counter S_WAIT_*CNT instruction, and if so, which counter it is waiting on.

Definition at line 1305 of file SIInsertWaitcnts.cpp.

Referenced by isWaitInstr().

◆ DEBUG_COUNTER() [1/3]

DEBUG_COUNTER ( ForceExpCounter ,
DEBUG_TYPE "-forceexp" ,
"Force emit s_waitcnt expcnt(0) instrs"  )

References DEBUG_TYPE.

◆ DEBUG_COUNTER() [2/3]

DEBUG_COUNTER ( ForceLgkmCounter ,
DEBUG_TYPE "-forcelgkm" ,
"Force emit s_waitcnt lgkmcnt(0) instrs"  )

References DEBUG_TYPE.

◆ DEBUG_COUNTER() [3/3]

DEBUG_COUNTER ( ForceVMCounter ,
DEBUG_TYPE "-forcevm" ,
"Force emit s_waitcnt vmcnt(0) instrs"  )

References DEBUG_TYPE.

◆ INITIALIZE_PASS_BEGIN()

INITIALIZE_PASS_BEGIN ( SIInsertWaitcntsLegacy ,
DEBUG_TYPE ,
"SI Insert Waitcnts" ,
false ,
false  )

◆ isGFX12CacheInvOrWBInst()

bool isGFX12CacheInvOrWBInst ( MachineInstr & Inst)
static

Definition at line 2197 of file SIInsertWaitcnts.cpp.

References llvm::MachineInstr::getOpcode(), and Opc.

◆ isWaitInstr()

◆ readsVCCZ()

bool readsVCCZ ( const MachineInstr & MI)
static

Definition at line 1778 of file SIInsertWaitcnts.cpp.

References MI, and Opc.

◆ updateOperandIfDifferent()

bool updateOperandIfDifferent ( MachineInstr & MI,
AMDGPU::OpName OpName,
unsigned NewEnc )
static

Variable Documentation

◆ DEBUG_TYPE

DEBUG_TYPE

Definition at line 1278 of file SIInsertWaitcnts.cpp.

◆ false

SI Insert false

Definition at line 1279 of file SIInsertWaitcnts.cpp.

◆ ForceEmitZeroFlag

cl::opt< bool > ForceEmitZeroFlag("amdgpu-waitcnt-forcezero", cl::desc("Force all waitcnt instrs to be emitted as " "s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)"), cl::init(false), cl::Hidden) ( "amdgpu-waitcnt-forcezero" ,
cl::desc("Force all waitcnt instrs to be emitted as " "s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)") ,
cl::init(false) ,
cl::Hidden  )
static

◆ ForceEmitZeroLoadFlag

cl::opt< bool > ForceEmitZeroLoadFlag("amdgpu-waitcnt-load-forcezero", cl::desc("Force all waitcnt load counters to wait until 0"), cl::init(false), cl::Hidden) ( "amdgpu-waitcnt-load-forcezero" ,
cl::desc("Force all waitcnt load counters to wait until 0") ,
cl::init(false) ,
cl::Hidden  )
static

◆ Waitcnts

SI Insert Waitcnts

Definition at line 1278 of file SIInsertWaitcnts.cpp.