LLVM 23.0.0git
GCNHazardRecognizer.h
Go to the documentation of this file.
1//===-- GCNHazardRecognizers.h - GCN Hazard Recognizers ---------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines hazard recognizers for scheduling on GCN processors.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_LIB_TARGET_AMDGPUHAZARDRECOGNIZERS_H
14#define LLVM_LIB_TARGET_AMDGPUHAZARDRECOGNIZERS_H
15
16#include "llvm/ADT/BitVector.h"
17#include "llvm/ADT/STLExtras.h"
21#include <list>
22
23namespace llvm {
24
25class MachineFunction;
26class MachineInstr;
27class MachineOperand;
29class SIInstrInfo;
30class SIRegisterInfo;
31class GCNSubtarget;
32
34public:
36 typedef function_ref<bool(const MachineInstr &, int WaitStates)> IsExpiredFn;
37 typedef function_ref<unsigned int(const MachineInstr &)> GetNumWaitStatesFn;
38
39private:
40 // Distinguish if we are called from scheduler or hazard recognizer
41 bool IsHazardRecognizerMode;
42
43 // This variable stores the instruction that has been emitted this cycle. It
44 // will be added to EmittedInstrs, when AdvanceCycle() or RecedeCycle() is
45 // called.
46 MachineInstr *CurrCycleInstr;
47 std::list<MachineInstr*> EmittedInstrs;
48
49 // WMMA co-execution hazards are only resolved by VALU-pipe activity (VALU
50 // ops or V_NOPs), never by S_NOPs, so track those instructions separately
51 // from EmittedInstrs.
52 std::list<MachineInstr *> EmittedVALUInstrs;
53 // Lookahead bound for EmittedVALUInstrs. It must be at least as large as the
54 // largest WMMA co-execution wait-state requirement (the maximum value in
55 // WMMAWaitStates/VALUWaitStates in checkWMMACoexecutionHazards). Increase
56 // this if those wait-state limits grow.
57 static constexpr unsigned MaxVALULookAhead = 18;
58 // When true, an unresolved WMMA co-execution hazard is pending, so stall
59 // cycles are optimistically recorded in EmittedVALUInstrs (they will become
60 // V_NOPs unless a non-VALU instruction is scheduled into them).
61 bool HasPendingWMMACoexecHazard = false;
62
63 const MachineFunction &MF;
64 const GCNSubtarget &ST;
65 const SIInstrInfo &TII;
66 const SIRegisterInfo &TRI;
67 const TargetSchedModel &TSchedModel;
68
69 // Loop info for V_NOP hoisting, passed from the pass manager.
70 MachineLoopInfo *MLI = nullptr;
71
72 bool RunLdsBranchVmemWARHazardFixup;
73
74 /// RegUnits of uses in the current soft memory clause.
75 mutable BitVector ClauseUses;
76
77 /// RegUnits of defs in the current soft memory clause.
78 mutable BitVector ClauseDefs;
79
80 void resetClause() const {
81 ClauseUses.reset();
82 ClauseDefs.reset();
83 }
84
85 void addClauseInst(const MachineInstr &MI) const;
86
87 /// \returns the number of wait states before another MFMA instruction can be
88 /// issued after \p MI.
89 unsigned getMFMAPipelineWaitStates(const MachineInstr &MI) const;
90
91 // Advance over a MachineInstr bundle. Look for hazards in the bundled
92 // instructions.
93 void processBundle();
94
95 // Run on an individual instruction in hazard recognizer mode. This can be
96 // used on a newly inserted instruction before returning from PreEmitNoops.
97 void runOnInstruction(MachineInstr *MI);
98
99 int getWaitStatesSince(IsHazardFn IsHazard, int Limit,
100 GetNumWaitStatesFn GetNumWaitStates) const;
101 int getWaitStatesSince(IsHazardFn IsHazard, int Limit) const;
102 int getWaitStatesSinceVALU(IsHazardFn IsHazard, int Limit) const;
103 int getWaitStatesSinceDef(unsigned Reg, IsHazardFn IsHazardDef,
104 int Limit) const;
105 int getWaitStatesSinceSetReg(IsHazardFn IsHazard, int Limit) const;
106
107 int checkSoftClauseHazards(MachineInstr *SMEM) const;
108 int checkSMRDHazards(MachineInstr *SMRD) const;
109 int checkVMEMHazards(MachineInstr *VMEM) const;
110 int checkDPPHazards(MachineInstr *DPP) const;
111 int checkDivFMasHazards(MachineInstr *DivFMas) const;
112 int checkGetRegHazards(MachineInstr *GetRegInstr) const;
113 int checkSetRegHazards(MachineInstr *SetRegInstr) const;
114 int createsVALUHazard(const MachineInstr &MI) const;
115 int checkVALUHazards(MachineInstr *VALU) const;
116 int checkVALUHazardsHelper(const MachineOperand &Def,
117 const MachineRegisterInfo &MRI) const;
118 int checkUniformWindowVALUHazardsHelper(Register Reg) const;
119 int checkSOFFSETWindowVALUHazardsHelper(Register Reg) const;
120 int checkRWLaneHazards(MachineInstr *RWLane) const;
121 int checkRFEHazards(MachineInstr *RFE) const;
122 int checkInlineAsmHazards(MachineInstr *IA) const;
123 int checkReadM0Hazards(MachineInstr *SMovRel) const;
124 int checkNSAtoVMEMHazard(MachineInstr *MI) const;
125 int checkFPAtomicToDenormModeHazard(MachineInstr *MI) const;
126 // Emit \p WaitStatesNeeded V_NOP instructions before \p InsertPt.
127 // If IsHoisting is true, uses empty DebugLoc for compiler-inserted NOPs.
128 void emitVNops(MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt,
129 int WaitStatesNeeded, bool IsHoisting = false);
130 void fixHazards(MachineInstr *MI);
131 bool fixVcmpxPermlaneHazards(MachineInstr *MI);
132 bool fixVMEMtoScalarWriteHazards(MachineInstr *MI);
133 bool fixSMEMtoVectorWriteHazards(MachineInstr *MI);
134 bool fixVcmpxExecWARHazard(MachineInstr *MI);
135 bool fixLdsBranchVmemWARHazard(MachineInstr *MI);
136 bool fixLdsDirectVALUHazard(MachineInstr *MI);
137 bool fixLdsDirectVMEMHazard(MachineInstr *MI);
138 bool fixVALUPartialForwardingHazard(MachineInstr *MI);
139 bool fixVALUTransUseHazard(MachineInstr *MI);
140 bool fixVALUTransCoexecutionHazards(MachineInstr *MI);
141 bool fixWMMAHazards(MachineInstr *MI);
142 int checkWMMACoexecutionHazards(MachineInstr *MI) const;
143 bool fixWMMACoexecutionHazards(MachineInstr *MI);
144 bool tryHoistWMMAVnopsFromLoop(MachineInstr *MI, int WaitStatesNeeded);
145 bool hasWMMAHazardInLoop(MachineLoop *L, MachineInstr *MI,
146 bool IncludeSubloops = true);
147 bool hasWMMAToWMMARegOverlap(const MachineInstr &WMMA,
148 const MachineInstr &MI) const;
149 bool hasWMMAToVALURegOverlap(const MachineInstr &WMMA,
150 const MachineInstr &MI) const;
151 bool isCoexecutionHazardFor(const MachineInstr &I,
152 const MachineInstr &MI) const;
153 bool fixShift64HighRegBug(MachineInstr *MI);
154 bool fixVALUMaskWriteHazard(MachineInstr *MI);
155 bool fixRequiredExportPriority(MachineInstr *MI);
156 bool fixGetRegWaitIdle(MachineInstr *MI);
157 bool fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI);
158 bool fixScratchBaseForwardingHazard(MachineInstr *MI);
159 bool fixSetRegMode(MachineInstr *MI);
160
161 int checkMAIHazards(MachineInstr *MI) const;
162 int checkMAIHazards908(MachineInstr *MI) const;
163 int checkMAIHazards90A(MachineInstr *MI) const;
164 /// Pad the latency between neighboring MFMA instructions with s_nops. The
165 /// percentage of wait states to fill with s_nops is specified by the command
166 /// line option '-amdgpu-mfma-padding-ratio'.
167 ///
168 /// For example, with '-amdgpu-mfma-padding-ratio=100':
169 ///
170 /// 2 pass MFMA instructions have a latency of 2 wait states. Therefore, a
171 /// 'S_NOP 1' will be added between sequential MFMA instructions.
172 ///
173 /// V_MFMA_F32_4X4X1F32
174 /// V_MFMA_F32_4X4X1F32
175 ///-->
176 /// V_MFMA_F32_4X4X1F32
177 /// S_NOP 1
178 /// V_MFMA_F32_4X4X1F32
179 int checkMFMAPadding(MachineInstr *MI) const;
180 int checkMAIVALUHazards(MachineInstr *MI) const;
181 int checkMAILdStHazards(MachineInstr *MI) const;
182 int checkPermlaneHazards(MachineInstr *MI) const;
183
184public:
186 MachineLoopInfo *MLI = nullptr);
187 // We can only issue one instruction per cycle.
188 bool atIssueLimit() const override { return true; }
189 void EmitInstruction(SUnit *SU) override;
190 void EmitInstruction(MachineInstr *MI) override;
191 HazardType getHazardType(SUnit *SU, int Stalls) override;
192
193 /// Returns the number of wait states until all hazards for \p MI are
194 /// resolved. This is useful for scheduling heuristics that want
195 /// cycle-accurate hazard information rather than just a boolean. Unlike
196 /// PreEmitNoops, this does not modify state or fix hazards.
197 unsigned getHazardWaitStates(MachineInstr *MI) const;
198 void EmitNoop() override;
199 unsigned PreEmitNoops(MachineInstr *) override;
200 unsigned PreEmitNoopsCommon(MachineInstr *) const;
201 void AdvanceCycle() override;
202 void RecedeCycle() override;
203 bool ShouldPreferAnother(SUnit *SU) const override;
204 void Reset() override;
205};
206
207} // end namespace llvm
208
209#endif //LLVM_LIB_TARGET_AMDGPUHAZARDRECOGNIZERS_H
MachineBasicBlock & MBB
This file implements the BitVector class.
static int getWaitStatesSince(GCNHazardRecognizer::IsHazardFn IsHazard, const MachineBasicBlock *MBB, MachineBasicBlock::const_reverse_instr_iterator I, int WaitStates, GCNHazardRecognizer::IsExpiredFn IsExpired, DenseSet< const MachineBasicBlock * > &Visited, GCNHazardRecognizer::GetNumWaitStatesFn GetNumWaitStates=SIInstrInfo::getNumWaitStates)
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
This file contains some templates that are useful if you are working with the STL at all.
BitVector & reset()
Reset all bits in the bitvector.
Definition BitVector.h:409
unsigned getHazardWaitStates(MachineInstr *MI) const
Returns the number of wait states until all hazards for MI are resolved.
unsigned PreEmitNoopsCommon(MachineInstr *) const
void EmitNoop() override
EmitNoop - This callback is invoked when a noop was added to the instruction stream.
void Reset() override
Reset - This callback is invoked when a new block of instructions is about to be schedule.
unsigned PreEmitNoops(MachineInstr *) override
This overload will be used when the hazard recognizer is being used by a non-scheduling pass,...
void EmitInstruction(SUnit *SU) override
EmitInstruction - This callback is invoked when an instruction is emitted, to advance the hazard stat...
function_ref< bool(const MachineInstr &)> IsHazardFn
void AdvanceCycle() override
AdvanceCycle - This callback is invoked whenever the next top-down instruction to be scheduled cannot...
function_ref< unsigned int(const MachineInstr &)> GetNumWaitStatesFn
bool ShouldPreferAnother(SUnit *SU) const override
ShouldPreferAnother - This callback may be invoked if getHazardType returns NoHazard.
function_ref< bool(const MachineInstr &, int WaitStates)> IsExpiredFn
GCNHazardRecognizer(const MachineFunction &MF, MachineLoopInfo *MLI=nullptr)
HazardType getHazardType(SUnit *SU, int Stalls) override
getHazardType - Return the hazard type of emitting this node.
void RecedeCycle() override
RecedeCycle - This callback is invoked whenever the next bottom-up instruction to be scheduled cannot...
bool atIssueLimit() const override
atIssueLimit - Return true if no more instructions may be issued in this cycle.
MachineInstrBundleIterator< MachineInstr > iterator
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Scheduling unit. This is a node in the scheduling DAG.
Provide an instruction scheduling machine model to CodeGen passes.
An efficient, type-erasing, non-owning reference to a callable.
This is an optimization pass for GlobalISel generic memory operations.