LLVM 17.0.0git
SIInsertHardClauses.cpp
Go to the documentation of this file.
1//===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Insert s_clause instructions to form hard clauses.
11///
12/// Clausing load instructions can give cache coherency benefits. Before gfx10,
13/// the hardware automatically detected "soft clauses", which were sequences of
14/// memory instructions of the same type. In gfx10 this detection was removed,
15/// and the s_clause instruction was introduced to explicitly mark "hard
16/// clauses".
17///
18/// It's the scheduler's job to form the clauses by putting similar memory
19/// instructions next to each other. Our job is just to insert an s_clause
20/// instruction to mark the start of each clause.
21///
22/// Note that hard clauses are very similar to, but logically distinct from, the
23/// groups of instructions that have to be restartable when XNACK is enabled.
24/// The rules are slightly different in each case. For example an s_nop
25/// instruction breaks a restartable group, but can appear in the middle of a
26/// hard clause. (Before gfx10 there wasn't a distinction, and both were called
27/// "soft clauses" or just "clauses".)
28///
29/// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable
30/// groups, not hard clauses.
31//
32//===----------------------------------------------------------------------===//
33
34#include "AMDGPU.h"
35#include "GCNSubtarget.h"
39
40using namespace llvm;
41
42#define DEBUG_TYPE "si-insert-hard-clauses"
43
44namespace {
45
46// A clause length of 64 instructions could be encoded in the s_clause
47// instruction, but the hardware documentation (at least for GFX11) says that
48// 63 is the maximum allowed.
49constexpr unsigned MaxInstructionsInClause = 63;
50
51enum HardClauseType {
52 // For GFX10:
53
54 // Texture, buffer, global or scratch memory instructions.
55 HARDCLAUSE_VMEM,
56 // Flat (not global or scratch) memory instructions.
57 HARDCLAUSE_FLAT,
58
59 // For GFX11:
60
61 // Texture memory instructions.
62 HARDCLAUSE_MIMG_LOAD,
63 HARDCLAUSE_MIMG_STORE,
64 HARDCLAUSE_MIMG_ATOMIC,
65 HARDCLAUSE_MIMG_SAMPLE,
66 // Buffer, global or scratch memory instructions.
67 HARDCLAUSE_VMEM_LOAD,
68 HARDCLAUSE_VMEM_STORE,
69 HARDCLAUSE_VMEM_ATOMIC,
70 // Flat (not global or scratch) memory instructions.
71 HARDCLAUSE_FLAT_LOAD,
72 HARDCLAUSE_FLAT_STORE,
73 HARDCLAUSE_FLAT_ATOMIC,
74 // BVH instructions.
75 HARDCLAUSE_BVH,
76
77 // Common:
78
79 // Instructions that access LDS.
80 HARDCLAUSE_LDS,
81 // Scalar memory instructions.
82 HARDCLAUSE_SMEM,
83 // VALU instructions.
84 HARDCLAUSE_VALU,
85 LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU,
86
87 // Internal instructions, which are allowed in the middle of a hard clause,
88 // except for s_waitcnt.
89 HARDCLAUSE_INTERNAL,
90 // Meta instructions that do not result in any ISA like KILL.
91 HARDCLAUSE_IGNORE,
92 // Instructions that are not allowed in a hard clause: SALU, export, branch,
93 // message, GDS, s_waitcnt and anything else not mentioned above.
94 HARDCLAUSE_ILLEGAL,
95};
96
97class SIInsertHardClauses : public MachineFunctionPass {
98public:
99 static char ID;
100 const GCNSubtarget *ST = nullptr;
101
102 SIInsertHardClauses() : MachineFunctionPass(ID) {}
103
104 void getAnalysisUsage(AnalysisUsage &AU) const override {
105 AU.setPreservesCFG();
107 }
108
109 HardClauseType getHardClauseType(const MachineInstr &MI) {
110 if (MI.mayLoad() || (MI.mayStore() && ST->shouldClusterStores())) {
111 if (ST->getGeneration() == AMDGPUSubtarget::GFX10) {
113 if (ST->hasNSAClauseBug()) {
114 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
115 if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA)
116 return HARDCLAUSE_ILLEGAL;
117 }
118 return HARDCLAUSE_VMEM;
119 }
121 return HARDCLAUSE_FLAT;
122 } else {
123 assert(ST->getGeneration() >= AMDGPUSubtarget::GFX11);
124 if (SIInstrInfo::isMIMG(MI)) {
125 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
126 const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo =
128 if (BaseInfo->BVH)
129 return HARDCLAUSE_BVH;
130 if (BaseInfo->Sampler)
131 return HARDCLAUSE_MIMG_SAMPLE;
132 return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_MIMG_ATOMIC
133 : HARDCLAUSE_MIMG_LOAD
134 : HARDCLAUSE_MIMG_STORE;
135 }
137 return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_VMEM_ATOMIC
138 : HARDCLAUSE_VMEM_LOAD
139 : HARDCLAUSE_VMEM_STORE;
140 }
141 if (SIInstrInfo::isFLAT(MI)) {
142 return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_FLAT_ATOMIC
143 : HARDCLAUSE_FLAT_LOAD
144 : HARDCLAUSE_FLAT_STORE;
145 }
146 }
147 // TODO: LDS
149 return HARDCLAUSE_SMEM;
150 }
151
152 // Don't form VALU clauses. It's not clear what benefit they give, if any.
153
154 // In practice s_nop is the only internal instruction we're likely to see.
155 // It's safe to treat the rest as illegal.
156 if (MI.getOpcode() == AMDGPU::S_NOP)
157 return HARDCLAUSE_INTERNAL;
158 if (MI.isMetaInstruction())
159 return HARDCLAUSE_IGNORE;
160 return HARDCLAUSE_ILLEGAL;
161 }
162
163 // Track information about a clause as we discover it.
164 struct ClauseInfo {
165 // The type of all (non-internal) instructions in the clause.
166 HardClauseType Type = HARDCLAUSE_ILLEGAL;
167 // The first (necessarily non-internal) instruction in the clause.
168 MachineInstr *First = nullptr;
169 // The last non-internal instruction in the clause.
170 MachineInstr *Last = nullptr;
171 // The length of the clause including any internal instructions in the
172 // middle (but not at the end) of the clause.
173 unsigned Length = 0;
174 // Internal instructions at the and of a clause should not be included in
175 // the clause. Count them in TrailingInternalLength until a new memory
176 // instruction is added.
177 unsigned TrailingInternalLength = 0;
178 // The base operands of *Last.
180 };
181
182 bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {
183 if (CI.First == CI.Last)
184 return false;
185 assert(CI.Length <= MaxInstructionsInClause && "Hard clause is too long!");
186
187 auto &MBB = *CI.First->getParent();
188 auto ClauseMI =
189 BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE))
190 .addImm(CI.Length - 1);
191 finalizeBundle(MBB, ClauseMI->getIterator(),
192 std::next(CI.Last->getIterator()));
193 return true;
194 }
195
196 bool runOnMachineFunction(MachineFunction &MF) override {
197 if (skipFunction(MF.getFunction()))
198 return false;
199
200 ST = &MF.getSubtarget<GCNSubtarget>();
201 if (!ST->hasHardClauses())
202 return false;
203
204 const SIInstrInfo *SII = ST->getInstrInfo();
205 const TargetRegisterInfo *TRI = ST->getRegisterInfo();
206
207 bool Changed = false;
208 for (auto &MBB : MF) {
209 ClauseInfo CI;
210 for (auto &MI : MBB) {
211 HardClauseType Type = getHardClauseType(MI);
212
213 int64_t Dummy1;
214 bool Dummy2;
215 unsigned Dummy3;
217 if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
218 if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2,
219 Dummy3, TRI)) {
220 // We failed to get the base operands, so we'll never clause this
221 // instruction with any other, so pretend it's illegal.
222 Type = HARDCLAUSE_ILLEGAL;
223 }
224 }
225
226 if (CI.Length == MaxInstructionsInClause ||
227 (CI.Length && Type != HARDCLAUSE_INTERNAL &&
228 Type != HARDCLAUSE_IGNORE &&
229 (Type != CI.Type ||
230 // Note that we lie to shouldClusterMemOps about the size of the
231 // cluster. When shouldClusterMemOps is called from the machine
232 // scheduler it limits the size of the cluster to avoid increasing
233 // register pressure too much, but this pass runs after register
234 // allocation so there is no need for that kind of limit.
235 !SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2, 2)))) {
236 // Finish the current clause.
237 Changed |= emitClause(CI, SII);
238 CI = ClauseInfo();
239 }
240
241 if (CI.Length) {
242 // Extend the current clause.
243 if (Type != HARDCLAUSE_IGNORE) {
244 if (Type == HARDCLAUSE_INTERNAL) {
245 ++CI.TrailingInternalLength;
246 } else {
247 ++CI.Length;
248 CI.Length += CI.TrailingInternalLength;
249 CI.TrailingInternalLength = 0;
250 CI.Last = &MI;
251 CI.BaseOps = std::move(BaseOps);
252 }
253 }
254 } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
255 // Start a new clause.
256 CI = ClauseInfo{Type, &MI, &MI, 1, 0, std::move(BaseOps)};
257 }
258 }
259
260 // Finish the last clause in the basic block if any.
261 if (CI.Length)
262 Changed |= emitClause(CI, SII);
263 }
264
265 return Changed;
266 }
267};
268
269} // namespace
270
271char SIInsertHardClauses::ID = 0;
272
273char &llvm::SIInsertHardClausesID = SIInsertHardClauses::ID;
274
275INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses",
276 false, false)
MachineBasicBlock & MBB
Provides AMDGPU specific target descriptions.
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
AMD GCN specific subclass of TargetSubtarget.
IRTranslator LLVM IR MI
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
#define DEBUG_TYPE
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
A debug info location.
Definition: DebugLoc.h:33
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:174
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Representation of each machine instruction.
Definition: MachineInstr.h:68
static bool isVMEM(const MachineInstr &MI)
Definition: SIInstrInfo.h:370
static bool isSMRD(const MachineInstr &MI)
Definition: SIInstrInfo.h:482
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, const TargetRegisterInfo *TRI) const final
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
Definition: SIInstrInfo.h:524
static bool isMIMG(const MachineInstr &MI)
Definition: SIInstrInfo.h:502
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2, unsigned NumLoads, unsigned NumBytes) const override
static bool isFLAT(const MachineInstr &MI)
Definition: SIInstrInfo.h:518
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Type(LLVMContext &C, TypeID tid)
Definition: Type.h:94
self_iterator getIterator()
Definition: ilist_node.h:82
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
LLVM_READONLY const MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Length
Definition: DWP.cpp:406
void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
char & SIInsertHardClausesID