LLVM  16.0.0git
SIInsertHardClauses.cpp
Go to the documentation of this file.
1 //===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Insert s_clause instructions to form hard clauses.
11 ///
12 /// Clausing load instructions can give cache coherency benefits. Before gfx10,
13 /// the hardware automatically detected "soft clauses", which were sequences of
14 /// memory instructions of the same type. In gfx10 this detection was removed,
15 /// and the s_clause instruction was introduced to explicitly mark "hard
16 /// clauses".
17 ///
18 /// It's the scheduler's job to form the clauses by putting similar memory
19 /// instructions next to each other. Our job is just to insert an s_clause
20 /// instruction to mark the start of each clause.
21 ///
22 /// Note that hard clauses are very similar to, but logically distinct from, the
23 /// groups of instructions that have to be restartable when XNACK is enabled.
24 /// The rules are slightly different in each case. For example an s_nop
25 /// instruction breaks a restartable group, but can appear in the middle of a
26 /// hard clause. (Before gfx10 there wasn't a distinction, and both were called
27 /// "soft clauses" or just "clauses".)
28 ///
29 /// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable
30 /// groups, not hard clauses.
31 //
32 //===----------------------------------------------------------------------===//
33 
34 #include "AMDGPU.h"
35 #include "GCNSubtarget.h"
37 #include "llvm/ADT/SmallVector.h"
39 
40 using namespace llvm;
41 
42 #define DEBUG_TYPE "si-insert-hard-clauses"
43 
44 namespace {
45 
46 // A clause length of 64 instructions could be encoded in the s_clause
47 // instruction, but the hardware documentation (at least for GFX11) says that
48 // 63 is the maximum allowed.
49 constexpr unsigned MaxInstructionsInClause = 63;
50 
51 enum HardClauseType {
52  // For GFX10:
53 
54  // Texture, buffer, global or scratch memory instructions.
55  HARDCLAUSE_VMEM,
56  // Flat (not global or scratch) memory instructions.
57  HARDCLAUSE_FLAT,
58 
59  // For GFX11:
60 
61  // Texture memory instructions.
62  HARDCLAUSE_MIMG_LOAD,
63  HARDCLAUSE_MIMG_STORE,
64  HARDCLAUSE_MIMG_ATOMIC,
65  HARDCLAUSE_MIMG_SAMPLE,
66  // Buffer, global or scratch memory instructions.
67  HARDCLAUSE_VMEM_LOAD,
68  HARDCLAUSE_VMEM_STORE,
69  HARDCLAUSE_VMEM_ATOMIC,
70  // Flat (not global or scratch) memory instructions.
71  HARDCLAUSE_FLAT_LOAD,
72  HARDCLAUSE_FLAT_STORE,
73  HARDCLAUSE_FLAT_ATOMIC,
74  // BVH instructions.
75  HARDCLAUSE_BVH,
76 
77  // Common:
78 
79  // Instructions that access LDS.
80  HARDCLAUSE_LDS,
81  // Scalar memory instructions.
82  HARDCLAUSE_SMEM,
83  // VALU instructions.
84  HARDCLAUSE_VALU,
85  LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU,
86 
87  // Internal instructions, which are allowed in the middle of a hard clause,
88  // except for s_waitcnt.
89  HARDCLAUSE_INTERNAL,
90  // Meta instructions that do not result in any ISA like KILL.
91  HARDCLAUSE_IGNORE,
92  // Instructions that are not allowed in a hard clause: SALU, export, branch,
93  // message, GDS, s_waitcnt and anything else not mentioned above.
94  HARDCLAUSE_ILLEGAL,
95 };
96 
97 class SIInsertHardClauses : public MachineFunctionPass {
98 public:
99  static char ID;
100  const GCNSubtarget *ST = nullptr;
101 
102  SIInsertHardClauses() : MachineFunctionPass(ID) {}
103 
104  void getAnalysisUsage(AnalysisUsage &AU) const override {
105  AU.setPreservesCFG();
107  }
108 
109  HardClauseType getHardClauseType(const MachineInstr &MI) {
110  if (MI.mayLoad() || (MI.mayStore() && ST->shouldClusterStores())) {
111  if (ST->getGeneration() == AMDGPUSubtarget::GFX10) {
113  if (ST->hasNSAClauseBug()) {
114  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
115  if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA)
116  return HARDCLAUSE_ILLEGAL;
117  }
118  return HARDCLAUSE_VMEM;
119  }
120  if (SIInstrInfo::isFLAT(MI))
121  return HARDCLAUSE_FLAT;
122  } else {
123  assert(ST->getGeneration() >= AMDGPUSubtarget::GFX11);
124  if (SIInstrInfo::isMIMG(MI)) {
125  const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
126  const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo =
127  AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
128  if (BaseInfo->BVH)
129  return HARDCLAUSE_BVH;
130  if (BaseInfo->Sampler)
131  return HARDCLAUSE_MIMG_SAMPLE;
132  return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_MIMG_ATOMIC
133  : HARDCLAUSE_MIMG_LOAD
134  : HARDCLAUSE_MIMG_STORE;
135  }
137  return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_VMEM_ATOMIC
138  : HARDCLAUSE_VMEM_LOAD
139  : HARDCLAUSE_VMEM_STORE;
140  }
141  if (SIInstrInfo::isFLAT(MI)) {
142  return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_FLAT_ATOMIC
143  : HARDCLAUSE_FLAT_LOAD
144  : HARDCLAUSE_FLAT_STORE;
145  }
146  }
147  // TODO: LDS
148  if (SIInstrInfo::isSMRD(MI))
149  return HARDCLAUSE_SMEM;
150  }
151 
152  // Don't form VALU clauses. It's not clear what benefit they give, if any.
153 
154  // In practice s_nop is the only internal instruction we're likely to see.
155  // It's safe to treat the rest as illegal.
156  if (MI.getOpcode() == AMDGPU::S_NOP)
157  return HARDCLAUSE_INTERNAL;
158  if (MI.isMetaInstruction())
159  return HARDCLAUSE_IGNORE;
160  return HARDCLAUSE_ILLEGAL;
161  }
162 
163  // Track information about a clause as we discover it.
164  struct ClauseInfo {
165  // The type of all (non-internal) instructions in the clause.
166  HardClauseType Type = HARDCLAUSE_ILLEGAL;
167  // The first (necessarily non-internal) instruction in the clause.
168  MachineInstr *First = nullptr;
169  // The last non-internal instruction in the clause.
170  MachineInstr *Last = nullptr;
171  // The length of the clause including any internal instructions in the
172  // middle (but not at the end) of the clause.
173  unsigned Length = 0;
174  // Internal instructions at the and of a clause should not be included in
175  // the clause. Count them in TrailingInternalLength until a new memory
176  // instruction is added.
177  unsigned TrailingInternalLength = 0;
178  // The base operands of *Last.
180  };
181 
182  bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {
183  if (CI.First == CI.Last)
184  return false;
185  assert(CI.Length <= MaxInstructionsInClause && "Hard clause is too long!");
186 
187  auto &MBB = *CI.First->getParent();
188  auto ClauseMI =
189  BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE))
190  .addImm(CI.Length - 1);
191  finalizeBundle(MBB, ClauseMI->getIterator(),
192  std::next(CI.Last->getIterator()));
193  return true;
194  }
195 
196  bool runOnMachineFunction(MachineFunction &MF) override {
197  if (skipFunction(MF.getFunction()))
198  return false;
199 
200  ST = &MF.getSubtarget<GCNSubtarget>();
201  if (!ST->hasHardClauses())
202  return false;
203 
204  const SIInstrInfo *SII = ST->getInstrInfo();
205  const TargetRegisterInfo *TRI = ST->getRegisterInfo();
206 
207  bool Changed = false;
208  for (auto &MBB : MF) {
209  ClauseInfo CI;
210  for (auto &MI : MBB) {
211  HardClauseType Type = getHardClauseType(MI);
212 
213  int64_t Dummy1;
214  bool Dummy2;
215  unsigned Dummy3;
217  if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
218  if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2,
219  Dummy3, TRI)) {
220  // We failed to get the base operands, so we'll never clause this
221  // instruction with any other, so pretend it's illegal.
222  Type = HARDCLAUSE_ILLEGAL;
223  }
224  }
225 
226  if (CI.Length == MaxInstructionsInClause ||
227  (CI.Length && Type != HARDCLAUSE_INTERNAL &&
228  Type != HARDCLAUSE_IGNORE &&
229  (Type != CI.Type ||
230  // Note that we lie to shouldClusterMemOps about the size of the
231  // cluster. When shouldClusterMemOps is called from the machine
232  // scheduler it limits the size of the cluster to avoid increasing
233  // register pressure too much, but this pass runs after register
234  // allocation so there is no need for that kind of limit.
235  !SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2, 2)))) {
236  // Finish the current clause.
237  Changed |= emitClause(CI, SII);
238  CI = ClauseInfo();
239  }
240 
241  if (CI.Length) {
242  // Extend the current clause.
243  if (Type != HARDCLAUSE_IGNORE) {
244  if (Type == HARDCLAUSE_INTERNAL) {
245  ++CI.TrailingInternalLength;
246  } else {
247  ++CI.Length;
248  CI.Length += CI.TrailingInternalLength;
249  CI.TrailingInternalLength = 0;
250  CI.Last = &MI;
251  CI.BaseOps = std::move(BaseOps);
252  }
253  }
254  } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
255  // Start a new clause.
256  CI = ClauseInfo{Type, &MI, &MI, 1, 0, std::move(BaseOps)};
257  }
258  }
259 
260  // Finish the last clause in the basic block if any.
261  if (CI.Length)
262  Changed |= emitClause(CI, SII);
263  }
264 
265  return Changed;
266  }
267 };
268 
269 } // namespace
270 
271 char SIInsertHardClauses::ID = 0;
272 
274 
275 INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses",
276  false, false)
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:109
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
llvm::PseudoProbeReservedId::Last
@ Last
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:236
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::GCNSubtarget
Definition: GCNSubtarget.h:31
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1628
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:167
llvm::AMDGPUSubtarget::GFX11
@ GFX11
Definition: AMDGPUSubtarget.h:42
llvm::finalizeBundle
void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
Definition: MachineInstrBundle.cpp:124
llvm::SIInstrInfo::isMIMG
static bool isMIMG(const MachineInstr &MI)
Definition: SIInstrInfo.h:501
GCNSubtarget.h
INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:37
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
First
into llvm powi allowing the code generator to produce balanced multiplication trees First
Definition: README.txt:54
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::AMDGPUSubtarget::GFX10
@ GFX10
Definition: AMDGPUSubtarget.h:41
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:660
DEBUG_TYPE
#define DEBUG_TYPE
Definition: SIInsertHardClauses.cpp:42
AMDGPUMCTargetDesc.h
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::SIInstrInfo::isSMRD
static bool isSMRD(const MachineInstr &MI)
Definition: SIInstrInfo.h:481
MachineFunctionPass.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::SIInsertHardClausesID
char & SIInsertHardClausesID
Definition: SIInsertHardClauses.cpp:273
llvm::AMDGPU::getMIMGInfo
const LLVM_READONLY MIMGInfo * getMIMGInfo(unsigned Opc)
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::AMDGPU::getMIMGBaseOpcodeInfo
const LLVM_READONLY MIMGBaseOpcodeInfo * getMIMGBaseOpcodeInfo(unsigned BaseOpcode)
llvm::SIInstrInfo::shouldClusterMemOps
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, ArrayRef< const MachineOperand * > BaseOps2, unsigned NumLoads, unsigned NumBytes) const override
Definition: SIInstrInfo.cpp:481
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
AMDGPU.h
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::SIInstrInfo::getMemOperandsWithOffsetWidth
bool getMemOperandsWithOffsetWidth(const MachineInstr &LdSt, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, unsigned &Width, const TargetRegisterInfo *TRI) const final
Definition: SIInstrInfo.cpp:294
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:626
llvm::SIInstrInfo::isSegmentSpecificFLAT
static bool isSegmentSpecificFLAT(const MachineInstr &MI)
Definition: SIInstrInfo.h:523
llvm::SIInstrInfo::isVMEM
static bool isVMEM(const MachineInstr &MI)
Definition: SIInstrInfo.h:369
llvm::SIInstrInfo
Definition: SIInstrInfo.h:44
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:357
SmallVector.h
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::SIInstrInfo::isFLAT
static bool isFLAT(const MachineInstr &MI)
Definition: SIInstrInfo.h:517