LLVM 20.0.0git
SIPostRABundler.cpp
Go to the documentation of this file.
1//===-- SIPostRABundler.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This pass creates bundles of memory instructions to protect adjacent loads
11/// and stores from being rescheduled apart from each other post-RA.
12///
13//===----------------------------------------------------------------------===//
14
15#include "AMDGPU.h"
16#include "GCNSubtarget.h"
17#include "llvm/ADT/SmallSet.h"
19
20using namespace llvm;
21
22#define DEBUG_TYPE "si-post-ra-bundler"
23
24namespace {
25
26class SIPostRABundler : public MachineFunctionPass {
27public:
28 static char ID;
29
30public:
31 SIPostRABundler() : MachineFunctionPass(ID) {
33 }
34
35 bool runOnMachineFunction(MachineFunction &MF) override;
36
37 StringRef getPassName() const override {
38 return "SI post-RA bundler";
39 }
40
41 void getAnalysisUsage(AnalysisUsage &AU) const override {
42 AU.setPreservesAll();
44 }
45
46private:
47 const SIRegisterInfo *TRI;
48
50
51 void collectUsedRegUnits(const MachineInstr &MI,
52 BitVector &UsedRegUnits) const;
53
54 bool isBundleCandidate(const MachineInstr &MI) const;
55 bool isDependentLoad(const MachineInstr &MI) const;
56 bool canBundle(const MachineInstr &MI, const MachineInstr &NextMI) const;
57};
58
62
63} // End anonymous namespace.
64
65INITIALIZE_PASS(SIPostRABundler, DEBUG_TYPE, "SI post-RA bundler", false, false)
66
67char SIPostRABundler::ID = 0;
68
69char &llvm::SIPostRABundlerID = SIPostRABundler::ID;
70
72 return new SIPostRABundler();
73}
74
75bool SIPostRABundler::isDependentLoad(const MachineInstr &MI) const {
76 if (!MI.mayLoad())
77 return false;
78
79 for (const MachineOperand &Op : MI.explicit_operands()) {
80 if (!Op.isReg())
81 continue;
82 Register Reg = Op.getReg();
83 for (Register Def : Defs)
84 if (TRI->regsOverlap(Reg, Def))
85 return true;
86 }
87
88 return false;
89}
90
91void SIPostRABundler::collectUsedRegUnits(const MachineInstr &MI,
92 BitVector &UsedRegUnits) const {
93 if (MI.isDebugInstr())
94 return;
95
96 for (const MachineOperand &Op : MI.operands()) {
97 if (!Op.isReg() || !Op.readsReg())
98 continue;
99
100 Register Reg = Op.getReg();
101 assert(!Op.getSubReg() &&
102 "subregister indexes should not be present after RA");
103
104 for (MCRegUnit Unit : TRI->regunits(Reg))
105 UsedRegUnits.set(Unit);
106 }
107}
108
109bool SIPostRABundler::isBundleCandidate(const MachineInstr &MI) const {
110 const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags;
111 return IMemFlags != 0 && MI.mayLoadOrStore() && !MI.isBundled();
112}
113
114bool SIPostRABundler::canBundle(const MachineInstr &MI,
115 const MachineInstr &NextMI) const {
116 const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags;
117
118 return (IMemFlags != 0 && MI.mayLoadOrStore() && !NextMI.isBundled() &&
119 NextMI.mayLoad() == MI.mayLoad() && NextMI.mayStore() == MI.mayStore() &&
120 ((NextMI.getDesc().TSFlags & MemFlags) == IMemFlags) &&
121 !isDependentLoad(NextMI));
122}
123
124bool SIPostRABundler::runOnMachineFunction(MachineFunction &MF) {
125 if (skipFunction(MF.getFunction()))
126 return false;
127
128 TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
129 BitVector BundleUsedRegUnits(TRI->getNumRegUnits());
130 BitVector KillUsedRegUnits(TRI->getNumRegUnits());
131
132 bool Changed = false;
133 for (MachineBasicBlock &MBB : MF) {
134 bool HasIGLPInstrs = llvm::any_of(MBB.instrs(), [](MachineInstr &MI) {
135 unsigned Opc = MI.getOpcode();
136 return Opc == AMDGPU::SCHED_GROUP_BARRIER || Opc == AMDGPU::IGLP_OPT;
137 });
138
139 // Don't cluster with IGLP instructions.
140 if (HasIGLPInstrs)
141 continue;
142
146
147 for (auto I = B; I != E; I = Next) {
148 Next = std::next(I);
149 if (!isBundleCandidate(*I))
150 continue;
151
152 assert(Defs.empty());
153
154 if (I->getNumExplicitDefs() != 0)
155 Defs.insert(I->defs().begin()->getReg());
156
159 unsigned ClauseLength = 1;
160 for (I = Next; I != E; I = Next) {
161 Next = std::next(I);
162
163 assert(BundleEnd != I);
164 if (canBundle(*BundleEnd, *I)) {
165 BundleEnd = I;
166 if (I->getNumExplicitDefs() != 0)
167 Defs.insert(I->defs().begin()->getReg());
168 ++ClauseLength;
169 } else if (!I->isMetaInstruction()) {
170 // Allow meta instructions in between bundle candidates, but do not
171 // start or end a bundle on one.
172 //
173 // TODO: It may be better to move meta instructions like dbg_value
174 // after the bundle. We're relying on the memory legalizer to unbundle
175 // these.
176 break;
177 }
178 }
179
180 Next = std::next(BundleEnd);
181 if (ClauseLength > 1) {
182 Changed = true;
183
184 // Before register allocation, kills are inserted after potential soft
185 // clauses to hint register allocation. Look for kills that look like
186 // this, and erase them.
187 if (Next != E && Next->isKill()) {
188
189 // TODO: Should maybe back-propagate kill flags to the bundle.
190 for (const MachineInstr &BundleMI : make_range(BundleStart, Next))
191 collectUsedRegUnits(BundleMI, BundleUsedRegUnits);
192
193 BundleUsedRegUnits.flip();
194
195 while (Next != E && Next->isKill()) {
196 MachineInstr &Kill = *Next;
197 collectUsedRegUnits(Kill, KillUsedRegUnits);
198
199 KillUsedRegUnits &= BundleUsedRegUnits;
200
201 // Erase the kill if it's a subset of the used registers.
202 //
203 // TODO: Should we just remove all kills? Is there any real reason to
204 // keep them after RA?
205 if (KillUsedRegUnits.none()) {
206 ++Next;
207 Kill.eraseFromParent();
208 } else
209 break;
210
211 KillUsedRegUnits.reset();
212 }
213
214 BundleUsedRegUnits.reset();
215 }
216
217 finalizeBundle(MBB, BundleStart, Next);
218 }
219
220 Defs.clear();
221 }
222 }
223
224 return Changed;
225}
MachineBasicBlock & MBB
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define DEBUG_TYPE
This file defines the SmallSet class.
Represent the analysis usage information of a pass.
void setPreservesAll()
Set by analyses that do not transform their input at all.
BitVector & set()
Definition: BitVector.h:351
This class represents an Operation in the Expression.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
instr_iterator instr_begin()
Instructions::iterator instr_iterator
instr_iterator instr_end()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:566
bool mayStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly modify memory.
bool isBundled() const
Return true if this instruction part of a bundle.
Definition: MachineInstr.h:471
MachineOperand class - Representation of each machine instruction operand.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
char & SIPostRABundlerID
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
void initializeSIPostRABundlerPass(PassRegistry &)
FunctionPass * createSIPostRABundlerPass()