LLVM 23.0.0git
HexagonAggressiveRDFCopy.cpp
Go to the documentation of this file.
1//===--- HexagonAggressiveRDFCopy.cpp -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// RDF-based aggressive copy propagation.
10//
11// This optimization extends the standard RDF copy propagation with support for
12// super-register and sub-register copy propagation. It determines candidates
13// for copy propagation by verifying that both the copy instruction and all
14// reached uses have the same reaching definitions for the source register(s).
15//
16// Key differences:
17// 1. Super-register handling: Can propagate copies involving super-registers
18// and their sub-registers (e.g., double-register pairs on Hexagon).
19// 2. Sub-register coverage: Verifies that all sub-registers of the source
20// register have consistent reaching definitions before propagating.
21// 3. Combine instruction support: Handles A2_combinew instructions that
22// combine two 32-bit registers into a 64-bit register pair.
23//
24// Algorithm:
25// 1. Scan all basic blocks in dominator tree order, maintaining a stack of
26// reaching definitions for each register.
27// 2. For each copy instruction:
28// a. Record the copy and its source/destination register mapping.
29// b. Find the reaching definition for the source register at the copy.
30// c. For each use reached by the copy's destination register:
31// - Check if all sub-registers of the source have the same reaching
32// definition at both the copy and the use.
33// - If yes, mark the use as replaceable.
34// 3. Replace all marked uses with the source register of the copy.
35//
36// Example:
37// BB1:
38// R1 = ... // Def1
39// D0 = A2_combinew R1, R0 // Copy: D0 = {R1, R0}
40// ... = D0 // Use of D0
41//
42// If R1 and R0 have the same reaching definitions at both the copy and the
43// use, the use of D0 can be replaced with the original source registers.
44// D0 is super-register corresponding to R1:0.
45//
46//===----------------------------------------------------------------------===//
47
61#include "llvm/Support/Debug.h"
64
65#include <cassert>
66#include <cstdint>
67#include <utility>
68
69using namespace llvm;
70using namespace rdf;
71
72#ifndef NDEBUG
74static unsigned RDFCpCount = 0;
75#endif
76
77// Record destination and source registers in EqualityMap
78// if this is a copy instruction
80 EqualityMap &EM) {
81 unsigned Opc = MI->getOpcode();
82 switch (Opc) {
83 case TargetOpcode::COPY: {
84 const MachineOperand &Dst = MI->getOperand(0);
85 const MachineOperand &Src = MI->getOperand(1);
86 RegisterRef DstR = DFG.makeRegRef(Dst.getReg(), Dst.getSubReg());
87 RegisterRef SrcR = DFG.makeRegRef(Src.getReg(), Src.getSubReg());
90 if (HRI.isFakeReg(DstR.Id) || HRI.isFakeReg(SrcR.Id))
91 return false;
92 if (TRI.getMinimalPhysRegClass(DstR.Id) !=
93 TRI.getMinimalPhysRegClass(SrcR.Id))
94 return false;
95 if (!DFG.isTracked(SrcR) || !DFG.isTracked(DstR))
96 return false;
97 EM.insert(std::make_pair(DstR, SrcR));
98 return true;
99 }
100 case TargetOpcode::REG_SEQUENCE:
101 llvm_unreachable("Unexpected REG_SEQUENCE");
102 }
103 return false;
104}
105
106// Track instructions determined to be copies along with their uses.
107// The register, sub-register copy pairs are given by EqualityMap
108// Find the reaching def from DefM stack for source (LHS) registers in each
109// copy. ReachedUseToCopyMap stores each reached use (UseNode) of a copy along
110// with the copy DefNode and source register
111void AggressiveCopyPropagation::recordCopy(NodeAddr<StmtNode *> SA,
112 EqualityMap &EM) {
113 if (trace())
114 CopyMap.insert(std::make_pair(SA.Id, EM));
115
116 // Find and store reaching def for each source register
117 // EqualityMap should also contain subregs
118 for (auto I : EM) {
119 if (PRI.equal_to(I.first, I.second))
120 continue;
121 NodeId RDefId = 0;
122 auto FS = DefM.find(I.second.Id);
123 if (FS != DefM.end() && !FS->second.empty()) {
124 auto Def = FS->second.top()->Addr->getRegRef(DFG);
125 // Avoid adding subreg as reaching def for superreg
127 TRI.isSuperRegister(Def.Id, I.second.Id))
128 continue;
129 RDefId = FS->second.top()->Id;
130 }
131 RDefMap[I.second][SA.Id] = RDefId;
132 }
133 for (NodeAddr<DefNode *> DA : SA.Addr->members_if(DFG.IsDef, DFG)) {
134 RegisterRef DR = DA.Addr->getRegRef(DFG);
135 auto FR = EM.find(DR);
136 if (FR == EM.end())
137 continue;
138 // Iterate over DR and its subregisters
139 // if present in EqualityMap, find its reached uses
140 for (MCPhysReg SubDReg : TRI.subregs_inclusive(DR.Id)) {
141 auto SubDR = DFG.makeRegRef(SubDReg, 0);
142 auto FR = EM.find(SubDR);
143 if (FR == EM.end())
144 continue;
145 RegisterRef SR = FR->second;
146 // Redundant copy
147 if (PRI.equal_to(SubDR, SR))
148 continue;
149 for (NodeId N = DA.Addr->getReachedUse(), NextN; N; N = NextN) {
150 auto UA = DFG.addr<UseNode *>(N);
151 NextN = UA.Addr->getSibling();
152 uint16_t F = UA.Addr->getFlags();
153 // Skip phi node uses
154 // Skip shadow uses. When shadow nodes are present, the register has
155 // multiple reaching defs.
156 if ((F & NodeAttrs::PhiRef) || (F & NodeAttrs::Fixed) ||
158 continue;
159 if (!PRI.equal_to(UA.Addr->getRegRef(DFG), SubDR))
160 continue;
161 MachineOperand &Op = UA.Addr->getOp();
162 // Skip operand if def and use of a register happens in same instruction
163 if (Op.isTied())
164 continue;
165 if (ReachedUseToCopyMap.find(UA.Id) != ReachedUseToCopyMap.end())
166 llvm_unreachable("Multiple copy instructions reach this use");
167 ReachedUseToCopyMap.insert(
168 std::make_pair(UA.Id, std::make_pair(DA, SR)));
169 }
170 }
171 }
172}
173
174// Now that we can obtain reaching def for uses from DefM,
175// check that reaching defs for source register and subregisters at the use
176// instruction, are the same as reaching defs for the copy. Uses that satisfy
177// this check can be replaced with the source registers of the copy.
178void AggressiveCopyPropagation::recordReplacableUses(NodeAddr<InstrNode *> IA) {
179 for (NodeAddr<UseNode *> UA : IA.Addr->members_if(DFG.IsUse, DFG)) {
180 // Check if any uses of the instruction are reached by a copy
181 auto CopyUseIt = ReachedUseToCopyMap.find(UA.Id);
182 if (CopyUseIt == ReachedUseToCopyMap.end())
183 continue;
184 [[maybe_unused]] auto UseReg = UA.Addr->getRegRef(DFG);
185 auto DA = CopyUseIt->second.first;
186 auto SR = CopyUseIt->second.second;
187 [[maybe_unused]] auto DefReg = DA.Addr->getRegRef(DFG);
188 assert(PRI.equal_to(DefReg, UseReg));
189 NodeAddr<InstrNode *> DefI = DA.Addr->getOwner(DFG);
190 // Aggr of subregs that have same reaching def (at IA) as DefI
191 RegisterAggr RRs(PRI);
192 // Registers that need to be added as use nodes in updated IA
193 SmallVector<RegisterRef, 4> UseRefs;
194 for (MCPhysReg S : TRI.subregs_inclusive(SR.Id)) {
195 auto SRef = DFG.makeRegRef(S, 0);
196 NodeId RDefId = 0;
197 // If there is no reaching def for SRef at DefI,
198 // do not check if SRef can be propagated
199 auto RDefIt = RDefMap.find(SRef);
200 if (RDefIt == RDefMap.end())
201 continue;
202 auto DefIIt = RDefIt->second.find(DefI.Id);
203 if (DefIIt == RDefIt->second.end())
204 continue;
205 // If we already have reaching def for SRef at IA,
206 // use it instead of searching DefM.
207 auto IAIt = RDefIt->second.find(IA.Id);
208 if (IAIt != RDefIt->second.end()) {
209 RDefId = IAIt->second;
210 } else {
211 auto F = DefM.find(S);
212 if (F != DefM.end() && !F->second.empty()) {
213 auto Def = F->second.top()->Addr->getRegRef(DFG);
214 // Avoid adding subreg as reaching def for superreg
216 TRI.isSuperRegister(Def.Id, S))
217 continue;
218 RDefId = F->second.top()->Id;
219 }
220 }
221 // If reaching def for SRef at DefI is not same as IA,
222 // SRef can not propagated to IA.
223 if (DefIIt->second != RDefId)
224 continue;
225 RRs.insert(SRef);
226 UseRefs.push_back(SRef);
227 RDefIt->second[IA.Id] = RDefId;
228 // If registers or sub-registers that can be propagated cover SR,
229 // the use node is a candidate for copy propagation
230 if (RRs.hasCoverOf(SR))
231 break;
232 }
233 // Use node can be replaced with new use nodes created from UseRefs
234 if (RRs.hasCoverOf(SR))
235 ReplacableUses.push_back(std::make_pair(UA, UseRefs));
236 }
237}
238
239// Recursively process all children in the dominator tree.
240// Find copy instructions and reached uses that are candidates for propagation
241void AggressiveCopyPropagation::scanBlock(MachineBasicBlock *B) {
242 NodeAddr<BlockNode *> BA = DFG.findBlock(B);
243 DFG.markBlock(BA.Id, DefM);
244
245 for (NodeAddr<InstrNode *> IA : BA.Addr->members(DFG)) {
246 if (DFG.IsCode<NodeAttrs::Stmt>(IA)) {
247 NodeAddr<StmtNode *> SA = IA;
248 EqualityMap EM(RegisterRefLess(DFG.getPRI()));
249 if (interpretAsCopy(SA.Addr->getCode(), EM))
250 recordCopy(SA, EM);
251 recordReplacableUses(IA);
252 }
253 DFG.pushAllDefs(IA, DefM);
254 }
255
256 MachineDomTreeNode *N = MDT.getNode(B);
257 for (auto *I : *N)
258 scanBlock(I->getBlock());
259
260 DFG.releaseBlock(BA.Id, DefM);
261 return;
262}
263
265 scanBlock(MDT.getRootNode()->getBlock());
266
267 if (trace()) {
268 dbgs() << "Copies:\n";
269 for (auto &C : CopyMap) {
270 dbgs() << "Instr: " << *DFG.addr<StmtNode *>(C.first).Addr->getCode();
271 dbgs() << " eq: {";
272 for (auto J : C.second)
273 dbgs() << ' ' << Print<RegisterRef>(J.first, DFG) << '='
274 << Print<RegisterRef>(J.second, DFG);
275 dbgs() << " }\n";
276 }
277 dbgs() << "\nCopy def-use:\n";
278 for (auto &U : ReachedUseToCopyMap) {
279 auto DA = U.second.first;
280 auto DefI = DA.Addr->getOwner(DFG);
281 auto UseI = DFG.addr<UseNode *>(U.first).Addr->getOwner(DFG);
282 dbgs() << "Copy def: " << *DFG.addr<StmtNode *>(DefI.Id).Addr->getCode();
283 dbgs() << "Copy use: " << *DFG.addr<StmtNode *>(UseI.Id).Addr->getCode();
284 }
285 dbgs() << "\nRDef map:\n";
286 for (auto R : RDefMap) {
287 dbgs() << Print<RegisterRef>(R.first, DFG) << " -> {";
288 for (auto &M : R.second)
289 dbgs() << ' ' << Print<NodeId>(M.first, DFG) << ':'
290 << Print<NodeId>(M.second, DFG);
291 dbgs() << " }\n";
292 }
293 }
294
295 bool Changed = false;
296#ifndef NDEBUG
297 bool HasLimit = RDFCpLimit.getNumOccurrences() > 0;
298#endif
299
300 auto MinPhysReg = [this](RegisterRef RR) -> unsigned {
301 const TargetRegisterClass &RC = *TRI.getMinimalPhysRegClass(RR.Id);
302 if ((RC.LaneMask & RR.Mask) == RC.LaneMask)
303 return RR.Id;
304 for (MCSubRegIndexIterator S(RR.Id, &TRI); S.isValid(); ++S)
305 if (RR.Mask == TRI.getSubRegIndexLaneMask(S.getSubRegIndex()))
306 return S.getSubReg();
307 llvm_unreachable("Should have found a register");
308 return 0;
309 };
310
311 // Iterate over all candidate uses found and replace with source register of
312 // copy
313 for (auto P : ReplacableUses) {
314#ifndef NDEBUG
315 if (HasLimit && RDFCpCount >= RDFCpLimit)
316 break;
317#endif
318 NodeAddr<UseNode *> UA = P.first;
319 SmallVector<RegisterRef, 4> UseRefs = P.second;
320
321 // UseRefs should never be empty if RRs.hasCoverOf(SR) was true
322 assert(!UseRefs.empty() &&
323 "UseRefs should not be empty for replaceable use");
324
325 auto IA = UA.Addr->getOwner(DFG);
326 auto DR = UA.Addr->getRegRef(DFG);
327 auto SR = ReachedUseToCopyMap[UA.Id].second;
328 if (HRI.isFakeReg(SR.Id))
329 continue;
330
331 if (trace()) {
332 dbgs() << "Can replace " << Print<RegisterRef>(DR, DFG) << " with "
333 << Print<RegisterRef>(SR, DFG) << " in "
334 << *NodeAddr<StmtNode *>(IA).Addr->getCode();
335 }
336
337 // Update existing use node to use the source register
338 MachineOperand &Op = UA.Addr->getOp();
339 unsigned NewReg = MinPhysReg(SR);
340 Op.setReg(NewReg);
341 Op.setSubReg(0);
342 DFG.unlinkUse(UA, false);
343 bool firstUseNode = true;
344
345 for (auto UR : UseRefs) {
346 // If we have more than one use (such as multiple subregs),
347 // add a new shadow use node
348 if (!firstUseNode) {
349 UA.Addr->setFlags(UA.Addr->getFlags() | NodeAttrs::Shadow);
350 UA = DFG.getNextShadow(IA, UA, true);
351 }
352 if (RDefMap[UR][IA.Id] != 0) {
353 UA.Addr->linkToDef(UA.Id, DFG.addr<DefNode *>(RDefMap[UR][IA.Id]));
354 } else {
355 // No reaching def present
356 UA.Addr->setReachingDef(0);
357 UA.Addr->setSibling(0);
358 }
359 firstUseNode = false;
360 }
361
362 Changed = true;
363#ifndef NDEBUG
364 if (HasLimit && RDFCpCount >= RDFCpLimit)
365 break;
366 RDFCpCount++;
367#endif
368
369 } // for (UA in replacable uses)
370
371 return Changed;
372}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
cl::opt< unsigned > RDFCpLimit
static unsigned RDFCpCount
static Register UseReg(const MachineOperand &MO)
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register const TargetRegisterInfo * TRI
#define P(N)
Iterator that enumerates the sub-registers of a Reg and the associated sub-register indices.
bool isValid() const
Returns true if this iterator is not yet at the end.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition Register.h:60
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
Print(const T &, const DataFlowGraph &) -> Print< T >
uint32_t NodeId
Definition RDFGraph.h:262
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
DomTreeNodeBase< MachineBasicBlock > MachineDomTreeNode
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
#define N
virtual bool interpretAsCopy(const MachineInstr *MI, EqualityMap &EM)
std::map< NodeId, EqualityMap > CopyMap
Definition RDFCopyBase.h:48
std::map< RegisterRef, std::map< NodeId, NodeId >, RegisterRefLess > RDefMap
Definition RDFCopyBase.h:46
DataFlowGraph::DefStackMap DefM
Definition RDFCopyBase.h:42
std::map< RegisterRef, RegisterRef, RegisterRefLess > EqualityMap
Definition RDFCopyBase.h:37
const MachineDominatorTree & MDT
Definition RDFCopyBase.h:40
LLVM_ABI RegisterRef makeRegRef(unsigned Reg, unsigned Sub) const
Definition RDFGraph.cpp:987
static bool IsDef(const Node BA)
Definition RDFGraph.h:828
NodeAddr< T > addr(NodeId N) const
Definition RDFGraph.h:692
LLVM_ABI bool equal_to(RegisterRef A, RegisterRef B) const
NodeId getSibling() const
Definition RDFGraph.h:569
LLVM_ABI RegisterRef getRegRef(const DataFlowGraph &G) const
Definition RDFGraph.cpp:401
LLVM_ABI Node getOwner(const DataFlowGraph &G)
Definition RDFGraph.cpp:427
MachineInstr * getCode() const
Definition RDFGraph.h:638