LLVM 18.0.0git
MIRCanonicalizerPass.cpp
Go to the documentation of this file.
1//===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The purpose of this pass is to employ a canonical code transformation so
10// that code compiled with slightly different IR passes can be diffed more
11// effectively than otherwise. This is done by renaming vregs in a given
12// LiveRange in a canonical way. This pass also does a pseudo-scheduling to
13// move defs closer to their use inorder to reduce diffs caused by slightly
14// different schedules.
15//
16// Basic Usage:
17//
18// llc -o - -run-pass mir-canonicalizer example.mir
19//
20// Reorders instructions canonically.
21// Renames virtual register operands canonically.
22// Strips certain MIR artifacts (optionally).
23//
24//===----------------------------------------------------------------------===//
25
26#include "MIRVRegNamerUtils.h"
28#include "llvm/ADT/STLExtras.h"
32#include "llvm/Pass.h"
33#include "llvm/Support/Debug.h"
35
36using namespace llvm;
37
38#define DEBUG_TYPE "mir-canonicalizer"
39
41 CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u),
42 cl::value_desc("N"),
43 cl::desc("Function number to canonicalize."));
44
45namespace {
46
47class MIRCanonicalizer : public MachineFunctionPass {
48public:
49 static char ID;
50 MIRCanonicalizer() : MachineFunctionPass(ID) {}
51
52 StringRef getPassName() const override {
53 return "Rename register operands in a canonical ordering.";
54 }
55
56 void getAnalysisUsage(AnalysisUsage &AU) const override {
57 AU.setPreservesCFG();
59 }
60
61 bool runOnMachineFunction(MachineFunction &MF) override;
62};
63
64} // end anonymous namespace
65
66char MIRCanonicalizer::ID;
67
68char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID;
69
70INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer",
71 "Rename Register Operands Canonically", false, false)
72
73INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer",
75
77 if (MF.empty())
78 return {};
80 std::vector<MachineBasicBlock *> RPOList;
81 append_range(RPOList, RPOT);
82
83 return RPOList;
84}
85
86static bool
87rescheduleLexographically(std::vector<MachineInstr *> instructions,
89 std::function<MachineBasicBlock::iterator()> getPos) {
90
91 bool Changed = false;
92 using StringInstrPair = std::pair<std::string, MachineInstr *>;
93 std::vector<StringInstrPair> StringInstrMap;
94
95 for (auto *II : instructions) {
96 std::string S;
98 II->print(OS);
99 OS.flush();
100
101 // Trim the assignment, or start from the beginning in the case of a store.
102 const size_t i = S.find('=');
103 StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II});
104 }
105
106 llvm::sort(StringInstrMap, llvm::less_first());
107
108 for (auto &II : StringInstrMap) {
109
110 LLVM_DEBUG({
111 dbgs() << "Splicing ";
112 II.second->dump();
113 dbgs() << " right before: ";
114 getPos()->dump();
115 });
116
117 Changed = true;
118 MBB->splice(getPos(), MBB, II.second);
119 }
120
121 return Changed;
122}
123
124static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
126
127 bool Changed = false;
128
129 // Calculates the distance of MI from the beginning of its parent BB.
130 auto getInstrIdx = [](const MachineInstr &MI) {
131 unsigned i = 0;
132 for (const auto &CurMI : *MI.getParent()) {
133 if (&CurMI == &MI)
134 return i;
135 i++;
136 }
137 return ~0U;
138 };
139
140 // Pre-Populate vector of instructions to reschedule so that we don't
141 // clobber the iterator.
142 std::vector<MachineInstr *> Instructions;
143 for (auto &MI : *MBB) {
144 Instructions.push_back(&MI);
145 }
146
147 std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers;
148 std::map<unsigned, MachineInstr *> MultiUserLookup;
149 unsigned UseToBringDefCloserToCount = 0;
150 std::vector<MachineInstr *> PseudoIdempotentInstructions;
151 std::vector<unsigned> PhysRegDefs;
152 for (auto *II : Instructions) {
153 for (unsigned i = 1; i < II->getNumOperands(); i++) {
154 MachineOperand &MO = II->getOperand(i);
155 if (!MO.isReg())
156 continue;
157
158 if (MO.getReg().isVirtual())
159 continue;
160
161 if (!MO.isDef())
162 continue;
163
164 PhysRegDefs.push_back(MO.getReg());
165 }
166 }
167
168 for (auto *II : Instructions) {
169 if (II->getNumOperands() == 0)
170 continue;
171 if (II->mayLoadOrStore())
172 continue;
173
174 MachineOperand &MO = II->getOperand(0);
175 if (!MO.isReg() || !MO.getReg().isVirtual())
176 continue;
177 if (!MO.isDef())
178 continue;
179
180 bool IsPseudoIdempotent = true;
181 for (unsigned i = 1; i < II->getNumOperands(); i++) {
182
183 if (II->getOperand(i).isImm()) {
184 continue;
185 }
186
187 if (II->getOperand(i).isReg()) {
188 if (!II->getOperand(i).getReg().isVirtual())
189 if (!llvm::is_contained(PhysRegDefs, II->getOperand(i).getReg())) {
190 continue;
191 }
192 }
193
194 IsPseudoIdempotent = false;
195 break;
196 }
197
198 if (IsPseudoIdempotent) {
199 PseudoIdempotentInstructions.push_back(II);
200 continue;
201 }
202
203 LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump(););
204
205 MachineInstr *Def = II;
206 unsigned Distance = ~0U;
207 MachineInstr *UseToBringDefCloserTo = nullptr;
209 for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) {
210 MachineInstr *UseInst = UO.getParent();
211
212 const unsigned DefLoc = getInstrIdx(*Def);
213 const unsigned UseLoc = getInstrIdx(*UseInst);
214 const unsigned Delta = (UseLoc - DefLoc);
215
216 if (UseInst->getParent() != Def->getParent())
217 continue;
218 if (DefLoc >= UseLoc)
219 continue;
220
221 if (Delta < Distance) {
222 Distance = Delta;
223 UseToBringDefCloserTo = UseInst;
224 MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo;
225 }
226 }
227
228 const auto BBE = MBB->instr_end();
231
232 for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) {
233
234 if (DefI != BBE && UseI != BBE)
235 break;
236
237 if (&*BBI == Def) {
238 DefI = BBI;
239 continue;
240 }
241
242 if (&*BBI == UseToBringDefCloserTo) {
243 UseI = BBI;
244 continue;
245 }
246 }
247
248 if (DefI == BBE || UseI == BBE)
249 continue;
250
251 LLVM_DEBUG({
252 dbgs() << "Splicing ";
253 DefI->dump();
254 dbgs() << " right before: ";
255 UseI->dump();
256 });
257
258 MultiUsers[UseToBringDefCloserTo].push_back(Def);
259 Changed = true;
260 MBB->splice(UseI, MBB, DefI);
261 }
262
263 // Sort the defs for users of multiple defs lexographically.
264 for (const auto &E : MultiUserLookup) {
265
266 auto UseI = llvm::find_if(MBB->instrs(), [&](MachineInstr &MI) -> bool {
267 return &MI == E.second;
268 });
269
270 if (UseI == MBB->instr_end())
271 continue;
272
274 dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";);
275 Changed |= rescheduleLexographically(
276 MultiUsers[E.second], MBB,
277 [&]() -> MachineBasicBlock::iterator { return UseI; });
278 }
279
280 PseudoIdempotentInstCount = PseudoIdempotentInstructions.size();
282 dbgs() << "Rescheduling Idempotent Instructions Lexographically.";);
283 Changed |= rescheduleLexographically(
284 PseudoIdempotentInstructions, MBB,
285 [&]() -> MachineBasicBlock::iterator { return MBB->begin(); });
286
287 return Changed;
288}
289
291 bool Changed = false;
293
294 std::vector<MachineInstr *> Copies;
295 for (MachineInstr &MI : MBB->instrs()) {
296 if (MI.isCopy())
297 Copies.push_back(&MI);
298 }
299
300 for (MachineInstr *MI : Copies) {
301
302 if (!MI->getOperand(0).isReg())
303 continue;
304 if (!MI->getOperand(1).isReg())
305 continue;
306
307 const Register Dst = MI->getOperand(0).getReg();
308 const Register Src = MI->getOperand(1).getReg();
309
310 if (!Dst.isVirtual())
311 continue;
312 if (!Src.isVirtual())
313 continue;
314 // Not folding COPY instructions if regbankselect has not set the RCs.
315 // Why are we only considering Register Classes? Because the verifier
316 // sometimes gets upset if the register classes don't match even if the
317 // types do. A future patch might add COPY folding for matching types in
318 // pre-registerbankselect code.
319 if (!MRI.getRegClassOrNull(Dst))
320 continue;
321 if (MRI.getRegClass(Dst) != MRI.getRegClass(Src))
322 continue;
323
324 std::vector<MachineOperand *> Uses;
325 for (MachineOperand &MO : MRI.use_operands(Dst))
326 Uses.push_back(&MO);
327 for (auto *MO : Uses)
328 MO->setReg(Src);
329
330 Changed = true;
331 MI->eraseFromParent();
332 }
333
334 return Changed;
335}
336
338 bool Changed = false;
339
340 for (auto &MI : *MBB) {
341 for (auto &MO : MI.operands()) {
342 if (!MO.isReg())
343 continue;
344 if (!MO.isDef() && MO.isKill()) {
345 Changed = true;
346 MO.setIsKill(false);
347 }
348
349 if (MO.isDef() && MO.isDead()) {
350 Changed = true;
351 MO.setIsDead(false);
352 }
353 }
354 }
355
356 return Changed;
357}
358
360 unsigned BasicBlockNum, VRegRenamer &Renamer) {
361 LLVM_DEBUG({
362 dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n";
363 dbgs() << "\n\n================================================\n\n";
364 });
365
366 bool Changed = false;
367
368 LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";);
369
370 LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n";
371 MBB->dump(););
372 Changed |= propagateLocalCopies(MBB);
373 LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump(););
374
375 LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump(););
376 unsigned IdempotentInstCount = 0;
377 Changed |= rescheduleCanonically(IdempotentInstCount, MBB);
378 LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
379
380 Changed |= Renamer.renameVRegs(MBB, BasicBlockNum);
381
382 // TODO: Consider dropping this. Dropping kill defs is probably not
383 // semantically sound.
384 Changed |= doDefKillClear(MBB);
385
386 LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump();
387 dbgs() << "\n";);
389 dbgs() << "\n\n================================================\n\n");
390 return Changed;
391}
392
393bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
394
395 static unsigned functionNum = 0;
396 if (CanonicalizeFunctionNumber != ~0U) {
397 if (CanonicalizeFunctionNumber != functionNum++)
398 return false;
399 LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName()
400 << "\n";);
401 }
402
403 // we need a valid vreg to create a vreg type for skipping all those
404 // stray vreg numbers so reach alignment/canonical vreg values.
405 std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF);
406
408 dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n";
409 dbgs() << "\n\n================================================\n\n";
410 dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n";
411 for (auto MBB
412 : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs()
413 << "\n\n================================================\n\n";);
414
415 unsigned BBNum = 0;
416 bool Changed = false;
418 VRegRenamer Renamer(MRI);
419 for (auto *MBB : RPOList)
420 Changed |= runOnBasicBlock(MBB, BBNum++, Renamer);
421
422 return Changed;
423}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
Rewrite Partial Register Uses
IRTranslator LLVM IR MI
Select target instructions out of generic instructions
static bool runOnBasicBlock(MachineBasicBlock *MBB, unsigned BasicBlockNum, VRegRenamer &Renamer)
static bool rescheduleLexographically(std::vector< MachineInstr * > instructions, MachineBasicBlock *MBB, std::function< MachineBasicBlock::iterator()> getPos)
static cl::opt< unsigned > CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u), cl::value_desc("N"), cl::desc("Function number to canonicalize."))
static bool doDefKillClear(MachineBasicBlock *MBB)
static bool propagateLocalCopies(MachineBasicBlock *MBB)
mir canonicalizer
mir Rename Register Operands Canonically
static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, MachineBasicBlock *MBB)
mir Rename Register Operands static false std::vector< MachineBasicBlock * > GetRPOList(MachineFunction &MF)
mir Rename Register Operands
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
SI Lower i1 Copies
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
instr_iterator instr_begin()
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Representation of each machine instruction.
Definition: MachineInstr.h:68
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:326
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
VRegRenamer - This class is used for renaming vregs in a machine basic block according to semantics o...
bool renameVRegs(MachineBasicBlock *MBB, unsigned BBNum)
Same as the above, but sets a BBNum depending on BB traversal that will be used as prefix for the vre...
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2042
char & MIRCanonicalizerID
MIRCanonicalizer - This pass canonicalizes MIR by renaming vregs according to the semantics of the in...
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1651
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1753
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1883
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858
Function object to check whether the first component of a container supported by std::get (like std::...
Definition: STLExtras.h:1454