LLVM  16.0.0git
MIRCanonicalizerPass.cpp
Go to the documentation of this file.
1 //===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The purpose of this pass is to employ a canonical code transformation so
10 // that code compiled with slightly different IR passes can be diffed more
11 // effectively than otherwise. This is done by renaming vregs in a given
12 // LiveRange in a canonical way. This pass also does a pseudo-scheduling to
13 // move defs closer to their use inorder to reduce diffs caused by slightly
14 // different schedules.
15 //
16 // Basic Usage:
17 //
18 // llc -o - -run-pass mir-canonicalizer example.mir
19 //
20 // Reorders instructions canonically.
21 // Renames virtual register operands canonically.
22 // Strips certain MIR artifacts (optionally).
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "MIRVRegNamerUtils.h"
28 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/InitializePasses.h"
32 #include "llvm/Pass.h"
33 #include "llvm/Support/Debug.h"
35 
36 using namespace llvm;
37 
38 #define DEBUG_TYPE "mir-canonicalizer"
39 
40 static cl::opt<unsigned>
41  CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u),
42  cl::value_desc("N"),
43  cl::desc("Function number to canonicalize."));
44 
45 namespace {
46 
47 class MIRCanonicalizer : public MachineFunctionPass {
48 public:
49  static char ID;
50  MIRCanonicalizer() : MachineFunctionPass(ID) {}
51 
52  StringRef getPassName() const override {
53  return "Rename register operands in a canonical ordering.";
54  }
55 
56  void getAnalysisUsage(AnalysisUsage &AU) const override {
57  AU.setPreservesCFG();
59  }
60 
61  bool runOnMachineFunction(MachineFunction &MF) override;
62 };
63 
64 } // end anonymous namespace
65 
67 
69 
70 INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer",
71  "Rename Register Operands Canonically", false, false)
72 
73 INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer",
75 
77  if (MF.empty())
78  return {};
80  std::vector<MachineBasicBlock *> RPOList;
81  append_range(RPOList, RPOT);
82 
83  return RPOList;
84 }
85 
86 static bool
87 rescheduleLexographically(std::vector<MachineInstr *> instructions,
90 
91  bool Changed = false;
92  using StringInstrPair = std::pair<std::string, MachineInstr *>;
93  std::vector<StringInstrPair> StringInstrMap;
94 
95  for (auto *II : instructions) {
96  std::string S;
98  II->print(OS);
99  OS.flush();
100 
101  // Trim the assignment, or start from the beginning in the case of a store.
102  const size_t i = S.find('=');
103  StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II});
104  }
105 
106  llvm::sort(StringInstrMap, llvm::less_first());
107 
108  for (auto &II : StringInstrMap) {
109 
110  LLVM_DEBUG({
111  dbgs() << "Splicing ";
112  II.second->dump();
113  dbgs() << " right before: ";
114  getPos()->dump();
115  });
116 
117  Changed = true;
118  MBB->splice(getPos(), MBB, II.second);
119  }
120 
121  return Changed;
122 }
123 
124 static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
126 
127  bool Changed = false;
128 
129  // Calculates the distance of MI from the beginning of its parent BB.
130  auto getInstrIdx = [](const MachineInstr &MI) {
131  unsigned i = 0;
132  for (const auto &CurMI : *MI.getParent()) {
133  if (&CurMI == &MI)
134  return i;
135  i++;
136  }
137  return ~0U;
138  };
139 
140  // Pre-Populate vector of instructions to reschedule so that we don't
141  // clobber the iterator.
142  std::vector<MachineInstr *> Instructions;
143  for (auto &MI : *MBB) {
144  Instructions.push_back(&MI);
145  }
146 
147  std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers;
148  std::map<unsigned, MachineInstr *> MultiUserLookup;
149  unsigned UseToBringDefCloserToCount = 0;
150  std::vector<MachineInstr *> PseudoIdempotentInstructions;
151  std::vector<unsigned> PhysRegDefs;
152  for (auto *II : Instructions) {
153  for (unsigned i = 1; i < II->getNumOperands(); i++) {
154  MachineOperand &MO = II->getOperand(i);
155  if (!MO.isReg())
156  continue;
157 
159  continue;
160 
161  if (!MO.isDef())
162  continue;
163 
164  PhysRegDefs.push_back(MO.getReg());
165  }
166  }
167 
168  for (auto *II : Instructions) {
169  if (II->getNumOperands() == 0)
170  continue;
171  if (II->mayLoadOrStore())
172  continue;
173 
174  MachineOperand &MO = II->getOperand(0);
175  if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
176  continue;
177  if (!MO.isDef())
178  continue;
179 
180  bool IsPseudoIdempotent = true;
181  for (unsigned i = 1; i < II->getNumOperands(); i++) {
182 
183  if (II->getOperand(i).isImm()) {
184  continue;
185  }
186 
187  if (II->getOperand(i).isReg()) {
188  if (!Register::isVirtualRegister(II->getOperand(i).getReg()))
189  if (!llvm::is_contained(PhysRegDefs, II->getOperand(i).getReg())) {
190  continue;
191  }
192  }
193 
194  IsPseudoIdempotent = false;
195  break;
196  }
197 
198  if (IsPseudoIdempotent) {
199  PseudoIdempotentInstructions.push_back(II);
200  continue;
201  }
202 
203  LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump(););
204 
205  MachineInstr *Def = II;
206  unsigned Distance = ~0U;
207  MachineInstr *UseToBringDefCloserTo = nullptr;
209  for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) {
210  MachineInstr *UseInst = UO.getParent();
211 
212  const unsigned DefLoc = getInstrIdx(*Def);
213  const unsigned UseLoc = getInstrIdx(*UseInst);
214  const unsigned Delta = (UseLoc - DefLoc);
215 
216  if (UseInst->getParent() != Def->getParent())
217  continue;
218  if (DefLoc >= UseLoc)
219  continue;
220 
221  if (Delta < Distance) {
222  Distance = Delta;
223  UseToBringDefCloserTo = UseInst;
224  MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo;
225  }
226  }
227 
228  const auto BBE = MBB->instr_end();
229  MachineBasicBlock::iterator DefI = BBE;
230  MachineBasicBlock::iterator UseI = BBE;
231 
232  for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) {
233 
234  if (DefI != BBE && UseI != BBE)
235  break;
236 
237  if (&*BBI == Def) {
238  DefI = BBI;
239  continue;
240  }
241 
242  if (&*BBI == UseToBringDefCloserTo) {
243  UseI = BBI;
244  continue;
245  }
246  }
247 
248  if (DefI == BBE || UseI == BBE)
249  continue;
250 
251  LLVM_DEBUG({
252  dbgs() << "Splicing ";
253  DefI->dump();
254  dbgs() << " right before: ";
255  UseI->dump();
256  });
257 
258  MultiUsers[UseToBringDefCloserTo].push_back(Def);
259  Changed = true;
260  MBB->splice(UseI, MBB, DefI);
261  }
262 
263  // Sort the defs for users of multiple defs lexographically.
264  for (const auto &E : MultiUserLookup) {
265 
266  auto UseI = llvm::find_if(MBB->instrs(), [&](MachineInstr &MI) -> bool {
267  return &MI == E.second;
268  });
269 
270  if (UseI == MBB->instr_end())
271  continue;
272 
273  LLVM_DEBUG(
274  dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";);
275  Changed |= rescheduleLexographically(
276  MultiUsers[E.second], MBB,
277  [&]() -> MachineBasicBlock::iterator { return UseI; });
278  }
279 
280  PseudoIdempotentInstCount = PseudoIdempotentInstructions.size();
281  LLVM_DEBUG(
282  dbgs() << "Rescheduling Idempotent Instructions Lexographically.";);
283  Changed |= rescheduleLexographically(
284  PseudoIdempotentInstructions, MBB,
285  [&]() -> MachineBasicBlock::iterator { return MBB->begin(); });
286 
287  return Changed;
288 }
289 
291  bool Changed = false;
293 
294  std::vector<MachineInstr *> Copies;
295  for (MachineInstr &MI : MBB->instrs()) {
296  if (MI.isCopy())
297  Copies.push_back(&MI);
298  }
299 
300  for (MachineInstr *MI : Copies) {
301 
302  if (!MI->getOperand(0).isReg())
303  continue;
304  if (!MI->getOperand(1).isReg())
305  continue;
306 
307  const Register Dst = MI->getOperand(0).getReg();
308  const Register Src = MI->getOperand(1).getReg();
309 
310  if (!Register::isVirtualRegister(Dst))
311  continue;
312  if (!Register::isVirtualRegister(Src))
313  continue;
314  // Not folding COPY instructions if regbankselect has not set the RCs.
315  // Why are we only considering Register Classes? Because the verifier
316  // sometimes gets upset if the register classes don't match even if the
317  // types do. A future patch might add COPY folding for matching types in
318  // pre-registerbankselect code.
319  if (!MRI.getRegClassOrNull(Dst))
320  continue;
321  if (MRI.getRegClass(Dst) != MRI.getRegClass(Src))
322  continue;
323 
324  std::vector<MachineOperand *> Uses;
325  for (MachineOperand &MO : MRI.use_operands(Dst))
326  Uses.push_back(&MO);
327  for (auto *MO : Uses)
328  MO->setReg(Src);
329 
330  Changed = true;
331  MI->eraseFromParent();
332  }
333 
334  return Changed;
335 }
336 
338  bool Changed = false;
339 
340  for (auto &MI : *MBB) {
341  for (auto &MO : MI.operands()) {
342  if (!MO.isReg())
343  continue;
344  if (!MO.isDef() && MO.isKill()) {
345  Changed = true;
346  MO.setIsKill(false);
347  }
348 
349  if (MO.isDef() && MO.isDead()) {
350  Changed = true;
351  MO.setIsDead(false);
352  }
353  }
354  }
355 
356  return Changed;
357 }
358 
360  unsigned BasicBlockNum, VRegRenamer &Renamer) {
361  LLVM_DEBUG({
362  dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n";
363  dbgs() << "\n\n================================================\n\n";
364  });
365 
366  bool Changed = false;
367 
368  LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";);
369 
370  LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n";
371  MBB->dump(););
372  Changed |= propagateLocalCopies(MBB);
373  LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump(););
374 
375  LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump(););
376  unsigned IdempotentInstCount = 0;
377  Changed |= rescheduleCanonically(IdempotentInstCount, MBB);
378  LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
379 
380  Changed |= Renamer.renameVRegs(MBB, BasicBlockNum);
381 
382  // TODO: Consider dropping this. Dropping kill defs is probably not
383  // semantically sound.
384  Changed |= doDefKillClear(MBB);
385 
386  LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump();
387  dbgs() << "\n";);
388  LLVM_DEBUG(
389  dbgs() << "\n\n================================================\n\n");
390  return Changed;
391 }
392 
393 bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
394 
395  static unsigned functionNum = 0;
396  if (CanonicalizeFunctionNumber != ~0U) {
397  if (CanonicalizeFunctionNumber != functionNum++)
398  return false;
399  LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName()
400  << "\n";);
401  }
402 
403  // we need a valid vreg to create a vreg type for skipping all those
404  // stray vreg numbers so reach alignment/canonical vreg values.
405  std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF);
406 
407  LLVM_DEBUG(
408  dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n";
409  dbgs() << "\n\n================================================\n\n";
410  dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n";
411  for (auto MBB
412  : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs()
413  << "\n\n================================================\n\n";);
414 
415  unsigned BBNum = 0;
416  bool Changed = false;
418  VRegRenamer Renamer(MRI);
419  for (auto *MBB : RPOList)
420  Changed |= runOnBasicBlock(MBB, BBNum++, Renamer);
421 
422  return Changed;
423 }
i
i
Definition: README.txt:29
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:109
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
runOnBasicBlock
static bool runOnBasicBlock(MachineBasicBlock *MBB, unsigned BasicBlockNum, VRegRenamer &Renamer)
Definition: MIRCanonicalizerPass.cpp:359
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:818
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
Pass.h
llvm::MachineBasicBlock::instrs
instr_range instrs()
Definition: MachineBasicBlock.h:300
canonicalizer
mir canonicalizer
Definition: MIRCanonicalizerPass.cpp:73
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:628
MIRVRegNamerUtils.h
propagateLocalCopies
static bool propagateLocalCopies(MachineBasicBlock *MBB)
Definition: MIRCanonicalizerPass.cpp:290
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
Canonically
mir Rename Register Operands Canonically
Definition: MIRCanonicalizerPass.cpp:74
rescheduleCanonically
static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, MachineBasicBlock *MBB)
Definition: MIRCanonicalizerPass.cpp:124
Instructions
Code Generation Notes for reduce the size of the ISel and reduce repetition in the implementation In a small number of this can cause even when no optimisation has taken place Instructions
Definition: MSA.txt:11
llvm::MachineRegisterInfo::use_operands
iterator_range< use_iterator > use_operands(Register Reg) const
Definition: MachineRegisterInfo.h:477
STLExtras.h
llvm::MachineOperand::dump
void dump() const
Definition: MachineOperand.cpp:985
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:167
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::MIRCanonicalizerID
char & MIRCanonicalizerID
MIRCanonicalizer - This pass canonicalizes MIR by renaming vregs according to the semantics of the in...
Definition: MIRCanonicalizerPass.cpp:68
MachineRegisterInfo.h
Uses
SmallPtrSet< MachineInstr *, 2 > Uses
Definition: ARMLowOverheadLoops.cpp:590
llvm::MachineBasicBlock::dump
void dump() const
Definition: MachineBasicBlock.cpp:292
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:667
llvm::MachineRegisterInfo::use_nodbg_operands
iterator_range< use_nodbg_iterator > use_nodbg_operands(Register Reg) const
Definition: MachineRegisterInfo.h:534
llvm::VRegRenamer::renameVRegs
bool renameVRegs(MachineBasicBlock *MBB, unsigned BBNum)
Same as the above, but sets a BBNum depending on BB traversal that will be used as prefix for the vre...
Definition: MIRVRegNamerUtils.h:89
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::less_first
Function object to check whether the first component of a std::pair compares less than the first comp...
Definition: STLExtras.h:1476
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:141
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::raw_ostream::flush
void flush()
Definition: raw_ostream.h:185
Copies
SI Lower i1 Copies
Definition: SILowerI1Copies.cpp:397
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:74
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:647
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1682
llvm::cl::opt
Definition: CommandLine.h:1411
llvm::instructions
inst_range instructions(Function *F)
Definition: InstIterator.h:133
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:320
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:446
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1868
MachineFunctionPass.h
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer", "Rename Register Operands Canonically", false, false) INITIALIZE_PASS_END(MIRCanonicalizer
llvm::Register::isVirtualRegister
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
llvm::MachineRegisterInfo::getRegClassOrNull
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
Definition: MachineRegisterInfo.h:664
llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition: MachineFunction.cpp:567
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:261
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:359
llvm::MachineBasicBlock::instr_begin
instr_iterator instr_begin()
Definition: MachineBasicBlock.h:289
llvm::MachineBasicBlock::instr_end
instr_iterator instr_end()
Definition: MachineBasicBlock.h:291
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::MachineBasicBlock::splice
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Definition: MachineBasicBlock.h:1009
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:2013
rescheduleLexographically
static bool rescheduleLexographically(std::vector< MachineInstr * > instructions, MachineBasicBlock *MBB, std::function< MachineBasicBlock::iterator()> getPos)
Definition: MIRCanonicalizerPass.cpp:87
llvm::MachineOperand::isDef
bool isDef() const
Definition: MachineOperand.h:374
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:313
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::find_if
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1761
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
std
Definition: BitVector.h:851
doDefKillClear
static bool doDefKillClear(MachineBasicBlock *MBB)
Definition: MIRCanonicalizerPass.cpp:337
GetRPOList
mir Rename Register Operands static false std::vector< MachineBasicBlock * > GetRPOList(MachineFunction &MF)
Definition: MIRCanonicalizerPass.cpp:76
llvm::VRegRenamer
VRegRenamer - This class is used for renaming vregs in a machine basic block according to semantics o...
Definition: MIRVRegNamerUtils.h:34
llvm::cl::value_desc
Definition: CommandLine.h:421
llvm::ReversePostOrderTraversal
Definition: PostOrderIterator.h:291
PostOrderIterator.h
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:305
llvm::cl::desc
Definition: CommandLine.h:412
raw_ostream.h
llvm::MachineInstrBundleIterator< MachineInstr >
InitializePasses.h
llvm::MachineBasicBlock::getName
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
Definition: MachineBasicBlock.cpp:311
Debug.h
CanonicalizeFunctionNumber
static cl::opt< unsigned > CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u), cl::value_desc("N"), cl::desc("Function number to canonicalize."))