LLVM  10.0.0svn
MIRCanonicalizerPass.cpp
Go to the documentation of this file.
1 //===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The purpose of this pass is to employ a canonical code transformation so
10 // that code compiled with slightly different IR passes can be diffed more
11 // effectively than otherwise. This is done by renaming vregs in a given
12 // LiveRange in a canonical way. This pass also does a pseudo-scheduling to
13 // move defs closer to their use inorder to reduce diffs caused by slightly
14 // different schedules.
15 //
16 // Basic Usage:
17 //
18 // llc -o - -run-pass mir-canonicalizer example.mir
19 //
20 // Reorders instructions canonically.
21 // Renames virtual register operands canonically.
22 // Strips certain MIR artifacts (optionally).
23 //
24 //===----------------------------------------------------------------------===//
25 
26 #include "MIRVRegNamerUtils.h"
28 #include "llvm/ADT/STLExtras.h"
32 #include "llvm/CodeGen/Passes.h"
34 
35 #include <queue>
36 
37 using namespace llvm;
38 
39 namespace llvm {
40 extern char &MIRCanonicalizerID;
41 } // namespace llvm
42 
43 #define DEBUG_TYPE "mir-canonicalizer"
44 
45 static cl::opt<unsigned>
46  CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u),
47  cl::value_desc("N"),
48  cl::desc("Function number to canonicalize."));
49 
51  "canon-nth-basicblock", cl::Hidden, cl::init(~0u), cl::value_desc("N"),
52  cl::desc("BasicBlock number to canonicalize."));
53 
54 namespace {
55 
56 class MIRCanonicalizer : public MachineFunctionPass {
57 public:
58  static char ID;
59  MIRCanonicalizer() : MachineFunctionPass(ID) {}
60 
61  StringRef getPassName() const override {
62  return "Rename register operands in a canonical ordering.";
63  }
64 
65  void getAnalysisUsage(AnalysisUsage &AU) const override {
66  AU.setPreservesCFG();
68  }
69 
70  bool runOnMachineFunction(MachineFunction &MF) override;
71 };
72 
73 } // end anonymous namespace
74 
76 
78 
79 INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer",
80  "Rename Register Operands Canonically", false, false)
81 
82 INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer",
83  "Rename Register Operands Canonically", false, false)
84 
86  if (MF.empty())
87  return {};
89  std::vector<MachineBasicBlock *> RPOList;
90  for (auto MBB : RPOT) {
91  RPOList.push_back(MBB);
92  }
93 
94  return RPOList;
95 }
96 
97 static bool
98 rescheduleLexographically(std::vector<MachineInstr *> instructions,
99  MachineBasicBlock *MBB,
101 
102  bool Changed = false;
103  using StringInstrPair = std::pair<std::string, MachineInstr *>;
104  std::vector<StringInstrPair> StringInstrMap;
105 
106  for (auto *II : instructions) {
107  std::string S;
108  raw_string_ostream OS(S);
109  II->print(OS);
110  OS.flush();
111 
112  // Trim the assignment, or start from the begining in the case of a store.
113  const size_t i = S.find("=");
114  StringInstrMap.push_back({(i == std::string::npos) ? S : S.substr(i), II});
115  }
116 
117  llvm::sort(StringInstrMap,
118  [](const StringInstrPair &a, const StringInstrPair &b) -> bool {
119  return (a.first < b.first);
120  });
121 
122  for (auto &II : StringInstrMap) {
123 
124  LLVM_DEBUG({
125  dbgs() << "Splicing ";
126  II.second->dump();
127  dbgs() << " right before: ";
128  getPos()->dump();
129  });
130 
131  Changed = true;
132  MBB->splice(getPos(), MBB, II.second);
133  }
134 
135  return Changed;
136 }
137 
138 static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
139  MachineBasicBlock *MBB) {
140 
141  bool Changed = false;
142 
143  // Calculates the distance of MI from the begining of its parent BB.
144  auto getInstrIdx = [](const MachineInstr &MI) {
145  unsigned i = 0;
146  for (auto &CurMI : *MI.getParent()) {
147  if (&CurMI == &MI)
148  return i;
149  i++;
150  }
151  return ~0U;
152  };
153 
154  // Pre-Populate vector of instructions to reschedule so that we don't
155  // clobber the iterator.
156  std::vector<MachineInstr *> Instructions;
157  for (auto &MI : *MBB) {
158  Instructions.push_back(&MI);
159  }
160 
161  std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers;
162  std::map<unsigned, MachineInstr *> MultiUserLookup;
163  unsigned UseToBringDefCloserToCount = 0;
164  std::vector<MachineInstr *> PseudoIdempotentInstructions;
165  std::vector<unsigned> PhysRegDefs;
166  for (auto *II : Instructions) {
167  for (unsigned i = 1; i < II->getNumOperands(); i++) {
168  MachineOperand &MO = II->getOperand(i);
169  if (!MO.isReg())
170  continue;
171 
173  continue;
174 
175  if (!MO.isDef())
176  continue;
177 
178  PhysRegDefs.push_back(MO.getReg());
179  }
180  }
181 
182  for (auto *II : Instructions) {
183  if (II->getNumOperands() == 0)
184  continue;
185  if (II->mayLoadOrStore())
186  continue;
187 
188  MachineOperand &MO = II->getOperand(0);
189  if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
190  continue;
191  if (!MO.isDef())
192  continue;
193 
194  bool IsPseudoIdempotent = true;
195  for (unsigned i = 1; i < II->getNumOperands(); i++) {
196 
197  if (II->getOperand(i).isImm()) {
198  continue;
199  }
200 
201  if (II->getOperand(i).isReg()) {
202  if (!Register::isVirtualRegister(II->getOperand(i).getReg()))
203  if (llvm::find(PhysRegDefs, II->getOperand(i).getReg()) ==
204  PhysRegDefs.end()) {
205  continue;
206  }
207  }
208 
209  IsPseudoIdempotent = false;
210  break;
211  }
212 
213  if (IsPseudoIdempotent) {
214  PseudoIdempotentInstructions.push_back(II);
215  continue;
216  }
217 
218  LLVM_DEBUG(dbgs() << "Operand " << 0 << " of "; II->dump(); MO.dump(););
219 
220  MachineInstr *Def = II;
221  unsigned Distance = ~0U;
222  MachineInstr *UseToBringDefCloserTo = nullptr;
223  MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
224  for (auto &UO : MRI->use_nodbg_operands(MO.getReg())) {
225  MachineInstr *UseInst = UO.getParent();
226 
227  const unsigned DefLoc = getInstrIdx(*Def);
228  const unsigned UseLoc = getInstrIdx(*UseInst);
229  const unsigned Delta = (UseLoc - DefLoc);
230 
231  if (UseInst->getParent() != Def->getParent())
232  continue;
233  if (DefLoc >= UseLoc)
234  continue;
235 
236  if (Delta < Distance) {
237  Distance = Delta;
238  UseToBringDefCloserTo = UseInst;
239  MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo;
240  }
241  }
242 
243  const auto BBE = MBB->instr_end();
244  MachineBasicBlock::iterator DefI = BBE;
245  MachineBasicBlock::iterator UseI = BBE;
246 
247  for (auto BBI = MBB->instr_begin(); BBI != BBE; ++BBI) {
248 
249  if (DefI != BBE && UseI != BBE)
250  break;
251 
252  if (&*BBI == Def) {
253  DefI = BBI;
254  continue;
255  }
256 
257  if (&*BBI == UseToBringDefCloserTo) {
258  UseI = BBI;
259  continue;
260  }
261  }
262 
263  if (DefI == BBE || UseI == BBE)
264  continue;
265 
266  LLVM_DEBUG({
267  dbgs() << "Splicing ";
268  DefI->dump();
269  dbgs() << " right before: ";
270  UseI->dump();
271  });
272 
273  MultiUsers[UseToBringDefCloserTo].push_back(Def);
274  Changed = true;
275  MBB->splice(UseI, MBB, DefI);
276  }
277 
278  // Sort the defs for users of multiple defs lexographically.
279  for (const auto &E : MultiUserLookup) {
280 
281  auto UseI =
282  std::find_if(MBB->instr_begin(), MBB->instr_end(),
283  [&](MachineInstr &MI) -> bool { return &MI == E.second; });
284 
285  if (UseI == MBB->instr_end())
286  continue;
287 
288  LLVM_DEBUG(
289  dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";);
290  Changed |= rescheduleLexographically(
291  MultiUsers[E.second], MBB,
292  [&]() -> MachineBasicBlock::iterator { return UseI; });
293  }
294 
295  PseudoIdempotentInstCount = PseudoIdempotentInstructions.size();
296  LLVM_DEBUG(
297  dbgs() << "Rescheduling Idempotent Instructions Lexographically.";);
298  Changed |= rescheduleLexographically(
299  PseudoIdempotentInstructions, MBB,
300  [&]() -> MachineBasicBlock::iterator { return MBB->begin(); });
301 
302  return Changed;
303 }
304 
306  bool Changed = false;
308 
309  std::vector<MachineInstr *> Copies;
310  for (MachineInstr &MI : MBB->instrs()) {
311  if (MI.isCopy())
312  Copies.push_back(&MI);
313  }
314 
315  for (MachineInstr *MI : Copies) {
316 
317  if (!MI->getOperand(0).isReg())
318  continue;
319  if (!MI->getOperand(1).isReg())
320  continue;
321 
322  const Register Dst = MI->getOperand(0).getReg();
323  const Register Src = MI->getOperand(1).getReg();
324 
325  if (!Register::isVirtualRegister(Dst))
326  continue;
327  if (!Register::isVirtualRegister(Src))
328  continue;
329  // Not folding COPY instructions if regbankselect has not set the RCs.
330  // Why are we only considering Register Classes? Because the verifier
331  // sometimes gets upset if the register classes don't match even if the
332  // types do. A future patch might add COPY folding for matching types in
333  // pre-registerbankselect code.
334  if (!MRI.getRegClassOrNull(Dst))
335  continue;
336  if (MRI.getRegClass(Dst) != MRI.getRegClass(Src))
337  continue;
338 
339  std::vector<MachineOperand *> Uses;
340  for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI)
341  Uses.push_back(&*UI);
342  for (auto *MO : Uses)
343  MO->setReg(Src);
344 
345  Changed = true;
346  MI->eraseFromParent();
347  }
348 
349  return Changed;
350 }
351 
353  bool Changed = false;
354 
355  for (auto &MI : *MBB) {
356  for (auto &MO : MI.operands()) {
357  if (!MO.isReg())
358  continue;
359  if (!MO.isDef() && MO.isKill()) {
360  Changed = true;
361  MO.setIsKill(false);
362  }
363 
364  if (MO.isDef() && MO.isDead()) {
365  Changed = true;
366  MO.setIsDead(false);
367  }
368  }
369  }
370 
371  return Changed;
372 }
373 
375  std::vector<StringRef> &bbNames,
376  unsigned &basicBlockNum, NamedVRegCursor &NVC) {
377 
378  if (CanonicalizeBasicBlockNumber != ~0U) {
379  if (CanonicalizeBasicBlockNumber != basicBlockNum++)
380  return false;
381  LLVM_DEBUG(dbgs() << "\n Canonicalizing BasicBlock " << MBB->getName()
382  << "\n";);
383  }
384 
385  if (llvm::find(bbNames, MBB->getName()) != bbNames.end()) {
386  LLVM_DEBUG({
387  dbgs() << "Found potentially duplicate BasicBlocks: " << MBB->getName()
388  << "\n";
389  });
390  return false;
391  }
392 
393  LLVM_DEBUG({
394  dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << " \n\n";
395  dbgs() << "\n\n================================================\n\n";
396  });
397 
398  bool Changed = false;
399  MachineFunction &MF = *MBB->getParent();
401 
402  bbNames.push_back(MBB->getName());
403  LLVM_DEBUG(dbgs() << "\n\n NEW BASIC BLOCK: " << MBB->getName() << "\n\n";);
404 
405  LLVM_DEBUG(dbgs() << "MBB Before Canonical Copy Propagation:\n";
406  MBB->dump(););
407  Changed |= propagateLocalCopies(MBB);
408  LLVM_DEBUG(dbgs() << "MBB After Canonical Copy Propagation:\n"; MBB->dump(););
409 
410  LLVM_DEBUG(dbgs() << "MBB Before Scheduling:\n"; MBB->dump(););
411  unsigned IdempotentInstCount = 0;
412  Changed |= rescheduleCanonically(IdempotentInstCount, MBB);
413  LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
414 
415  Changed |= NVC.renameVRegs(MBB);
416 
417  // Here we renumber the def vregs for the idempotent instructions from the top
418  // of the MachineBasicBlock so that they are named in the order that we sorted
419  // them alphabetically. Eventually we wont need SkipVRegs because we will use
420  // named vregs instead.
421  if (IdempotentInstCount)
422  NVC.skipVRegs();
423 
424  auto MII = MBB->begin();
425  for (unsigned i = 0; i < IdempotentInstCount && MII != MBB->end(); ++i) {
426  MachineInstr &MI = *MII++;
427  Changed = true;
428  Register vRegToRename = MI.getOperand(0).getReg();
429  auto Rename = NVC.createVirtualRegister(vRegToRename);
430 
431  std::vector<MachineOperand *> RenameMOs;
432  for (auto &MO : MRI.reg_operands(vRegToRename)) {
433  RenameMOs.push_back(&MO);
434  }
435 
436  for (auto *MO : RenameMOs) {
437  MO->setReg(Rename);
438  }
439  }
440 
441  Changed |= doDefKillClear(MBB);
442 
443  LLVM_DEBUG(dbgs() << "Updated MachineBasicBlock:\n"; MBB->dump();
444  dbgs() << "\n";);
445  LLVM_DEBUG(
446  dbgs() << "\n\n================================================\n\n");
447  return Changed;
448 }
449 
450 bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
451 
452  static unsigned functionNum = 0;
453  if (CanonicalizeFunctionNumber != ~0U) {
454  if (CanonicalizeFunctionNumber != functionNum++)
455  return false;
456  LLVM_DEBUG(dbgs() << "\n Canonicalizing Function " << MF.getName()
457  << "\n";);
458  }
459 
460  // we need a valid vreg to create a vreg type for skipping all those
461  // stray vreg numbers so reach alignment/canonical vreg values.
462  std::vector<MachineBasicBlock *> RPOList = GetRPOList(MF);
463 
464  LLVM_DEBUG(
465  dbgs() << "\n\n NEW MACHINE FUNCTION: " << MF.getName() << " \n\n";
466  dbgs() << "\n\n================================================\n\n";
467  dbgs() << "Total Basic Blocks: " << RPOList.size() << "\n";
468  for (auto MBB
469  : RPOList) { dbgs() << MBB->getName() << "\n"; } dbgs()
470  << "\n\n================================================\n\n";);
471 
472  std::vector<StringRef> BBNames;
473 
474  unsigned BBNum = 0;
475 
476  bool Changed = false;
477 
479  NamedVRegCursor NVC(MRI);
480  for (auto MBB : RPOList)
481  Changed |= runOnBasicBlock(MBB, BBNames, BBNum, NVC);
482 
483  return Changed;
484 }
char & MIRCanonicalizerID
This class represents lattice values for constants.
Definition: AllocatorList.h:23
bool renameVRegs(MachineBasicBlock *MBB)
renameVRegs - For a given MachineBasicBlock, scan for side-effecting instructions, walk the def-use from each side-effecting root (in sorted root order) and rename the encountered vregs in the def-use graph in a canonical ordering.
iterator_range< use_nodbg_iterator > use_nodbg_operands(unsigned Reg) const
static bool rescheduleLexographically(std::vector< MachineInstr *> instructions, MachineBasicBlock *MBB, std::function< MachineBasicBlock::iterator()> getPos)
iterator_range< reg_iterator > reg_operands(unsigned Reg) const
unsigned createVirtualRegister(unsigned VReg)
createVirtualRegister - Given an existing vreg, create a named vreg to take its place.
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: BitVector.h:937
static use_iterator use_end()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
mir Rename Register Operands Canonically
mir Rename Register Operands
static bool doDefKillClear(MachineBasicBlock *MBB)
static cl::opt< unsigned > CanonicalizeFunctionNumber("canon-nth-function", cl::Hidden, cl::init(~0u), cl::value_desc("N"), cl::desc("Function number to canonicalize."))
static cl::opt< unsigned > CanonicalizeBasicBlockNumber("canon-nth-basicblock", cl::Hidden, cl::init(~0u), cl::value_desc("N"), cl::desc("BasicBlock number to canonicalize."))
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
unsigned const MachineRegisterInfo * MRI
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
mir Rename Register Operands static false std::vector< MachineBasicBlock * > GetRPOList(MachineFunction &MF)
Represent the analysis usage information of a pass.
void skipVRegs()
SkipGapSize - Skips modulo a gap value of indices.
static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount, MachineBasicBlock *MBB)
mir canonicalizer
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1193
auto find(R &&Range, const T &Val) -> decltype(adl_begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1186
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1095
static bool runOnBasicBlock(MachineBasicBlock *MBB, std::vector< StringRef > &bbNames, unsigned &basicBlockNum, NamedVRegCursor &NVC)
MachineOperand class - Representation of each machine instruction operand.
SI Lower i1 Copies
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:301
NamedVRegCursor - The cursor is an object that keeps track of what the next vreg name should be...
static bool propagateLocalCopies(MachineBasicBlock *MBB)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
INITIALIZE_PASS_BEGIN(MIRCanonicalizer, "mir-canonicalizer", "Rename Register Operands Canonically", false, false) INITIALIZE_PASS_END(MIRCanonicalizer
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:256
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
Definition: MachineInstr.h:64
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB &#39;Other&#39; at the position From, and insert it into this MBB right before &#39;...
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
use_iterator use_begin(unsigned RegNo) const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
const TargetRegisterClass * getRegClassOrNull(unsigned Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet...
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:503
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:69
print Print MemDeps of function
IRTranslator LLVM IR MI
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
inst_range instructions(Function *F)
Definition: InstIterator.h:133
Register getReg() const
getReg - Returns the register number.
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
Wrapper class representing virtual and physical registers.
Definition: Register.h:19