LLVM  14.0.0git
AMDILCFGStructurizer.cpp
Go to the documentation of this file.
1 //===- AMDILCFGStructurizer.cpp - CFG Structurizer ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //==-----------------------------------------------------------------------===//
8 
9 #include "AMDGPU.h"
11 #include "R600.h"
12 #include "R600RegisterInfo.h"
13 #include "R600Subtarget.h"
14 #include "llvm/ADT/SCCIterator.h"
15 #include "llvm/ADT/Statistic.h"
21 #include "llvm/InitializePasses.h"
22 
23 using namespace llvm;
24 
25 #define DEBUG_TYPE "structcfg"
26 
27 #define DEFAULT_VEC_SLOTS 8
28 
29 // TODO: move-begin.
30 
31 //===----------------------------------------------------------------------===//
32 //
33 // Statistics for CFGStructurizer.
34 //
35 //===----------------------------------------------------------------------===//
36 
37 STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern "
38  "matched");
39 STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern "
40  "matched");
41 STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks");
42 STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
43 
44 namespace llvm {
45 
47 
48 } // end namespace llvm
49 
50 namespace {
51 
52 //===----------------------------------------------------------------------===//
53 //
54 // Miscellaneous utility for CFGStructurizer.
55 //
56 //===----------------------------------------------------------------------===//
57 
58 #define SHOWNEWINSTR(i) LLVM_DEBUG(dbgs() << "New instr: " << *i << "\n");
59 
60 #define SHOWNEWBLK(b, msg) \
61  LLVM_DEBUG(dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
62  dbgs() << "\n";);
63 
64 #define SHOWBLK_DETAIL(b, msg) \
65  LLVM_DEBUG(if (b) { \
66  dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
67  b->print(dbgs()); \
68  dbgs() << "\n"; \
69  });
70 
71 #define INVALIDSCCNUM -1
72 
73 //===----------------------------------------------------------------------===//
74 //
75 // supporting data structure for CFGStructurizer
76 //
77 //===----------------------------------------------------------------------===//
78 
79 class BlockInformation {
80 public:
81  bool IsRetired = false;
82  int SccNum = INVALIDSCCNUM;
83 
84  BlockInformation() = default;
85 };
86 
87 //===----------------------------------------------------------------------===//
88 //
89 // CFGStructurizer
90 //
91 //===----------------------------------------------------------------------===//
92 
93 class AMDGPUCFGStructurizer : public MachineFunctionPass {
94 public:
96  using MBBInfoMap = std::map<MachineBasicBlock *, BlockInformation *>;
97  using LoopLandInfoMap = std::map<MachineLoop *, MachineBasicBlock *>;
98 
99  enum PathToKind {
100  Not_SinglePath = 0,
101  SinglePath_InPath = 1,
102  SinglePath_NotInPath = 2
103  };
104 
105  static char ID;
106 
107  AMDGPUCFGStructurizer() : MachineFunctionPass(ID) {
109  }
110 
111  StringRef getPassName() const override {
112  return "AMDGPU Control Flow Graph structurizer Pass";
113  }
114 
115  void getAnalysisUsage(AnalysisUsage &AU) const override {
120  }
121 
122  /// Perform the CFG structurization
123  bool run();
124 
125  /// Perform the CFG preparation
126  /// This step will remove every unconditionnal/dead jump instructions and make
127  /// sure all loops have an exit block
128  bool prepare();
129 
130  bool runOnMachineFunction(MachineFunction &MF) override {
131  TII = MF.getSubtarget<R600Subtarget>().getInstrInfo();
132  TRI = &TII->getRegisterInfo();
133  LLVM_DEBUG(MF.dump(););
134  OrderedBlks.clear();
135  Visited.clear();
136  FuncRep = &MF;
137  MLI = &getAnalysis<MachineLoopInfo>();
138  LLVM_DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
139  MDT = &getAnalysis<MachineDominatorTree>();
140  LLVM_DEBUG(MDT->print(dbgs(), (const Module *)nullptr););
141  PDT = &getAnalysis<MachinePostDominatorTree>();
142  LLVM_DEBUG(PDT->print(dbgs()););
143  prepare();
144  run();
145  LLVM_DEBUG(MF.dump(););
146  return true;
147  }
148 
149 protected:
152  MachineLoopInfo *MLI;
153  const R600InstrInfo *TII = nullptr;
154  const R600RegisterInfo *TRI = nullptr;
155 
156  // PRINT FUNCTIONS
157  /// Print the ordered Blocks.
158  void printOrderedBlocks() const {
159  size_t i = 0;
160  for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(),
161  iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) {
162  dbgs() << "BB" << (*iterBlk)->getNumber();
163  dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
164  if (i != 0 && i % 10 == 0) {
165  dbgs() << "\n";
166  } else {
167  dbgs() << " ";
168  }
169  }
170  }
171 
172  static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) {
173  for (MachineLoop::iterator iter = LoopInfo.begin(),
174  iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) {
175  (*iter)->print(dbgs());
176  }
177  }
178 
179  // UTILITY FUNCTIONS
180  int getSCCNum(MachineBasicBlock *MBB) const;
181  MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const;
182  bool hasBackEdge(MachineBasicBlock *MBB) const;
183  bool isRetiredBlock(MachineBasicBlock *MBB) const;
184  bool isActiveLoophead(MachineBasicBlock *MBB) const;
185  PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
186  bool AllowSideEntry = true) const;
187  int countActiveBlock(MBBVector::const_iterator It,
189  bool needMigrateBlock(MachineBasicBlock *MBB) const;
190 
191  // Utility Functions
192  void reversePredicateSetter(MachineBasicBlock::iterator I,
194  /// Compute the reversed DFS post order of Blocks
195  void orderBlocks(MachineFunction *MF);
196 
197  // Function originally from CFGStructTraits
198  void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode,
199  const DebugLoc &DL = DebugLoc());
200  MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode,
201  const DebugLoc &DL = DebugLoc());
202  MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode);
203  void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode,
204  const DebugLoc &DL);
205  void insertCondBranchBefore(MachineBasicBlock *MBB,
206  MachineBasicBlock::iterator I, int NewOpcode,
207  int RegNum, const DebugLoc &DL);
208 
209  static int getBranchNzeroOpcode(int OldOpcode);
210  static int getBranchZeroOpcode(int OldOpcode);
211  static int getContinueNzeroOpcode(int OldOpcode);
212  static int getContinueZeroOpcode(int OldOpcode);
213  static MachineBasicBlock *getTrueBranch(MachineInstr *MI);
214  static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB);
215  static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB,
216  MachineInstr *MI);
217  static bool isCondBranch(MachineInstr *MI);
218  static bool isUncondBranch(MachineInstr *MI);
219  static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB);
220  static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB);
221 
222  /// The correct naming for this is getPossibleLoopendBlockBranchInstr.
223  ///
224  /// BB with backward-edge could have move instructions after the branch
225  /// instruction. Such move instruction "belong to" the loop backward-edge.
226  MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB);
227 
228  static MachineInstr *getReturnInstr(MachineBasicBlock *MBB);
229  static bool isReturnBlock(MachineBasicBlock *MBB);
230  static void cloneSuccessorList(MachineBasicBlock *DstMBB,
231  MachineBasicBlock *SrcMBB);
232  static MachineBasicBlock *clone(MachineBasicBlock *MBB);
233 
234  /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose
235  /// because the AMDGPU instruction is not recognized as terminator fix this
236  /// and retire this routine
237  void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB,
238  MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk);
239 
240  static void wrapup(MachineBasicBlock *MBB);
241 
242  int patternMatch(MachineBasicBlock *MBB);
243  int patternMatchGroup(MachineBasicBlock *MBB);
244  int serialPatternMatch(MachineBasicBlock *MBB);
245  int ifPatternMatch(MachineBasicBlock *MBB);
246  int loopendPatternMatch();
247  int mergeLoop(MachineLoop *LoopRep);
248 
249  /// return true iff src1Blk->succ_empty() && src1Blk and src2Blk are in
250  /// the same loop with LoopLandInfo without explicitly keeping track of
251  /// loopContBlks and loopBreakBlks, this is a method to get the information.
252  bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB,
253  MachineBasicBlock *Src2MBB);
254  int handleJumpintoIf(MachineBasicBlock *HeadMBB,
255  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
256  int handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
257  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
258  int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
259  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
260  MachineBasicBlock **LandMBBPtr);
261  void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
262  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
263  MachineBasicBlock *LandMBB, bool Detail = false);
264  int cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
265  MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB);
266  void mergeSerialBlock(MachineBasicBlock *DstMBB,
267  MachineBasicBlock *SrcMBB);
268 
269  void mergeIfthenelseBlock(MachineInstr *BranchMI,
271  MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB);
272  void mergeLooplandBlock(MachineBasicBlock *DstMBB,
273  MachineBasicBlock *LandMBB);
274  void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
275  MachineBasicBlock *LandMBB);
276  void settleLoopcontBlock(MachineBasicBlock *ContingMBB,
277  MachineBasicBlock *ContMBB);
278 
279  /// normalizeInfiniteLoopExit change
280  /// B1:
281  /// uncond_br LoopHeader
282  ///
283  /// to
284  /// B1:
285  /// cond_br 1 LoopHeader dummyExit
286  /// and return the newly added dummy exit block
287  MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep);
288  void removeUnconditionalBranch(MachineBasicBlock *MBB);
289 
290  /// Remove duplicate branches instructions in a block.
291  /// For instance
292  /// B0:
293  /// cond_br X B1 B2
294  /// cond_br X B1 B2
295  /// is transformed to
296  /// B0:
297  /// cond_br X B1 B2
298  void removeRedundantConditionalBranch(MachineBasicBlock *MBB);
299 
300  void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB);
301  void removeSuccessor(MachineBasicBlock *MBB);
302  MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB,
303  MachineBasicBlock *PredMBB);
304  void migrateInstruction(MachineBasicBlock *SrcMBB,
306  void recordSccnum(MachineBasicBlock *MBB, int SCCNum);
307  void retireBlock(MachineBasicBlock *MBB);
308 
309 private:
310  MBBInfoMap BlockInfoMap;
311  LoopLandInfoMap LLInfoMap;
312  std::map<MachineLoop *, bool> Visited;
313  MachineFunction *FuncRep;
315 };
316 
317 } // end anonymous namespace
318 
320 
321 int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const {
322  MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
323  if (It == BlockInfoMap.end())
324  return INVALIDSCCNUM;
325  return (*It).second->SccNum;
326 }
327 
328 MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep)
329  const {
330  LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep);
331  if (It == LLInfoMap.end())
332  return nullptr;
333  return (*It).second;
334 }
335 
336 bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const {
337  MachineLoop *LoopRep = MLI->getLoopFor(MBB);
338  if (!LoopRep)
339  return false;
340  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
341  return MBB->isSuccessor(LoopHeader);
342 }
343 
344 bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const {
345  MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
346  if (It == BlockInfoMap.end())
347  return false;
348  return (*It).second->IsRetired;
349 }
350 
351 bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const {
352  MachineLoop *LoopRep = MLI->getLoopFor(MBB);
353  while (LoopRep && LoopRep->getHeader() == MBB) {
354  MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep);
355  if(!LoopLand)
356  return true;
357  if (!isRetiredBlock(LoopLand))
358  return true;
359  LoopRep = LoopRep->getParentLoop();
360  }
361  return false;
362 }
363 
364 AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(
365  MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
366  bool AllowSideEntry) const {
367  assert(DstMBB);
368  if (SrcMBB == DstMBB)
369  return SinglePath_InPath;
370  while (SrcMBB && SrcMBB->succ_size() == 1) {
371  SrcMBB = *SrcMBB->succ_begin();
372  if (SrcMBB == DstMBB)
373  return SinglePath_InPath;
374  if (!AllowSideEntry && SrcMBB->pred_size() > 1)
375  return Not_SinglePath;
376  }
377  if (SrcMBB && SrcMBB->succ_size()==0)
378  return SinglePath_NotInPath;
379  return Not_SinglePath;
380 }
381 
382 int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It,
384  int Count = 0;
385  while (It != E) {
386  if (!isRetiredBlock(*It))
387  ++Count;
388  ++It;
389  }
390  return Count;
391 }
392 
393 bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const {
394  unsigned BlockSizeThreshold = 30;
395  unsigned CloneInstrThreshold = 100;
396  bool MultiplePreds = MBB && (MBB->pred_size() > 1);
397 
398  if(!MultiplePreds)
399  return false;
400  unsigned BlkSize = MBB->size();
401  return ((BlkSize > BlockSizeThreshold) &&
402  (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold));
403 }
404 
405 void AMDGPUCFGStructurizer::reversePredicateSetter(
407  assert(I.isValid() && "Expected valid iterator");
408  for (;; --I) {
409  if (I == MBB.end())
410  continue;
411  if (I->getOpcode() == R600::PRED_X) {
412  switch (I->getOperand(2).getImm()) {
413  case R600::PRED_SETE_INT:
414  I->getOperand(2).setImm(R600::PRED_SETNE_INT);
415  return;
416  case R600::PRED_SETNE_INT:
417  I->getOperand(2).setImm(R600::PRED_SETE_INT);
418  return;
419  case R600::PRED_SETE:
420  I->getOperand(2).setImm(R600::PRED_SETNE);
421  return;
422  case R600::PRED_SETNE:
423  I->getOperand(2).setImm(R600::PRED_SETE);
424  return;
425  default:
426  llvm_unreachable("PRED_X Opcode invalid!");
427  }
428  }
429  }
430 }
431 
432 void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB,
433  int NewOpcode, const DebugLoc &DL) {
434  MachineInstr *MI =
435  MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
436  MBB->push_back(MI);
437  //assume the instruction doesn't take any reg operand ...
438  SHOWNEWINSTR(MI);
439 }
440 
441 MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB,
442  int NewOpcode,
443  const DebugLoc &DL) {
444  MachineInstr *MI =
445  MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
446  if (!MBB->empty())
447  MBB->insert(MBB->begin(), MI);
448  else
449  MBB->push_back(MI);
450  SHOWNEWINSTR(MI);
451  return MI;
452 }
453 
454 MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(
455  MachineBasicBlock::iterator I, int NewOpcode) {
456  MachineInstr *OldMI = &(*I);
457  MachineBasicBlock *MBB = OldMI->getParent();
458  MachineInstr *NewMBB =
459  MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc());
460  MBB->insert(I, NewMBB);
461  //assume the instruction doesn't take any reg operand ...
462  SHOWNEWINSTR(NewMBB);
463  return NewMBB;
464 }
465 
466 void AMDGPUCFGStructurizer::insertCondBranchBefore(
467  MachineBasicBlock::iterator I, int NewOpcode, const DebugLoc &DL) {
468  MachineInstr *OldMI = &(*I);
469  MachineBasicBlock *MBB = OldMI->getParent();
470  MachineFunction *MF = MBB->getParent();
471  MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
472  MBB->insert(I, NewMI);
473  MachineInstrBuilder MIB(*MF, NewMI);
474  MIB.addReg(OldMI->getOperand(1).getReg(), false);
475  SHOWNEWINSTR(NewMI);
476  //erase later oldInstr->eraseFromParent();
477 }
478 
479 void AMDGPUCFGStructurizer::insertCondBranchBefore(
481  int RegNum, const DebugLoc &DL) {
482  MachineFunction *MF = blk->getParent();
483  MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
484  //insert before
485  blk->insert(I, NewInstr);
486  MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false);
487  SHOWNEWINSTR(NewInstr);
488 }
489 
490 int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {
491  switch(OldOpcode) {
492  case R600::JUMP_COND:
493  case R600::JUMP: return R600::IF_PREDICATE_SET;
494  case R600::BRANCH_COND_i32:
495  case R600::BRANCH_COND_f32: return R600::IF_LOGICALNZ_f32;
496  default: llvm_unreachable("internal error");
497  }
498  return -1;
499 }
500 
501 int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {
502  switch(OldOpcode) {
503  case R600::JUMP_COND:
504  case R600::JUMP: return R600::IF_PREDICATE_SET;
505  case R600::BRANCH_COND_i32:
506  case R600::BRANCH_COND_f32: return R600::IF_LOGICALZ_f32;
507  default: llvm_unreachable("internal error");
508  }
509  return -1;
510 }
511 
512 int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
513  switch(OldOpcode) {
514  case R600::JUMP_COND:
515  case R600::JUMP: return R600::CONTINUE_LOGICALNZ_i32;
516  default: llvm_unreachable("internal error");
517  }
518  return -1;
519 }
520 
521 int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) {
522  switch(OldOpcode) {
523  case R600::JUMP_COND:
524  case R600::JUMP: return R600::CONTINUE_LOGICALZ_i32;
525  default: llvm_unreachable("internal error");
526  }
527  return -1;
528 }
529 
530 MachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) {
531  return MI->getOperand(0).getMBB();
532 }
533 
534 void AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI,
536  MI->getOperand(0).setMBB(MBB);
537 }
538 
540 AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB,
541  MachineInstr *MI) {
542  assert(MBB->succ_size() == 2);
543  MachineBasicBlock *TrueBranch = getTrueBranch(MI);
546  ++Next;
547  return (*It == TrueBranch) ? *Next : *It;
548 }
549 
550 bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {
551  switch (MI->getOpcode()) {
552  case R600::JUMP_COND:
553  case R600::BRANCH_COND_i32:
554  case R600::BRANCH_COND_f32: return true;
555  default:
556  return false;
557  }
558  return false;
559 }
560 
561 bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) {
562  switch (MI->getOpcode()) {
563  case R600::JUMP:
564  case R600::BRANCH:
565  return true;
566  default:
567  return false;
568  }
569  return false;
570 }
571 
572 DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
573  //get DebugLoc from the first MachineBasicBlock instruction with debug info
574  DebugLoc DL;
575  for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end();
576  ++It) {
577  MachineInstr *instr = &(*It);
578  if (instr->getDebugLoc())
579  DL = instr->getDebugLoc();
580  }
581  return DL;
582 }
583 
584 MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr(
587  MachineInstr *MI = &*It;
588  if (MI && (isCondBranch(MI) || isUncondBranch(MI)))
589  return MI;
590  return nullptr;
591 }
592 
593 MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr(
596  It != E; ++It) {
597  // FIXME: Simplify
598  MachineInstr *MI = &*It;
599  if (MI) {
600  if (isCondBranch(MI) || isUncondBranch(MI))
601  return MI;
602  else if (!TII->isMov(MI->getOpcode()))
603  break;
604  }
605  }
606  return nullptr;
607 }
608 
609 MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) {
611  if (It != MBB->rend()) {
612  MachineInstr *instr = &(*It);
613  if (instr->getOpcode() == R600::RETURN)
614  return instr;
615  }
616  return nullptr;
617 }
618 
619 bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) {
620  MachineInstr *MI = getReturnInstr(MBB);
621  bool IsReturn = MBB->succ_empty();
622  if (MI)
623  assert(IsReturn);
624  else if (IsReturn)
625  LLVM_DEBUG(dbgs() << "BB" << MBB->getNumber()
626  << " is return block without RETURN instr\n";);
627  return IsReturn;
628 }
629 
630 void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB,
631  MachineBasicBlock *SrcMBB) {
632  for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(),
633  iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It)
634  DstMBB->addSuccessor(*It); // *iter's predecessor is also taken care of
635 }
636 
637 MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) {
639  MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock();
640  Func->push_back(NewMBB); //insert to function
641  for (const MachineInstr &It : *MBB)
642  NewMBB->push_back(Func->CloneMachineInstr(&It));
643  return NewMBB;
644 }
645 
646 void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith(
647  MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB,
648  MachineBasicBlock *NewBlk) {
649  MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB);
650  if (BranchMI && isCondBranch(BranchMI) &&
651  getTrueBranch(BranchMI) == OldMBB)
652  setTrueBranch(BranchMI, NewBlk);
653 }
654 
655 void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) {
658  && "found a jump table");
659 
660  //collect continue right before endloop
665  while (It != E) {
666  if (Pre->getOpcode() == R600::CONTINUE
667  && It->getOpcode() == R600::ENDLOOP)
668  ContInstr.push_back(&*Pre);
669  Pre = It;
670  ++It;
671  }
672 
673  //delete continue right before endloop
674  for (unsigned i = 0; i < ContInstr.size(); ++i)
675  ContInstr[i]->eraseFromParent();
676 
677  // TODO to fix up jump table so later phase won't be confused. if
678  // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
679  // there isn't such an interface yet. alternatively, replace all the other
680  // blocks in the jump table with the entryBlk //}
681 }
682 
683 bool AMDGPUCFGStructurizer::prepare() {
684  bool Changed = false;
685 
686  //FIXME: if not reducible flow graph, make it so ???
687 
688  LLVM_DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";);
689 
690  orderBlocks(FuncRep);
691 
693 
694  // Add an ExitBlk to loop that don't have one
695  for (MachineLoopInfo::iterator It = MLI->begin(),
696  E = MLI->end(); It != E; ++It) {
697  MachineLoop *LoopRep = (*It);
698  MBBVector ExitingMBBs;
699  LoopRep->getExitingBlocks(ExitingMBBs);
700 
701  if (ExitingMBBs.size() == 0) {
702  MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep);
703  if (DummyExitBlk)
704  RetBlks.push_back(DummyExitBlk);
705  }
706  }
707 
708  // Remove unconditional branch instr.
709  // Add dummy exit block iff there are multiple returns.
711  It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) {
712  MachineBasicBlock *MBB = *It;
713  removeUnconditionalBranch(MBB);
714  removeRedundantConditionalBranch(MBB);
715  if (isReturnBlock(MBB)) {
716  RetBlks.push_back(MBB);
717  }
718  assert(MBB->succ_size() <= 2);
719  }
720 
721  if (RetBlks.size() >= 2) {
722  addDummyExitBlock(RetBlks);
723  Changed = true;
724  }
725 
726  return Changed;
727 }
728 
729 bool AMDGPUCFGStructurizer::run() {
730  //Assume reducible CFG...
731  LLVM_DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n");
732 
733 #ifdef STRESSTEST
734  //Use the worse block ordering to test the algorithm.
735  ReverseVector(orderedBlks);
736 #endif
737 
738  LLVM_DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks(););
739  int NumIter = 0;
740  bool Finish = false;
742  bool MakeProgress = false;
743  int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(),
744  OrderedBlks.end());
745 
746  do {
747  ++NumIter;
748  LLVM_DEBUG(dbgs() << "numIter = " << NumIter
749  << ", numRemaintedBlk = " << NumRemainedBlk << "\n";);
750 
752  OrderedBlks.begin();
754  OrderedBlks.end();
755 
757  It;
758  MachineBasicBlock *SccBeginMBB = nullptr;
759  int SccNumBlk = 0; // The number of active blocks, init to a
760  // maximum possible number.
761  int SccNumIter; // Number of iteration in this SCC.
762 
763  while (It != E) {
764  MBB = *It;
765 
766  if (!SccBeginMBB) {
767  SccBeginIter = It;
768  SccBeginMBB = MBB;
769  SccNumIter = 0;
770  SccNumBlk = NumRemainedBlk; // Init to maximum possible number.
771  LLVM_DEBUG(dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB);
772  dbgs() << "\n";);
773  }
774 
775  if (!isRetiredBlock(MBB))
776  patternMatch(MBB);
777 
778  ++It;
779 
780  bool ContNextScc = true;
781  if (It == E
782  || getSCCNum(SccBeginMBB) != getSCCNum(*It)) {
783  // Just finish one scc.
784  ++SccNumIter;
785  int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It);
786  if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) {
787  LLVM_DEBUG(dbgs() << "Can't reduce SCC " << getSCCNum(MBB)
788  << ", sccNumIter = " << SccNumIter;
789  dbgs() << "doesn't make any progress\n";);
790  ContNextScc = true;
791  } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) {
792  SccNumBlk = sccRemainedNumBlk;
793  It = SccBeginIter;
794  ContNextScc = false;
795  LLVM_DEBUG(dbgs() << "repeat processing SCC" << getSCCNum(MBB)
796  << "sccNumIter = " << SccNumIter << '\n';);
797  } else {
798  // Finish the current scc.
799  ContNextScc = true;
800  }
801  } else {
802  // Continue on next component in the current scc.
803  ContNextScc = false;
804  }
805 
806  if (ContNextScc)
807  SccBeginMBB = nullptr;
808  } //while, "one iteration" over the function.
809 
810  MachineBasicBlock *EntryMBB =
812  if (EntryMBB->succ_empty()) {
813  Finish = true;
814  LLVM_DEBUG(dbgs() << "Reduce to one block\n";);
815  } else {
816  int NewnumRemainedBlk
817  = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end());
818  // consider cloned blocks ??
819  if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) {
820  MakeProgress = true;
821  NumRemainedBlk = NewnumRemainedBlk;
822  } else {
823  MakeProgress = false;
824  LLVM_DEBUG(dbgs() << "No progress\n";);
825  }
826  }
827  } while (!Finish && MakeProgress);
828 
829  // Misc wrap up to maintain the consistency of the Function representation.
831 
832  // Detach retired Block, release memory.
833  for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end();
834  It != E; ++It) {
835  if ((*It).second && (*It).second->IsRetired) {
836  assert(((*It).first)->getNumber() != -1);
837  LLVM_DEBUG(dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n";);
838  (*It).first->eraseFromParent(); //Remove from the parent Function.
839  }
840  delete (*It).second;
841  }
842  BlockInfoMap.clear();
843  LLInfoMap.clear();
844 
845  if (!Finish) {
846  LLVM_DEBUG(FuncRep->viewCFG());
847  report_fatal_error("IRREDUCIBLE_CFG");
848  }
849 
850  return true;
851 }
852 
853 void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
854  int SccNum = 0;
856  for (scc_iterator<MachineFunction *> It = scc_begin(MF); !It.isAtEnd();
857  ++It, ++SccNum) {
858  const std::vector<MachineBasicBlock *> &SccNext = *It;
859  for (std::vector<MachineBasicBlock *>::const_iterator
860  blockIter = SccNext.begin(), blockEnd = SccNext.end();
861  blockIter != blockEnd; ++blockIter) {
862  MBB = *blockIter;
863  OrderedBlks.push_back(MBB);
864  recordSccnum(MBB, SccNum);
865  }
866  }
867 
868  // walk through all the block in func to check for unreachable
869  for (auto *MBB : nodes(MF)) {
870  SccNum = getSCCNum(MBB);
871  if (SccNum == INVALIDSCCNUM)
872  dbgs() << "unreachable block BB" << MBB->getNumber() << "\n";
873  }
874 }
875 
876 int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) {
877  int NumMatch = 0;
878  int CurMatch;
879 
880  LLVM_DEBUG(dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n";);
881 
882  while ((CurMatch = patternMatchGroup(MBB)) > 0)
883  NumMatch += CurMatch;
884 
885  LLVM_DEBUG(dbgs() << "End patternMatch BB" << MBB->getNumber()
886  << ", numMatch = " << NumMatch << "\n";);
887 
888  return NumMatch;
889 }
890 
891 int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) {
892  int NumMatch = 0;
893  NumMatch += loopendPatternMatch();
894  NumMatch += serialPatternMatch(MBB);
895  NumMatch += ifPatternMatch(MBB);
896  return NumMatch;
897 }
898 
899 int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {
900  if (MBB->succ_size() != 1)
901  return 0;
902 
903  MachineBasicBlock *childBlk = *MBB->succ_begin();
904  if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk))
905  return 0;
906 
907  mergeSerialBlock(MBB, childBlk);
908  ++numSerialPatternMatch;
909  return 1;
910 }
911 
912 int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
913  //two edges
914  if (MBB->succ_size() != 2)
915  return 0;
916  if (hasBackEdge(MBB))
917  return 0;
918  MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
919  if (!BranchMI)
920  return 0;
921 
922  assert(isCondBranch(BranchMI));
923  int NumMatch = 0;
924 
925  MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI);
926  NumMatch += serialPatternMatch(TrueMBB);
927  NumMatch += ifPatternMatch(TrueMBB);
928  MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI);
929  NumMatch += serialPatternMatch(FalseMBB);
930  NumMatch += ifPatternMatch(FalseMBB);
931  MachineBasicBlock *LandBlk;
932  int Cloned = 0;
933 
934  assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty());
935  // TODO: Simplify
936  if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1
937  && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) {
938  // Diamond pattern
939  LandBlk = *TrueMBB->succ_begin();
940  } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) {
941  // Triangle pattern, false is empty
942  LandBlk = FalseMBB;
943  FalseMBB = nullptr;
944  } else if (FalseMBB->succ_size() == 1
945  && *FalseMBB->succ_begin() == TrueMBB) {
946  // Triangle pattern, true is empty
947  // We reverse the predicate to make a triangle, empty false pattern;
948  std::swap(TrueMBB, FalseMBB);
949  reversePredicateSetter(MBB->end(), *MBB);
950  LandBlk = FalseMBB;
951  FalseMBB = nullptr;
952  } else if (FalseMBB->succ_size() == 1
953  && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
954  LandBlk = *FalseMBB->succ_begin();
955  } else if (TrueMBB->succ_size() == 1
956  && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) {
957  LandBlk = *TrueMBB->succ_begin();
958  } else {
959  return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB);
960  }
961 
962  // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
963  // new BB created for landBlk==NULL may introduce new challenge to the
964  // reduction process.
965  if (LandBlk &&
966  ((TrueMBB && TrueMBB->pred_size() > 1)
967  || (FalseMBB && FalseMBB->pred_size() > 1))) {
968  Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk);
969  }
970 
971  if (TrueMBB && TrueMBB->pred_size() > 1) {
972  TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB);
973  ++Cloned;
974  }
975 
976  if (FalseMBB && FalseMBB->pred_size() > 1) {
977  FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB);
978  ++Cloned;
979  }
980 
981  mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk);
982 
983  ++numIfPatternMatch;
984 
985  numClonedBlock += Cloned;
986 
987  return 1 + Cloned + NumMatch;
988 }
989 
990 int AMDGPUCFGStructurizer::loopendPatternMatch() {
991  std::deque<MachineLoop *> NestedLoops;
992  for (auto &It: *MLI)
993  for (MachineLoop *ML : depth_first(It))
994  NestedLoops.push_front(ML);
995 
996  if (NestedLoops.empty())
997  return 0;
998 
999  // Process nested loop outside->inside (we did push_front),
1000  // so "continue" to a outside loop won't be mistaken as "break"
1001  // of the current loop.
1002  int Num = 0;
1003  for (MachineLoop *ExaminedLoop : NestedLoops) {
1004  if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop])
1005  continue;
1006  LLVM_DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump(););
1007  int NumBreak = mergeLoop(ExaminedLoop);
1008  if (NumBreak == -1)
1009  break;
1010  Num += NumBreak;
1011  }
1012  return Num;
1013 }
1014 
1015 int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) {
1016  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
1017  MBBVector ExitingMBBs;
1018  LoopRep->getExitingBlocks(ExitingMBBs);
1019  assert(!ExitingMBBs.empty() && "Infinite Loop not supported");
1020  LLVM_DEBUG(dbgs() << "Loop has " << ExitingMBBs.size()
1021  << " exiting blocks\n";);
1022  // We assume a single ExitBlk
1023  MBBVector ExitBlks;
1024  LoopRep->getExitBlocks(ExitBlks);
1026  for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i)
1027  ExitBlkSet.insert(ExitBlks[i]);
1028  assert(ExitBlkSet.size() == 1);
1029  MachineBasicBlock *ExitBlk = *ExitBlks.begin();
1030  assert(ExitBlk && "Loop has several exit block");
1031  MBBVector LatchBlks;
1032  for (auto *LB : inverse_children<MachineBasicBlock*>(LoopHeader))
1033  if (LoopRep->contains(LB))
1034  LatchBlks.push_back(LB);
1035 
1036  for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i)
1037  mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk);
1038  for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i)
1039  settleLoopcontBlock(LatchBlks[i], LoopHeader);
1040  int Match = 0;
1041  do {
1042  Match = 0;
1043  Match += serialPatternMatch(LoopHeader);
1044  Match += ifPatternMatch(LoopHeader);
1045  } while (Match > 0);
1046  mergeLooplandBlock(LoopHeader, ExitBlk);
1047  MachineLoop *ParentLoop = LoopRep->getParentLoop();
1048  if (ParentLoop)
1049  MLI->changeLoopFor(LoopHeader, ParentLoop);
1050  else
1051  MLI->removeBlock(LoopHeader);
1052  Visited[LoopRep] = true;
1053  return 1;
1054 }
1055 
1056 bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
1057  MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) {
1058  if (Src1MBB->succ_empty()) {
1059  MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB);
1060  if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) {
1061  MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep];
1062  if (TheEntry) {
1063  LLVM_DEBUG(dbgs() << "isLoopContBreakBlock yes src1 = BB"
1064  << Src1MBB->getNumber() << " src2 = BB"
1065  << Src2MBB->getNumber() << "\n";);
1066  return true;
1067  }
1068  }
1069  }
1070  return false;
1071 }
1072 
1073 int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB,
1074  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
1075  int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB);
1076  if (Num == 0) {
1077  LLVM_DEBUG(dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk"
1078  << "\n";);
1079  Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB);
1080  }
1081  return Num;
1082 }
1083 
1084 int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
1085  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
1086  int Num = 0;
1087  MachineBasicBlock *DownBlk;
1088 
1089  //trueBlk could be the common post dominator
1090  DownBlk = TrueMBB;
1091 
1092  LLVM_DEBUG(dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber()
1093  << " true = BB" << TrueMBB->getNumber()
1094  << ", numSucc=" << TrueMBB->succ_size() << " false = BB"
1095  << FalseMBB->getNumber() << "\n";);
1096 
1097  while (DownBlk) {
1098  LLVM_DEBUG(dbgs() << "check down = BB" << DownBlk->getNumber(););
1099 
1100  if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) {
1101  LLVM_DEBUG(dbgs() << " working\n";);
1102 
1103  Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk);
1104  Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk);
1105 
1106  numClonedBlock += Num;
1107  Num += serialPatternMatch(*HeadMBB->succ_begin());
1108  Num += serialPatternMatch(*std::next(HeadMBB->succ_begin()));
1109  Num += ifPatternMatch(HeadMBB);
1110  assert(Num > 0);
1111 
1112  break;
1113  }
1114  LLVM_DEBUG(dbgs() << " not working\n";);
1115  DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr;
1116  } // walk down the postDomTree
1117 
1118  return Num;
1119 }
1120 
1121 #ifndef NDEBUG
1122 void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(
1123  MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB,
1124  MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) {
1125  dbgs() << "head = BB" << HeadMBB->getNumber()
1126  << " size = " << HeadMBB->size();
1127  if (Detail) {
1128  dbgs() << "\n";
1129  HeadMBB->print(dbgs());
1130  dbgs() << "\n";
1131  }
1132 
1133  if (TrueMBB) {
1134  dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = "
1135  << TrueMBB->size() << " numPred = " << TrueMBB->pred_size();
1136  if (Detail) {
1137  dbgs() << "\n";
1138  TrueMBB->print(dbgs());
1139  dbgs() << "\n";
1140  }
1141  }
1142  if (FalseMBB) {
1143  dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = "
1144  << FalseMBB->size() << " numPred = " << FalseMBB->pred_size();
1145  if (Detail) {
1146  dbgs() << "\n";
1147  FalseMBB->print(dbgs());
1148  dbgs() << "\n";
1149  }
1150  }
1151  if (LandMBB) {
1152  dbgs() << ", land = BB" << LandMBB->getNumber() << " size = "
1153  << LandMBB->size() << " numPred = " << LandMBB->pred_size();
1154  if (Detail) {
1155  dbgs() << "\n";
1156  LandMBB->print(dbgs());
1157  dbgs() << "\n";
1158  }
1159  }
1160 
1161  dbgs() << "\n";
1162 }
1163 #endif
1164 
1165 int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
1166  MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
1167  MachineBasicBlock **LandMBBPtr) {
1168  bool MigrateTrue = false;
1169  bool MigrateFalse = false;
1170 
1171  MachineBasicBlock *LandBlk = *LandMBBPtr;
1172 
1173  assert((!TrueMBB || TrueMBB->succ_size() <= 1)
1174  && (!FalseMBB || FalseMBB->succ_size() <= 1));
1175 
1176  if (TrueMBB == FalseMBB)
1177  return 0;
1178 
1179  MigrateTrue = needMigrateBlock(TrueMBB);
1180  MigrateFalse = needMigrateBlock(FalseMBB);
1181 
1182  if (!MigrateTrue && !MigrateFalse)
1183  return 0;
1184 
1185  // If we need to migrate either trueBlk and falseBlk, migrate the rest that
1186  // have more than one predecessors. without doing this, its predecessor
1187  // rather than headBlk will have undefined value in initReg.
1188  if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1)
1189  MigrateTrue = true;
1190  if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1)
1191  MigrateFalse = true;
1192 
1193  LLVM_DEBUG(
1194  dbgs() << "before improveSimpleJumpintoIf: ";
1195  showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0););
1196 
1197  // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
1198  //
1199  // new: headBlk => if () {initReg = 1; org trueBlk branch} else
1200  // {initReg = 0; org falseBlk branch }
1201  // => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
1202  // => org landBlk
1203  // if landBlk->pred_size() > 2, put the about if-else inside
1204  // if (initReg !=2) {...}
1205  //
1206  // add initReg = initVal to headBlk
1207 
1208  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
1209  if (!MigrateTrue || !MigrateFalse) {
1210  // XXX: We have an opportunity here to optimize the "branch into if" case
1211  // here. Branch into if looks like this:
1212  // entry
1213  // / |
1214  // diamond_head branch_from
1215  // / \ |
1216  // diamond_false diamond_true
1217  // \ /
1218  // done
1219  //
1220  // The diamond_head block begins the "if" and the diamond_true block
1221  // is the block being "branched into".
1222  //
1223  // If MigrateTrue is true, then TrueBB is the block being "branched into"
1224  // and if MigrateFalse is true, then FalseBB is the block being
1225  // "branched into"
1226  //
1227  // Here is the pseudo code for how I think the optimization should work:
1228  // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head.
1229  // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from.
1230  // 3. Move the branch instruction from diamond_head into its own basic
1231  // block (new_block).
1232  // 4. Add an unconditional branch from diamond_head to new_block
1233  // 5. Replace the branch instruction in branch_from with an unconditional
1234  // branch to new_block. If branch_from has multiple predecessors, then
1235  // we need to replace the True/False block in the branch
1236  // instruction instead of replacing it.
1237  // 6. Change the condition of the branch instruction in new_block from
1238  // COND to (COND || GPR0)
1239  //
1240  // In order insert these MOV instruction, we will need to use the
1241  // RegisterScavenger. Usually liveness stops being tracked during
1242  // the late machine optimization passes, however if we implement
1243  // bool TargetRegisterInfo::requiresRegisterScavenging(
1244  // const MachineFunction &MF)
1245  // and have it return true, liveness will be tracked correctly
1246  // by generic optimization passes. We will also need to make sure that
1247  // all of our target-specific passes that run after regalloc and before
1248  // the CFGStructurizer track liveness and we will need to modify this pass
1249  // to correctly track liveness.
1250  //
1251  // After the above changes, the new CFG should look like this:
1252  // entry
1253  // / |
1254  // diamond_head branch_from
1255  // \ /
1256  // new_block
1257  // / |
1258  // diamond_false diamond_true
1259  // \ /
1260  // done
1261  //
1262  // Without this optimization, we are forced to duplicate the diamond_true
1263  // block and we will end up with a CFG like this:
1264  //
1265  // entry
1266  // / |
1267  // diamond_head branch_from
1268  // / \ |
1269  // diamond_false diamond_true diamond_true (duplicate)
1270  // \ / |
1271  // done --------------------|
1272  //
1273  // Duplicating diamond_true can be very costly especially if it has a
1274  // lot of instructions.
1275  return 0;
1276  }
1277 
1278  int NumNewBlk = 0;
1279 
1280  bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2);
1281 
1282  //insert R600::ENDIF to avoid special case "input landBlk == NULL"
1283  MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, R600::ENDIF);
1284 
1285  if (LandBlkHasOtherPred) {
1286  report_fatal_error("Extra register needed to handle CFG");
1287  Register CmpResReg =
1288  HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
1289  report_fatal_error("Extra compare instruction needed to handle CFG");
1290  insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET,
1291  CmpResReg, DebugLoc());
1292  }
1293 
1294  // XXX: We are running this after RA, so creating virtual registers will
1295  // cause an assertion failure in the PostRA scheduling pass.
1296  Register InitReg =
1297  HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
1298  insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET, InitReg,
1299  DebugLoc());
1300 
1301  if (MigrateTrue) {
1302  migrateInstruction(TrueMBB, LandBlk, I);
1303  // need to uncondionally insert the assignment to ensure a path from its
1304  // predecessor rather than headBlk has valid value in initReg if
1305  // (initVal != 1).
1306  report_fatal_error("Extra register needed to handle CFG");
1307  }
1308  insertInstrBefore(I, R600::ELSE);
1309 
1310  if (MigrateFalse) {
1311  migrateInstruction(FalseMBB, LandBlk, I);
1312  // need to uncondionally insert the assignment to ensure a path from its
1313  // predecessor rather than headBlk has valid value in initReg if
1314  // (initVal != 0)
1315  report_fatal_error("Extra register needed to handle CFG");
1316  }
1317 
1318  if (LandBlkHasOtherPred) {
1319  // add endif
1320  insertInstrBefore(I, R600::ENDIF);
1321 
1322  // put initReg = 2 to other predecessors of landBlk
1323  for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(),
1324  PE = LandBlk->pred_end(); PI != PE; ++PI) {
1325  MachineBasicBlock *MBB = *PI;
1326  if (MBB != TrueMBB && MBB != FalseMBB)
1327  report_fatal_error("Extra register needed to handle CFG");
1328  }
1329  }
1330  LLVM_DEBUG(
1331  dbgs() << "result from improveSimpleJumpintoIf: ";
1332  showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0););
1333 
1334  // update landBlk
1335  *LandMBBPtr = LandBlk;
1336 
1337  return NumNewBlk;
1338 }
1339 
1340 void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
1341  MachineBasicBlock *SrcMBB) {
1342  LLVM_DEBUG(dbgs() << "serialPattern BB" << DstMBB->getNumber() << " <= BB"
1343  << SrcMBB->getNumber() << "\n";);
1344  DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end());
1345 
1346  DstMBB->removeSuccessor(SrcMBB, true);
1347  cloneSuccessorList(DstMBB, SrcMBB);
1348 
1349  removeSuccessor(SrcMBB);
1350  MLI->removeBlock(SrcMBB);
1351  retireBlock(SrcMBB);
1352 }
1353 
1354 void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
1356  MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) {
1357  assert (TrueMBB);
1358  LLVM_DEBUG(dbgs() << "ifPattern BB" << MBB->getNumber(); dbgs() << "{ ";
1359  if (TrueMBB) { dbgs() << "BB" << TrueMBB->getNumber(); } dbgs()
1360  << " } else ";
1361  dbgs() << "{ "; if (FalseMBB) {
1362  dbgs() << "BB" << FalseMBB->getNumber();
1363  } dbgs() << " }\n ";
1364  dbgs() << "landBlock: "; if (!LandMBB) { dbgs() << "NULL"; } else {
1365  dbgs() << "BB" << LandMBB->getNumber();
1366  } dbgs() << "\n";);
1367 
1368  int OldOpcode = BranchMI->getOpcode();
1369  DebugLoc BranchDL = BranchMI->getDebugLoc();
1370 
1371 // transform to
1372 // if cond
1373 // trueBlk
1374 // else
1375 // falseBlk
1376 // endif
1377 // landBlk
1378 
1379  MachineBasicBlock::iterator I = BranchMI;
1380  insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode),
1381  BranchDL);
1382 
1383  if (TrueMBB) {
1384  MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end());
1385  MBB->removeSuccessor(TrueMBB, true);
1386  if (LandMBB && TrueMBB->succ_size()!=0)
1387  TrueMBB->removeSuccessor(LandMBB, true);
1388  retireBlock(TrueMBB);
1389  MLI->removeBlock(TrueMBB);
1390  }
1391 
1392  if (FalseMBB) {
1393  insertInstrBefore(I, R600::ELSE);
1394  MBB->splice(I, FalseMBB, FalseMBB->begin(),
1395  FalseMBB->end());
1396  MBB->removeSuccessor(FalseMBB, true);
1397  if (LandMBB && !FalseMBB->succ_empty())
1398  FalseMBB->removeSuccessor(LandMBB, true);
1399  retireBlock(FalseMBB);
1400  MLI->removeBlock(FalseMBB);
1401  }
1402  insertInstrBefore(I, R600::ENDIF);
1403 
1404  BranchMI->eraseFromParent();
1405 
1406  if (LandMBB && TrueMBB && FalseMBB)
1407  MBB->addSuccessor(LandMBB);
1408 }
1409 
1410 void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
1411  MachineBasicBlock *LandMBB) {
1412  LLVM_DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber()
1413  << " land = BB" << LandMBB->getNumber() << "\n";);
1414 
1415  insertInstrBefore(DstBlk, R600::WHILELOOP, DebugLoc());
1416  insertInstrEnd(DstBlk, R600::ENDLOOP, DebugLoc());
1417  DstBlk->replaceSuccessor(DstBlk, LandMBB);
1418 }
1419 
1420 void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
1421  MachineBasicBlock *LandMBB) {
1422  LLVM_DEBUG(dbgs() << "loopbreakPattern exiting = BB"
1423  << ExitingMBB->getNumber() << " land = BB"
1424  << LandMBB->getNumber() << "\n";);
1425  MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB);
1426  assert(BranchMI && isCondBranch(BranchMI));
1427  DebugLoc DL = BranchMI->getDebugLoc();
1428  MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI);
1429  MachineBasicBlock::iterator I = BranchMI;
1430  if (TrueBranch != LandMBB)
1431  reversePredicateSetter(I, *I->getParent());
1432  insertCondBranchBefore(ExitingMBB, I, R600::IF_PREDICATE_SET, R600::PREDICATE_BIT, DL);
1433  insertInstrBefore(I, R600::BREAK);
1434  insertInstrBefore(I, R600::ENDIF);
1435  //now branchInst can be erase safely
1436  BranchMI->eraseFromParent();
1437  //now take care of successors, retire blocks
1438  ExitingMBB->removeSuccessor(LandMBB, true);
1439 }
1440 
1441 void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
1442  MachineBasicBlock *ContMBB) {
1443  LLVM_DEBUG(dbgs() << "settleLoopcontBlock conting = BB"
1444  << ContingMBB->getNumber() << ", cont = BB"
1445  << ContMBB->getNumber() << "\n";);
1446 
1447  MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB);
1448  if (MI) {
1449  assert(isCondBranch(MI));
1451  MachineBasicBlock *TrueBranch = getTrueBranch(MI);
1452  int OldOpcode = MI->getOpcode();
1453  DebugLoc DL = MI->getDebugLoc();
1454 
1455  bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI);
1456 
1457  if (!UseContinueLogical) {
1458  int BranchOpcode =
1459  TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) :
1460  getBranchZeroOpcode(OldOpcode);
1461  insertCondBranchBefore(I, BranchOpcode, DL);
1462  // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
1463  insertInstrEnd(ContingMBB, R600::CONTINUE, DL);
1464  insertInstrEnd(ContingMBB, R600::ENDIF, DL);
1465  } else {
1466  int BranchOpcode =
1467  TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) :
1468  getContinueZeroOpcode(OldOpcode);
1469  insertCondBranchBefore(I, BranchOpcode, DL);
1470  }
1471 
1472  MI->eraseFromParent();
1473  } else {
1474  // if we've arrived here then we've already erased the branch instruction
1475  // travel back up the basic block to see the last reference of our debug
1476  // location we've just inserted that reference here so it should be
1477  // representative insertEnd to ensure phi-moves, if exist, go before the
1478  // continue-instr.
1479  insertInstrEnd(ContingMBB, R600::CONTINUE,
1480  getLastDebugLocInBB(ContingMBB));
1481  }
1482 }
1483 
1484 int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
1485  MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) {
1486  int Cloned = 0;
1487  assert(PreMBB->isSuccessor(SrcMBB));
1488  while (SrcMBB && SrcMBB != DstMBB) {
1489  assert(SrcMBB->succ_size() == 1);
1490  if (SrcMBB->pred_size() > 1) {
1491  SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB);
1492  ++Cloned;
1493  }
1494 
1495  PreMBB = SrcMBB;
1496  SrcMBB = *SrcMBB->succ_begin();
1497  }
1498 
1499  return Cloned;
1500 }
1501 
1503 AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
1504  MachineBasicBlock *PredMBB) {
1505  assert(PredMBB->isSuccessor(MBB) &&
1506  "succBlk is not a prececessor of curBlk");
1507 
1508  MachineBasicBlock *CloneMBB = clone(MBB); //clone instructions
1509  replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB);
1510  //srcBlk, oldBlk, newBlk
1511 
1512  PredMBB->replaceSuccessor(MBB, CloneMBB);
1513 
1514  // add all successor to cloneBlk
1515  cloneSuccessorList(CloneMBB, MBB);
1516 
1517  numClonedInstr += MBB->size();
1518 
1519  LLVM_DEBUG(dbgs() << "Cloned block: "
1520  << "BB" << MBB->getNumber() << "size " << MBB->size()
1521  << "\n";);
1522 
1523  SHOWNEWBLK(CloneMBB, "result of Cloned block: ");
1524 
1525  return CloneMBB;
1526 }
1527 
1528 void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
1530  MachineBasicBlock::iterator SpliceEnd;
1531  //look for the input branchinstr, not the AMDGPU branchinstr
1532  MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB);
1533  if (!BranchMI) {
1534  LLVM_DEBUG(dbgs() << "migrateInstruction don't see branch instr\n";);
1535  SpliceEnd = SrcMBB->end();
1536  } else {
1537  LLVM_DEBUG(dbgs() << "migrateInstruction see branch instr: " << *BranchMI);
1538  SpliceEnd = BranchMI;
1539  }
1540  LLVM_DEBUG(dbgs() << "migrateInstruction before splice dstSize = "
1541  << DstMBB->size() << "srcSize = " << SrcMBB->size()
1542  << "\n";);
1543 
1544  //splice insert before insertPos
1545  DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd);
1546 
1547  LLVM_DEBUG(dbgs() << "migrateInstruction after splice dstSize = "
1548  << DstMBB->size() << "srcSize = " << SrcMBB->size()
1549  << '\n';);
1550 }
1551 
1553 AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
1554  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
1555  MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch();
1556 
1557  if (!LoopHeader || !LoopLatch)
1558  return nullptr;
1559  MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch);
1560  // Is LoopRep an infinite loop ?
1561  if (!BranchMI || !isUncondBranch(BranchMI))
1562  return nullptr;
1563 
1564  MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
1565  FuncRep->push_back(DummyExitBlk); //insert to function
1566  SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
1567  LLVM_DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";);
1568  LLVMContext &Ctx = LoopHeader->getParent()->getFunction().getContext();
1569  Ctx.emitError("Extra register needed to handle CFG");
1570  return nullptr;
1571 }
1572 
1573 void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) {
1574  MachineInstr *BranchMI;
1575 
1576  // I saw two unconditional branch in one basic block in example
1577  // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
1578  while ((BranchMI = getLoopendBlockBranchInstr(MBB))
1579  && isUncondBranch(BranchMI)) {
1580  LLVM_DEBUG(dbgs() << "Removing uncond branch instr: " << *BranchMI);
1581  BranchMI->eraseFromParent();
1582  }
1583 }
1584 
1585 void AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
1587  if (MBB->succ_size() != 2)
1588  return;
1589  MachineBasicBlock *MBB1 = *MBB->succ_begin();
1590  MachineBasicBlock *MBB2 = *std::next(MBB->succ_begin());
1591  if (MBB1 != MBB2)
1592  return;
1593 
1594  MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
1595  assert(BranchMI && isCondBranch(BranchMI));
1596  LLVM_DEBUG(dbgs() << "Removing unneeded cond branch instr: " << *BranchMI);
1597  BranchMI->eraseFromParent();
1598  SHOWNEWBLK(MBB1, "Removing redundant successor");
1599  MBB->removeSuccessor(MBB1, true);
1600 }
1601 
1602 void AMDGPUCFGStructurizer::addDummyExitBlock(
1604  MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
1605  FuncRep->push_back(DummyExitBlk); //insert to function
1606  insertInstrEnd(DummyExitBlk, R600::RETURN);
1607 
1608  for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(),
1609  E = RetMBB.end(); It != E; ++It) {
1610  MachineBasicBlock *MBB = *It;
1611  MachineInstr *MI = getReturnInstr(MBB);
1612  if (MI)
1613  MI->eraseFromParent();
1614  MBB->addSuccessor(DummyExitBlk);
1615  LLVM_DEBUG(dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber()
1616  << " successors\n";);
1617  }
1618  SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: ");
1619 }
1620 
1621 void AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) {
1622  while (MBB->succ_size())
1624 }
1625 
1626 void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB,
1627  int SccNum) {
1628  BlockInformation *&srcBlkInfo = BlockInfoMap[MBB];
1629  if (!srcBlkInfo)
1630  srcBlkInfo = new BlockInformation();
1631  srcBlkInfo->SccNum = SccNum;
1632 }
1633 
1634 void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) {
1635  LLVM_DEBUG(dbgs() << "Retiring BB" << MBB->getNumber() << "\n";);
1636 
1637  BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB];
1638 
1639  if (!SrcBlkInfo)
1640  SrcBlkInfo = new BlockInformation();
1641 
1642  SrcBlkInfo->IsRetired = true;
1643  assert(MBB->succ_empty() && MBB->pred_empty() && "can't retire block yet");
1644 }
1645 
1646 INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer",
1647  "AMDGPU CFG Structurizer", false, false)
1651 INITIALIZE_PASS_END(AMDGPUCFGStructurizer, "amdgpustructurizer",
1653 
1655  return new AMDGPUCFGStructurizer();
1656 }
i
i
Definition: README.txt:29
SHOWNEWINSTR
#define SHOWNEWINSTR(i)
Definition: AMDILCFGStructurizer.cpp:58
llvm::MachineBasicBlock::succ_size
unsigned succ_size() const
Definition: MachineBasicBlock.h:344
llvm::MachineBasicBlock::pred_begin
pred_iterator pred_begin()
Definition: MachineBasicBlock.h:316
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:103
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::LLVMContext::emitError
void emitError(uint64_t LocCookie, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
Definition: LLVMContext.cpp:251
llvm::LoopBase::getExitBlocks
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all of the successor blocks of this loop.
Definition: LoopInfoImpl.h:62
llvm::MachineLoopInfo::getLoopFor
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
Definition: MachineLoopInfo.h:127
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:158
SCCIterator.h
llvm::MachineLoopInfo::iterator
LoopInfoBase< MachineBasicBlock, MachineLoop >::iterator iterator
The iterator interface to the top-level loops in the current function.
Definition: MachineLoopInfo.h:120
llvm::LoopBase::contains
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:122
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:321
MachineJumpTableInfo.h
llvm::AMDGPUISD::ELSE
@ ELSE
Definition: AMDGPUISelLowering.h:351
llvm::NVPTXISD::RETURN
@ RETURN
Definition: NVPTXISelLowering.h:49
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::DiagnosticPredicateTy::Match
@ Match
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:90
llvm::R600RegisterInfo
Definition: R600RegisterInfo.h:22
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::MachineBasicBlock::addSuccessor
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Definition: MachineBasicBlock.cpp:746
llvm::MachineBasicBlock::push_back
void push_back(MachineInstr *MI)
Definition: MachineBasicBlock.h:843
R600.h
llvm::MachineBasicBlock::pred_size
unsigned pred_size() const
Definition: MachineBasicBlock.h:328
llvm::LoopBase::getParentLoop
LoopT * getParentLoop() const
Return the parent loop if it exists or nullptr for top level loops.
Definition: LoopInfo.h:113
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:636
MachineLoopInfo.h
llvm::MachineLoopInfo::end
iterator end() const
Definition: MachineLoopInfo.h:122
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineBasicBlock::succ_end
succ_iterator succ_end()
Definition: MachineBasicBlock.h:334
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:499
llvm::MachineJumpTableInfo::isEmpty
bool isEmpty() const
isEmpty - Return true if there are no jump tables.
Definition: MachineJumpTableInfo.h:97
llvm::MachineBasicBlock::isSuccessor
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
Definition: MachineBasicBlock.cpp:908
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
false
Definition: StackSlotColoring.cpp:142
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::MachineBasicBlock::rend
reverse_iterator rend()
Definition: MachineBasicBlock.h:278
llvm::PassRegistry
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::LoopBase::getExitingBlocks
void getExitingBlocks(SmallVectorImpl< BlockT * > &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:34
SHOWNEWBLK
#define SHOWNEWBLK(b, msg)
Definition: AMDILCFGStructurizer.cpp:60
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer", "AMDGPU CFG Structurizer", false, false) INITIALIZE_PASS_END(AMDGPUCFGStructurizer
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::scc_begin
scc_iterator< T > scc_begin(const T &G)
Construct the begin iterator for a deduced graph type T.
Definition: SCCIterator.h:228
llvm::MachineBasicBlock::succ_iterator
std::vector< MachineBasicBlock * >::iterator succ_iterator
Definition: MachineBasicBlock.h:306
R600MCTargetDesc.h
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:626
blk
static uint32_t blk(uint32_t *Buf, int I)
Definition: SHA1.cpp:36
llvm::MachineInstr::getDebugLoc
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:418
llvm::R600Subtarget
Definition: R600Subtarget.h:35
llvm::MachineLoop
Definition: MachineLoopInfo.h:45
llvm::MachineBasicBlock::pred_end
pred_iterator pred_end()
Definition: MachineBasicBlock.h:318
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
llvm::scc_iterator
Enumerate the SCCs of a directed graph in reverse topological order of the SCC DAG.
Definition: SCCIterator.h:42
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::nodes
iterator_range< typename GraphTraits< GraphType >::nodes_iterator > nodes(const GraphType &G)
Definition: GraphTraits.h:108
llvm::MachineFunction::dump
void dump() const
dump - Print the current MachineFunction to cerr, useful for debugger use.
Definition: MachineFunction.cpp:536
I
#define I(x, y, z)
Definition: MD5.cpp:59
MachineFunctionPass.h
llvm::MachineBasicBlock::pred_iterator
std::vector< MachineBasicBlock * >::iterator pred_iterator
Definition: MachineBasicBlock.h:304
llvm::LoopBase::getLoopLatch
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:216
llvm::SmallVectorImpl::const_iterator
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:563
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineBasicBlock::size
unsigned size() const
Definition: MachineBasicBlock.h:239
llvm::MachineBasicBlock::succ_begin
succ_iterator succ_begin()
Definition: MachineBasicBlock.h:332
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:225
llvm::initializeAMDGPUCFGStructurizerPass
void initializeAMDGPUCFGStructurizerPass(PassRegistry &)
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::LoopInfoBase::begin
iterator begin() const
Definition: LoopInfo.h:942
MachinePostDominators.h
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:360
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
amdgpustructurizer
amdgpustructurizer
Definition: AMDILCFGStructurizer.cpp:1651
llvm::MachineBasicBlock::pred_empty
bool pred_empty() const
Definition: MachineBasicBlock.h:331
R600RegisterInfo.h
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::MachineBasicBlock::succ_empty
bool succ_empty() const
Definition: MachineBasicBlock.h:347
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::MachineBasicBlock::getNumber
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
Definition: MachineBasicBlock.h:1056
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::MachineBasicBlock::rbegin
reverse_iterator rbegin()
Definition: MachineBasicBlock.h:272
llvm::MachineBasicBlock::splice
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
Definition: MachineBasicBlock.h:950
AMDGPU.h
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:489
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:286
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::MachinePostDominatorTree
MachinePostDominatorTree - an analysis pass wrapper for DominatorTree used to compute the post-domina...
Definition: MachinePostDominators.h:27
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::SmallPtrSetImplBase::size
size_type size() const
Definition: SmallPtrSet.h:92
llvm::depth_first
iterator_range< df_iterator< T > > depth_first(const T &G)
Definition: DepthFirstIterator.h:229
CFG
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to CFG
Definition: README.txt:39
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::MachineBasicBlock::replaceSuccessor
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
Definition: MachineBasicBlock.cpp:807
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:592
llvm::MachineBasicBlock::removeSuccessor
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
Definition: MachineBasicBlock.cpp:784
llvm::MachineBasicBlock::insert
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
Definition: MachineBasicBlock.cpp:1312
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
R600Subtarget.h
llvm::MVT::i32
@ i32
Definition: MachineValueType.h:46
INVALIDSCCNUM
#define INVALIDSCCNUM
Definition: AMDILCFGStructurizer.cpp:71
llvm::R600InstrInfo
Definition: R600InstrInfo.h:39
llvm::LoopInfoBase::end
iterator end() const
Definition: LoopInfo.h:943
llvm::MachineLoopInfo::begin
iterator begin() const
Definition: MachineLoopInfo.h:121
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:268
llvm::LoopBase< MachineBasicBlock, MachineLoop >::iterator
std::vector< MachineLoop * >::const_iterator iterator
Definition: LoopInfo.h:151
llvm::SmallVectorImpl::iterator
typename SuperClass::iterator iterator
Definition: SmallVector.h:562
llvm::MachineBasicBlock::empty
bool empty() const
Definition: MachineBasicBlock.h:240
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::MachineFunction::getJumpTableInfo
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
Definition: MachineFunction.h:649
llvm::MachineFunction::CreateMachineInstr
MachineInstr * CreateMachineInstr(const MCInstrDesc &MCID, const DebugLoc &DL, bool NoImplicit=false)
CreateMachineInstr - Allocate a new MachineInstr.
Definition: MachineFunction.cpp:348
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::GraphTraits
Definition: GraphTraits.h:35
llvm::MachineBasicBlock::print
void print(raw_ostream &OS, const SlotIndexes *=nullptr, bool IsStandalone=true) const
Definition: MachineBasicBlock.cpp:333
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition: MachineDominators.h:45
llvm::createAMDGPUCFGStructurizerPass
FunctionPass * createAMDGPUCFGStructurizerPass()
Definition: AMDILCFGStructurizer.cpp:1654
MachineFunction.h
llvm::MachineInstr::eraseFromParent
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Definition: MachineInstr.cpp:677
llvm::MachineInstrBundleIterator< MachineInstr >
InitializePasses.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:270
Structurizer
AMDGPU CFG Structurizer
Definition: AMDILCFGStructurizer.cpp:1652
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37