LLVM 17.0.0git
AMDGPUMachineCFGStructurizer.cpp
Go to the documentation of this file.
1//===- AMDGPUMachineCFGStructurizer.cpp - Machine code if conversion pass. ===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the machine instruction level CFG structurizer pass.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AMDGPU.h"
14#include "GCNSubtarget.h"
15#include "llvm/ADT/DenseSet.h"
17#include "llvm/ADT/SetVector.h"
25
26using namespace llvm;
27
28#define DEBUG_TYPE "amdgpucfgstructurizer"
29
30namespace {
31
32class PHILinearizeDestIterator;
33
34class PHILinearize {
35 friend class PHILinearizeDestIterator;
36
37public:
38 using PHISourceT = std::pair<unsigned, MachineBasicBlock *>;
39
40private:
41 using PHISourcesT = DenseSet<PHISourceT>;
42 using PHIInfoElementT = struct {
43 unsigned DestReg;
45 PHISourcesT Sources;
46 };
47 using PHIInfoT = SmallPtrSet<PHIInfoElementT *, 2>;
48 PHIInfoT PHIInfo;
49
50 static unsigned phiInfoElementGetDest(PHIInfoElementT *Info);
51 static void phiInfoElementSetDef(PHIInfoElementT *Info, unsigned NewDef);
52 static PHISourcesT &phiInfoElementGetSources(PHIInfoElementT *Info);
53 static void phiInfoElementAddSource(PHIInfoElementT *Info, unsigned SourceReg,
54 MachineBasicBlock *SourceMBB);
55 static void phiInfoElementRemoveSource(PHIInfoElementT *Info,
56 unsigned SourceReg,
57 MachineBasicBlock *SourceMBB);
58 PHIInfoElementT *findPHIInfoElement(unsigned DestReg);
59 PHIInfoElementT *findPHIInfoElementFromSource(unsigned SourceReg,
60 MachineBasicBlock *SourceMBB);
61
62public:
63 bool findSourcesFromMBB(MachineBasicBlock *SourceMBB,
65 void addDest(unsigned DestReg, const DebugLoc &DL);
66 void replaceDef(unsigned OldDestReg, unsigned NewDestReg);
67 void deleteDef(unsigned DestReg);
68 void addSource(unsigned DestReg, unsigned SourceReg,
69 MachineBasicBlock *SourceMBB);
70 void removeSource(unsigned DestReg, unsigned SourceReg,
71 MachineBasicBlock *SourceMBB = nullptr);
72 bool findDest(unsigned SourceReg, MachineBasicBlock *SourceMBB,
73 unsigned &DestReg);
74 bool isSource(unsigned Reg, MachineBasicBlock *SourceMBB = nullptr);
75 unsigned getNumSources(unsigned DestReg);
77 void clear();
78
79 using source_iterator = PHISourcesT::iterator;
80 using dest_iterator = PHILinearizeDestIterator;
81
82 dest_iterator dests_begin();
83 dest_iterator dests_end();
84
85 source_iterator sources_begin(unsigned Reg);
86 source_iterator sources_end(unsigned Reg);
87};
88
89class PHILinearizeDestIterator {
90private:
92
93public:
94 PHILinearizeDestIterator(PHILinearize::PHIInfoT::iterator I) : Iter(I) {}
95
96 unsigned operator*() { return PHILinearize::phiInfoElementGetDest(*Iter); }
97 PHILinearizeDestIterator &operator++() {
98 ++Iter;
99 return *this;
100 }
101 bool operator==(const PHILinearizeDestIterator &I) const {
102 return I.Iter == Iter;
103 }
104 bool operator!=(const PHILinearizeDestIterator &I) const {
105 return I.Iter != Iter;
106 }
107};
108
109} // end anonymous namespace
110
111unsigned PHILinearize::phiInfoElementGetDest(PHIInfoElementT *Info) {
112 return Info->DestReg;
113}
114
115void PHILinearize::phiInfoElementSetDef(PHIInfoElementT *Info,
116 unsigned NewDef) {
117 Info->DestReg = NewDef;
118}
119
121PHILinearize::phiInfoElementGetSources(PHIInfoElementT *Info) {
122 return Info->Sources;
123}
124
125void PHILinearize::phiInfoElementAddSource(PHIInfoElementT *Info,
126 unsigned SourceReg,
127 MachineBasicBlock *SourceMBB) {
128 // Assertion ensures we don't use the same SourceMBB for the
129 // sources, because we cannot have different registers with
130 // identical predecessors, but we can have the same register for
131 // multiple predecessors.
132#if !defined(NDEBUG)
133 for (auto SI : phiInfoElementGetSources(Info)) {
134 assert((SI.second != SourceMBB || SourceReg == SI.first));
135 }
136#endif
137
138 phiInfoElementGetSources(Info).insert(PHISourceT(SourceReg, SourceMBB));
139}
140
141void PHILinearize::phiInfoElementRemoveSource(PHIInfoElementT *Info,
142 unsigned SourceReg,
143 MachineBasicBlock *SourceMBB) {
144 auto &Sources = phiInfoElementGetSources(Info);
145 SmallVector<PHISourceT, 4> ElimiatedSources;
146 for (auto SI : Sources) {
147 if (SI.first == SourceReg &&
148 (SI.second == nullptr || SI.second == SourceMBB)) {
149 ElimiatedSources.push_back(PHISourceT(SI.first, SI.second));
150 }
151 }
152
153 for (auto &Source : ElimiatedSources) {
154 Sources.erase(Source);
155 }
156}
157
158PHILinearize::PHIInfoElementT *
159PHILinearize::findPHIInfoElement(unsigned DestReg) {
160 for (auto *I : PHIInfo) {
161 if (phiInfoElementGetDest(I) == DestReg) {
162 return I;
163 }
164 }
165 return nullptr;
166}
167
168PHILinearize::PHIInfoElementT *
169PHILinearize::findPHIInfoElementFromSource(unsigned SourceReg,
170 MachineBasicBlock *SourceMBB) {
171 for (auto *I : PHIInfo) {
172 for (auto SI : phiInfoElementGetSources(I)) {
173 if (SI.first == SourceReg &&
174 (SI.second == nullptr || SI.second == SourceMBB)) {
175 return I;
176 }
177 }
178 }
179 return nullptr;
180}
181
182bool PHILinearize::findSourcesFromMBB(MachineBasicBlock *SourceMBB,
183 SmallVector<unsigned, 4> &Sources) {
184 bool FoundSource = false;
185 for (auto *I : PHIInfo) {
186 for (auto SI : phiInfoElementGetSources(I)) {
187 if (SI.second == SourceMBB) {
188 FoundSource = true;
189 Sources.push_back(SI.first);
190 }
191 }
192 }
193 return FoundSource;
194}
195
196void PHILinearize::addDest(unsigned DestReg, const DebugLoc &DL) {
197 assert(findPHIInfoElement(DestReg) == nullptr && "Dest already exists");
198 PHISourcesT EmptySet;
199 PHIInfoElementT *NewElement = new PHIInfoElementT();
200 NewElement->DestReg = DestReg;
201 NewElement->DL = DL;
202 NewElement->Sources = EmptySet;
203 PHIInfo.insert(NewElement);
204}
205
206void PHILinearize::replaceDef(unsigned OldDestReg, unsigned NewDestReg) {
207 phiInfoElementSetDef(findPHIInfoElement(OldDestReg), NewDestReg);
208}
209
210void PHILinearize::deleteDef(unsigned DestReg) {
211 PHIInfoElementT *InfoElement = findPHIInfoElement(DestReg);
212 PHIInfo.erase(InfoElement);
213 delete InfoElement;
214}
215
216void PHILinearize::addSource(unsigned DestReg, unsigned SourceReg,
217 MachineBasicBlock *SourceMBB) {
218 phiInfoElementAddSource(findPHIInfoElement(DestReg), SourceReg, SourceMBB);
219}
220
221void PHILinearize::removeSource(unsigned DestReg, unsigned SourceReg,
222 MachineBasicBlock *SourceMBB) {
223 phiInfoElementRemoveSource(findPHIInfoElement(DestReg), SourceReg, SourceMBB);
224}
225
226bool PHILinearize::findDest(unsigned SourceReg, MachineBasicBlock *SourceMBB,
227 unsigned &DestReg) {
228 PHIInfoElementT *InfoElement =
229 findPHIInfoElementFromSource(SourceReg, SourceMBB);
230 if (InfoElement != nullptr) {
231 DestReg = phiInfoElementGetDest(InfoElement);
232 return true;
233 }
234 return false;
235}
236
237bool PHILinearize::isSource(unsigned Reg, MachineBasicBlock *SourceMBB) {
238 unsigned DestReg;
239 return findDest(Reg, SourceMBB, DestReg);
240}
241
242unsigned PHILinearize::getNumSources(unsigned DestReg) {
243 return phiInfoElementGetSources(findPHIInfoElement(DestReg)).size();
244}
245
246#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
247LLVM_DUMP_METHOD void PHILinearize::dump(MachineRegisterInfo *MRI) {
248 const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
249 dbgs() << "=PHIInfo Start=\n";
250 for (auto *PII : this->PHIInfo) {
251 PHIInfoElementT &Element = *PII;
252 dbgs() << "Dest: " << printReg(Element.DestReg, TRI)
253 << " Sources: {";
254 for (auto &SI : Element.Sources) {
255 dbgs() << printReg(SI.first, TRI) << '(' << printMBBReference(*SI.second)
256 << "),";
257 }
258 dbgs() << "}\n";
259 }
260 dbgs() << "=PHIInfo End=\n";
261}
262#endif
263
264void PHILinearize::clear() { PHIInfo = PHIInfoT(); }
265
266PHILinearize::dest_iterator PHILinearize::dests_begin() {
267 return PHILinearizeDestIterator(PHIInfo.begin());
268}
269
270PHILinearize::dest_iterator PHILinearize::dests_end() {
271 return PHILinearizeDestIterator(PHIInfo.end());
272}
273
274PHILinearize::source_iterator PHILinearize::sources_begin(unsigned Reg) {
275 auto InfoElement = findPHIInfoElement(Reg);
276 return phiInfoElementGetSources(InfoElement).begin();
277}
278
279PHILinearize::source_iterator PHILinearize::sources_end(unsigned Reg) {
280 auto InfoElement = findPHIInfoElement(Reg);
281 return phiInfoElementGetSources(InfoElement).end();
282}
283
285 assert(PHI.isPHI());
286 return (PHI.getNumOperands() - 1) / 2;
287}
288
290 assert(PHI.isPHI());
291 return PHI.getOperand(Index * 2 + 2).getMBB();
292}
293
294static void setPhiPred(MachineInstr &PHI, unsigned Index,
295 MachineBasicBlock *NewPred) {
296 PHI.getOperand(Index * 2 + 2).setMBB(NewPred);
297}
298
299static unsigned getPHISourceReg(MachineInstr &PHI, unsigned Index) {
300 assert(PHI.isPHI());
301 return PHI.getOperand(Index * 2 + 1).getReg();
302}
303
304static unsigned getPHIDestReg(MachineInstr &PHI) {
305 assert(PHI.isPHI());
306 return PHI.getOperand(0).getReg();
307}
308
309namespace {
310
311class RegionMRT;
312class MBBMRT;
313
314class LinearizedRegion {
315protected:
316 MachineBasicBlock *Entry;
317 // The exit block is part of the region, and is the last
318 // merge block before exiting the region.
319 MachineBasicBlock *Exit;
320 DenseSet<unsigned> LiveOuts;
322 bool HasLoop;
323 LinearizedRegion *Parent;
324 RegionMRT *RMRT;
325
326 void storeLiveOutReg(MachineBasicBlock *MBB, Register Reg,
327 MachineInstr *DefInstr, const MachineRegisterInfo *MRI,
328 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
329
330 void storeLiveOutRegRegion(RegionMRT *Region, Register Reg,
331 MachineInstr *DefInstr,
333 const TargetRegisterInfo *TRI,
334 PHILinearize &PHIInfo);
335
336 void storeMBBLiveOuts(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI,
337 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo,
338 RegionMRT *TopRegion);
339
340 void storeLiveOuts(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI,
341 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
342
343 void storeLiveOuts(RegionMRT *Region, const MachineRegisterInfo *MRI,
344 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo,
345 RegionMRT *TopRegion = nullptr);
346
347public:
348 LinearizedRegion();
349 LinearizedRegion(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI,
350 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
351 ~LinearizedRegion() = default;
352
353 void setRegionMRT(RegionMRT *Region) { RMRT = Region; }
354
355 RegionMRT *getRegionMRT() { return RMRT; }
356
357 void setParent(LinearizedRegion *P) { Parent = P; }
358
359 LinearizedRegion *getParent() { return Parent; }
360
361 void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr);
362
363 void setBBSelectRegIn(unsigned Reg);
364
365 unsigned getBBSelectRegIn();
366
367 void setBBSelectRegOut(unsigned Reg, bool IsLiveOut);
368
369 unsigned getBBSelectRegOut();
370
371 void setHasLoop(bool Value);
372
373 bool getHasLoop();
374
375 void addLiveOut(unsigned VReg);
376
377 void removeLiveOut(unsigned Reg);
378
379 void replaceLiveOut(unsigned OldReg, unsigned NewReg);
380
381 void replaceRegister(unsigned Register, class Register NewRegister,
382 MachineRegisterInfo *MRI, bool ReplaceInside,
383 bool ReplaceOutside, bool IncludeLoopPHIs);
384
385 void replaceRegisterInsideRegion(unsigned Register, unsigned NewRegister,
386 bool IncludeLoopPHIs,
388
389 void replaceRegisterOutsideRegion(unsigned Register, unsigned NewRegister,
390 bool IncludeLoopPHIs,
392
393 DenseSet<unsigned> *getLiveOuts();
394
395 void setEntry(MachineBasicBlock *NewEntry);
396
397 MachineBasicBlock *getEntry();
398
399 void setExit(MachineBasicBlock *NewExit);
400
401 MachineBasicBlock *getExit();
402
403 void addMBB(MachineBasicBlock *MBB);
404
405 void addMBBs(LinearizedRegion *InnerRegion);
406
408
409 bool isLiveOut(unsigned Reg);
410
411 bool hasNoDef(unsigned Reg, MachineRegisterInfo *MRI);
412
413 void removeFalseRegisterKills(MachineRegisterInfo *MRI);
414
415 void initLiveOut(RegionMRT *Region, const MachineRegisterInfo *MRI,
416 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
417};
418
419class MRT {
420protected:
421 RegionMRT *Parent;
422 unsigned BBSelectRegIn;
423 unsigned BBSelectRegOut;
424
425public:
426 virtual ~MRT() = default;
427
428 unsigned getBBSelectRegIn() { return BBSelectRegIn; }
429
430 unsigned getBBSelectRegOut() { return BBSelectRegOut; }
431
432 void setBBSelectRegIn(unsigned Reg) { BBSelectRegIn = Reg; }
433
434 void setBBSelectRegOut(unsigned Reg) { BBSelectRegOut = Reg; }
435
436 virtual RegionMRT *getRegionMRT() { return nullptr; }
437
438 virtual MBBMRT *getMBBMRT() { return nullptr; }
439
440 bool isRegion() { return getRegionMRT() != nullptr; }
441
442 bool isMBB() { return getMBBMRT() != nullptr; }
443
444 bool isRoot() { return Parent == nullptr; }
445
446 void setParent(RegionMRT *Region) { Parent = Region; }
447
448 RegionMRT *getParent() { return Parent; }
449
450 static MachineBasicBlock *
451 initializeMRT(MachineFunction &MF, const MachineRegionInfo *RegionInfo,
453
454 static RegionMRT *buildMRT(MachineFunction &MF,
456 const SIInstrInfo *TII,
458
459 virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) = 0;
460
461 void dumpDepth(int depth) {
462 for (int i = depth; i > 0; --i) {
463 dbgs() << " ";
464 }
465 }
466};
467
468class MBBMRT : public MRT {
470
471public:
472 MBBMRT(MachineBasicBlock *BB) : MBB(BB) {
473 setParent(nullptr);
474 setBBSelectRegOut(0);
475 setBBSelectRegIn(0);
476 }
477
478 MBBMRT *getMBBMRT() override { return this; }
479
480 MachineBasicBlock *getMBB() { return MBB; }
481
482 void dump(const TargetRegisterInfo *TRI, int depth = 0) override {
483 dumpDepth(depth);
484 dbgs() << "MBB: " << getMBB()->getNumber();
485 dbgs() << " In: " << printReg(getBBSelectRegIn(), TRI);
486 dbgs() << ", Out: " << printReg(getBBSelectRegOut(), TRI) << "\n";
487 }
488};
489
490class RegionMRT : public MRT {
491protected:
493 LinearizedRegion *LRegion = nullptr;
494 MachineBasicBlock *Succ = nullptr;
496
497public:
499 setParent(nullptr);
500 setBBSelectRegOut(0);
501 setBBSelectRegIn(0);
502 }
503
504 ~RegionMRT() override {
505 if (LRegion) {
506 delete LRegion;
507 }
508
509 for (auto *CI : Children) {
510 delete &(*CI);
511 }
512 }
513
514 RegionMRT *getRegionMRT() override { return this; }
515
516 void setLinearizedRegion(LinearizedRegion *LinearizeRegion) {
517 LRegion = LinearizeRegion;
518 }
519
520 LinearizedRegion *getLinearizedRegion() { return LRegion; }
521
522 MachineRegion *getMachineRegion() { return Region; }
523
524 unsigned getInnerOutputRegister() {
525 return (*(Children.begin()))->getBBSelectRegOut();
526 }
527
528 void addChild(MRT *Tree) { Children.insert(Tree); }
529
530 SetVector<MRT *> *getChildren() { return &Children; }
531
532 void dump(const TargetRegisterInfo *TRI, int depth = 0) override {
533 dumpDepth(depth);
534 dbgs() << "Region: " << (void *)Region;
535 dbgs() << " In: " << printReg(getBBSelectRegIn(), TRI);
536 dbgs() << ", Out: " << printReg(getBBSelectRegOut(), TRI) << "\n";
537
538 dumpDepth(depth);
539 if (getSucc())
540 dbgs() << "Succ: " << getSucc()->getNumber() << "\n";
541 else
542 dbgs() << "Succ: none \n";
543 for (auto *MRTI : Children) {
544 MRTI->dump(TRI, depth + 1);
545 }
546 }
547
548 MRT *getEntryTree() { return Children.back(); }
549
550 MRT *getExitTree() { return Children.front(); }
551
552 MachineBasicBlock *getEntry() {
553 MRT *Tree = Children.back();
554 return (Tree->isRegion()) ? Tree->getRegionMRT()->getEntry()
555 : Tree->getMBBMRT()->getMBB();
556 }
557
558 MachineBasicBlock *getExit() {
559 MRT *Tree = Children.front();
560 return (Tree->isRegion()) ? Tree->getRegionMRT()->getExit()
561 : Tree->getMBBMRT()->getMBB();
562 }
563
564 void setSucc(MachineBasicBlock *MBB) { Succ = MBB; }
565
566 MachineBasicBlock *getSucc() { return Succ; }
567
569 for (auto *CI : Children) {
570 if (CI->isMBB()) {
571 if (MBB == CI->getMBBMRT()->getMBB()) {
572 return true;
573 }
574 } else {
575 if (CI->getRegionMRT()->contains(MBB)) {
576 return true;
577 } else if (CI->getRegionMRT()->getLinearizedRegion() != nullptr &&
578 CI->getRegionMRT()->getLinearizedRegion()->contains(MBB)) {
579 return true;
580 }
581 }
582 }
583 return false;
584 }
585
586 void replaceLiveOutReg(unsigned Register, unsigned NewRegister) {
587 LinearizedRegion *LRegion = getLinearizedRegion();
588 LRegion->replaceLiveOut(Register, NewRegister);
589 for (auto &CI : Children) {
590 if (CI->isRegion()) {
591 CI->getRegionMRT()->replaceLiveOutReg(Register, NewRegister);
592 }
593 }
594 }
595};
596
597} // end anonymous namespace
598
599static unsigned createBBSelectReg(const SIInstrInfo *TII,
601 return MRI->createVirtualRegister(TII->getPreferredSelectRegClass(32));
602}
603
605MRT::initializeMRT(MachineFunction &MF, const MachineRegionInfo *RegionInfo,
607 for (auto &MFI : MF) {
608 MachineBasicBlock *ExitMBB = &MFI;
609 if (ExitMBB->succ_empty()) {
610 return ExitMBB;
611 }
612 }
613 llvm_unreachable("CFG has no exit block");
614 return nullptr;
615}
616
617RegionMRT *MRT::buildMRT(MachineFunction &MF,
622 MachineRegion *TopLevelRegion = RegionInfo->getTopLevelRegion();
623 RegionMRT *Result = new RegionMRT(TopLevelRegion);
624 RegionMap[TopLevelRegion] = Result;
625
626 // Insert the exit block first, we need it to be the merge node
627 // for the top level region.
628 MachineBasicBlock *Exit = initializeMRT(MF, RegionInfo, RegionMap);
629
630 unsigned BBSelectRegIn = createBBSelectReg(TII, MRI);
631 MBBMRT *ExitMRT = new MBBMRT(Exit);
632 RegionMap[RegionInfo->getRegionFor(Exit)]->addChild(ExitMRT);
633 ExitMRT->setBBSelectRegIn(BBSelectRegIn);
634
635 for (auto *MBBI : post_order(&(MF.front()))) {
636 MachineBasicBlock *MBB = &(*MBBI);
637
638 // Skip Exit since we already added it
639 if (MBB == Exit) {
640 continue;
641 }
642
643 LLVM_DEBUG(dbgs() << "Visiting " << printMBBReference(*MBB) << "\n");
644 MBBMRT *NewMBB = new MBBMRT(MBB);
646
647 // Ensure we have the MRT region
648 if (RegionMap.count(Region) == 0) {
649 RegionMRT *NewMRTRegion = new RegionMRT(Region);
650 RegionMap[Region] = NewMRTRegion;
651
652 // Ensure all parents are in the RegionMap
653 MachineRegion *Parent = Region->getParent();
654 while (RegionMap.count(Parent) == 0) {
655 RegionMRT *NewMRTParent = new RegionMRT(Parent);
656 NewMRTParent->addChild(NewMRTRegion);
657 NewMRTRegion->setParent(NewMRTParent);
658 RegionMap[Parent] = NewMRTParent;
659 NewMRTRegion = NewMRTParent;
660 Parent = Parent->getParent();
661 }
662 RegionMap[Parent]->addChild(NewMRTRegion);
663 NewMRTRegion->setParent(RegionMap[Parent]);
664 }
665
666 // Add MBB to Region MRT
667 RegionMap[Region]->addChild(NewMBB);
668 NewMBB->setParent(RegionMap[Region]);
669 RegionMap[Region]->setSucc(Region->getExit());
670 }
671 return Result;
672}
673
674void LinearizedRegion::storeLiveOutReg(MachineBasicBlock *MBB, Register Reg,
675 MachineInstr *DefInstr,
677 const TargetRegisterInfo *TRI,
678 PHILinearize &PHIInfo) {
679 if (Reg.isVirtual()) {
680 LLVM_DEBUG(dbgs() << "Considering Register: " << printReg(Reg, TRI)
681 << "\n");
682 // If this is a source register to a PHI we are chaining, it
683 // must be live out.
684 if (PHIInfo.isSource(Reg)) {
685 LLVM_DEBUG(dbgs() << "Add LiveOut (PHI): " << printReg(Reg, TRI) << "\n");
686 addLiveOut(Reg);
687 } else {
688 // If this is live out of the MBB
689 for (auto &UI : MRI->use_operands(Reg)) {
690 if (UI.getParent()->getParent() != MBB) {
691 LLVM_DEBUG(dbgs() << "Add LiveOut (MBB " << printMBBReference(*MBB)
692 << "): " << printReg(Reg, TRI) << "\n");
693 addLiveOut(Reg);
694 } else {
695 // If the use is in the same MBB we have to make sure
696 // it is after the def, otherwise it is live out in a loop
697 MachineInstr *UseInstr = UI.getParent();
699 MII = UseInstr->getIterator(),
700 MIE = UseInstr->getParent()->instr_end();
701 MII != MIE; ++MII) {
702 if ((&(*MII)) == DefInstr) {
703 LLVM_DEBUG(dbgs() << "Add LiveOut (Loop): " << printReg(Reg, TRI)
704 << "\n");
705 addLiveOut(Reg);
706 }
707 }
708 }
709 }
710 }
711 }
712}
713
714void LinearizedRegion::storeLiveOutRegRegion(RegionMRT *Region, Register Reg,
715 MachineInstr *DefInstr,
717 const TargetRegisterInfo *TRI,
718 PHILinearize &PHIInfo) {
719 if (Reg.isVirtual()) {
720 LLVM_DEBUG(dbgs() << "Considering Register: " << printReg(Reg, TRI)
721 << "\n");
722 for (auto &UI : MRI->use_operands(Reg)) {
723 if (!Region->contains(UI.getParent()->getParent())) {
724 LLVM_DEBUG(dbgs() << "Add LiveOut (Region " << (void *)Region
725 << "): " << printReg(Reg, TRI) << "\n");
726 addLiveOut(Reg);
727 }
728 }
729 }
730}
731
732void LinearizedRegion::storeLiveOuts(MachineBasicBlock *MBB,
734 const TargetRegisterInfo *TRI,
735 PHILinearize &PHIInfo) {
736 LLVM_DEBUG(dbgs() << "-Store Live Outs Begin (" << printMBBReference(*MBB)
737 << ")-\n");
738 for (auto &II : *MBB) {
739 for (auto &RI : II.defs()) {
740 storeLiveOutReg(MBB, RI.getReg(), RI.getParent(), MRI, TRI, PHIInfo);
741 }
742 for (auto &IRI : II.implicit_operands()) {
743 if (IRI.isDef()) {
744 storeLiveOutReg(MBB, IRI.getReg(), IRI.getParent(), MRI, TRI, PHIInfo);
745 }
746 }
747 }
748
749 // If we have a successor with a PHI, source coming from this MBB we have to
750 // add the register as live out
751 for (MachineBasicBlock *Succ : MBB->successors()) {
752 for (auto &II : *Succ) {
753 if (II.isPHI()) {
754 MachineInstr &PHI = II;
755 int numPreds = getPHINumInputs(PHI);
756 for (int i = 0; i < numPreds; ++i) {
757 if (getPHIPred(PHI, i) == MBB) {
758 unsigned PHIReg = getPHISourceReg(PHI, i);
760 << "Add LiveOut (PhiSource " << printMBBReference(*MBB)
761 << " -> " << printMBBReference(*Succ)
762 << "): " << printReg(PHIReg, TRI) << "\n");
763 addLiveOut(PHIReg);
764 }
765 }
766 }
767 }
768 }
769
770 LLVM_DEBUG(dbgs() << "-Store Live Outs Endn-\n");
771}
772
773void LinearizedRegion::storeMBBLiveOuts(MachineBasicBlock *MBB,
775 const TargetRegisterInfo *TRI,
776 PHILinearize &PHIInfo,
777 RegionMRT *TopRegion) {
778 for (auto &II : *MBB) {
779 for (auto &RI : II.defs()) {
780 storeLiveOutRegRegion(TopRegion, RI.getReg(), RI.getParent(), MRI, TRI,
781 PHIInfo);
782 }
783 for (auto &IRI : II.implicit_operands()) {
784 if (IRI.isDef()) {
785 storeLiveOutRegRegion(TopRegion, IRI.getReg(), IRI.getParent(), MRI,
786 TRI, PHIInfo);
787 }
788 }
789 }
790}
791
792void LinearizedRegion::storeLiveOuts(RegionMRT *Region,
794 const TargetRegisterInfo *TRI,
795 PHILinearize &PHIInfo,
796 RegionMRT *CurrentTopRegion) {
797 MachineBasicBlock *Exit = Region->getSucc();
798
799 RegionMRT *TopRegion =
800 CurrentTopRegion == nullptr ? Region : CurrentTopRegion;
801
802 // Check if exit is end of function, if so, no live outs.
803 if (Exit == nullptr)
804 return;
805
806 auto Children = Region->getChildren();
807 for (auto *CI : *Children) {
808 if (CI->isMBB()) {
809 auto MBB = CI->getMBBMRT()->getMBB();
810 storeMBBLiveOuts(MBB, MRI, TRI, PHIInfo, TopRegion);
811 } else {
812 LinearizedRegion *SubRegion = CI->getRegionMRT()->getLinearizedRegion();
813 // We should be limited to only store registers that are live out from the
814 // linearized region
815 for (auto *MBBI : SubRegion->MBBs) {
816 storeMBBLiveOuts(MBBI, MRI, TRI, PHIInfo, TopRegion);
817 }
818 }
819 }
820
821 if (CurrentTopRegion == nullptr) {
822 auto Succ = Region->getSucc();
823 for (auto &II : *Succ) {
824 if (II.isPHI()) {
825 MachineInstr &PHI = II;
826 int numPreds = getPHINumInputs(PHI);
827 for (int i = 0; i < numPreds; ++i) {
828 if (Region->contains(getPHIPred(PHI, i))) {
829 unsigned PHIReg = getPHISourceReg(PHI, i);
830 LLVM_DEBUG(dbgs() << "Add Region LiveOut (" << (void *)Region
831 << "): " << printReg(PHIReg, TRI) << "\n");
832 addLiveOut(PHIReg);
833 }
834 }
835 }
836 }
837 }
838}
839
840#ifndef NDEBUG
841void LinearizedRegion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
842 OS << "Linearized Region {";
843 bool IsFirst = true;
844 for (auto *MBB : MBBs) {
845 if (IsFirst) {
846 IsFirst = false;
847 } else {
848 OS << " ,";
849 }
850 OS << MBB->getNumber();
851 }
852 OS << "} (" << Entry->getNumber() << ", "
853 << (Exit == nullptr ? -1 : Exit->getNumber())
854 << "): In:" << printReg(getBBSelectRegIn(), TRI)
855 << " Out:" << printReg(getBBSelectRegOut(), TRI) << " {";
856 for (auto &LI : LiveOuts) {
857 OS << printReg(LI, TRI) << " ";
858 }
859 OS << "} \n";
860}
861#endif
862
863unsigned LinearizedRegion::getBBSelectRegIn() {
864 return getRegionMRT()->getBBSelectRegIn();
865}
866
867unsigned LinearizedRegion::getBBSelectRegOut() {
868 return getRegionMRT()->getBBSelectRegOut();
869}
870
871void LinearizedRegion::setHasLoop(bool Value) { HasLoop = Value; }
872
873bool LinearizedRegion::getHasLoop() { return HasLoop; }
874
875void LinearizedRegion::addLiveOut(unsigned VReg) { LiveOuts.insert(VReg); }
876
877void LinearizedRegion::removeLiveOut(unsigned Reg) {
878 if (isLiveOut(Reg))
879 LiveOuts.erase(Reg);
880}
881
882void LinearizedRegion::replaceLiveOut(unsigned OldReg, unsigned NewReg) {
883 if (isLiveOut(OldReg)) {
884 removeLiveOut(OldReg);
885 addLiveOut(NewReg);
886 }
887}
888
889void LinearizedRegion::replaceRegister(unsigned Register,
890 class Register NewRegister,
892 bool ReplaceInside, bool ReplaceOutside,
893 bool IncludeLoopPHI) {
894 assert(Register != NewRegister && "Cannot replace a reg with itself");
895
897 dbgs() << "Preparing to replace register (region): "
898 << printReg(Register, MRI->getTargetRegisterInfo()) << " with "
899 << printReg(NewRegister, MRI->getTargetRegisterInfo()) << "\n");
900
901 // If we are replacing outside, we also need to update the LiveOuts
902 if (ReplaceOutside &&
903 (isLiveOut(Register) || this->getParent()->isLiveOut(Register))) {
904 LinearizedRegion *Current = this;
905 while (Current != nullptr && Current->getEntry() != nullptr) {
906 LLVM_DEBUG(dbgs() << "Region before register replace\n");
907 LLVM_DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo()));
908 Current->replaceLiveOut(Register, NewRegister);
909 LLVM_DEBUG(dbgs() << "Region after register replace\n");
910 LLVM_DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo()));
911 Current = Current->getParent();
912 }
913 }
914
916 E = MRI->reg_end();
917 I != E;) {
918 MachineOperand &O = *I;
919 ++I;
920
921 // We don't rewrite defs.
922 if (O.isDef())
923 continue;
924
925 bool IsInside = contains(O.getParent()->getParent());
926 bool IsLoopPHI = IsInside && (O.getParent()->isPHI() &&
927 O.getParent()->getParent() == getEntry());
928 bool ShouldReplace = (IsInside && ReplaceInside) ||
929 (!IsInside && ReplaceOutside) ||
930 (IncludeLoopPHI && IsLoopPHI);
931 if (ShouldReplace) {
932
933 if (NewRegister.isPhysical()) {
934 LLVM_DEBUG(dbgs() << "Trying to substitute physical register: "
935 << printReg(NewRegister, MRI->getTargetRegisterInfo())
936 << "\n");
937 llvm_unreachable("Cannot substitute physical registers");
938 } else {
939 LLVM_DEBUG(dbgs() << "Replacing register (region): "
940 << printReg(Register, MRI->getTargetRegisterInfo())
941 << " with "
942 << printReg(NewRegister, MRI->getTargetRegisterInfo())
943 << "\n");
944 O.setReg(NewRegister);
945 }
946 }
947 }
948}
949
950void LinearizedRegion::replaceRegisterInsideRegion(unsigned Register,
951 unsigned NewRegister,
952 bool IncludeLoopPHIs,
954 replaceRegister(Register, NewRegister, MRI, true, false, IncludeLoopPHIs);
955}
956
957void LinearizedRegion::replaceRegisterOutsideRegion(unsigned Register,
958 unsigned NewRegister,
959 bool IncludeLoopPHIs,
961 replaceRegister(Register, NewRegister, MRI, false, true, IncludeLoopPHIs);
962}
963
964DenseSet<unsigned> *LinearizedRegion::getLiveOuts() { return &LiveOuts; }
965
966void LinearizedRegion::setEntry(MachineBasicBlock *NewEntry) {
967 Entry = NewEntry;
968}
969
970MachineBasicBlock *LinearizedRegion::getEntry() { return Entry; }
971
972void LinearizedRegion::setExit(MachineBasicBlock *NewExit) { Exit = NewExit; }
973
974MachineBasicBlock *LinearizedRegion::getExit() { return Exit; }
975
976void LinearizedRegion::addMBB(MachineBasicBlock *MBB) { MBBs.insert(MBB); }
977
978void LinearizedRegion::addMBBs(LinearizedRegion *InnerRegion) {
979 for (auto *MBB : InnerRegion->MBBs) {
980 addMBB(MBB);
981 }
982}
983
984bool LinearizedRegion::contains(MachineBasicBlock *MBB) {
985 return MBBs.contains(MBB);
986}
987
988bool LinearizedRegion::isLiveOut(unsigned Reg) {
989 return LiveOuts.contains(Reg);
990}
991
992bool LinearizedRegion::hasNoDef(unsigned Reg, MachineRegisterInfo *MRI) {
993 return MRI->def_begin(Reg) == MRI->def_end();
994}
995
996// After the code has been structurized, what was flagged as kills
997// before are no longer register kills.
998void LinearizedRegion::removeFalseRegisterKills(MachineRegisterInfo *MRI) {
999 const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
1000 (void)TRI; // It's used by LLVM_DEBUG.
1001
1002 for (auto *MBBI : MBBs) {
1004 for (auto &II : *MBB) {
1005 for (auto &RI : II.uses()) {
1006 if (RI.isReg()) {
1007 Register Reg = RI.getReg();
1008 if (Reg.isVirtual()) {
1009 if (hasNoDef(Reg, MRI))
1010 continue;
1011 if (!MRI->hasOneDef(Reg)) {
1012 LLVM_DEBUG(this->getEntry()->getParent()->dump());
1013 LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << "\n");
1014 }
1015
1016 if (MRI->def_begin(Reg) == MRI->def_end()) {
1017 LLVM_DEBUG(dbgs() << "Register "
1018 << printReg(Reg, MRI->getTargetRegisterInfo())
1019 << " has NO defs\n");
1020 } else if (!MRI->hasOneDef(Reg)) {
1021 LLVM_DEBUG(dbgs() << "Register "
1022 << printReg(Reg, MRI->getTargetRegisterInfo())
1023 << " has multiple defs\n");
1024 }
1025
1026 assert(MRI->hasOneDef(Reg) && "Register has multiple definitions");
1027 MachineOperand *Def = &(*(MRI->def_begin(Reg)));
1028 MachineOperand *UseOperand = &(RI);
1029 bool UseIsOutsideDefMBB = Def->getParent()->getParent() != MBB;
1030 if (UseIsOutsideDefMBB && UseOperand->isKill()) {
1031 LLVM_DEBUG(dbgs() << "Removing kill flag on register: "
1032 << printReg(Reg, TRI) << "\n");
1033 UseOperand->setIsKill(false);
1034 }
1035 }
1036 }
1037 }
1038 }
1039 }
1040}
1041
1042void LinearizedRegion::initLiveOut(RegionMRT *Region,
1043 const MachineRegisterInfo *MRI,
1044 const TargetRegisterInfo *TRI,
1045 PHILinearize &PHIInfo) {
1046 storeLiveOuts(Region, MRI, TRI, PHIInfo);
1047}
1048
1049LinearizedRegion::LinearizedRegion(MachineBasicBlock *MBB,
1050 const MachineRegisterInfo *MRI,
1051 const TargetRegisterInfo *TRI,
1052 PHILinearize &PHIInfo) {
1053 setEntry(MBB);
1054 setExit(MBB);
1055 storeLiveOuts(MBB, MRI, TRI, PHIInfo);
1056 MBBs.insert(MBB);
1057 Parent = nullptr;
1058}
1059
1060LinearizedRegion::LinearizedRegion() {
1061 setEntry(nullptr);
1062 setExit(nullptr);
1063 Parent = nullptr;
1064}
1065
1066namespace {
1067
1068class AMDGPUMachineCFGStructurizer : public MachineFunctionPass {
1069private:
1070 const MachineRegionInfo *Regions;
1071 const SIInstrInfo *TII;
1072 const TargetRegisterInfo *TRI;
1074 PHILinearize PHIInfo;
1076 RegionMRT *RMRT;
1077
1078 void getPHIRegionIndices(RegionMRT *Region, MachineInstr &PHI,
1079 SmallVector<unsigned, 2> &RegionIndices);
1080 void getPHIRegionIndices(LinearizedRegion *Region, MachineInstr &PHI,
1081 SmallVector<unsigned, 2> &RegionIndices);
1082 void getPHINonRegionIndices(LinearizedRegion *Region, MachineInstr &PHI,
1083 SmallVector<unsigned, 2> &PHINonRegionIndices);
1084
1085 void storePHILinearizationInfoDest(
1086 unsigned LDestReg, MachineInstr &PHI,
1087 SmallVector<unsigned, 2> *RegionIndices = nullptr);
1088
1089 unsigned storePHILinearizationInfo(MachineInstr &PHI,
1090 SmallVector<unsigned, 2> *RegionIndices);
1091
1092 void extractKilledPHIs(MachineBasicBlock *MBB);
1093
1094 bool shrinkPHI(MachineInstr &PHI, SmallVector<unsigned, 2> &PHIIndices,
1095 unsigned *ReplaceReg);
1096
1097 bool shrinkPHI(MachineInstr &PHI, unsigned CombinedSourceReg,
1098 MachineBasicBlock *SourceMBB,
1099 SmallVector<unsigned, 2> &PHIIndices, unsigned *ReplaceReg);
1100
1101 void replacePHI(MachineInstr &PHI, unsigned CombinedSourceReg,
1102 MachineBasicBlock *LastMerge,
1103 SmallVector<unsigned, 2> &PHIRegionIndices);
1104 void replaceEntryPHI(MachineInstr &PHI, unsigned CombinedSourceReg,
1105 MachineBasicBlock *IfMBB,
1106 SmallVector<unsigned, 2> &PHIRegionIndices);
1107 void replaceLiveOutRegs(MachineInstr &PHI,
1108 SmallVector<unsigned, 2> &PHIRegionIndices,
1109 unsigned CombinedSourceReg,
1110 LinearizedRegion *LRegion);
1111 void rewriteRegionExitPHI(RegionMRT *Region, MachineBasicBlock *LastMerge,
1112 MachineInstr &PHI, LinearizedRegion *LRegion);
1113
1114 void rewriteRegionExitPHIs(RegionMRT *Region, MachineBasicBlock *LastMerge,
1115 LinearizedRegion *LRegion);
1116 void rewriteRegionEntryPHI(LinearizedRegion *Region, MachineBasicBlock *IfMBB,
1117 MachineInstr &PHI);
1118 void rewriteRegionEntryPHIs(LinearizedRegion *Region,
1119 MachineBasicBlock *IfMBB);
1120
1121 bool regionIsSimpleIf(RegionMRT *Region);
1122
1123 void transformSimpleIfRegion(RegionMRT *Region);
1124
1125 void insertUnconditionalBranch(MachineBasicBlock *MBB,
1126 MachineBasicBlock *Dest,
1127 const DebugLoc &DL = DebugLoc());
1128
1129 MachineBasicBlock *createLinearizedExitBlock(RegionMRT *Region);
1130
1131 void insertMergePHI(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB,
1132 MachineBasicBlock *MergeBB, unsigned DestRegister,
1133 unsigned IfSourceRegister, unsigned CodeSourceRegister,
1134 bool IsUndefIfSource = false);
1135
1136 MachineBasicBlock *createIfBlock(MachineBasicBlock *MergeBB,
1137 MachineBasicBlock *CodeBBStart,
1138 MachineBasicBlock *CodeBBEnd,
1139 MachineBasicBlock *SelectBB, unsigned IfReg,
1140 bool InheritPreds);
1141
1142 void prunePHIInfo(MachineBasicBlock *MBB);
1143 void createEntryPHI(LinearizedRegion *CurrentRegion, unsigned DestReg);
1144
1145 void createEntryPHIs(LinearizedRegion *CurrentRegion);
1146 void resolvePHIInfos(MachineBasicBlock *FunctionEntry);
1147
1148 void replaceRegisterWith(unsigned Register, class Register NewRegister);
1149
1150 MachineBasicBlock *createIfRegion(MachineBasicBlock *MergeBB,
1151 MachineBasicBlock *CodeBB,
1152 LinearizedRegion *LRegion,
1153 unsigned BBSelectRegIn,
1154 unsigned BBSelectRegOut);
1155
1157 createIfRegion(MachineBasicBlock *MergeMBB, LinearizedRegion *InnerRegion,
1158 LinearizedRegion *CurrentRegion, MachineBasicBlock *SelectBB,
1159 unsigned BBSelectRegIn, unsigned BBSelectRegOut);
1160 void ensureCondIsNotKilled(SmallVector<MachineOperand, 1> Cond);
1161
1162 void rewriteCodeBBTerminator(MachineBasicBlock *CodeBB,
1163 MachineBasicBlock *MergeBB,
1164 unsigned BBSelectReg);
1165
1166 MachineInstr *getDefInstr(unsigned Reg);
1167 void insertChainedPHI(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB,
1168 MachineBasicBlock *MergeBB,
1169 LinearizedRegion *InnerRegion, unsigned DestReg,
1170 unsigned SourceReg);
1171 bool containsDef(MachineBasicBlock *MBB, LinearizedRegion *InnerRegion,
1172 unsigned Register);
1173 void rewriteLiveOutRegs(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB,
1174 MachineBasicBlock *MergeBB,
1175 LinearizedRegion *InnerRegion,
1176 LinearizedRegion *LRegion);
1177
1178 void splitLoopPHI(MachineInstr &PHI, MachineBasicBlock *Entry,
1179 MachineBasicBlock *EntrySucc, LinearizedRegion *LRegion);
1180 void splitLoopPHIs(MachineBasicBlock *Entry, MachineBasicBlock *EntrySucc,
1181 LinearizedRegion *LRegion);
1182
1183 MachineBasicBlock *splitExit(LinearizedRegion *LRegion);
1184
1185 MachineBasicBlock *splitEntry(LinearizedRegion *LRegion);
1186
1187 LinearizedRegion *initLinearizedRegion(RegionMRT *Region);
1188
1189 bool structurizeComplexRegion(RegionMRT *Region);
1190
1191 bool structurizeRegion(RegionMRT *Region);
1192
1193 bool structurizeRegions(RegionMRT *Region, bool isTopRegion);
1194
1195public:
1196 static char ID;
1197
1198 AMDGPUMachineCFGStructurizer() : MachineFunctionPass(ID) {
1200 }
1201
1202 void getAnalysisUsage(AnalysisUsage &AU) const override {
1205 }
1206
1207 void initFallthroughMap(MachineFunction &MF);
1208
1209 void createLinearizedRegion(RegionMRT *Region, unsigned SelectOut);
1210
1211 unsigned initializeSelectRegisters(MRT *MRT, unsigned ExistingExitReg,
1213 const SIInstrInfo *TII);
1214
1215 void setRegionMRT(RegionMRT *RegionTree) { RMRT = RegionTree; }
1216
1217 RegionMRT *getRegionMRT() { return RMRT; }
1218
1219 bool runOnMachineFunction(MachineFunction &MF) override;
1220};
1221
1222} // end anonymous namespace
1223
1224char AMDGPUMachineCFGStructurizer::ID = 0;
1225
1226bool AMDGPUMachineCFGStructurizer::regionIsSimpleIf(RegionMRT *Region) {
1227 MachineBasicBlock *Entry = Region->getEntry();
1228 MachineBasicBlock *Succ = Region->getSucc();
1229 bool FoundBypass = false;
1230 bool FoundIf = false;
1231
1232 if (Entry->succ_size() != 2) {
1233 return false;
1234 }
1235
1236 for (MachineBasicBlock *Current : Entry->successors()) {
1237 if (Current == Succ) {
1238 FoundBypass = true;
1239 } else if ((Current->succ_size() == 1) &&
1240 *(Current->succ_begin()) == Succ) {
1241 FoundIf = true;
1242 }
1243 }
1244
1245 return FoundIf && FoundBypass;
1246}
1247
1248void AMDGPUMachineCFGStructurizer::transformSimpleIfRegion(RegionMRT *Region) {
1249 MachineBasicBlock *Entry = Region->getEntry();
1250 MachineBasicBlock *Exit = Region->getExit();
1251 TII->convertNonUniformIfRegion(Entry, Exit);
1252}
1253
1255 if (MBB->succ_size() == 1) {
1256 auto *Succ = *(MBB->succ_begin());
1257 for (auto &TI : MBB->terminators()) {
1258 for (auto &UI : TI.uses()) {
1259 if (UI.isMBB() && UI.getMBB() != Succ) {
1260 UI.setMBB(Succ);
1261 }
1262 }
1263 }
1264 }
1265}
1266
1267static void fixRegionTerminator(RegionMRT *Region) {
1268 MachineBasicBlock *InternalSucc = nullptr;
1269 MachineBasicBlock *ExternalSucc = nullptr;
1270 LinearizedRegion *LRegion = Region->getLinearizedRegion();
1271 auto Exit = LRegion->getExit();
1272
1274 for (MachineBasicBlock *Succ : Exit->successors()) {
1275 if (LRegion->contains(Succ)) {
1276 // Do not allow re-assign
1277 assert(InternalSucc == nullptr);
1278 InternalSucc = Succ;
1279 } else {
1280 // Do not allow re-assign
1281 assert(ExternalSucc == nullptr);
1282 ExternalSucc = Succ;
1283 }
1284 }
1285
1286 for (auto &TI : Exit->terminators()) {
1287 for (auto &UI : TI.uses()) {
1288 if (UI.isMBB()) {
1289 auto Target = UI.getMBB();
1290 if (Target != InternalSucc && Target != ExternalSucc) {
1291 UI.setMBB(ExternalSucc);
1292 }
1293 }
1294 }
1295 }
1296}
1297
1298// If a region is just a sequence of regions (and the exit
1299// block in the case of the top level region), we can simply skip
1300// linearizing it, because it is already linear
1301bool regionIsSequence(RegionMRT *Region) {
1302 auto Children = Region->getChildren();
1303 for (auto *CI : *Children) {
1304 if (!CI->isRegion()) {
1305 if (CI->getMBBMRT()->getMBB()->succ_size() > 1) {
1306 return false;
1307 }
1308 }
1309 }
1310 return true;
1311}
1312
1313void fixupRegionExits(RegionMRT *Region) {
1314 auto Children = Region->getChildren();
1315 for (auto *CI : *Children) {
1316 if (!CI->isRegion()) {
1317 fixMBBTerminator(CI->getMBBMRT()->getMBB());
1318 } else {
1319 fixRegionTerminator(CI->getRegionMRT());
1320 }
1321 }
1322}
1323
1324void AMDGPUMachineCFGStructurizer::getPHIRegionIndices(
1325 RegionMRT *Region, MachineInstr &PHI,
1326 SmallVector<unsigned, 2> &PHIRegionIndices) {
1327 unsigned NumInputs = getPHINumInputs(PHI);
1328 for (unsigned i = 0; i < NumInputs; ++i) {
1329 MachineBasicBlock *Pred = getPHIPred(PHI, i);
1330 if (Region->contains(Pred)) {
1331 PHIRegionIndices.push_back(i);
1332 }
1333 }
1334}
1335
1336void AMDGPUMachineCFGStructurizer::getPHIRegionIndices(
1337 LinearizedRegion *Region, MachineInstr &PHI,
1338 SmallVector<unsigned, 2> &PHIRegionIndices) {
1339 unsigned NumInputs = getPHINumInputs(PHI);
1340 for (unsigned i = 0; i < NumInputs; ++i) {
1341 MachineBasicBlock *Pred = getPHIPred(PHI, i);
1342 if (Region->contains(Pred)) {
1343 PHIRegionIndices.push_back(i);
1344 }
1345 }
1346}
1347
1348void AMDGPUMachineCFGStructurizer::getPHINonRegionIndices(
1349 LinearizedRegion *Region, MachineInstr &PHI,
1350 SmallVector<unsigned, 2> &PHINonRegionIndices) {
1351 unsigned NumInputs = getPHINumInputs(PHI);
1352 for (unsigned i = 0; i < NumInputs; ++i) {
1353 MachineBasicBlock *Pred = getPHIPred(PHI, i);
1354 if (!Region->contains(Pred)) {
1355 PHINonRegionIndices.push_back(i);
1356 }
1357 }
1358}
1359
1360void AMDGPUMachineCFGStructurizer::storePHILinearizationInfoDest(
1361 unsigned LDestReg, MachineInstr &PHI,
1362 SmallVector<unsigned, 2> *RegionIndices) {
1363 if (RegionIndices) {
1364 for (auto i : *RegionIndices) {
1365 PHIInfo.addSource(LDestReg, getPHISourceReg(PHI, i), getPHIPred(PHI, i));
1366 }
1367 } else {
1368 unsigned NumInputs = getPHINumInputs(PHI);
1369 for (unsigned i = 0; i < NumInputs; ++i) {
1370 PHIInfo.addSource(LDestReg, getPHISourceReg(PHI, i), getPHIPred(PHI, i));
1371 }
1372 }
1373}
1374
1375unsigned AMDGPUMachineCFGStructurizer::storePHILinearizationInfo(
1376 MachineInstr &PHI, SmallVector<unsigned, 2> *RegionIndices) {
1377 unsigned DestReg = getPHIDestReg(PHI);
1378 Register LinearizeDestReg =
1379 MRI->createVirtualRegister(MRI->getRegClass(DestReg));
1380 PHIInfo.addDest(LinearizeDestReg, PHI.getDebugLoc());
1381 storePHILinearizationInfoDest(LinearizeDestReg, PHI, RegionIndices);
1382 return LinearizeDestReg;
1383}
1384
1385void AMDGPUMachineCFGStructurizer::extractKilledPHIs(MachineBasicBlock *MBB) {
1386 // We need to create a new chain for the killed phi, but there is no
1387 // need to do the renaming outside or inside the block.
1390 E = MBB->instr_end();
1391 I != E; ++I) {
1392 MachineInstr &Instr = *I;
1393 if (Instr.isPHI()) {
1394 unsigned PHIDestReg = getPHIDestReg(Instr);
1395 LLVM_DEBUG(dbgs() << "Extracting killed phi:\n");
1396 LLVM_DEBUG(Instr.dump());
1397 PHIs.insert(&Instr);
1398 PHIInfo.addDest(PHIDestReg, Instr.getDebugLoc());
1399 storePHILinearizationInfoDest(PHIDestReg, Instr);
1400 }
1401 }
1402
1403 for (auto *PI : PHIs) {
1404 PI->eraseFromParent();
1405 }
1406}
1407
1408static bool isPHIRegionIndex(SmallVector<unsigned, 2> PHIRegionIndices,
1409 unsigned Index) {
1410 return llvm::is_contained(PHIRegionIndices, Index);
1411}
1412
1413bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI,
1414 SmallVector<unsigned, 2> &PHIIndices,
1415 unsigned *ReplaceReg) {
1416 return shrinkPHI(PHI, 0, nullptr, PHIIndices, ReplaceReg);
1417}
1418
1419bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI,
1420 unsigned CombinedSourceReg,
1421 MachineBasicBlock *SourceMBB,
1422 SmallVector<unsigned, 2> &PHIIndices,
1423 unsigned *ReplaceReg) {
1424 LLVM_DEBUG(dbgs() << "Shrink PHI: ");
1425 LLVM_DEBUG(PHI.dump());
1426 LLVM_DEBUG(dbgs() << " to " << printReg(getPHIDestReg(PHI), TRI)
1427 << " = PHI(");
1428
1429 bool Replaced = false;
1430 unsigned NumInputs = getPHINumInputs(PHI);
1431 int SingleExternalEntryIndex = -1;
1432 for (unsigned i = 0; i < NumInputs; ++i) {
1433 if (!isPHIRegionIndex(PHIIndices, i)) {
1434 if (SingleExternalEntryIndex == -1) {
1435 // Single entry
1436 SingleExternalEntryIndex = i;
1437 } else {
1438 // Multiple entries
1439 SingleExternalEntryIndex = -2;
1440 }
1441 }
1442 }
1443
1444 if (SingleExternalEntryIndex > -1) {
1445 *ReplaceReg = getPHISourceReg(PHI, SingleExternalEntryIndex);
1446 // We should not rewrite the code, we should only pick up the single value
1447 // that represents the shrunk PHI.
1448 Replaced = true;
1449 } else {
1450 MachineBasicBlock *MBB = PHI.getParent();
1452 BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI),
1454 if (SourceMBB) {
1455 MIB.addReg(CombinedSourceReg);
1456 MIB.addMBB(SourceMBB);
1457 LLVM_DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", "
1458 << printMBBReference(*SourceMBB));
1459 }
1460
1461 for (unsigned i = 0; i < NumInputs; ++i) {
1462 if (isPHIRegionIndex(PHIIndices, i)) {
1463 continue;
1464 }
1465 unsigned SourceReg = getPHISourceReg(PHI, i);
1466 MachineBasicBlock *SourcePred = getPHIPred(PHI, i);
1467 MIB.addReg(SourceReg);
1468 MIB.addMBB(SourcePred);
1469 LLVM_DEBUG(dbgs() << printReg(SourceReg, TRI) << ", "
1470 << printMBBReference(*SourcePred));
1471 }
1472 LLVM_DEBUG(dbgs() << ")\n");
1473 }
1474 PHI.eraseFromParent();
1475 return Replaced;
1476}
1477
1478void AMDGPUMachineCFGStructurizer::replacePHI(
1479 MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *LastMerge,
1480 SmallVector<unsigned, 2> &PHIRegionIndices) {
1481 LLVM_DEBUG(dbgs() << "Replace PHI: ");
1482 LLVM_DEBUG(PHI.dump());
1483 LLVM_DEBUG(dbgs() << " with " << printReg(getPHIDestReg(PHI), TRI)
1484 << " = PHI(");
1485
1486 bool HasExternalEdge = false;
1487 unsigned NumInputs = getPHINumInputs(PHI);
1488 for (unsigned i = 0; i < NumInputs; ++i) {
1489 if (!isPHIRegionIndex(PHIRegionIndices, i)) {
1490 HasExternalEdge = true;
1491 }
1492 }
1493
1494 if (HasExternalEdge) {
1495 MachineBasicBlock *MBB = PHI.getParent();
1497 BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI),
1499 MIB.addReg(CombinedSourceReg);
1500 MIB.addMBB(LastMerge);
1501 LLVM_DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", "
1502 << printMBBReference(*LastMerge));
1503 for (unsigned i = 0; i < NumInputs; ++i) {
1504 if (isPHIRegionIndex(PHIRegionIndices, i)) {
1505 continue;
1506 }
1507 unsigned SourceReg = getPHISourceReg(PHI, i);
1508 MachineBasicBlock *SourcePred = getPHIPred(PHI, i);
1509 MIB.addReg(SourceReg);
1510 MIB.addMBB(SourcePred);
1511 LLVM_DEBUG(dbgs() << printReg(SourceReg, TRI) << ", "
1512 << printMBBReference(*SourcePred));
1513 }
1514 LLVM_DEBUG(dbgs() << ")\n");
1515 } else {
1516 replaceRegisterWith(getPHIDestReg(PHI), CombinedSourceReg);
1517 }
1518 PHI.eraseFromParent();
1519}
1520
1521void AMDGPUMachineCFGStructurizer::replaceEntryPHI(
1522 MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *IfMBB,
1523 SmallVector<unsigned, 2> &PHIRegionIndices) {
1524 LLVM_DEBUG(dbgs() << "Replace entry PHI: ");
1525 LLVM_DEBUG(PHI.dump());
1526 LLVM_DEBUG(dbgs() << " with ");
1527
1528 unsigned NumInputs = getPHINumInputs(PHI);
1529 unsigned NumNonRegionInputs = NumInputs;
1530 for (unsigned i = 0; i < NumInputs; ++i) {
1531 if (isPHIRegionIndex(PHIRegionIndices, i)) {
1532 NumNonRegionInputs--;
1533 }
1534 }
1535
1536 if (NumNonRegionInputs == 0) {
1537 auto DestReg = getPHIDestReg(PHI);
1538 replaceRegisterWith(DestReg, CombinedSourceReg);
1539 LLVM_DEBUG(dbgs() << " register " << printReg(CombinedSourceReg, TRI)
1540 << "\n");
1541 PHI.eraseFromParent();
1542 } else {
1543 LLVM_DEBUG(dbgs() << printReg(getPHIDestReg(PHI), TRI) << " = PHI(");
1544 MachineBasicBlock *MBB = PHI.getParent();
1546 BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI),
1548 MIB.addReg(CombinedSourceReg);
1549 MIB.addMBB(IfMBB);
1550 LLVM_DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", "
1551 << printMBBReference(*IfMBB));
1552 unsigned NumInputs = getPHINumInputs(PHI);
1553 for (unsigned i = 0; i < NumInputs; ++i) {
1554 if (isPHIRegionIndex(PHIRegionIndices, i)) {
1555 continue;
1556 }
1557 unsigned SourceReg = getPHISourceReg(PHI, i);
1558 MachineBasicBlock *SourcePred = getPHIPred(PHI, i);
1559 MIB.addReg(SourceReg);
1560 MIB.addMBB(SourcePred);
1561 LLVM_DEBUG(dbgs() << printReg(SourceReg, TRI) << ", "
1562 << printMBBReference(*SourcePred));
1563 }
1564 LLVM_DEBUG(dbgs() << ")\n");
1565 PHI.eraseFromParent();
1566 }
1567}
1568
1569void AMDGPUMachineCFGStructurizer::replaceLiveOutRegs(
1570 MachineInstr &PHI, SmallVector<unsigned, 2> &PHIRegionIndices,
1571 unsigned CombinedSourceReg, LinearizedRegion *LRegion) {
1572 bool WasLiveOut = false;
1573 for (auto PII : PHIRegionIndices) {
1574 unsigned Reg = getPHISourceReg(PHI, PII);
1575 if (LRegion->isLiveOut(Reg)) {
1576 bool IsDead = true;
1577
1578 // Check if register is live out of the basic block
1579 MachineBasicBlock *DefMBB = getDefInstr(Reg)->getParent();
1580 for (const MachineOperand &MO : MRI->use_operands(Reg))
1581 if (MO.getParent()->getParent() != DefMBB)
1582 IsDead = false;
1583
1584 LLVM_DEBUG(dbgs() << "Register " << printReg(Reg, TRI) << " is "
1585 << (IsDead ? "dead" : "alive")
1586 << " after PHI replace\n");
1587 if (IsDead) {
1588 LRegion->removeLiveOut(Reg);
1589 }
1590 WasLiveOut = true;
1591 }
1592 }
1593
1594 if (WasLiveOut)
1595 LRegion->addLiveOut(CombinedSourceReg);
1596}
1597
1598void AMDGPUMachineCFGStructurizer::rewriteRegionExitPHI(RegionMRT *Region,
1599 MachineBasicBlock *LastMerge,
1601 LinearizedRegion *LRegion) {
1602 SmallVector<unsigned, 2> PHIRegionIndices;
1603 getPHIRegionIndices(Region, PHI, PHIRegionIndices);
1604 unsigned LinearizedSourceReg =
1605 storePHILinearizationInfo(PHI, &PHIRegionIndices);
1606
1607 replacePHI(PHI, LinearizedSourceReg, LastMerge, PHIRegionIndices);
1608 replaceLiveOutRegs(PHI, PHIRegionIndices, LinearizedSourceReg, LRegion);
1609}
1610
1611void AMDGPUMachineCFGStructurizer::rewriteRegionEntryPHI(LinearizedRegion *Region,
1612 MachineBasicBlock *IfMBB,
1613 MachineInstr &PHI) {
1614 SmallVector<unsigned, 2> PHINonRegionIndices;
1615 getPHINonRegionIndices(Region, PHI, PHINonRegionIndices);
1616 unsigned LinearizedSourceReg =
1617 storePHILinearizationInfo(PHI, &PHINonRegionIndices);
1618 replaceEntryPHI(PHI, LinearizedSourceReg, IfMBB, PHINonRegionIndices);
1619}
1620
1623 for (auto &BBI : *MBB) {
1624 if (BBI.isPHI()) {
1625 PHIs.push_back(&BBI);
1626 }
1627 }
1628}
1629
1630void AMDGPUMachineCFGStructurizer::rewriteRegionExitPHIs(RegionMRT *Region,
1631 MachineBasicBlock *LastMerge,
1632 LinearizedRegion *LRegion) {
1634 auto Exit = Region->getSucc();
1635 if (Exit == nullptr)
1636 return;
1637
1638 collectPHIs(Exit, PHIs);
1639
1640 for (auto *PHII : PHIs) {
1641 rewriteRegionExitPHI(Region, LastMerge, *PHII, LRegion);
1642 }
1643}
1644
1645void AMDGPUMachineCFGStructurizer::rewriteRegionEntryPHIs(LinearizedRegion *Region,
1646 MachineBasicBlock *IfMBB) {
1648 auto Entry = Region->getEntry();
1649
1650 collectPHIs(Entry, PHIs);
1651
1652 for (auto *PHII : PHIs) {
1653 rewriteRegionEntryPHI(Region, IfMBB, *PHII);
1654 }
1655}
1656
1657void AMDGPUMachineCFGStructurizer::insertUnconditionalBranch(MachineBasicBlock *MBB,
1658 MachineBasicBlock *Dest,
1659 const DebugLoc &DL) {
1660 LLVM_DEBUG(dbgs() << "Inserting unconditional branch: " << MBB->getNumber()
1661 << " -> " << Dest->getNumber() << "\n");
1663 bool HasTerminator = Terminator != MBB->instr_end();
1664 if (HasTerminator) {
1665 TII->ReplaceTailWithBranchTo(Terminator, Dest);
1666 }
1668 TII->insertUnconditionalBranch(*MBB, Dest, DL);
1669 }
1670}
1671
1673 MachineBasicBlock *result = nullptr;
1674 for (auto &MFI : MF) {
1675 if (MFI.succ_empty()) {
1676 if (result == nullptr) {
1677 result = &MFI;
1678 } else {
1679 return nullptr;
1680 }
1681 }
1682 }
1683
1684 return result;
1685}
1686
1688 return getSingleExitNode(MF) != nullptr;
1689}
1690
1692AMDGPUMachineCFGStructurizer::createLinearizedExitBlock(RegionMRT *Region) {
1693 auto Exit = Region->getSucc();
1694
1695 // If the exit is the end of the function, we just use the existing
1696 MachineFunction *MF = Region->getEntry()->getParent();
1697 if (Exit == nullptr && hasOneExitNode(*MF)) {
1698 return &(*(--(Region->getEntry()->getParent()->end())));
1699 }
1700
1701 MachineBasicBlock *LastMerge = MF->CreateMachineBasicBlock();
1702 if (Exit == nullptr) {
1703 MachineFunction::iterator ExitIter = MF->end();
1704 MF->insert(ExitIter, LastMerge);
1705 } else {
1706 MachineFunction::iterator ExitIter = Exit->getIterator();
1707 MF->insert(ExitIter, LastMerge);
1708 LastMerge->addSuccessor(Exit);
1709 insertUnconditionalBranch(LastMerge, Exit);
1710 LLVM_DEBUG(dbgs() << "Created exit block: " << LastMerge->getNumber()
1711 << "\n");
1712 }
1713 return LastMerge;
1714}
1715
1716void AMDGPUMachineCFGStructurizer::insertMergePHI(MachineBasicBlock *IfBB,
1717 MachineBasicBlock *CodeBB,
1718 MachineBasicBlock *MergeBB,
1719 unsigned DestRegister,
1720 unsigned IfSourceRegister,
1721 unsigned CodeSourceRegister,
1722 bool IsUndefIfSource) {
1723 // If this is the function exit block, we don't need a phi.
1724 if (MergeBB->succ_begin() == MergeBB->succ_end()) {
1725 return;
1726 }
1727 LLVM_DEBUG(dbgs() << "Merge PHI (" << printMBBReference(*MergeBB)
1728 << "): " << printReg(DestRegister, TRI) << " = PHI("
1729 << printReg(IfSourceRegister, TRI) << ", "
1730 << printMBBReference(*IfBB)
1731 << printReg(CodeSourceRegister, TRI) << ", "
1732 << printMBBReference(*CodeBB) << ")\n");
1733 const DebugLoc &DL = MergeBB->findDebugLoc(MergeBB->begin());
1734 MachineInstrBuilder MIB = BuildMI(*MergeBB, MergeBB->instr_begin(), DL,
1735 TII->get(TargetOpcode::PHI), DestRegister);
1736 if (IsUndefIfSource && false) {
1737 MIB.addReg(IfSourceRegister, RegState::Undef);
1738 } else {
1739 MIB.addReg(IfSourceRegister);
1740 }
1741 MIB.addMBB(IfBB);
1742 MIB.addReg(CodeSourceRegister);
1743 MIB.addMBB(CodeBB);
1744}
1745
1748 E = MBB->succ_end();
1749 PI != E; ++PI) {
1750 if ((*PI) != MBB) {
1751 (MBB)->removeSuccessor(*PI);
1752 }
1753 }
1754}
1755
1757 MachineBasicBlock *EndMBB) {
1758
1759 // We have to check against the StartMBB successor because a
1760 // structurized region with a loop will have the entry block split,
1761 // and the backedge will go to the entry successor.
1763 unsigned SuccSize = StartMBB->succ_size();
1764 if (SuccSize > 0) {
1765 MachineBasicBlock *StartMBBSucc = *(StartMBB->succ_begin());
1766 for (MachineBasicBlock *Succ : EndMBB->successors()) {
1767 // Either we have a back-edge to the entry block, or a back-edge to the
1768 // successor of the entry block since the block may be split.
1769 if (Succ != StartMBB &&
1770 !(Succ == StartMBBSucc && StartMBB != EndMBB && SuccSize == 1)) {
1771 Succs.insert(
1772 std::pair<MachineBasicBlock *, MachineBasicBlock *>(EndMBB, Succ));
1773 }
1774 }
1775 }
1776
1777 for (MachineBasicBlock *Pred : StartMBB->predecessors())
1778 if (Pred != EndMBB)
1779 Succs.insert(std::pair(Pred, StartMBB));
1780
1781 for (auto SI : Succs) {
1782 std::pair<MachineBasicBlock *, MachineBasicBlock *> Edge = SI;
1783 LLVM_DEBUG(dbgs() << "Removing edge: " << printMBBReference(*Edge.first)
1784 << " -> " << printMBBReference(*Edge.second) << "\n");
1785 Edge.first->removeSuccessor(Edge.second);
1786 }
1787}
1788
1789MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfBlock(
1790 MachineBasicBlock *MergeBB, MachineBasicBlock *CodeBBStart,
1791 MachineBasicBlock *CodeBBEnd, MachineBasicBlock *SelectBB, unsigned IfReg,
1792 bool InheritPreds) {
1793 MachineFunction *MF = MergeBB->getParent();
1795
1796 if (InheritPreds) {
1797 for (MachineBasicBlock *Pred : CodeBBStart->predecessors())
1798 if (Pred != CodeBBEnd)
1799 Pred->addSuccessor(IfBB);
1800 }
1801
1802 removeExternalCFGEdges(CodeBBStart, CodeBBEnd);
1803
1804 auto CodeBBStartI = CodeBBStart->getIterator();
1805 auto CodeBBEndI = CodeBBEnd->getIterator();
1806 auto MergeIter = MergeBB->getIterator();
1807 MF->insert(MergeIter, IfBB);
1808 MF->splice(MergeIter, CodeBBStartI, ++CodeBBEndI);
1809 IfBB->addSuccessor(MergeBB);
1810 IfBB->addSuccessor(CodeBBStart);
1811
1812 LLVM_DEBUG(dbgs() << "Created If block: " << IfBB->getNumber() << "\n");
1813 // Ensure that the MergeBB is a successor of the CodeEndBB.
1814 if (!CodeBBEnd->isSuccessor(MergeBB))
1815 CodeBBEnd->addSuccessor(MergeBB);
1816
1817 LLVM_DEBUG(dbgs() << "Moved " << printMBBReference(*CodeBBStart)
1818 << " through " << printMBBReference(*CodeBBEnd) << "\n");
1819
1820 // If we have a single predecessor we can find a reasonable debug location
1821 MachineBasicBlock *SinglePred =
1822 CodeBBStart->pred_size() == 1 ? *(CodeBBStart->pred_begin()) : nullptr;
1823 const DebugLoc &DL = SinglePred
1824 ? SinglePred->findDebugLoc(SinglePred->getFirstTerminator())
1825 : DebugLoc();
1826
1827 Register Reg =
1828 TII->insertEQ(IfBB, IfBB->begin(), DL, IfReg,
1829 SelectBB->getNumber() /* CodeBBStart->getNumber() */);
1830 if (&(*(IfBB->getParent()->begin())) == IfBB) {
1831 TII->materializeImmediate(*IfBB, IfBB->begin(), DL, IfReg,
1832 CodeBBStart->getNumber());
1833 }
1834 MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true);
1836 TII->insertBranch(*IfBB, MergeBB, CodeBBStart, Cond, DL);
1837
1838 return IfBB;
1839}
1840
1841void AMDGPUMachineCFGStructurizer::ensureCondIsNotKilled(
1843 if (Cond.size() != 1)
1844 return;
1845 if (!Cond[0].isReg())
1846 return;
1847
1848 Register CondReg = Cond[0].getReg();
1849 for (MachineOperand &MO : MRI->use_operands(CondReg))
1850 MO.setIsKill(false);
1851}
1852
1853void AMDGPUMachineCFGStructurizer::rewriteCodeBBTerminator(MachineBasicBlock *CodeBB,
1854 MachineBasicBlock *MergeBB,
1855 unsigned BBSelectReg) {
1856 MachineBasicBlock *TrueBB = nullptr;
1857 MachineBasicBlock *FalseBB = nullptr;
1859 MachineBasicBlock *FallthroughBB = FallthroughMap[CodeBB];
1860 TII->analyzeBranch(*CodeBB, TrueBB, FalseBB, Cond);
1861
1862 const DebugLoc &DL = CodeBB->findDebugLoc(CodeBB->getFirstTerminator());
1863
1864 if (FalseBB == nullptr && TrueBB == nullptr && FallthroughBB == nullptr) {
1865 // This is an exit block, hence no successors. We will assign the
1866 // bb select register to the entry block.
1867 TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL,
1868 BBSelectReg,
1869 CodeBB->getParent()->begin()->getNumber());
1870 insertUnconditionalBranch(CodeBB, MergeBB, DL);
1871 return;
1872 }
1873
1874 if (FalseBB == nullptr && TrueBB == nullptr) {
1875 TrueBB = FallthroughBB;
1876 } else if (TrueBB != nullptr) {
1877 FalseBB =
1878 (FallthroughBB && (FallthroughBB != TrueBB)) ? FallthroughBB : FalseBB;
1879 }
1880
1881 if ((TrueBB != nullptr && FalseBB == nullptr) || (TrueBB == FalseBB)) {
1882 TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL,
1883 BBSelectReg, TrueBB->getNumber());
1884 } else {
1885 const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectReg);
1886 Register TrueBBReg = MRI->createVirtualRegister(RegClass);
1887 Register FalseBBReg = MRI->createVirtualRegister(RegClass);
1888 TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL,
1889 TrueBBReg, TrueBB->getNumber());
1890 TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL,
1891 FalseBBReg, FalseBB->getNumber());
1892 ensureCondIsNotKilled(Cond);
1893 TII->insertVectorSelect(*CodeBB, CodeBB->getFirstTerminator(), DL,
1894 BBSelectReg, Cond, TrueBBReg, FalseBBReg);
1895 }
1896
1897 insertUnconditionalBranch(CodeBB, MergeBB, DL);
1898}
1899
1900MachineInstr *AMDGPUMachineCFGStructurizer::getDefInstr(unsigned Reg) {
1901 if (MRI->def_begin(Reg) == MRI->def_end()) {
1902 LLVM_DEBUG(dbgs() << "Register "
1903 << printReg(Reg, MRI->getTargetRegisterInfo())
1904 << " has NO defs\n");
1905 } else if (!MRI->hasOneDef(Reg)) {
1906 LLVM_DEBUG(dbgs() << "Register "
1907 << printReg(Reg, MRI->getTargetRegisterInfo())
1908 << " has multiple defs\n");
1909 LLVM_DEBUG(dbgs() << "DEFS BEGIN:\n");
1910 for (auto DI = MRI->def_begin(Reg), DE = MRI->def_end(); DI != DE; ++DI) {
1911 LLVM_DEBUG(DI->getParent()->dump());
1912 }
1913 LLVM_DEBUG(dbgs() << "DEFS END\n");
1914 }
1915
1916 assert(MRI->hasOneDef(Reg) && "Register has multiple definitions");
1917 return (*(MRI->def_begin(Reg))).getParent();
1918}
1919
1920void AMDGPUMachineCFGStructurizer::insertChainedPHI(MachineBasicBlock *IfBB,
1921 MachineBasicBlock *CodeBB,
1922 MachineBasicBlock *MergeBB,
1923 LinearizedRegion *InnerRegion,
1924 unsigned DestReg,
1925 unsigned SourceReg) {
1926 // In this function we know we are part of a chain already, so we need
1927 // to add the registers to the existing chain, and rename the register
1928 // inside the region.
1929 bool IsSingleBB = InnerRegion->getEntry() == InnerRegion->getExit();
1930 MachineInstr *DefInstr = getDefInstr(SourceReg);
1931 if (DefInstr->isPHI() && DefInstr->getParent() == CodeBB && IsSingleBB) {
1932 // Handle the case where the def is a PHI-def inside a basic
1933 // block, then we only need to do renaming. Special care needs to
1934 // be taken if the PHI-def is part of an existing chain, or if a
1935 // new one needs to be created.
1936 InnerRegion->replaceRegisterInsideRegion(SourceReg, DestReg, true, MRI);
1937
1938 // We collect all PHI Information, and if we are at the region entry,
1939 // all PHIs will be removed, and then re-introduced if needed.
1940 storePHILinearizationInfoDest(DestReg, *DefInstr);
1941 // We have picked up all the information we need now and can remove
1942 // the PHI
1943 PHIInfo.removeSource(DestReg, SourceReg, CodeBB);
1944 DefInstr->eraseFromParent();
1945 } else {
1946 // If this is not a phi-def, or it is a phi-def but from a linearized region
1947 if (IsSingleBB && DefInstr->getParent() == InnerRegion->getEntry()) {
1948 // If this is a single BB and the definition is in this block we
1949 // need to replace any uses outside the region.
1950 InnerRegion->replaceRegisterOutsideRegion(SourceReg, DestReg, false, MRI);
1951 }
1952 const TargetRegisterClass *RegClass = MRI->getRegClass(DestReg);
1953 Register NextDestReg = MRI->createVirtualRegister(RegClass);
1954 bool IsLastDef = PHIInfo.getNumSources(DestReg) == 1;
1955 LLVM_DEBUG(dbgs() << "Insert Chained PHI\n");
1956 insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, DestReg, NextDestReg,
1957 SourceReg, IsLastDef);
1958
1959 PHIInfo.removeSource(DestReg, SourceReg, CodeBB);
1960 if (IsLastDef) {
1961 const DebugLoc &DL = IfBB->findDebugLoc(IfBB->getFirstTerminator());
1962 TII->materializeImmediate(*IfBB, IfBB->getFirstTerminator(), DL,
1963 NextDestReg, 0);
1964 PHIInfo.deleteDef(DestReg);
1965 } else {
1966 PHIInfo.replaceDef(DestReg, NextDestReg);
1967 }
1968 }
1969}
1970
1971bool AMDGPUMachineCFGStructurizer::containsDef(MachineBasicBlock *MBB,
1972 LinearizedRegion *InnerRegion,
1973 unsigned Register) {
1974 return getDefInstr(Register)->getParent() == MBB ||
1975 InnerRegion->contains(getDefInstr(Register)->getParent());
1976}
1977
1978void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB,
1979 MachineBasicBlock *CodeBB,
1980 MachineBasicBlock *MergeBB,
1981 LinearizedRegion *InnerRegion,
1982 LinearizedRegion *LRegion) {
1983 DenseSet<unsigned> *LiveOuts = InnerRegion->getLiveOuts();
1984 SmallVector<unsigned, 4> OldLiveOuts;
1985 bool IsSingleBB = InnerRegion->getEntry() == InnerRegion->getExit();
1986 for (auto OLI : *LiveOuts) {
1987 OldLiveOuts.push_back(OLI);
1988 }
1989
1990 for (auto LI : OldLiveOuts) {
1991 LLVM_DEBUG(dbgs() << "LiveOut: " << printReg(LI, TRI));
1992 if (!containsDef(CodeBB, InnerRegion, LI) ||
1993 (!IsSingleBB && (getDefInstr(LI)->getParent() == LRegion->getExit()))) {
1994 // If the register simply lives through the CodeBB, we don't have
1995 // to rewrite anything since the register is not defined in this
1996 // part of the code.
1997 LLVM_DEBUG(dbgs() << "- through");
1998 continue;
1999 }
2000 LLVM_DEBUG(dbgs() << "\n");
2001 unsigned Reg = LI;
2002 if (/*!PHIInfo.isSource(Reg) &&*/ Reg != InnerRegion->getBBSelectRegOut()) {
2003 // If the register is live out, we do want to create a phi,
2004 // unless it is from the Exit block, because in that case there
2005 // is already a PHI, and no need to create a new one.
2006
2007 // If the register is just a live out def and not part of a phi
2008 // chain, we need to create a PHI node to handle the if region,
2009 // and replace all uses outside of the region with the new dest
2010 // register, unless it is the outgoing BB select register. We have
2011 // already created phi nodes for these.
2012 const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);
2013 Register PHIDestReg = MRI->createVirtualRegister(RegClass);
2014 Register IfSourceReg = MRI->createVirtualRegister(RegClass);
2015 // Create initializer, this value is never used, but is needed
2016 // to satisfy SSA.
2017 LLVM_DEBUG(dbgs() << "Initializer for reg: " << printReg(Reg) << "\n");
2018 TII->materializeImmediate(*IfBB, IfBB->getFirstTerminator(), DebugLoc(),
2019 IfSourceReg, 0);
2020
2021 InnerRegion->replaceRegisterOutsideRegion(Reg, PHIDestReg, true, MRI);
2022 LLVM_DEBUG(dbgs() << "Insert Non-Chained Live out PHI\n");
2023 insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, PHIDestReg,
2024 IfSourceReg, Reg, true);
2025 }
2026 }
2027
2028 // Handle the chained definitions in PHIInfo, checking if this basic block
2029 // is a source block for a definition.
2031 if (PHIInfo.findSourcesFromMBB(CodeBB, Sources)) {
2032 LLVM_DEBUG(dbgs() << "Inserting PHI Live Out from "
2033 << printMBBReference(*CodeBB) << "\n");
2034 for (auto SI : Sources) {
2035 unsigned DestReg;
2036 PHIInfo.findDest(SI, CodeBB, DestReg);
2037 insertChainedPHI(IfBB, CodeBB, MergeBB, InnerRegion, DestReg, SI);
2038 }
2039 LLVM_DEBUG(dbgs() << "Insertion done.\n");
2040 }
2041
2042 LLVM_DEBUG(PHIInfo.dump(MRI));
2043}
2044
2045void AMDGPUMachineCFGStructurizer::prunePHIInfo(MachineBasicBlock *MBB) {
2046 LLVM_DEBUG(dbgs() << "Before PHI Prune\n");
2047 LLVM_DEBUG(PHIInfo.dump(MRI));
2049 ElimiatedSources;
2050 for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE;
2051 ++DRI) {
2052
2053 unsigned DestReg = *DRI;
2054 auto SE = PHIInfo.sources_end(DestReg);
2055
2056 bool MBBContainsPHISource = false;
2057 // Check if there is a PHI source in this MBB
2058 for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) {
2059 unsigned SourceReg = (*SRI).first;
2060 MachineOperand *Def = &(*(MRI->def_begin(SourceReg)));
2061 if (Def->getParent()->getParent() == MBB) {
2062 MBBContainsPHISource = true;
2063 }
2064 }
2065
2066 // If so, all other sources are useless since we know this block
2067 // is always executed when the region is executed.
2068 if (MBBContainsPHISource) {
2069 for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) {
2070 PHILinearize::PHISourceT Source = *SRI;
2071 unsigned SourceReg = Source.first;
2072 MachineBasicBlock *SourceMBB = Source.second;
2073 MachineOperand *Def = &(*(MRI->def_begin(SourceReg)));
2074 if (Def->getParent()->getParent() != MBB) {
2075 ElimiatedSources.push_back(std::tuple(DestReg, SourceReg, SourceMBB));
2076 }
2077 }
2078 }
2079 }
2080
2081 // Remove the PHI sources that are in the given MBB
2082 for (auto &SourceInfo : ElimiatedSources) {
2083 PHIInfo.removeSource(std::get<0>(SourceInfo), std::get<1>(SourceInfo),
2084 std::get<2>(SourceInfo));
2085 }
2086 LLVM_DEBUG(dbgs() << "After PHI Prune\n");
2087 LLVM_DEBUG(PHIInfo.dump(MRI));
2088}
2089
2090void AMDGPUMachineCFGStructurizer::createEntryPHI(LinearizedRegion *CurrentRegion,
2091 unsigned DestReg) {
2092 MachineBasicBlock *Entry = CurrentRegion->getEntry();
2093 MachineBasicBlock *Exit = CurrentRegion->getExit();
2094
2095 LLVM_DEBUG(dbgs() << "RegionExit: " << Exit->getNumber() << " Pred: "
2096 << (*(Entry->pred_begin()))->getNumber() << "\n");
2097
2098 int NumSources = 0;
2099 auto SE = PHIInfo.sources_end(DestReg);
2100
2101 for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) {
2102 NumSources++;
2103 }
2104
2105 if (NumSources == 1) {
2106 auto SRI = PHIInfo.sources_begin(DestReg);
2107 unsigned SourceReg = (*SRI).first;
2108 replaceRegisterWith(DestReg, SourceReg);
2109 } else {
2110 const DebugLoc &DL = Entry->findDebugLoc(Entry->begin());
2111 MachineInstrBuilder MIB = BuildMI(*Entry, Entry->instr_begin(), DL,
2112 TII->get(TargetOpcode::PHI), DestReg);
2113 LLVM_DEBUG(dbgs() << "Entry PHI " << printReg(DestReg, TRI) << " = PHI(");
2114
2115 unsigned CurrentBackedgeReg = 0;
2116
2117 for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) {
2118 unsigned SourceReg = (*SRI).first;
2119
2120 if (CurrentRegion->contains((*SRI).second)) {
2121 if (CurrentBackedgeReg == 0) {
2122 CurrentBackedgeReg = SourceReg;
2123 } else {
2124 MachineInstr *PHIDefInstr = getDefInstr(SourceReg);
2125 MachineBasicBlock *PHIDefMBB = PHIDefInstr->getParent();
2126 const TargetRegisterClass *RegClass =
2127 MRI->getRegClass(CurrentBackedgeReg);
2128 Register NewBackedgeReg = MRI->createVirtualRegister(RegClass);
2129 MachineInstrBuilder BackedgePHI =
2130 BuildMI(*PHIDefMBB, PHIDefMBB->instr_begin(), DL,
2131 TII->get(TargetOpcode::PHI), NewBackedgeReg);
2132 BackedgePHI.addReg(CurrentBackedgeReg);
2133 BackedgePHI.addMBB(getPHIPred(*PHIDefInstr, 0));
2134 BackedgePHI.addReg(getPHISourceReg(*PHIDefInstr, 1));
2135 BackedgePHI.addMBB((*SRI).second);
2136 CurrentBackedgeReg = NewBackedgeReg;
2138 << "Inserting backedge PHI: "
2139 << printReg(NewBackedgeReg, TRI) << " = PHI("
2140 << printReg(CurrentBackedgeReg, TRI) << ", "
2141 << printMBBReference(*getPHIPred(*PHIDefInstr, 0)) << ", "
2142 << printReg(getPHISourceReg(*PHIDefInstr, 1), TRI) << ", "
2143 << printMBBReference(*(*SRI).second));
2144 }
2145 } else {
2146 MIB.addReg(SourceReg);
2147 MIB.addMBB((*SRI).second);
2148 LLVM_DEBUG(dbgs() << printReg(SourceReg, TRI) << ", "
2149 << printMBBReference(*(*SRI).second) << ", ");
2150 }
2151 }
2152
2153 // Add the final backedge register source to the entry phi
2154 if (CurrentBackedgeReg != 0) {
2155 MIB.addReg(CurrentBackedgeReg);
2156 MIB.addMBB(Exit);
2157 LLVM_DEBUG(dbgs() << printReg(CurrentBackedgeReg, TRI) << ", "
2158 << printMBBReference(*Exit) << ")\n");
2159 } else {
2160 LLVM_DEBUG(dbgs() << ")\n");
2161 }
2162 }
2163}
2164
2165void AMDGPUMachineCFGStructurizer::createEntryPHIs(LinearizedRegion *CurrentRegion) {
2166 LLVM_DEBUG(PHIInfo.dump(MRI));
2167
2168 for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE;
2169 ++DRI) {
2170
2171 unsigned DestReg = *DRI;
2172 createEntryPHI(CurrentRegion, DestReg);
2173 }
2174 PHIInfo.clear();
2175}
2176
2177void AMDGPUMachineCFGStructurizer::replaceRegisterWith(
2178 unsigned Register, class Register NewRegister) {
2179 assert(Register != NewRegister && "Cannot replace a reg with itself");
2180
2182 E = MRI->reg_end();
2183 I != E;) {
2184 MachineOperand &O = *I;
2185 ++I;
2186 if (NewRegister.isPhysical()) {
2187 LLVM_DEBUG(dbgs() << "Trying to substitute physical register: "
2188 << printReg(NewRegister, MRI->getTargetRegisterInfo())
2189 << "\n");
2190 llvm_unreachable("Cannot substitute physical registers");
2191 // We don't handle physical registers, but if we need to
2192 // in the future This is how we do it:
2193 // O.substPhysReg(NewRegister, *TRI);
2194 } else {
2195 LLVM_DEBUG(dbgs() << "Replacing register: "
2196 << printReg(Register, MRI->getTargetRegisterInfo())
2197 << " with "
2198 << printReg(NewRegister, MRI->getTargetRegisterInfo())
2199 << "\n");
2200 O.setReg(NewRegister);
2201 }
2202 }
2203 PHIInfo.deleteDef(Register);
2204
2205 getRegionMRT()->replaceLiveOutReg(Register, NewRegister);
2206
2207 LLVM_DEBUG(PHIInfo.dump(MRI));
2208}
2209
2210void AMDGPUMachineCFGStructurizer::resolvePHIInfos(MachineBasicBlock *FunctionEntry) {
2211 LLVM_DEBUG(dbgs() << "Resolve PHI Infos\n");
2212 LLVM_DEBUG(PHIInfo.dump(MRI));
2213 for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE;
2214 ++DRI) {
2215 unsigned DestReg = *DRI;
2216 LLVM_DEBUG(dbgs() << "DestReg: " << printReg(DestReg, TRI) << "\n");
2217 auto SRI = PHIInfo.sources_begin(DestReg);
2218 unsigned SourceReg = (*SRI).first;
2219 LLVM_DEBUG(dbgs() << "DestReg: " << printReg(DestReg, TRI)
2220 << " SourceReg: " << printReg(SourceReg, TRI) << "\n");
2221
2222 assert(PHIInfo.sources_end(DestReg) == ++SRI &&
2223 "More than one phi source in entry node");
2224 replaceRegisterWith(DestReg, SourceReg);
2225 }
2226}
2227
2229 return ((&(*(MBB->getParent()->begin()))) == MBB);
2230}
2231
2232MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion(
2233 MachineBasicBlock *MergeBB, MachineBasicBlock *CodeBB,
2234 LinearizedRegion *CurrentRegion, unsigned BBSelectRegIn,
2235 unsigned BBSelectRegOut) {
2236 if (isFunctionEntryBlock(CodeBB) && !CurrentRegion->getHasLoop()) {
2237 // Handle non-loop function entry block.
2238 // We need to allow loops to the entry block and then
2239 rewriteCodeBBTerminator(CodeBB, MergeBB, BBSelectRegOut);
2240 resolvePHIInfos(CodeBB);
2242 CodeBB->addSuccessor(MergeBB);
2243 CurrentRegion->addMBB(CodeBB);
2244 return nullptr;
2245 }
2246 if (CurrentRegion->getEntry() == CodeBB && !CurrentRegion->getHasLoop()) {
2247 // Handle non-loop region entry block.
2248 MachineFunction *MF = MergeBB->getParent();
2249 auto MergeIter = MergeBB->getIterator();
2250 auto CodeBBStartIter = CodeBB->getIterator();
2251 auto CodeBBEndIter = ++(CodeBB->getIterator());
2252 if (CodeBBEndIter != MergeIter) {
2253 MF->splice(MergeIter, CodeBBStartIter, CodeBBEndIter);
2254 }
2255 rewriteCodeBBTerminator(CodeBB, MergeBB, BBSelectRegOut);
2256 prunePHIInfo(CodeBB);
2257 createEntryPHIs(CurrentRegion);
2259 CodeBB->addSuccessor(MergeBB);
2260 CurrentRegion->addMBB(CodeBB);
2261 return nullptr;
2262 } else {
2263 // Handle internal block.
2264 const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectRegIn);
2265 Register CodeBBSelectReg = MRI->createVirtualRegister(RegClass);
2266 rewriteCodeBBTerminator(CodeBB, MergeBB, CodeBBSelectReg);
2267 bool IsRegionEntryBB = CurrentRegion->getEntry() == CodeBB;
2268 MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeBB, CodeBB, CodeBB,
2269 BBSelectRegIn, IsRegionEntryBB);
2270 CurrentRegion->addMBB(IfBB);
2271 // If this is the entry block we need to make the If block the new
2272 // linearized region entry.
2273 if (IsRegionEntryBB) {
2274 CurrentRegion->setEntry(IfBB);
2275
2276 if (CurrentRegion->getHasLoop()) {
2277 MachineBasicBlock *RegionExit = CurrentRegion->getExit();
2278 MachineBasicBlock *ETrueBB = nullptr;
2279 MachineBasicBlock *EFalseBB = nullptr;
2281
2282 const DebugLoc &DL = DebugLoc();
2283 TII->analyzeBranch(*RegionExit, ETrueBB, EFalseBB, ECond);
2284 TII->removeBranch(*RegionExit);
2285
2286 // We need to create a backedge if there is a loop
2287 Register Reg = TII->insertNE(
2288 RegionExit, RegionExit->instr_end(), DL,
2289 CurrentRegion->getRegionMRT()->getInnerOutputRegister(),
2290 CurrentRegion->getRegionMRT()->getEntry()->getNumber());
2291 MachineOperand RegOp =
2292 MachineOperand::CreateReg(Reg, false, false, true);
2294 LLVM_DEBUG(dbgs() << "RegionExitReg: ");
2295 LLVM_DEBUG(Cond[0].print(dbgs(), TRI));
2296 LLVM_DEBUG(dbgs() << "\n");
2297 TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit,
2298 Cond, DebugLoc());
2299 RegionExit->addSuccessor(CurrentRegion->getEntry());
2300 }
2301 }
2302 CurrentRegion->addMBB(CodeBB);
2303 LinearizedRegion InnerRegion(CodeBB, MRI, TRI, PHIInfo);
2304
2305 InnerRegion.setParent(CurrentRegion);
2306 LLVM_DEBUG(dbgs() << "Insert BB Select PHI (BB)\n");
2307 insertMergePHI(IfBB, CodeBB, MergeBB, BBSelectRegOut, BBSelectRegIn,
2308 CodeBBSelectReg);
2309 InnerRegion.addMBB(MergeBB);
2310
2311 LLVM_DEBUG(InnerRegion.print(dbgs(), TRI));
2312 rewriteLiveOutRegs(IfBB, CodeBB, MergeBB, &InnerRegion, CurrentRegion);
2313 extractKilledPHIs(CodeBB);
2314 if (IsRegionEntryBB) {
2315 createEntryPHIs(CurrentRegion);
2316 }
2317 return IfBB;
2318 }
2319}
2320
2321MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion(
2322 MachineBasicBlock *MergeBB, LinearizedRegion *InnerRegion,
2323 LinearizedRegion *CurrentRegion, MachineBasicBlock *SelectBB,
2324 unsigned BBSelectRegIn, unsigned BBSelectRegOut) {
2325 unsigned CodeBBSelectReg =
2326 InnerRegion->getRegionMRT()->getInnerOutputRegister();
2327 MachineBasicBlock *CodeEntryBB = InnerRegion->getEntry();
2328 MachineBasicBlock *CodeExitBB = InnerRegion->getExit();
2329 MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeEntryBB, CodeExitBB,
2330 SelectBB, BBSelectRegIn, true);
2331 CurrentRegion->addMBB(IfBB);
2332 bool isEntry = CurrentRegion->getEntry() == InnerRegion->getEntry();
2333 if (isEntry) {
2334
2335 if (CurrentRegion->getHasLoop()) {
2336 MachineBasicBlock *RegionExit = CurrentRegion->getExit();
2337 MachineBasicBlock *ETrueBB = nullptr;
2338 MachineBasicBlock *EFalseBB = nullptr;
2340
2341 const DebugLoc &DL = DebugLoc();
2342 TII->analyzeBranch(*RegionExit, ETrueBB, EFalseBB, ECond);
2343 TII->removeBranch(*RegionExit);
2344
2345 // We need to create a backedge if there is a loop
2346 Register Reg =
2347 TII->insertNE(RegionExit, RegionExit->instr_end(), DL,
2348 CurrentRegion->getRegionMRT()->getInnerOutputRegister(),
2349 CurrentRegion->getRegionMRT()->getEntry()->getNumber());
2350 MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true);
2352 LLVM_DEBUG(dbgs() << "RegionExitReg: ");
2353 LLVM_DEBUG(Cond[0].print(dbgs(), TRI));
2354 LLVM_DEBUG(dbgs() << "\n");
2355 TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit,
2356 Cond, DebugLoc());
2357 RegionExit->addSuccessor(IfBB);
2358 }
2359 }
2360 CurrentRegion->addMBBs(InnerRegion);
2361 LLVM_DEBUG(dbgs() << "Insert BB Select PHI (region)\n");
2362 insertMergePHI(IfBB, CodeExitBB, MergeBB, BBSelectRegOut, BBSelectRegIn,
2363 CodeBBSelectReg);
2364
2365 rewriteLiveOutRegs(IfBB, /* CodeEntryBB */ CodeExitBB, MergeBB, InnerRegion,
2366 CurrentRegion);
2367
2368 rewriteRegionEntryPHIs(InnerRegion, IfBB);
2369
2370 if (isEntry) {
2371 CurrentRegion->setEntry(IfBB);
2372 }
2373
2374 if (isEntry) {
2375 createEntryPHIs(CurrentRegion);
2376 }
2377
2378 return IfBB;
2379}
2380
2381void AMDGPUMachineCFGStructurizer::splitLoopPHI(MachineInstr &PHI,
2382 MachineBasicBlock *Entry,
2383 MachineBasicBlock *EntrySucc,
2384 LinearizedRegion *LRegion) {
2385 SmallVector<unsigned, 2> PHIRegionIndices;
2386 getPHIRegionIndices(LRegion, PHI, PHIRegionIndices);
2387
2388 assert(PHIRegionIndices.size() == 1);
2389
2390 unsigned RegionIndex = PHIRegionIndices[0];
2391 unsigned RegionSourceReg = getPHISourceReg(PHI, RegionIndex);
2392 MachineBasicBlock *RegionSourceMBB = getPHIPred(PHI, RegionIndex);
2393 unsigned PHIDest = getPHIDestReg(PHI);
2394 unsigned PHISource = PHIDest;
2395 unsigned ReplaceReg;
2396
2397 if (shrinkPHI(PHI, PHIRegionIndices, &ReplaceReg)) {
2398 PHISource = ReplaceReg;
2399 }
2400
2401 const TargetRegisterClass *RegClass = MRI->getRegClass(PHIDest);
2402 Register NewDestReg = MRI->createVirtualRegister(RegClass);
2403 LRegion->replaceRegisterInsideRegion(PHIDest, NewDestReg, false, MRI);
2405 BuildMI(*EntrySucc, EntrySucc->instr_begin(), PHI.getDebugLoc(),
2406 TII->get(TargetOpcode::PHI), NewDestReg);
2407 LLVM_DEBUG(dbgs() << "Split Entry PHI " << printReg(NewDestReg, TRI)
2408 << " = PHI(");
2409 MIB.addReg(PHISource);
2410 MIB.addMBB(Entry);
2411 LLVM_DEBUG(dbgs() << printReg(PHISource, TRI) << ", "
2412 << printMBBReference(*Entry));
2413 MIB.addReg(RegionSourceReg);
2414 MIB.addMBB(RegionSourceMBB);
2415 LLVM_DEBUG(dbgs() << " ," << printReg(RegionSourceReg, TRI) << ", "
2416 << printMBBReference(*RegionSourceMBB) << ")\n");
2417}
2418
2419void AMDGPUMachineCFGStructurizer::splitLoopPHIs(MachineBasicBlock *Entry,
2420 MachineBasicBlock *EntrySucc,
2421 LinearizedRegion *LRegion) {
2423 collectPHIs(Entry, PHIs);
2424
2425 for (auto *PHII : PHIs) {
2426 splitLoopPHI(*PHII, Entry, EntrySucc, LRegion);
2427 }
2428}
2429
2430// Split the exit block so that we can insert a end control flow
2432AMDGPUMachineCFGStructurizer::splitExit(LinearizedRegion *LRegion) {
2433 auto MRTRegion = LRegion->getRegionMRT();
2434 auto Exit = LRegion->getExit();
2435 auto MF = Exit->getParent();
2436 auto Succ = MRTRegion->getSucc();
2437
2438 auto NewExit = MF->CreateMachineBasicBlock();
2439 auto AfterExitIter = Exit->getIterator();
2440 AfterExitIter++;
2441 MF->insert(AfterExitIter, NewExit);
2442 Exit->removeSuccessor(Succ);
2443 Exit->addSuccessor(NewExit);
2444 NewExit->addSuccessor(Succ);
2445 insertUnconditionalBranch(NewExit, Succ);
2446 LRegion->addMBB(NewExit);
2447 LRegion->setExit(NewExit);
2448
2449 LLVM_DEBUG(dbgs() << "Created new exit block: " << NewExit->getNumber()
2450 << "\n");
2451
2452 // Replace any PHI Predecessors in the successor with NewExit
2453 for (auto &II : *Succ) {
2454 MachineInstr &Instr = II;
2455
2456 // If we are past the PHI instructions we are done
2457 if (!Instr.isPHI())
2458 break;
2459
2460 int numPreds = getPHINumInputs(Instr);
2461 for (int i = 0; i < numPreds; ++i) {
2462 auto Pred = getPHIPred(Instr, i);
2463 if (Pred == Exit) {
2464 setPhiPred(Instr, i, NewExit);
2465 }
2466 }
2467 }
2468
2469 return NewExit;
2470}
2471
2473 // Create the fall-through block.
2475 MachineFunction *MF = MBB->getParent();
2477 auto MBBIter = ++(MBB->getIterator());
2478 MF->insert(MBBIter, SuccMBB);
2480 MBB->addSuccessor(SuccMBB);
2481
2482 // Splice the code over.
2483 SuccMBB->splice(SuccMBB->end(), MBB, I, MBB->end());
2484
2485 return SuccMBB;
2486}
2487
2488// Split the entry block separating PHI-nodes and the rest of the code
2489// This is needed to insert an initializer for the bb select register
2490// inloop regions.
2491
2493AMDGPUMachineCFGStructurizer::splitEntry(LinearizedRegion *LRegion) {
2494 MachineBasicBlock *Entry = LRegion->getEntry();
2495 MachineBasicBlock *EntrySucc = split(Entry->getFirstNonPHI());
2496 MachineBasicBlock *Exit = LRegion->getExit();
2497
2498 LLVM_DEBUG(dbgs() << "Split " << printMBBReference(*Entry) << " to "
2499 << printMBBReference(*Entry) << " -> "
2500 << printMBBReference(*EntrySucc) << "\n");
2501 LRegion->addMBB(EntrySucc);
2502
2503 // Make the backedge go to Entry Succ
2504 if (Exit->isSuccessor(Entry)) {
2505 Exit->removeSuccessor(Entry);
2506 }
2507 Exit->addSuccessor(EntrySucc);
2508 MachineInstr &Branch = *(Exit->instr_rbegin());
2509 for (auto &UI : Branch.uses()) {
2510 if (UI.isMBB() && UI.getMBB() == Entry) {
2511 UI.setMBB(EntrySucc);
2512 }
2513 }
2514
2515 splitLoopPHIs(Entry, EntrySucc, LRegion);
2516
2517 return EntrySucc;
2518}
2519
2520LinearizedRegion *
2521AMDGPUMachineCFGStructurizer::initLinearizedRegion(RegionMRT *Region) {
2522 LinearizedRegion *LRegion = Region->getLinearizedRegion();
2523 LRegion->initLiveOut(Region, MRI, TRI, PHIInfo);
2524 LRegion->setEntry(Region->getEntry());
2525 return LRegion;
2526}
2527
2528static void removeOldExitPreds(RegionMRT *Region) {
2529 MachineBasicBlock *Exit = Region->getSucc();
2530 if (Exit == nullptr) {
2531 return;
2532 }
2534 E = Exit->pred_end();
2535 PI != E; ++PI) {
2536 if (Region->contains(*PI)) {
2537 (*PI)->removeSuccessor(Exit);
2538 }
2539 }
2540}
2541
2544 for (MachineBasicBlock *Succ : MBB->successors())
2545 if (MBBs.contains(Succ))
2546 return true;
2547 return false;
2548}
2549
2550static bool containsNewBackedge(MRT *Tree,
2552 // Need to traverse this in reverse since it is in post order.
2553 if (Tree == nullptr)
2554 return false;
2555
2556 if (Tree->isMBB()) {
2557 MachineBasicBlock *MBB = Tree->getMBBMRT()->getMBB();
2558 MBBs.insert(MBB);
2559 if (mbbHasBackEdge(MBB, MBBs)) {
2560 return true;
2561 }
2562 } else {
2563 RegionMRT *Region = Tree->getRegionMRT();
2564 for (MRT *C : llvm::reverse(*Region->getChildren()))
2565 if (containsNewBackedge(C, MBBs))
2566 return true;
2567 }
2568 return false;
2569}
2570
2571static bool containsNewBackedge(RegionMRT *Region) {
2573 return containsNewBackedge(Region, MBBs);
2574}
2575
2576bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) {
2577 auto *LRegion = initLinearizedRegion(Region);
2578 LRegion->setHasLoop(containsNewBackedge(Region));
2579 MachineBasicBlock *LastMerge = createLinearizedExitBlock(Region);
2580 MachineBasicBlock *CurrentMerge = LastMerge;
2581 LRegion->addMBB(LastMerge);
2582 LRegion->setExit(LastMerge);
2583
2584 rewriteRegionExitPHIs(Region, LastMerge, LRegion);
2586
2587 LLVM_DEBUG(PHIInfo.dump(MRI));
2588
2589 SetVector<MRT *> *Children = Region->getChildren();
2590 LLVM_DEBUG(dbgs() << "===========If Region Start===============\n");
2591 if (LRegion->getHasLoop()) {
2592 LLVM_DEBUG(dbgs() << "Has Backedge: Yes\n");
2593 } else {
2594 LLVM_DEBUG(dbgs() << "Has Backedge: No\n");
2595 }
2596
2597 unsigned BBSelectRegIn;
2598 unsigned BBSelectRegOut;
2599 for (auto CI = Children->begin(), CE = Children->end(); CI != CE; ++CI) {
2600 LLVM_DEBUG(dbgs() << "CurrentRegion: \n");
2601 LLVM_DEBUG(LRegion->print(dbgs(), TRI));
2602
2603 auto CNI = CI;
2604 ++CNI;
2605
2606 MRT *Child = (*CI);
2607
2608 if (Child->isRegion()) {
2609
2610 LinearizedRegion *InnerLRegion =
2611 Child->getRegionMRT()->getLinearizedRegion();
2612 // We found the block is the exit of an inner region, we need
2613 // to put it in the current linearized region.
2614
2615 LLVM_DEBUG(dbgs() << "Linearizing region: ");
2616 LLVM_DEBUG(InnerLRegion->print(dbgs(), TRI));
2617 LLVM_DEBUG(dbgs() << "\n");
2618
2619 MachineBasicBlock *InnerEntry = InnerLRegion->getEntry();
2620 if ((&(*(InnerEntry->getParent()->begin()))) == InnerEntry) {
2621 // Entry has already been linearized, no need to do this region.
2622 unsigned OuterSelect = InnerLRegion->getBBSelectRegOut();
2623 unsigned InnerSelectReg =
2624 InnerLRegion->getRegionMRT()->getInnerOutputRegister();
2625 replaceRegisterWith(InnerSelectReg, OuterSelect),
2626 resolvePHIInfos(InnerEntry);
2627 if (!InnerLRegion->getExit()->isSuccessor(CurrentMerge))
2628 InnerLRegion->getExit()->addSuccessor(CurrentMerge);
2629 continue;
2630 }
2631
2632 BBSelectRegOut = Child->getBBSelectRegOut();
2633 BBSelectRegIn = Child->getBBSelectRegIn();
2634
2635 LLVM_DEBUG(dbgs() << "BBSelectRegIn: " << printReg(BBSelectRegIn, TRI)
2636 << "\n");
2637 LLVM_DEBUG(dbgs() << "BBSelectRegOut: " << printReg(BBSelectRegOut, TRI)
2638 << "\n");
2639
2640 MachineBasicBlock *IfEnd = CurrentMerge;
2641 CurrentMerge = createIfRegion(CurrentMerge, InnerLRegion, LRegion,
2642 Child->getRegionMRT()->getEntry(),
2643 BBSelectRegIn, BBSelectRegOut);
2644 TII->convertNonUniformIfRegion(CurrentMerge, IfEnd);
2645 } else {
2646 MachineBasicBlock *MBB = Child->getMBBMRT()->getMBB();
2647 LLVM_DEBUG(dbgs() << "Linearizing block: " << MBB->getNumber() << "\n");
2648
2649 if (MBB == getSingleExitNode(*(MBB->getParent()))) {
2650 // If this is the exit block then we need to skip to the next.
2651 // The "in" register will be transferred to "out" in the next
2652 // iteration.
2653 continue;
2654 }
2655
2656 BBSelectRegOut = Child->getBBSelectRegOut();
2657 BBSelectRegIn = Child->getBBSelectRegIn();
2658
2659 LLVM_DEBUG(dbgs() << "BBSelectRegIn: " << printReg(BBSelectRegIn, TRI)
2660 << "\n");
2661 LLVM_DEBUG(dbgs() << "BBSelectRegOut: " << printReg(BBSelectRegOut, TRI)
2662 << "\n");
2663
2664 MachineBasicBlock *IfEnd = CurrentMerge;
2665 // This is a basic block that is not part of an inner region, we
2666 // need to put it in the current linearized region.
2667 CurrentMerge = createIfRegion(CurrentMerge, MBB, LRegion, BBSelectRegIn,
2668 BBSelectRegOut);
2669 if (CurrentMerge) {
2670 TII->convertNonUniformIfRegion(CurrentMerge, IfEnd);
2671 }
2672
2673 LLVM_DEBUG(PHIInfo.dump(MRI));
2674 }
2675 }
2676
2677 LRegion->removeFalseRegisterKills(MRI);
2678
2679 if (LRegion->getHasLoop()) {
2680 MachineBasicBlock *NewSucc = splitEntry(LRegion);
2681 if (isFunctionEntryBlock(LRegion->getEntry())) {
2682 resolvePHIInfos(LRegion->getEntry());
2683 }
2684 const DebugLoc &DL = NewSucc->findDebugLoc(NewSucc->getFirstNonPHI());
2685 unsigned InReg = LRegion->getBBSelectRegIn();
2686 Register InnerSelectReg =
2687 MRI->createVirtualRegister(MRI->getRegClass(InReg));
2688 Register NewInReg = MRI->createVirtualRegister(MRI->getRegClass(InReg));
2689 TII->materializeImmediate(*(LRegion->getEntry()),
2690 LRegion->getEntry()->getFirstTerminator(), DL,
2691 NewInReg, Region->getEntry()->getNumber());
2692 // Need to be careful about updating the registers inside the region.
2693 LRegion->replaceRegisterInsideRegion(InReg, InnerSelectReg, false, MRI);
2694 LLVM_DEBUG(dbgs() << "Loop BBSelect Merge PHI:\n");
2695 insertMergePHI(LRegion->getEntry(), LRegion->getExit(), NewSucc,
2696 InnerSelectReg, NewInReg,
2697 LRegion->getRegionMRT()->getInnerOutputRegister());
2698 splitExit(LRegion);
2699 TII->convertNonUniformLoopRegion(NewSucc, LastMerge);
2700 }
2701
2702 if (Region->isRoot()) {
2703 TII->insertReturn(*LastMerge);
2704 }
2705
2706 LLVM_DEBUG(Region->getEntry()->getParent()->dump());
2707 LLVM_DEBUG(LRegion->print(dbgs(), TRI));
2708 LLVM_DEBUG(PHIInfo.dump(MRI));
2709
2710 LLVM_DEBUG(dbgs() << "===========If Region End===============\n");
2711
2712 Region->setLinearizedRegion(LRegion);
2713 return true;
2714}
2715
2716bool AMDGPUMachineCFGStructurizer::structurizeRegion(RegionMRT *Region) {
2717 if (false && regionIsSimpleIf(Region)) {
2718 transformSimpleIfRegion(Region);
2719 return true;
2720 } else if (regionIsSequence(Region)) {
2722 return false;
2723 } else {
2724 structurizeComplexRegion(Region);
2725 }
2726 return false;
2727}
2728
2729static int structurize_once = 0;
2730
2731bool AMDGPUMachineCFGStructurizer::structurizeRegions(RegionMRT *Region,
2732 bool isTopRegion) {
2733 bool Changed = false;
2734
2735 auto Children = Region->getChildren();
2736 for (auto *CI : *Children) {
2737 if (CI->isRegion()) {
2738 Changed |= structurizeRegions(CI->getRegionMRT(), false);
2739 }
2740 }
2741
2742 if (structurize_once < 2 || true) {
2743 Changed |= structurizeRegion(Region);
2745 }
2746 return Changed;
2747}
2748
2749void AMDGPUMachineCFGStructurizer::initFallthroughMap(MachineFunction &MF) {
2750 LLVM_DEBUG(dbgs() << "Fallthrough Map:\n");
2751 for (auto &MBBI : MF) {
2753 if (MBB != nullptr) {
2754 LLVM_DEBUG(dbgs() << "Fallthrough: " << MBBI.getNumber() << " -> "
2755 << MBB->getNumber() << "\n");
2756 }
2757 FallthroughMap[&MBBI] = MBB;
2758 }
2759}
2760
2761void AMDGPUMachineCFGStructurizer::createLinearizedRegion(RegionMRT *Region,
2762 unsigned SelectOut) {
2763 LinearizedRegion *LRegion = new LinearizedRegion();
2764 if (SelectOut) {
2765 LRegion->addLiveOut(SelectOut);
2766 LLVM_DEBUG(dbgs() << "Add LiveOut (BBSelect): " << printReg(SelectOut, TRI)
2767 << "\n");
2768 }
2769 LRegion->setRegionMRT(Region);
2770 Region->setLinearizedRegion(LRegion);
2771 LRegion->setParent(Region->getParent()
2772 ? Region->getParent()->getLinearizedRegion()
2773 : nullptr);
2774}
2775
2776unsigned
2777AMDGPUMachineCFGStructurizer::initializeSelectRegisters(MRT *MRT, unsigned SelectOut,
2779 const SIInstrInfo *TII) {
2780 if (MRT->isRegion()) {
2781 RegionMRT *Region = MRT->getRegionMRT();
2782 Region->setBBSelectRegOut(SelectOut);
2783 unsigned InnerSelectOut = createBBSelectReg(TII, MRI);
2784
2785 // Fixme: Move linearization creation to the original spot
2786 createLinearizedRegion(Region, SelectOut);
2787
2788 for (auto *CI : *Region->getChildren())
2789 InnerSelectOut = initializeSelectRegisters(CI, InnerSelectOut, MRI, TII);
2790 MRT->setBBSelectRegIn(InnerSelectOut);
2791 return InnerSelectOut;
2792 } else {
2793 MRT->setBBSelectRegOut(SelectOut);
2794 unsigned NewSelectIn = createBBSelectReg(TII, MRI);
2795 MRT->setBBSelectRegIn(NewSelectIn);
2796 return NewSelectIn;
2797 }
2798}
2799
2801 for (auto &MBBI : MF) {
2803 E = MBBI.instr_end();
2804 I != E; ++I) {
2805 MachineInstr &Instr = *I;
2806 if (Instr.isPHI()) {
2807 int numPreds = getPHINumInputs(Instr);
2808 for (int i = 0; i < numPreds; ++i) {
2809 assert(Instr.getOperand(i * 2 + 1).isReg() &&
2810 "PHI Operand not a register");
2811 }
2812 }
2813 }
2814 }
2815}
2816
2817bool AMDGPUMachineCFGStructurizer::runOnMachineFunction(MachineFunction &MF) {
2819 const SIInstrInfo *TII = ST.getInstrInfo();
2820 TRI = ST.getRegisterInfo();
2821 MRI = &(MF.getRegInfo());
2822 initFallthroughMap(MF);
2823
2825 LLVM_DEBUG(dbgs() << "----STRUCTURIZER START----\n");
2826 LLVM_DEBUG(MF.dump());
2827
2828 Regions = &(getAnalysis<MachineRegionInfoPass>().getRegionInfo());
2829 LLVM_DEBUG(Regions->dump());
2830
2831 RegionMRT *RTree = MRT::buildMRT(MF, Regions, TII, MRI);
2832 setRegionMRT(RTree);
2833 initializeSelectRegisters(RTree, 0, MRI, TII);
2834 LLVM_DEBUG(RTree->dump(TRI));
2835 bool result = structurizeRegions(RTree, true);
2836 delete RTree;
2837 LLVM_DEBUG(dbgs() << "----STRUCTURIZER END----\n");
2838 initFallthroughMap(MF);
2839 return result;
2840}
2841
2842char AMDGPUMachineCFGStructurizerID = AMDGPUMachineCFGStructurizer::ID;
2843
2844INITIALIZE_PASS_BEGIN(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer",
2845 "AMDGPU Machine CFG Structurizer", false, false)
2847INITIALIZE_PASS_END(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer",
2849
2851 return new AMDGPUMachineCFGStructurizer();
2852}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static bool containsNewBackedge(MRT *Tree, SmallPtrSet< MachineBasicBlock *, 8 > &MBBs)
static MachineBasicBlock * split(MachineBasicBlock::iterator I)
static unsigned getPHISourceReg(MachineInstr &PHI, unsigned Index)
bool regionIsSequence(RegionMRT *Region)
static unsigned getPHIDestReg(MachineInstr &PHI)
amdgpu machine cfg structurizer
static void collectPHIs(MachineBasicBlock *MBB, SmallVector< MachineInstr *, 2 > &PHIs)
static void setPhiPred(MachineInstr &PHI, unsigned Index, MachineBasicBlock *NewPred)
static bool isFunctionEntryBlock(MachineBasicBlock *MBB)
static void removeOldExitPreds(RegionMRT *Region)
static bool isPHIRegionIndex(SmallVector< unsigned, 2 > PHIRegionIndices, unsigned Index)
static void checkRegOnlyPHIInputs(MachineFunction &MF)
static void fixMBBTerminator(MachineBasicBlock *MBB)
static bool hasOneExitNode(MachineFunction &MF)
static void removeExternalCFGEdges(MachineBasicBlock *StartMBB, MachineBasicBlock *EndMBB)
static void removeExternalCFGSuccessors(MachineBasicBlock *MBB)
static MachineBasicBlock * getSingleExitNode(MachineFunction &MF)
static unsigned createBBSelectReg(const SIInstrInfo *TII, MachineRegisterInfo *MRI)
void fixupRegionExits(RegionMRT *Region)
static int structurize_once
static MachineBasicBlock * getPHIPred(MachineInstr &PHI, unsigned Index)
char AMDGPUMachineCFGStructurizerID
amdgpu machine cfg AMDGPU Machine CFG Structurizer
static unsigned getPHINumInputs(MachineInstr &PHI)
static void fixRegionTerminator(RegionMRT *Region)
static bool mbbHasBackEdge(MachineBasicBlock *MBB, SmallPtrSet< MachineBasicBlock *, 8 > &MBBs)
Rewrite undef for PHI
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static const Function * getParent(const Value *V)
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:492
static void clear(coro::Shape &Shape)
Definition: Coroutines.cpp:149
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseSet and SmallDenseSet classes.
Flatten the CFG
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static bool isReg(const MCInst &MI, unsigned OpNo)
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool IsDead
static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg)
raw_pwrite_stream & OS
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:467
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A debug info location.
Definition: DebugLoc.h:33
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
unsigned pred_size() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
instr_iterator instr_begin()
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
reverse_instr_iterator instr_rbegin()
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
MachineBasicBlock * getFallThrough(bool JumpToFallThrough=false)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned succ_size() const
std::vector< MachineBasicBlock * >::iterator succ_iterator
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
std::vector< MachineBasicBlock * >::iterator pred_iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
void dump() const
dump - Print the current MachineFunction to cerr, useful for debugger use.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineBasicBlock & front() const
void splice(iterator InsertPt, iterator MBBI)
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:68
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:313
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:445
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
bool isPHI() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsKill(bool Val=true)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
reg_begin/reg_end - Provide iteration support to walk over all definitions and uses of a register wit...
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
BlockT * getExit() const
Get the exit BasicBlock of the Region.
Definition: RegionInfo.h:359
bool contains(const BlockT *BB) const
Check if the region contains a BasicBlock.
RegionT * getParent() const
Get the parent of the Region.
Definition: RegionInfo.h:364
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
Definition: RegionInfo.h:322
RegionT * getTopLevelRegion() const
Definition: RegionInfo.h:866
RegionT * getRegionFor(BlockT *BB) const
Get the smallest region that contains a BasicBlock.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:97
A vector that has set insertion semantics.
Definition: SetVector.h:40
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:389
SmallPtrSetIterator - This implements a const_iterator for SmallPtrSet.
Definition: SmallPtrSet.h:268
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Target - Wrapper for Target specific information.
LLVM Value Representation.
Definition: Value.h:74
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
Iterator for intrusive lists based on ilist_node.
self_iterator getIterator()
Definition: ilist_node.h:82
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
APInt operator*(APInt a, uint64_t RHS)
Definition: APInt.h:2169
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2047
iterator_range< po_iterator< T > > post_order(const T &G)
void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry &)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:511
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
FunctionPass * createAMDGPUMachineCFGStructurizerPass()
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1976
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.