87 #define DEBUG_TYPE "si-wqm"
96 StateStrict = StateStrictWWM | StateStrictWQM,
103 explicit PrintState(
int State) : State(State) {}
109 static const std::pair<char, const char *> Mapping[] = {
110 std::make_pair(StateWQM,
"WQM"),
111 std::make_pair(StateStrictWWM,
"StrictWWM"),
112 std::make_pair(StateStrictWQM,
"StrictWQM"),
113 std::make_pair(StateExact,
"Exact")};
114 char State = PS.State;
115 for (
auto M : Mapping) {
116 if (State &
M.first) {
139 char InitialState = 0;
140 bool NeedsLowering =
false;
147 WorkItem() =
default;
165 unsigned AndSaveExecOpc;
166 unsigned OrSaveExecOpc;
185 std::vector<WorkItem> &Worklist);
187 unsigned SubReg,
char Flag, std::vector<WorkItem> &Worklist);
189 std::vector<WorkItem> &Worklist);
191 std::vector<WorkItem> &Worklist);
192 char scanInstructions(
MachineFunction &MF, std::vector<WorkItem> &Worklist);
193 void propagateInstruction(
MachineInstr &
MI, std::vector<WorkItem> &Worklist);
208 Register SaveOrig,
char StrictStateNeeded);
211 char NonStrictState,
char CurrentStrictState);
222 void lowerLiveMaskQueries();
223 void lowerCopyInstrs();
224 void lowerKillInstrs(
bool IsWQM);
234 StringRef getPassName()
const override {
return "SI Whole Quad Mode"; }
268 return new SIWholeQuadMode;
273 for (
const auto &BII : Blocks) {
276 <<
" InNeeds = " << PrintState(BII.second.InNeeds)
277 <<
", Needs = " << PrintState(BII.second.Needs)
278 <<
", OutNeeds = " << PrintState(BII.second.OutNeeds) <<
"\n\n";
285 dbgs() <<
" " <<
MI <<
" Needs = " << PrintState(III->second.Needs)
286 <<
", OutNeeds = " << PrintState(III->second.OutNeeds) <<
'\n';
293 std::vector<WorkItem> &Worklist) {
302 Flag &= ~II.Disabled;
311 Worklist.push_back(&
MI);
317 std::vector<WorkItem> &Worklist) {
341 : Phi(Phi), PredIdx(PredIdx), DefinedLanes(DefinedLanes) {}
343 using VisitKey = std::pair<const VNInfo *, LaneBitmask>;
347 unsigned NextPredIdx = 0;
349 const VNInfo *NextValue =
nullptr;
350 const VisitKey
Key(
Value, DefinedLanes);
358 if (
Value->isPHIDef()) {
361 assert(
MBB &&
"Phi-def has no defining MBB");
364 unsigned Idx = NextPredIdx;
367 for (; PI != PE && !NextValue; ++PI, ++Idx) {
369 if (!Visited.
count(VisitKey(VN, DefinedLanes)))
379 assert(
MI &&
"Def has no defining instruction");
381 if (
Reg.isVirtual()) {
385 if (!(
Op.isReg() &&
Op.isDef() &&
Op.getReg() ==
Reg))
391 :
TRI->getSubRegIndexLaneMask(
Op.getSubReg());
395 HasDef |= Overlap.
any();
398 DefinedLanes |= OpLanes;
402 if ((DefinedLanes & UseLanes) != UseLanes) {
406 if (!Visited.
count(VisitKey(VN, DefinedLanes)))
413 markInstruction(*
MI,
Flag, Worklist);
416 markInstruction(*
MI,
Flag, Worklist);
420 if (!NextValue && !PhiStack.empty()) {
422 PhiEntry &Entry = PhiStack.back();
423 NextValue = Entry.Phi;
424 NextPredIdx = Entry.PredIdx;
425 DefinedLanes = Entry.DefinedLanes;
435 std::vector<WorkItem> &Worklist) {
442 case AMDGPU::EXEC_LO:
450 if (
Reg.isVirtual()) {
452 markDefs(
MI, LR,
Reg,
Op.getSubReg(),
Flag, Worklist);
459 LiveRange &LR = LIS->getRegUnit(*RegUnit);
464 markDefs(
MI, LR, *RegUnit, AMDGPU::NoSubRegister,
Flag, Worklist);
471 std::vector<WorkItem> &Worklist) {
476 if (!
Use.isReg() || !
Use.isUse())
485 std::vector<WorkItem> &Worklist) {
486 char GlobalFlags = 0;
490 bool HasImplicitDerivatives =
499 BlockInfo &BBI = Blocks[
MBB];
503 unsigned Opcode =
MI.getOpcode();
506 if (
TII->isWQM(Opcode)) {
508 if (!
ST->hasExtendedImageInsts())
513 if (!HasImplicitDerivatives)
518 markInstructionUses(
MI, StateWQM, Worklist);
519 GlobalFlags |= StateWQM;
525 LowerToCopyInstrs.push_back(&
MI);
526 }
else if (Opcode == AMDGPU::SOFT_WQM) {
527 LowerToCopyInstrs.push_back(&
MI);
528 SoftWQMInstrs.push_back(&
MI);
530 }
else if (Opcode == AMDGPU::STRICT_WWM) {
534 markInstructionUses(
MI, StateStrictWWM, Worklist);
535 GlobalFlags |= StateStrictWWM;
536 LowerToMovInstrs.push_back(&
MI);
538 }
else if (Opcode == AMDGPU::STRICT_WQM) {
542 markInstructionUses(
MI, StateStrictWQM, Worklist);
543 GlobalFlags |= StateStrictWQM;
544 LowerToMovInstrs.push_back(&
MI);
546 }
else if (Opcode == AMDGPU::V_SET_INACTIVE_B32 ||
547 Opcode == AMDGPU::V_SET_INACTIVE_B64) {
548 III.Disabled = StateStrict;
550 if (Inactive.
isReg()) {
552 LowerToCopyInstrs.push_back(&
MI);
554 markOperand(
MI, Inactive, StateStrictWWM, Worklist);
557 SetInactiveInstrs.push_back(&
MI);
559 }
else if (
TII->isDisableWQM(
MI)) {
560 BBI.Needs |= StateExact;
561 if (!(BBI.InNeeds & StateExact)) {
562 BBI.InNeeds |= StateExact;
563 Worklist.push_back(
MBB);
565 GlobalFlags |= StateExact;
566 III.Disabled = StateWQM | StateStrict;
569 if (Opcode == AMDGPU::SI_PS_LIVE || Opcode == AMDGPU::SI_LIVE_MASK) {
570 LiveMaskQueries.push_back(&
MI);
571 }
else if (Opcode == AMDGPU::SI_KILL_I1_TERMINATOR ||
572 Opcode == AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR ||
573 Opcode == AMDGPU::SI_DEMOTE_I1) {
574 KillInstrs.push_back(&
MI);
575 BBI.NeedsLowering =
true;
576 }
else if (WQMOutputs) {
587 if (!
Reg.isVirtual() &&
588 TRI->hasVectorRegisters(
TRI->getPhysRegClass(
Reg))) {
599 markInstruction(
MI, Flags, Worklist);
600 GlobalFlags |= Flags;
608 if (GlobalFlags & StateWQM) {
610 markInstruction(*
MI, StateWQM, Worklist);
612 markInstruction(*
MI, StateWQM, Worklist);
619 std::vector<WorkItem>& Worklist) {
622 BlockInfo &BI = Blocks[
MBB];
626 if ((II.OutNeeds & StateWQM) && !(II.Disabled & StateWQM) &&
627 (
MI.isTerminator() || (
TII->usesVM_CNT(
MI) &&
MI.mayStore()))) {
633 if (II.Needs & StateWQM) {
634 BI.Needs |= StateWQM;
635 if (!(BI.InNeeds & StateWQM)) {
636 BI.InNeeds |= StateWQM;
637 Worklist.push_back(
MBB);
643 char InNeeds = (II.Needs & ~StateStrict) | II.OutNeeds;
644 if (!PrevMI->isPHI()) {
646 if ((PrevII.OutNeeds | InNeeds) != PrevII.OutNeeds) {
647 PrevII.OutNeeds |= InNeeds;
648 Worklist.push_back(PrevMI);
654 assert(!(II.Needs & StateExact));
657 markInstructionUses(
MI, II.Needs, Worklist);
661 if (II.Needs & StateStrictWWM)
662 BI.Needs |= StateStrictWWM;
663 if (II.Needs & StateStrictWQM)
664 BI.Needs |= StateStrictWQM;
668 std::vector<WorkItem>& Worklist) {
669 BlockInfo BI = Blocks[&
MBB];
675 if ((LastII.OutNeeds | BI.OutNeeds) != LastII.OutNeeds) {
676 LastII.OutNeeds |= BI.OutNeeds;
677 Worklist.push_back(LastMI);
683 BlockInfo &PredBI = Blocks[Pred];
684 if ((PredBI.OutNeeds | BI.InNeeds) == PredBI.OutNeeds)
687 PredBI.OutNeeds |= BI.InNeeds;
688 PredBI.InNeeds |= BI.InNeeds;
689 Worklist.push_back(Pred);
694 BlockInfo &SuccBI = Blocks[Succ];
695 if ((SuccBI.InNeeds | BI.OutNeeds) == SuccBI.InNeeds)
698 SuccBI.InNeeds |= BI.OutNeeds;
699 Worklist.push_back(Succ);
704 std::vector<WorkItem> Worklist;
705 char GlobalFlags = scanInstructions(MF, Worklist);
707 while (!Worklist.empty()) {
708 WorkItem WI = Worklist.back();
712 propagateInstruction(*WI.MI, Worklist);
714 propagateBlock(*WI.MBB, Worklist);
732 LIS->InsertMachineInstrInMaps(*Save);
733 LIS->InsertMachineInstrInMaps(*Restore);
734 LIS->createAndComputeVirtRegInterval(SaveReg);
745 BB->splitAt(*TermMI,
true, LIS);
749 unsigned NewOpcode = 0;
751 case AMDGPU::S_AND_B32:
752 NewOpcode = AMDGPU::S_AND_B32_term;
754 case AMDGPU::S_AND_B64:
755 NewOpcode = AMDGPU::S_AND_B64_term;
757 case AMDGPU::S_MOV_B32:
758 NewOpcode = AMDGPU::S_MOV_B32_term;
760 case AMDGPU::S_MOV_B64:
761 NewOpcode = AMDGPU::S_MOV_B64_term;
775 DTUpdates.push_back({DomTreeT::Delete,
BB, Succ});
779 MDT->getBase().applyUpdates(DTUpdates);
781 PDT->getBase().applyUpdates(DTUpdates);
787 LIS->InsertMachineInstrInMaps(*
MI);
807 switch (
MI.getOperand(2).getImm()) {
809 Opcode = AMDGPU::V_CMP_LG_F32_e64;
812 Opcode = AMDGPU::V_CMP_GE_F32_e64;
815 Opcode = AMDGPU::V_CMP_GT_F32_e64;
818 Opcode = AMDGPU::V_CMP_LE_F32_e64;
821 Opcode = AMDGPU::V_CMP_LT_F32_e64;
824 Opcode = AMDGPU::V_CMP_EQ_F32_e64;
827 Opcode = AMDGPU::V_CMP_O_F32_e64;
830 Opcode = AMDGPU::V_CMP_U_F32_e64;
834 Opcode = AMDGPU::V_CMP_NEQ_F32_e64;
838 Opcode = AMDGPU::V_CMP_NLT_F32_e64;
842 Opcode = AMDGPU::V_CMP_NLE_F32_e64;
846 Opcode = AMDGPU::V_CMP_NGT_F32_e64;
850 Opcode = AMDGPU::V_CMP_NGE_F32_e64;
854 Opcode = AMDGPU::V_CMP_NLG_F32_e64;
866 Register VCC =
ST->isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC;
899 LIS->ReplaceMachineInstrInMaps(
MI, *VcmpMI);
902 LIS->InsertMachineInstrInMaps(*MaskUpdateMI);
903 LIS->InsertMachineInstrInMaps(*ExecMaskMI);
904 LIS->InsertMachineInstrInMaps(*EarlyTermMI);
905 LIS->InsertMachineInstrInMaps(*NewTerm);
915 const bool IsDemote = IsWQM && (
MI.getOpcode() == AMDGPU::SI_DEMOTE_I1);
917 int64_t KillVal =
MI.getOperand(1).getImm();
924 if (
Op.getImm() == KillVal) {
932 if (
MI.getOpcode() == AMDGPU::SI_DEMOTE_I1) {
933 LIS->RemoveMachineInstrFromMaps(
MI);
938 LIS->ReplaceMachineInstrInMaps(
MI, *NewTerm);
948 ComputeKilledMaskMI =
982 unsigned MovOpc =
ST->isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
989 unsigned Opcode = KillVal ? AndN2Opc : AndOpc;
996 LIS->RemoveMachineInstrFromMaps(
MI);
1001 if (ComputeKilledMaskMI)
1002 LIS->InsertMachineInstrInMaps(*ComputeKilledMaskMI);
1003 LIS->InsertMachineInstrInMaps(*MaskUpdateMI);
1004 LIS->InsertMachineInstrInMaps(*EarlyTermMI);
1006 LIS->InsertMachineInstrInMaps(*WQMMaskMI);
1007 LIS->InsertMachineInstrInMaps(*NewTerm);
1010 LIS->removeInterval(CndReg);
1011 LIS->createAndComputeVirtRegInterval(CndReg);
1014 LIS->createAndComputeVirtRegInterval(TmpReg);
1016 LIS->createAndComputeVirtRegInterval(LiveMaskWQM);
1025 auto BII = Blocks.find(&
MBB);
1026 if (BII == Blocks.end())
1029 const BlockInfo &BI = BII->second;
1030 if (!BI.NeedsLowering)
1036 char State = BI.InitialState;
1040 if (StateTransition.count(&
MI))
1041 State = StateTransition[&
MI];
1044 switch (
MI.getOpcode()) {
1045 case AMDGPU::SI_DEMOTE_I1:
1046 case AMDGPU::SI_KILL_I1_TERMINATOR:
1047 SplitPoint = lowerKillI1(
MBB,
MI, State == StateWQM);
1049 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
1050 SplitPoint = lowerKillF32(
MBB,
MI);
1056 SplitPoints.push_back(SplitPoint);
1060 if (!SplitPoints.empty()) {
1080 SlotIndex FirstIdx =
First != MBBE ? LIS->getInstructionIndex(*First)
1081 : LIS->getMBBEndIdx(&
MBB);
1083 Last != MBBE ? LIS->getInstructionIndex(*Last) : LIS->getMBBEndIdx(&
MBB);
1084 SlotIndex Idx = PreferLast ? LastIdx : FirstIdx;
1094 if (Next < FirstIdx)
1098 MachineInstr *EndMI = LIS->getInstructionFromIndex(
S->end.getBaseIndex());
1099 assert(EndMI &&
"Segment does not end on valid instruction");
1103 SlotIndex Next = LIS->getInstructionIndex(*NextI);
1122 while (
MBBI != Last) {
1123 bool IsExecDef =
false;
1125 if (MO.isReg() && MO.isDef()) {
1127 MO.getReg() == AMDGPU::EXEC_LO || MO.getReg() == AMDGPU::EXEC;
1156 LIS->InsertMachineInstrInMaps(*
MI);
1157 StateTransition[
MI] = StateExact;
1172 LIS->InsertMachineInstrInMaps(*
MI);
1173 StateTransition[
MI] = StateWQM;
1178 Register SaveOrig,
char StrictStateNeeded) {
1181 assert(StrictStateNeeded == StateStrictWWM ||
1182 StrictStateNeeded == StateStrictWQM);
1184 if (StrictStateNeeded == StateStrictWWM) {
1193 LIS->InsertMachineInstrInMaps(*
MI);
1194 StateTransition[
MI] = StateStrictWWM;
1199 Register SavedOrig,
char NonStrictState,
1200 char CurrentStrictState) {
1204 assert(CurrentStrictState == StateStrictWWM ||
1205 CurrentStrictState == StateStrictWQM);
1207 if (CurrentStrictState == StateStrictWWM) {
1216 LIS->InsertMachineInstrInMaps(*
MI);
1217 StateTransition[
MI] = NonStrictState;
1221 auto BII = Blocks.find(&
MBB);
1222 if (BII == Blocks.end())
1225 BlockInfo &BI = BII->second;
1229 if (!IsEntry && BI.Needs == StateWQM && BI.OutNeeds != StateExact) {
1230 BI.InitialState = StateWQM;
1239 bool WQMFromExec = IsEntry;
1240 char State = (IsEntry || !(BI.InNeeds & StateWQM)) ? StateExact : StateWQM;
1241 char NonStrictState = 0;
1247 if (II !=
IE && II->getOpcode() == AMDGPU::COPY)
1262 BI.InitialState = State;
1266 char Needs = StateExact | StateWQM;
1272 if (FirstStrict ==
IE)
1280 if (
MI.isTerminator() ||
TII->mayReadEXEC(*
MRI,
MI)) {
1283 if (III->second.Needs & StateStrictWWM)
1284 Needs = StateStrictWWM;
1285 else if (III->second.Needs & StateStrictWQM)
1286 Needs = StateStrictWQM;
1287 else if (III->second.Needs & StateWQM)
1290 Needs &= ~III->second.Disabled;
1291 OutNeeds = III->second.OutNeeds;
1296 Needs = StateExact | StateWQM | StateStrict;
1299 if (
MI.isTerminator() && OutNeeds == StateExact)
1305 if (BI.OutNeeds & StateWQM)
1307 else if (BI.OutNeeds == StateExact)
1310 Needs = StateWQM | StateExact;
1314 if (!(Needs & State)) {
1316 if (State == StateStrictWWM || Needs == StateStrictWWM ||
1317 State == StateStrictWQM || Needs == StateStrictWQM) {
1319 First = FirstStrict;
1326 bool SaveSCC =
false;
1329 case StateStrictWWM:
1330 case StateStrictWQM:
1334 SaveSCC = (Needs & StateStrict) || ((Needs & StateWQM) && WQMFromExec);
1338 SaveSCC = !(Needs & StateWQM);
1345 prepareInsertion(
MBB, First, II, Needs == StateWQM, SaveSCC);
1347 if (State & StateStrict) {
1348 assert(State == StateStrictWWM || State == StateStrictWQM);
1349 assert(SavedNonStrictReg);
1350 fromStrictMode(
MBB, Before, SavedNonStrictReg, NonStrictState, State);
1352 LIS->createAndComputeVirtRegInterval(SavedNonStrictReg);
1353 SavedNonStrictReg = 0;
1354 State = NonStrictState;
1357 if (Needs & StateStrict) {
1358 NonStrictState = State;
1359 assert(Needs == StateStrictWWM || Needs == StateStrictWQM);
1360 assert(!SavedNonStrictReg);
1363 toStrictMode(
MBB, Before, SavedNonStrictReg, Needs);
1367 if (State == StateWQM && (Needs & StateExact) && !(Needs & StateWQM)) {
1368 if (!WQMFromExec && (OutNeeds & StateWQM)) {
1373 toExact(
MBB, Before, SavedWQMReg);
1375 }
else if (State == StateExact && (Needs & StateWQM) &&
1376 !(Needs & StateExact)) {
1377 assert(WQMFromExec == (SavedWQMReg == 0));
1379 toWQM(
MBB, Before, SavedWQMReg);
1382 LIS->createAndComputeVirtRegInterval(SavedWQMReg);
1395 if (Needs != (StateExact | StateWQM | StateStrict)) {
1396 if (Needs != (StateExact | StateWQM))
1407 assert(!SavedNonStrictReg);
1410 void SIWholeQuadMode::lowerLiveMaskQueries() {
1419 LIS->ReplaceMachineInstrInMaps(*
MI, *Copy);
1420 MI->eraseFromParent();
1424 void SIWholeQuadMode::lowerCopyInstrs() {
1426 assert(
MI->getNumExplicitOperands() == 2);
1429 const unsigned SubReg =
MI->getOperand(0).getSubReg();
1435 regClass =
TRI->getSubRegClass(regClass,
SubReg);
1437 const unsigned MovOp =
TII->getMovOpcode(regClass);
1438 MI->setDesc(
TII->get(MovOp));
1443 return MO.isUse() && MO.getReg() == AMDGPU::EXEC;
1449 if (
MI->getOperand(0).isEarlyClobber()) {
1450 LIS->removeInterval(
Reg);
1451 MI->getOperand(0).setIsEarlyClobber(
false);
1452 LIS->createAndComputeVirtRegInterval(
Reg);
1454 int Index =
MI->findRegisterUseOperandIdx(AMDGPU::EXEC);
1455 while (Index >= 0) {
1456 MI->removeOperand(Index);
1457 Index =
MI->findRegisterUseOperandIdx(AMDGPU::EXEC);
1459 MI->setDesc(
TII->get(AMDGPU::COPY));
1464 if (
MI->getOpcode() == AMDGPU::V_SET_INACTIVE_B32 ||
1465 MI->getOpcode() == AMDGPU::V_SET_INACTIVE_B64) {
1466 assert(
MI->getNumExplicitOperands() == 3);
1470 assert(
MI->getOperand(2).isUndef());
1471 MI->removeOperand(2);
1472 MI->untieRegOperand(1);
1474 assert(
MI->getNumExplicitOperands() == 2);
1477 MI->setDesc(
TII->get(AMDGPU::COPY));
1481 void SIWholeQuadMode::lowerKillInstrs(
bool IsWQM) {
1485 switch (
MI->getOpcode()) {
1486 case AMDGPU::SI_DEMOTE_I1:
1487 case AMDGPU::SI_KILL_I1_TERMINATOR:
1488 SplitPoint = lowerKillI1(*
MBB, *
MI, IsWQM);
1490 case AMDGPU::SI_KILL_F32_COND_IMM_TERMINATOR:
1491 SplitPoint = lowerKillF32(*
MBB, *
MI);
1503 <<
" ------------- \n");
1508 LiveMaskQueries.clear();
1509 LowerToCopyInstrs.clear();
1510 LowerToMovInstrs.clear();
1512 StateTransition.clear();
1516 TII =
ST->getInstrInfo();
1517 TRI = &
TII->getRegisterInfo();
1519 LIS = &getAnalysis<LiveIntervals>();
1520 MDT = &getAnalysis<MachineDominatorTree>();
1521 PDT = &getAnalysis<MachinePostDominatorTree>();
1523 if (
ST->isWave32()) {
1524 AndOpc = AMDGPU::S_AND_B32;
1525 AndN2Opc = AMDGPU::S_ANDN2_B32;
1526 XorOpc = AMDGPU::S_XOR_B32;
1527 AndSaveExecOpc = AMDGPU::S_AND_SAVEEXEC_B32;
1528 OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B32;
1529 WQMOpc = AMDGPU::S_WQM_B32;
1530 Exec = AMDGPU::EXEC_LO;
1532 AndOpc = AMDGPU::S_AND_B64;
1533 AndN2Opc = AMDGPU::S_ANDN2_B64;
1534 XorOpc = AMDGPU::S_XOR_B64;
1535 AndSaveExecOpc = AMDGPU::S_AND_SAVEEXEC_B64;
1536 OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B64;
1537 WQMOpc = AMDGPU::S_WQM_B64;
1538 Exec = AMDGPU::EXEC;
1541 const char GlobalFlags = analyzeFunction(MF);
1542 const bool NeedsLiveMask = !(KillInstrs.empty() && LiveMaskQueries.empty());
1547 if (!(GlobalFlags & (StateWQM | StateStrict)) && LowerToCopyInstrs.empty() &&
1548 LowerToMovInstrs.empty() && KillInstrs.empty()) {
1549 lowerLiveMaskQueries();
1550 return !LiveMaskQueries.empty();
1557 if (NeedsLiveMask || (GlobalFlags & StateWQM)) {
1562 LIS->InsertMachineInstrInMaps(*
MI);
1567 lowerLiveMaskQueries();
1571 if (GlobalFlags == StateWQM) {
1574 LIS->InsertMachineInstrInMaps(*
MI);
1575 lowerKillInstrs(
true);
1577 for (
auto BII : Blocks)
1578 processBlock(*BII.first, BII.first == &Entry);
1580 for (
auto BII : Blocks)
1581 lowerBlock(*BII.first);
1585 if (LiveMaskReg != Exec)
1586 LIS->createAndComputeVirtRegInterval(LiveMaskReg);
1594 if (!KillInstrs.empty())
1595 LIS->removeAllRegUnitsForPhysReg(AMDGPU::EXEC);