75#define DEBUG_TYPE "arm-low-overhead-loops"
76#define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass"
80 cl::desc(
"Disable tail-predication in the ARM LowOverheadLoop pass"),
85 cl::desc(
"Disable omitting 'dls lr, lr' instructions"),
90 return PIdx != -1 &&
MI->getOperand(PIdx + 1).getReg() == ARM::VPR;
94 return MI->findRegisterDefOperandIdx(ARM::VPR) != -1;
98 return MI.findRegisterUseOperandIdx(ARM::VPR) != -1;
113 if (
MI.isDebugInstr())
122 class PostOrderLoopTraversal {
130 :
ML(
ML), MLI(MLI) { }
146 if (!
ML.contains(Succ))
155 ML.getExitBlocks(ExitBlocks);
159 Search(
ML.getHeader());
169 if (
auto *Preheader =
ML.getLoopPreheader())
170 GetPredecessor(Preheader);
172 GetPredecessor(Preheader);
176 struct PredicatedMI {
182 assert(
I &&
"Instruction must not be null!");
193 friend struct LowOverheadLoop;
200 std::unique_ptr<PredicatedMI>> PredicatedInsts;
203 assert((CurrentPredicates.
size() ||
MI->getParent()->isLiveIn(ARM::VPR))
204 &&
"Can't begin VPT without predicate");
209 PredicatedInsts.emplace(
210 MI, std::make_unique<PredicatedMI>(
MI, CurrentPredicates));
213 static void reset() {
215 PredicatedInsts.clear();
216 CurrentPredicates.
clear();
221 PredicatedInsts.emplace(
222 MI, std::make_unique<PredicatedMI>(
MI, CurrentPredicates));
232 CurrentPredicates.
clear();
239 static bool hasUniformPredicate(VPTState &
Block) {
240 return getDivergent(
Block) ==
nullptr;
247 for (
unsigned i = 1; i < Insts.
size(); ++i) {
256 static bool isPredicatedOnVCTP(
MachineInstr *
MI,
bool Exclusive =
false) {
258 if (Exclusive && Predicates.
size() != 1)
264 static bool isEntryPredicatedOnVCTP(VPTState &
Block,
265 bool Exclusive =
false) {
267 return isPredicatedOnVCTP(Insts.
front(), Exclusive);
273 static bool hasImplicitlyValidVPT(VPTState &
Block,
278 "Expected VPT block to begin with VPT/VPST");
285 return Op && PredicatedInsts.count(
Op) && isPredicatedOnVCTP(
Op);
298 for (
auto *Def : Defs)
306 return (IsOperandPredicated(VPT, 1) || IsOperandPredicated(VPT, 2)) &&
307 (IsOperandPredicated(VPT, 1) || IsOperandInvariant(VPT, 1)) &&
308 (IsOperandPredicated(VPT, 2) || IsOperandInvariant(VPT, 2));
316 if (isEntryPredicatedOnVCTP(
Block,
false) ||
324 "Expected VPT block to start with a VPST or VPT!");
325 if (Insts.
size() == 2 && Insts.
front()->getOpcode() != ARM::MVE_VPST &&
329 for (
auto *
MI : Insts) {
339 if (!isPredicatedOnVCTP(
MI)) {
353 assert(Insts.size() <= 5 &&
"Too many instructions in VPT block!");
356 bool containsVCTP()
const {
360 unsigned size()
const {
return Insts.size(); }
364 struct LowOverheadLoop {
385 bool CannotTailPredicate =
false;
392 MF =
ML.getHeader()->getParent();
393 if (
auto *
MBB =
ML.getLoopPreheader())
406 CannotTailPredicate = !ValidateMVEInst(
MI);
409 bool IsTailPredicationLegal()
const {
412 return !Revert && FoundAllComponents() && !VCTPs.
empty() &&
413 !CannotTailPredicate &&
ML.getNumBlocks() == 1;
423 bool ValidateTailPredicate();
427 bool ValidateLiveOuts();
433 bool FoundAllComponents()
const {
434 return Start && Dec &&
End;
438 return VPTState::Blocks;
444 if (IsTailPredicationLegal())
445 return TPNumElements;
446 return Start->getOperand(1);
449 unsigned getStartOpcode()
const {
451 if (!IsTailPredicationLegal())
452 return IsDo ? ARM::t2DLS : ARM::t2WLS;
458 if (Start)
dbgs() <<
"ARM Loops: Found Loop Start: " << *Start;
459 if (Dec)
dbgs() <<
"ARM Loops: Found Loop Dec: " << *Dec;
460 if (End)
dbgs() <<
"ARM Loops: Found Loop End: " << *
End;
461 if (!VCTPs.
empty()) {
462 dbgs() <<
"ARM Loops: Found VCTP(s):\n";
463 for (
auto *
MI : VCTPs)
466 if (!FoundAllComponents())
467 dbgs() <<
"ARM Loops: Not a low-overhead loop.\n";
468 else if (!(Start && Dec && End))
469 dbgs() <<
"ARM Loops: Failed to find all loop components.\n";
480 std::unique_ptr<ARMBasicBlockUtils> BBUtils =
nullptr;
498 MachineFunctionProperties::Property::NoVRegs).
set(
499 MachineFunctionProperties::Property::TracksLiveness);
509 bool RevertNonLoops();
520 void ConvertVPTBlocks(LowOverheadLoop &LoLoop);
524 void Expand(LowOverheadLoop &LoLoop);
526 void IterationCountDCE(LowOverheadLoop &LoLoop);
530char ARMLowOverheadLoops::ID = 0;
535 std::unique_ptr<PredicatedMI>> VPTState::PredicatedInsts;
548 for (
auto *Dead : Killed)
549 BasicBlocks.
insert(Dead->getParent());
552 std::map<MachineInstr *, SmallPtrSet<MachineInstr *, 2>> ITBlocks;
553 for (
auto *
MBB : BasicBlocks) {
554 for (
auto &
IT : *
MBB) {
555 if (
IT.getOpcode() != ARM::t2IT)
566 for (
auto *Dead : Killed) {
567 if (
MachineOperand *MO = Dead->findRegisterUseOperand(ARM::ITSTATE)) {
570 auto &CurrentBlock = ITBlocks[
IT];
571 CurrentBlock.erase(Dead);
572 if (CurrentBlock.empty())
578 if (!ModifiedITs.
empty())
580 Killed.insert(RemoveITs.
begin(), RemoveITs.
end());
591 <<
" - can also remove:\n";
597 if (WontCorruptITs(Killed,
RDA)) {
600 dbgs() <<
" - " << *Dead);
607bool LowOverheadLoop::ValidateTailPredicate() {
608 if (!IsTailPredicationLegal()) {
610 dbgs() <<
"ARM Loops: Didn't find a VCTP instruction.\n";
611 dbgs() <<
"ARM Loops: Tail-predication is not valid.\n");
615 assert(!VCTPs.
empty() &&
"VCTP instruction expected but is not set");
616 assert(
ML.getBlocks().size() == 1 &&
617 "Shouldn't be processing a loop with more than one block");
620 LLVM_DEBUG(
dbgs() <<
"ARM Loops: tail-predication is disabled\n");
624 if (!VPTState::isValid(
RDA)) {
629 if (!ValidateLiveOuts()) {
639 if (Start->getOpcode() == ARM::t2DoLoopStartTP ||
640 Start->getOpcode() == ARM::t2WhileLoopStartTP) {
641 TPNumElements = Start->getOperand(2);
642 StartInsertPt = Start;
643 StartInsertBB = Start->getParent();
652 LLVM_DEBUG(
dbgs() <<
"ARM Loops: VCTP operand is defined in the loop.\n");
660 if (StartInsertPt != StartInsertBB->
end() &&
665 ElemDef->removeFromParent();
666 StartInsertBB->
insert(StartInsertPt, ElemDef);
668 <<
"ARM Loops: Moved element count def: " << *ElemDef);
670 StartInsertPt->removeFromParent();
673 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Moved start past: " << *ElemDef);
683 TPNumElements = Operand;
684 NumElements = TPNumElements.
getReg();
687 <<
"ARM Loops: Unable to move element count to loop "
688 <<
"start instruction.\n");
715 while (
MBB &&
MBB != StartInsertBB) {
716 if (CannotProvideElements(
MBB, NumElements)) {
717 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Unable to provide element count.\n");
734 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Instruction blocks [W|D]LSTP\n");
745 if (InstrVecSize > VCTPVecSize) {
746 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Double width result larger than VCTP "
747 <<
"VecSize:\n" << *
MI);
773 if (TryRemove(Def,
RDA, ElementChain,
Ignore)) {
774 bool FoundSub =
false;
776 for (
auto *
MI : ElementChain) {
781 if (FoundSub || !IsValidSub(
MI, ExpectedVectorWidth)) {
782 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Unexpected instruction in element"
788 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Unexpected instruction in element"
793 ToRemove.insert(ElementChain.begin(), ElementChain.end());
800 if ((Start->getOpcode() == ARM::t2DoLoopStartTP ||
801 Start->getOpcode() == ARM::t2WhileLoopStartTP) &&
802 Preheader && !Preheader->
empty() &&
855 switch (
MI.getOpcode()) {
864 case ARM::MVE_VCLZs8:
865 case ARM::MVE_VCLZs16:
866 case ARM::MVE_VCLZs32:
880 InstSet &FalseLanesZero) {
892 Def->getOpcode() == ARM::MVE_VMOVimmi32 &&
893 Def->getOperand(1).getImm() == 0;
897 for (
auto &MO :
MI.operands()) {
915 for (
auto *Def : Defs) {
916 if (Def == &
MI || FalseLanesZero.count(Def) || IsZeroInit(Def))
918 if (MO.
isUse() && isPredicated)
927bool LowOverheadLoop::ValidateLiveOuts() {
957 for (
auto &
MI : *Header) {
965 bool retainsOrReduces =
972 else if (
MI.getNumDefs() == 0)
974 else if (!isPredicated && retainsOrReduces) {
975 LLVM_DEBUG(
dbgs() <<
" Unpredicated instruction that retainsOrReduces: " <<
MI);
977 }
else if (!isPredicated &&
MI.getOpcode() != ARM::MQPRCopy)
982 dbgs() <<
" Predicated:\n";
983 for (
auto *
I : Predicated)
985 dbgs() <<
" FalseLanesZero:\n";
986 for (
auto *
I : FalseLanesZero)
988 dbgs() <<
" FalseLanesUnknown:\n";
989 for (
auto *
I : FalseLanesUnknown)
998 if (
Use !=
MI && !Predicated.count(
Use))
1011 for (
auto *
MI :
reverse(FalseLanesUnknown)) {
1012 for (
auto &MO :
MI->operands()) {
1015 if (!HasPredicatedUsers(
MI, MO, Predicated)) {
1017 <<
TRI.getRegAsmName(MO.getReg()) <<
" at " << *
MI);
1024 Predicated.insert(
MI);
1029 ML.getExitBlocks(ExitBlocks);
1030 assert(
ML.getNumBlocks() == 1 &&
"Expected single block loop!");
1031 assert(ExitBlocks.
size() == 1 &&
"Expected a single exit block");
1036 if (
RegMask.PhysReg == ARM::VPR) {
1055 while (!Worklist.empty()) {
1057 if (
MI->getOpcode() == ARM::MQPRCopy) {
1062 Worklist.push_back(CopySrc);
1082 ?
End->getOperand(1).getMBB()
1083 :
End->getOperand(2).getMBB();
1086 if (TgtBB !=
ML.getHeader()) {
1087 LLVM_DEBUG(
dbgs() <<
"ARM Loops: LoopEnd is not targeting header.\n");
1093 if (BBUtils->getOffsetOf(
End) < BBUtils->getOffsetOf(
ML.getHeader()) ||
1094 !BBUtils->isBBInRange(
End,
ML.getHeader(), 4094)) {
1101 if (BBUtils->getOffsetOf(Start) > BBUtils->getOffsetOf(TargetBB) ||
1102 !BBUtils->isBBInRange(Start, TargetBB, 4094)) {
1103 LLVM_DEBUG(
dbgs() <<
"ARM Loops: WLS offset is out-of-range!\n");
1111 StartInsertBB = Start->getParent();
1115 Revert = !ValidateRanges(Start,
End, BBUtils,
ML);
1116 CannotTailPredicate = !ValidateTailPredicate();
1121 if (VCTPs.
empty()) {
1131 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Found VCTP with a different reaching "
1132 "definition from the main VCTP");
1144 if (
const auto *FS = dyn_cast<FixedStackPseudoSourceValue>(PseudoValue)) {
1145 return FS->getFrameIndex();
1152 switch (
I->getOpcode()) {
1153 case ARM::MVE_VSTRWU32:
1154 case ARM::MVE_VLDRWU32: {
1155 return I->getOperand(1).getReg() == ARM::SP &&
1156 I->memoperands().size() == 1 &&
1157 GetFrameIndex(
I->memoperands().front()) >= 0;
1166 if (
MI->getOpcode() != ARM::MVE_VSTRWU32 || !IsStackOp(
MI))
1172 if (
MI->memoperands().size() == 0)
1174 int FI = GetFrameIndex(
MI->memoperands().front());
1176 auto &FrameInfo =
MI->getParent()->getParent()->getFrameInfo();
1177 if (FI == -1 || !FrameInfo.isSpillSlotObjectIndex(FI))
1181 ML->getExitBlocks(Frontier);
1184 while (
Idx < Frontier.
size()) {
1186 bool LookAtSuccessors =
true;
1187 for (
auto &
I : *BB) {
1188 if (!IsStackOp(&
I) ||
I.memoperands().size() == 0)
1190 if (GetFrameIndex(
I.memoperands().front()) != FI)
1194 if (
I.getOpcode() == ARM::MVE_VSTRWU32) {
1195 LookAtSuccessors =
false;
1200 if (
I.getOpcode() == ARM::MVE_VLDRWU32)
1204 if (LookAtSuccessors) {
1218 if (CannotTailPredicate)
1224 if (
MI->getOpcode() == ARM::MVE_VPSEL ||
1225 MI->getOpcode() == ARM::MVE_VPNOT) {
1245 unsigned LastOpIdx =
MI->getNumOperands() - 1;
1252 VPTState::addInst(
MI);
1254 }
else if (
MI->getOpcode() != ARM::MVE_VPST) {
1263 bool RequiresExplicitPredication =
1266 if (
MI->getOpcode() == ARM::MQPRCopy)
1269 DoubleWidthResultInstrs.insert(
MI);
1274 <<
"ARM Loops: Can't tail predicate: " << *
MI);
1289 VPTState::resetPredicate(
MI);
1291 VPTState::addPredicate(
MI);
1297 VPTState::CreateVPTBlock(
MI);
1310 MLI = &getAnalysis<MachineLoopInfo>();
1311 RDA = &getAnalysis<ReachingDefAnalysis>();
1312 MF->
getProperties().
set(MachineFunctionProperties::Property::TracksLiveness);
1315 TRI =
ST.getRegisterInfo();
1317 BBUtils->computeAllBlockSizes();
1318 BBUtils->adjustBBOffsetsAfter(&MF->
front());
1320 bool Changed =
false;
1321 for (
auto *
ML : *MLI) {
1322 if (
ML->isOutermost())
1323 Changed |= ProcessLoop(
ML);
1325 Changed |= RevertNonLoops();
1331 bool Changed =
false;
1335 Changed |= ProcessLoop(L);
1338 dbgs() <<
"ARM Loops: Processing loop containing:\n";
1339 if (
auto *Preheader =
ML->getLoopPreheader())
1341 else if (
auto *Preheader = MLI->findLoopPreheader(
ML,
true,
true))
1343 for (
auto *
MBB :
ML->getBlocks())
1351 for (
auto &
MI : *
MBB) {
1360 LowOverheadLoop LoLoop(*
ML, *MLI, *
RDA, *
TRI, *
TII);
1364 if (LoLoop.Preheader)
1365 LoLoop.Start = SearchForStart(LoLoop.Preheader);
1373 for (
auto &
MI : *
MBB) {
1374 if (
MI.isDebugValue())
1376 else if (
MI.getOpcode() == ARM::t2LoopDec)
1378 else if (
MI.getOpcode() == ARM::t2LoopEnd)
1380 else if (
MI.getOpcode() == ARM::t2LoopEndDec)
1381 LoLoop.End = LoLoop.Dec = &
MI;
1384 else if (
MI.getDesc().isCall()) {
1388 LoLoop.Revert =
true;
1393 LoLoop.AnalyseMVEInst(&
MI);
1399 if (!LoLoop.FoundAllComponents()) {
1400 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Didn't find loop start, update, end\n");
1404 assert(LoLoop.Start->getOpcode() != ARM::t2WhileLoopStart &&
1405 "Expected t2WhileLoopStart to be removed before regalloc!");
1410 if (LoLoop.Dec != LoLoop.End) {
1413 if (
Uses.size() > 1 || !
Uses.count(LoLoop.End)) {
1415 LoLoop.Revert =
true;
1418 LoLoop.Validate(BBUtils.get());
1430 unsigned BrOpc = BBUtils->isBBInRange(
MI, DestBB, 254) ?
1431 ARM::tBcc : ARM::t2Bcc;
1446 if (
I->getOpcode() == ARM::t2LoopEnd) {
1461void ARMLowOverheadLoops::RevertLoopEnd(
MachineInstr *
MI,
bool SkipCmp)
const {
1465 unsigned BrOpc = BBUtils->isBBInRange(
MI, DestBB, 254) ?
1466 ARM::tBcc : ARM::t2Bcc;
1472void ARMLowOverheadLoops::RevertLoopEndDec(
MachineInstr *
MI)
const {
1474 assert(
MI->getOpcode() == ARM::t2LoopEndDec &&
"Expected a t2LoopEndDec!");
1480 MIB.
add(
MI->getOperand(1));
1483 MIB.
addReg(ARM::NoRegister);
1489 BBUtils->isBBInRange(
MI, DestBB, 254) ? ARM::tBcc : ARM::t2Bcc;
1493 MIB.
add(
MI->getOperand(2));
1497 MI->eraseFromParent();
1524void ARMLowOverheadLoops::IterationCountDCE(LowOverheadLoop &LoLoop) {
1525 if (!LoLoop.IsTailPredicationLegal())
1528 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Trying DCE on loop iteration count.\n");
1532 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Couldn't find iteration count.\n");
1539 if (!TryRemove(Def, *
RDA, LoLoop.ToRemove, Killed))
1540 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Unsafe to remove loop iteration count.\n");
1543MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) {
1547 IterationCountDCE(LoLoop);
1552 unsigned Opc = LoLoop.getStartOpcode();
1558 Count.
getReg() == ARM::LR) {
1559 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Didn't insert start: DLS lr, lr");
1563 BuildMI(*
MBB, InsertPt, Start->getDebugLoc(),
TII->get(Opc));
1574 LoLoop.ToRemove.insert(Start);
1578void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) {
1580 if (
MI->isDebugInstr())
1584 assert(PIdx >= 1 &&
"Trying to unpredicate a non-predicated instruction");
1586 "Expected Then predicate!");
1588 MI->getOperand(PIdx + 1).setReg(0);
1591 for (
auto &
Block : LoLoop.getVPTBlocks()) {
1595 assert(TheVCMP &&
"Replacing a removed or non-existent VCMP");
1598 BuildMI(*At->getParent(), At, At->getDebugLoc(),
1607 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Combining with VCMP to VPT: " << *MIB);
1608 LoLoop.BlockMasksToRecompute.insert(MIB.
getInstr());
1609 LoLoop.ToRemove.insert(TheVCMP);
1613 if (VPTState::isEntryPredicatedOnVCTP(
Block,
true)) {
1615 if (VPTState::hasUniformPredicate(
Block)) {
1621 for (
unsigned i = 1; i < Insts.
size(); ++i)
1622 RemovePredicate(Insts[i]);
1634 while (DivergentNext !=
MBB->
end() && DivergentNext->isDebugInstr())
1637 bool DivergentNextIsPredicated =
1638 DivergentNext !=
MBB->
end() &&
1643 RemovePredicate(&*
I);
1650 if (DivergentNextIsPredicated) {
1662 LoLoop.BlockMasksToRecompute.insert(MIB.
getInstr());
1666 ReplaceVCMPWithVPT(VCMP, VCMP);
1671 LoLoop.ToRemove.insert(VPST);
1672 }
else if (
Block.containsVCTP()) {
1676 if (
Block.size() == 2) {
1678 "Found a VPST in an otherwise empty vpt block");
1679 LoLoop.ToRemove.insert(VPST);
1681 LoLoop.BlockMasksToRecompute.insert(VPST);
1682 }
else if (Insts.
front()->getOpcode() == ARM::MVE_VPST) {
1689 "The instruction after a VPST must be predicated");
1693 !LoLoop.ToRemove.contains(VprDef)) {
1704 ReplaceVCMPWithVPT(VCMP, VPST);
1706 LoLoop.ToRemove.insert(VPST);
1712 LoLoop.ToRemove.insert(LoLoop.VCTPs.begin(), LoLoop.VCTPs.end());
1715void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
1718 auto ExpandLoopEnd = [
this](LowOverheadLoop &LoLoop) {
1721 unsigned Opc = LoLoop.IsTailPredicationLegal() ?
1722 ARM::MVE_LETP : ARM::t2LEUpdate;
1726 unsigned Off = LoLoop.Dec == LoLoop.End ? 1 : 0;
1727 MIB.
add(
End->getOperand(Off + 0));
1728 MIB.
add(
End->getOperand(Off + 1));
1730 LoLoop.ToRemove.insert(LoLoop.Dec);
1731 LoLoop.ToRemove.insert(
End);
1746 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Removing branch: " << *Terminator);
1755 for (
auto *
MI : VMOVCopies) {
1757 assert(
MI->getOpcode() == ARM::MQPRCopy &&
"Only expected MQPRCOPY!");
1762 ARM::D0 + (Dst - ARM::Q0) * 2)
1763 .
addReg(ARM::D0 + (Src - ARM::Q0) * 2)
1768 ARM::D0 + (Dst - ARM::Q0) * 2 + 1)
1769 .
addReg(ARM::D0 + (Src - ARM::Q0) * 2 + 1)
1773 MI->eraseFromParent();
1777 if (LoLoop.Revert) {
1779 RevertWhile(LoLoop.Start);
1781 RevertDo(LoLoop.Start);
1782 if (LoLoop.Dec == LoLoop.End)
1783 RevertLoopEndDec(LoLoop.End);
1787 ExpandVMOVCopies(LoLoop.VMOVCopies);
1788 LoLoop.Start = ExpandLoopStart(LoLoop);
1790 RemoveDeadBranch(LoLoop.Start);
1791 LoLoop.End = ExpandLoopEnd(LoLoop);
1792 RemoveDeadBranch(LoLoop.End);
1793 if (LoLoop.IsTailPredicationLegal())
1794 ConvertVPTBlocks(LoLoop);
1795 for (
auto *
I : LoLoop.ToRemove) {
1797 I->eraseFromParent();
1799 for (
auto *
I : LoLoop.BlockMasksToRecompute) {
1800 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Recomputing VPT/VPST Block Mask: " << *
I);
1806 PostOrderLoopTraversal DFS(LoLoop.ML, *MLI);
1809 for (
auto *
MBB : PostOrder) {
1823bool ARMLowOverheadLoops::RevertNonLoops() {
1824 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Reverting any remaining pseudos...\n");
1825 bool Changed =
false;
1827 for (
auto &
MBB : *MF) {
1834 for (
auto &
I :
MBB) {
1837 else if (
I.getOpcode() == ARM::t2LoopDec)
1839 else if (
I.getOpcode() == ARM::t2LoopEnd)
1841 else if (
I.getOpcode() == ARM::t2LoopEndDec)
1843 else if (
I.getOpcode() == ARM::MQPRCopy)
1853 for (
auto *Start : Starts) {
1859 for (
auto *Dec : Decs)
1862 for (
auto *
End : Ends)
1864 for (
auto *
End : EndDecs)
1865 RevertLoopEndDec(
End);
1866 for (
auto *
MI : MQPRCopies) {
1868 assert(
MI->getOpcode() == ARM::MQPRCopy &&
"Only expected MQPRCOPY!");
1871 MI->getOperand(0).getReg())
1872 .
add(
MI->getOperand(1))
1873 .
add(
MI->getOperand(1));
1875 MI->eraseFromParent();
1882 return new ARMLowOverheadLoops();
unsigned const MachineRegisterInfo * MRI
static bool isDomainMVE(MachineInstr *MI)
SmallPtrSet< MachineInstr *, 2 > Uses
static bool isVectorPredicated(MachineInstr *MI)
ReachingDefAnalysis & RDA
static bool canGenerateNonZeros(const MachineInstr &MI)
static bool isHorizontalReduction(const MachineInstr &MI)
ReachingDefAnalysis InstSet & ToRemove
static bool producesDoubleWidthResult(const MachineInstr &MI)
static bool hasVPRUse(MachineInstr &MI)
static bool isRegInClass(const MachineOperand &MO, const TargetRegisterClass *Class)
static bool ValidateMVEStore(MachineInstr *MI, MachineLoop *ML)
static bool isVectorPredicate(MachineInstr *MI)
static bool retainsPreviousHalfElement(const MachineInstr &MI)
static bool shouldInspect(MachineInstr &MI)
static cl::opt< bool > DisableTailPredication("arm-loloops-disable-tailpred", cl::Hidden, cl::desc("Disable tail-predication in the ARM LowOverheadLoop pass"), cl::init(false))
static bool producesFalseLanesZero(MachineInstr &MI, const TargetRegisterClass *QPRs, const ReachingDefAnalysis &RDA, InstSet &FalseLanesZero)
static int getVecSize(const MachineInstr &MI)
#define ARM_LOW_OVERHEAD_LOOPS_NAME
static cl::opt< bool > DisableOmitDLS("arm-disable-omit-dls", cl::Hidden, cl::desc("Disable omitting 'dls lr, lr' instructions"), cl::init(false))
ReachingDefAnalysis InstSet InstSet & Ignore
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
DenseMap< Block *, BlockRelaxAux > Blocks
const HexagonInstrInfo * TII
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static ARM::PredBlockMask CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter, MachineBasicBlock::instr_iterator EndIter, SmallVectorImpl< MachineInstr * > &DeadInstructions)
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
static constexpr uint32_t RegMask
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
This class represents an Operation in the Expression.
FunctionPass class - This class is used to implement most global optimizations.
Describe properties that are true of each instruction in the target description file.
ArrayRef< MCOperandInfo > operands() const
Wrapper class representing physical registers. Should be passed by value.
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
unsigned pred_size() const
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< livein_iterator > liveins() const
MachineInstr & instr_back()
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
pred_iterator pred_begin()
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
iterator_range< succ_iterator > successors()
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const MachineBasicBlock & front() const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
MachineBasicBlock * findLoopPreheader(MachineLoop *L, bool SpeculativePreheader=false, bool FindMultiLoopPreheader=false) const
Find the block that either is the loop preheader, or could speculatively be used as the preheader.
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
unsigned getOperandNo() const
Returns the index of this operand in the instruction that it belongs to.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Special value supplied for machine level alias analysis.
This class provides the reaching def analysis.
bool isSafeToMoveForwards(MachineInstr *From, MachineInstr *To) const
Return whether From can be moved forwards to just before To.
bool isSafeToDefRegAt(MachineInstr *MI, MCRegister PhysReg) const
Return whether a MachineInstr could be inserted at MI and safely define the given register without af...
bool isSafeToRemove(MachineInstr *MI, InstSet &ToRemove) const
Return whether removing this instruction will have no effect on the program, returning the redundant ...
MachineInstr * getLocalLiveOutMIDef(MachineBasicBlock *MBB, MCRegister PhysReg) const
Return the local MI that produces the live out value for PhysReg, or nullptr for a non-live out or no...
MachineInstr * getMIOperand(MachineInstr *MI, unsigned Idx) const
If a single MachineInstr creates the reaching definition, for MIs operand at Idx, then return it.
void getReachingLocalUses(MachineInstr *MI, MCRegister PhysReg, InstSet &Uses) const
Provides the uses, in the same block as MI, of register that MI defines.
void reset()
Re-run the analysis.
bool hasLocalDefBefore(MachineInstr *MI, MCRegister PhysReg) const
Provide whether the register has been defined in the same basic block as, and before,...
bool hasSameReachingDef(MachineInstr *A, MachineInstr *B, MCRegister PhysReg) const
Return whether A and B use the same def of PhysReg.
void getGlobalUses(MachineInstr *MI, MCRegister PhysReg, InstSet &Uses) const
Collect the users of the value stored in PhysReg, which is defined by MI.
void collectKilledOperands(MachineInstr *MI, InstSet &Dead) const
Assuming MI is dead, recursively search the incoming operands which are killed by MI and collect thos...
bool isSafeToMoveBackwards(MachineInstr *From, MachineInstr *To) const
Return whether From can be moved backwards to just after To.
void getGlobalReachingDefs(MachineInstr *MI, MCRegister PhysReg, InstSet &Defs) const
Collect all possible definitions of the value stored in PhysReg, which is used by MI.
MachineInstr * getUniqueReachingMIDef(MachineInstr *MI, MCRegister PhysReg) const
If a single MachineInstr creates the reaching definition, then return it.
bool isReachingDefLiveOut(MachineInstr *MI, MCRegister PhysReg) const
Return whether the reaching def for MI also is live out of its parent block.
Wrapper class representing virtual and physical registers.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
iterator end()
Get an iterator to the end of the SetVector.
void clear()
Completely clear the SetVector.
iterator begin()
Get an iterator to the beginning of the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
A Use represents the edge between a Value definition and its users.
@ ValidForTailPredication
@ RetainsPreviousHalfElement
bool isPredicated(const MCInst &MI, const MCInstrInfo *MCII)
bool isVpred(OperandType op)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
NodeAddr< DefNode * > Def
This is an optimization pass for GlobalISel generic memory operations.
static bool isDoLoopStart(const MachineInstr &MI)
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
int findFirstVPTPredOperandIdx(const MachineInstr &MI)
ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI, Register &PredReg)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool isVCTP(const MachineInstr *MI)
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
static bool isVPTOpcode(int Opc)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
static void recomputeLiveIns(MachineBasicBlock &MBB)
Convenience function for recomputing live-in's for MBB.
static unsigned getTailPredVectorWidth(unsigned Opcode)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
FunctionPass * createARMLowOverheadLoopsPass()
static bool isMovRegOpcode(int Opc)
static bool isSubImmOpcode(int Opc)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
static bool isLoopStart(const MachineInstr &MI)
void RevertWhileLoopStartLR(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool UseCmp=false)
void recomputeLivenessFlags(MachineBasicBlock &MBB)
Recomputes dead and kill flags in MBB.
static unsigned VCTPOpcodeToLSTP(unsigned Opcode, bool IsDoLoop)
void addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, Register DestReg)
void RevertLoopEnd(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool SkipCmp=false)
void RevertLoopDec(MachineInstr *MI, const TargetInstrInfo *TII, bool SetFlags=false)
MachineBasicBlock * getWhileLoopStartTargetBB(const MachineInstr &MI)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
static bool isWhileLoopStart(const MachineInstr &MI)
static unsigned VCMPOpcodeToVPT(unsigned Opcode)
void RevertDoLoopStart(MachineInstr *MI, const TargetInstrInfo *TII)
int getAddSubImmediate(MachineInstr &MI)
void recomputeVPTBlockMask(MachineInstr &Instr)
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Pair of physical register and lane mask.