Go to the documentation of this file.
76 #define DEBUG_TYPE "arm-low-overhead-loops"
77 #define ARM_LOW_OVERHEAD_LOOPS_NAME "ARM Low Overhead Loops pass"
81 cl::desc(
"Disable tail-predication in the ARM LowOverheadLoop pass"),
86 return PIdx != -1 &&
MI->getOperand(PIdx + 1).getReg() == ARM::VPR;
90 return MI->findRegisterDefOperandIdx(ARM::VPR) != -1;
94 return MI.findRegisterUseOperandIdx(ARM::VPR) != -1;
109 if (
MI.isDebugInstr())
118 class PostOrderLoopTraversal {
126 : ML(ML), MLI(MLI) { }
146 Order.push_back(
MBB);
160 Order.push_back(
MBB);
166 GetPredecessor(Preheader);
168 GetPredecessor(Preheader);
172 struct PredicatedMI {
178 assert(
I &&
"Instruction must not be null!");
189 friend struct LowOverheadLoop;
196 std::unique_ptr<PredicatedMI>> PredicatedInsts;
199 assert((CurrentPredicates.
size() ||
MI->getParent()->isLiveIn(ARM::VPR))
200 &&
"Can't begin VPT without predicate");
205 PredicatedInsts.emplace(
206 MI, std::make_unique<PredicatedMI>(
MI, CurrentPredicates));
209 static void reset() {
211 PredicatedInsts.clear();
212 CurrentPredicates.
clear();
217 PredicatedInsts.emplace(
218 MI, std::make_unique<PredicatedMI>(
MI, CurrentPredicates));
228 CurrentPredicates.
clear();
235 static bool hasUniformPredicate(VPTState &Block) {
236 return getDivergent(Block) ==
nullptr;
243 for (
unsigned i = 1;
i < Insts.size(); ++
i) {
252 static bool isPredicatedOnVCTP(
MachineInstr *
MI,
bool Exclusive =
false) {
254 if (Exclusive && Predicates.
size() != 1)
260 static bool isEntryPredicatedOnVCTP(VPTState &Block,
261 bool Exclusive =
false) {
263 return isPredicatedOnVCTP(Insts.front(), Exclusive);
269 static bool hasImplicitlyValidVPT(VPTState &Block,
274 "Expected VPT block to begin with VPT/VPST");
281 return Op && PredicatedInsts.count(
Op) && isPredicatedOnVCTP(
Op);
294 for (
auto *
Def : Defs)
302 return (IsOperandPredicated(VPT, 1) || IsOperandPredicated(VPT, 2)) &&
303 (IsOperandPredicated(VPT, 1) || IsOperandInvariant(VPT, 1)) &&
304 (IsOperandPredicated(VPT, 2) || IsOperandInvariant(VPT, 2));
311 for (
auto &Block : Blocks) {
312 if (isEntryPredicatedOnVCTP(Block,
false) ||
313 hasImplicitlyValidVPT(Block,
RDA))
320 "Expected VPT block to start with a VPST or VPT!");
321 if (Insts.size() == 2 && Insts.front()->getOpcode() != ARM::MVE_VPST &&
325 for (
auto *
MI : Insts) {
335 if (!isPredicatedOnVCTP(
MI)) {
349 assert(Insts.size() <= 5 &&
"Too many instructions in VPT block!");
352 bool containsVCTP()
const {
356 unsigned size()
const {
return Insts.size(); }
360 struct LowOverheadLoop {
381 bool CannotTailPredicate =
false;
402 CannotTailPredicate = !ValidateMVEInst(
MI);
405 bool IsTailPredicationLegal()
const {
408 return !Revert && FoundAllComponents() && !VCTPs.empty() &&
419 bool ValidateTailPredicate();
423 bool ValidateLiveOuts();
434 bool FoundAllComponents()
const {
435 return Start && Dec && End;
439 return VPTState::Blocks;
445 if (IsTailPredicationLegal())
446 return TPNumElements;
447 return Start->getOperand(1);
450 unsigned getStartOpcode()
const {
452 if (!IsTailPredicationLegal())
453 return IsDo ? ARM::t2DLS : ARM::t2WLS;
459 if (Start)
dbgs() <<
"ARM Loops: Found Loop Start: " << *Start;
460 if (Dec)
dbgs() <<
"ARM Loops: Found Loop Dec: " << *Dec;
461 if (End)
dbgs() <<
"ARM Loops: Found Loop End: " << *End;
462 if (!VCTPs.empty()) {
463 dbgs() <<
"ARM Loops: Found VCTP(s):\n";
464 for (
auto *
MI : VCTPs)
467 if (!FoundAllComponents())
468 dbgs() <<
"ARM Loops: Not a low-overhead loop.\n";
469 else if (!(Start && Dec && End))
470 dbgs() <<
"ARM Loops: Failed to find all loop components.\n";
481 std::unique_ptr<ARMBasicBlockUtils> BBUtils =
nullptr;
510 bool RevertNonLoops();
521 void ConvertVPTBlocks(LowOverheadLoop &LoLoop);
525 void Expand(LowOverheadLoop &LoLoop);
527 void IterationCountDCE(LowOverheadLoop &LoLoop);
536 std::unique_ptr<PredicatedMI>> VPTState::PredicatedInsts;
549 for (
auto *
Dead : Killed)
553 std::map<MachineInstr *, SmallPtrSet<MachineInstr *, 2>> ITBlocks;
554 for (
auto *
MBB : BasicBlocks) {
555 for (
auto &
IT : *
MBB) {
556 if (
IT.getOpcode() != ARM::t2IT)
567 for (
auto *
Dead : Killed) {
571 auto &CurrentBlock = ITBlocks[
IT];
572 CurrentBlock.erase(
Dead);
573 if (CurrentBlock.empty())
579 if (!ModifiedITs.
empty())
581 Killed.insert(RemoveITs.
begin(), RemoveITs.
end());
592 <<
" - can also remove:\n";
598 if (WontCorruptITs(Killed,
RDA)) {
608 bool LowOverheadLoop::ValidateTailPredicate() {
609 if (!IsTailPredicationLegal()) {
611 dbgs() <<
"ARM Loops: Didn't find a VCTP instruction.\n";
612 dbgs() <<
"ARM Loops: Tail-predication is not valid.\n");
616 assert(!VCTPs.empty() &&
"VCTP instruction expected but is not set");
618 "Shouldn't be processing a loop with more than one block");
621 LLVM_DEBUG(
dbgs() <<
"ARM Loops: tail-predication is disabled\n");
630 if (!ValidateLiveOuts()) {
640 if (Start->getOpcode() == ARM::t2DoLoopStartTP ||
641 Start->getOpcode() == ARM::t2WhileLoopStartTP) {
642 TPNumElements = Start->getOperand(2);
643 StartInsertPt = Start;
644 StartInsertBB = Start->getParent();
653 LLVM_DEBUG(
dbgs() <<
"ARM Loops: VCTP operand is defined in the loop.\n");
661 if (StartInsertPt != StartInsertBB->
end() &&
666 ElemDef->removeFromParent();
667 StartInsertBB->
insert(StartInsertPt, ElemDef);
669 <<
"ARM Loops: Moved element count def: " << *ElemDef);
671 StartInsertPt->removeFromParent();
674 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Moved start past: " << *ElemDef);
684 TPNumElements = Operand;
685 NumElements = TPNumElements.
getReg();
688 <<
"ARM Loops: Unable to move element count to loop "
689 <<
"start instruction.\n");
716 while (
MBB &&
MBB != StartInsertBB) {
717 if (CannotProvideElements(
MBB, NumElements)) {
718 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Unable to provide element count.\n");
735 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Instruction blocks [W|D]LSTP\n");
746 if (InstrVecSize > VCTPVecSize) {
747 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Double width result larger than VCTP "
748 <<
"VecSize:\n" << *
MI);
772 Ignore.insert(VCTPs.begin(), VCTPs.end());
775 bool FoundSub =
false;
777 for (
auto *
MI : ElementChain) {
782 if (FoundSub || !IsValidSub(
MI, ExpectedVectorWidth)) {
783 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Unexpected instruction in element"
789 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Unexpected instruction in element"
794 ToRemove.insert(ElementChain.begin(), ElementChain.end());
801 if ((Start->getOpcode() == ARM::t2DoLoopStartTP ||
802 Start->getOpcode() == ARM::t2WhileLoopStartTP) &&
803 Preheader && !Preheader->
empty() &&
808 Ignore.insert(VCTPs.begin(), VCTPs.end());
856 switch (
MI.getOpcode()) {
865 case ARM::MVE_VCLZs8:
866 case ARM::MVE_VCLZs16:
867 case ARM::MVE_VCLZs32:
893 Def->getOpcode() == ARM::MVE_VMOVimmi32 &&
894 Def->getOperand(1).getImm() == 0;
898 for (
auto &MO :
MI.operands()) {
905 if (PIdx != -1 && (
int)
MI.getOperandNo(&MO) == PIdx + 2)
914 for (
auto *
Def : Defs) {
926 bool LowOverheadLoop::ValidateLiveOuts() {
956 for (
auto &
MI : *Header) {
964 bool retainsOrReduces =
971 else if (
MI.getNumDefs() == 0)
974 LLVM_DEBUG(
dbgs() <<
" Unpredicated instruction that retainsOrReduces: " <<
MI);
981 dbgs() <<
" Predicated:\n";
982 for (
auto *
I : Predicated)
984 dbgs() <<
" FalseLanesZero:\n";
987 dbgs() <<
" FalseLanesUnknown:\n";
988 for (
auto *
I : FalseLanesUnknown)
997 if (
Use !=
MI && !Predicated.count(
Use))
1010 for (
auto *
MI :
reverse(FalseLanesUnknown)) {
1011 for (
auto &MO :
MI->operands()) {
1014 if (!HasPredicatedUsers(
MI, MO, Predicated)) {
1023 Predicated.insert(
MI);
1030 assert(ExitBlocks.size() == 1 &&
"Expected a single exit block");
1035 if (RegMask.PhysReg == ARM::VPR) {
1041 if (QPRs->
contains(RegMask.PhysReg))
1054 while (!Worklist.empty()) {
1056 if (
MI->getOpcode() == ARM::MQPRCopy) {
1061 Worklist.push_back(CopySrc);
1081 ? End->getOperand(1).getMBB()
1082 : End->getOperand(2).getMBB();
1086 LLVM_DEBUG(
dbgs() <<
"ARM Loops: LoopEnd is not targeting header.\n");
1092 if (BBUtils->getOffsetOf(End) < BBUtils->getOffsetOf(ML.
getHeader()) ||
1093 !BBUtils->isBBInRange(End, ML.
getHeader(), 4094)) {
1100 if (BBUtils->getOffsetOf(Start) > BBUtils->getOffsetOf(TargetBB) ||
1101 !BBUtils->isBBInRange(Start, TargetBB, 4094)) {
1102 LLVM_DEBUG(
dbgs() <<
"ARM Loops: WLS offset is out-of-range!\n");
1110 StartInsertBB = Start->getParent();
1114 Revert = !ValidateRanges(Start, End, BBUtils, ML);
1115 CannotTailPredicate = !ValidateTailPredicate();
1120 if (VCTPs.empty()) {
1121 VCTPs.push_back(
MI);
1130 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Found VCTP with a different reaching "
1131 "definition from the main VCTP");
1134 VCTPs.push_back(
MI);
1143 if (
const auto *
FS = dyn_cast<FixedStackPseudoSourceValue>(PseudoValue)) {
1144 return FS->getFrameIndex();
1151 switch (
I->getOpcode()) {
1152 case ARM::MVE_VSTRWU32:
1153 case ARM::MVE_VLDRWU32: {
1154 return I->getOperand(1).getReg() == ARM::SP &&
1155 I->memoperands().size() == 1 &&
1156 GetFrameIndex(
I->memoperands().front()) >= 0;
1165 if (
MI->getOpcode() != ARM::MVE_VSTRWU32 || !IsStackOp(
MI))
1171 if (
MI->memoperands().size() == 0)
1173 int FI = GetFrameIndex(
MI->memoperands().front());
1175 auto &FrameInfo =
MI->getParent()->getParent()->getFrameInfo();
1176 if (FI == -1 || !FrameInfo.isSpillSlotObjectIndex(FI))
1183 while (Idx < Frontier.size()) {
1185 bool LookAtSuccessors =
true;
1186 for (
auto &
I : *
BB) {
1187 if (!IsStackOp(&
I) ||
I.memoperands().size() == 0)
1189 if (GetFrameIndex(
I.memoperands().front()) != FI)
1193 if (
I.getOpcode() == ARM::MVE_VSTRWU32) {
1194 LookAtSuccessors =
false;
1199 if (
I.getOpcode() == ARM::MVE_VLDRWU32)
1203 if (LookAtSuccessors) {
1204 for (
auto Succ :
BB->successors()) {
1206 Frontier.push_back(Succ);
1217 if (CannotTailPredicate)
1223 if (
MI->getOpcode() == ARM::MVE_VPSEL ||
1224 MI->getOpcode() == ARM::MVE_VPNOT) {
1244 unsigned LastOpIdx =
MI->getNumOperands() - 1;
1251 VPTState::addInst(
MI);
1253 }
else if (
MI->getOpcode() != ARM::MVE_VPST) {
1262 bool RequiresExplicitPredication =
1265 if (
MI->getOpcode() == ARM::MQPRCopy)
1268 DoubleWidthResultInstrs.insert(
MI);
1273 <<
"ARM Loops: Can't tail predicate: " << *
MI);
1288 VPTState::resetPredicate(
MI);
1290 VPTState::addPredicate(
MI);
1309 MLI = &getAnalysis<MachineLoopInfo>();
1310 RDA = &getAnalysis<ReachingDefAnalysis>();
1314 TRI =
ST.getRegisterInfo();
1316 BBUtils->computeAllBlockSizes();
1317 BBUtils->adjustBBOffsetsAfter(&MF->
front());
1319 bool Changed =
false;
1320 for (
auto ML : *MLI) {
1322 Changed |= ProcessLoop(ML);
1324 Changed |= RevertNonLoops();
1328 bool ARMLowOverheadLoops::ProcessLoop(
MachineLoop *ML) {
1330 bool Changed =
false;
1334 Changed |= ProcessLoop(L);
1337 dbgs() <<
"ARM Loops: Processing loop containing:\n";
1338 if (
auto *Preheader = ML->getLoopPreheader())
1340 else if (
auto *Preheader = MLI->findLoopPreheader(ML,
true,
true))
1342 for (
auto *
MBB : ML->getBlocks())
1350 for (
auto &
MI : *
MBB) {
1359 LowOverheadLoop LoLoop(*ML, *MLI, *
RDA, *
TRI, *
TII);
1363 if (LoLoop.Preheader)
1364 LoLoop.Start = SearchForStart(LoLoop.Preheader);
1372 for (
auto &
MI : *
MBB) {
1373 if (
MI.isDebugValue())
1375 else if (
MI.getOpcode() == ARM::t2LoopDec)
1377 else if (
MI.getOpcode() == ARM::t2LoopEnd)
1379 else if (
MI.getOpcode() == ARM::t2LoopEndDec)
1380 LoLoop.End = LoLoop.Dec = &
MI;
1383 else if (
MI.getDesc().isCall()) {
1387 LoLoop.Revert =
true;
1392 LoLoop.AnalyseMVEInst(&
MI);
1398 if (!LoLoop.FoundAllComponents()) {
1399 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Didn't find loop start, update, end\n");
1403 assert(LoLoop.Start->getOpcode() != ARM::t2WhileLoopStart &&
1404 "Expected t2WhileLoopStart to be removed before regalloc!");
1409 if (LoLoop.Dec != LoLoop.End) {
1412 if (
Uses.size() > 1 || !
Uses.count(LoLoop.End)) {
1414 LoLoop.Revert =
true;
1417 LoLoop.Validate(BBUtils.get());
1429 unsigned BrOpc = BBUtils->isBBInRange(
MI, DestBB, 254) ?
1430 ARM::tBcc : ARM::t2Bcc;
1445 if (
I->getOpcode() == ARM::t2LoopEnd) {
1464 unsigned BrOpc = BBUtils->isBBInRange(
MI, DestBB, 254) ?
1465 ARM::tBcc : ARM::t2Bcc;
1471 void ARMLowOverheadLoops::RevertLoopEndDec(
MachineInstr *
MI)
const {
1473 assert(
MI->getOpcode() == ARM::t2LoopEndDec &&
"Expected a t2LoopEndDec!");
1479 MIB.
add(
MI->getOperand(1));
1482 MIB.
addReg(ARM::NoRegister);
1488 BBUtils->isBBInRange(
MI, DestBB, 254) ? ARM::tBcc : ARM::t2Bcc;
1492 MIB.
add(
MI->getOperand(2));
1496 MI->eraseFromParent();
1523 void ARMLowOverheadLoops::IterationCountDCE(LowOverheadLoop &LoLoop) {
1524 if (!LoLoop.IsTailPredicationLegal())
1527 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Trying DCE on loop iteration count.\n");
1531 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Couldn't find iteration count.\n");
1538 if (!TryRemove(
Def, *
RDA, LoLoop.ToRemove, Killed))
1539 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Unsafe to remove loop iteration count.\n");
1542 MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) {
1546 IterationCountDCE(LoLoop);
1551 unsigned Opc = LoLoop.getStartOpcode();
1556 if (Opc == ARM::t2DLS && Count.
isReg() && Count.
getReg() == ARM::LR) {
1557 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Didn't insert start: DLS lr, lr");
1561 BuildMI(*
MBB, InsertPt, Start->getDebugLoc(),
TII->get(Opc));
1572 LoLoop.ToRemove.insert(Start);
1576 void ARMLowOverheadLoops::ConvertVPTBlocks(LowOverheadLoop &LoLoop) {
1578 if (
MI->isDebugInstr())
1582 assert(PIdx >= 1 &&
"Trying to unpredicate a non-predicated instruction");
1584 "Expected Then predicate!");
1586 MI->getOperand(PIdx + 1).setReg(0);
1589 for (
auto &Block : LoLoop.getVPTBlocks()) {
1593 assert(TheVCMP &&
"Replacing a removed or non-existent VCMP");
1596 BuildMI(*At->getParent(), At, At->getDebugLoc(),
1605 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Combining with VCMP to VPT: " << *MIB);
1606 LoLoop.BlockMasksToRecompute.insert(MIB.
getInstr());
1607 LoLoop.ToRemove.insert(TheVCMP);
1611 if (VPTState::isEntryPredicatedOnVCTP(Block,
true)) {
1613 if (VPTState::hasUniformPredicate(Block)) {
1619 for (
unsigned i = 1;
i < Insts.size(); ++
i)
1620 RemovePredicate(Insts[
i]);
1629 MachineInstr *Divergent = VPTState::getDivergent(Block);
1632 while (DivergentNext !=
MBB->
end() && DivergentNext->isDebugInstr())
1635 bool DivergentNextIsPredicated =
1636 DivergentNext !=
MBB->
end() &&
1641 RemovePredicate(&*
I);
1648 if (DivergentNextIsPredicated) {
1660 LoLoop.BlockMasksToRecompute.insert(MIB.
getInstr());
1669 LoLoop.ToRemove.insert(VPST);
1670 }
else if (
Block.containsVCTP()) {
1674 if (
Block.size() == 2) {
1676 "Found a VPST in an otherwise empty vpt block");
1677 LoLoop.ToRemove.insert(VPST);
1679 LoLoop.BlockMasksToRecompute.insert(VPST);
1680 }
else if (Insts.front()->getOpcode() == ARM::MVE_VPST) {
1687 "The instruction after a VPST must be predicated");
1691 !LoLoop.ToRemove.contains(VprDef)) {
1702 ReplaceVCMPWithVPT(
VCMP, VPST);
1704 LoLoop.ToRemove.insert(VPST);
1710 LoLoop.ToRemove.insert(LoLoop.VCTPs.begin(), LoLoop.VCTPs.end());
1713 void ARMLowOverheadLoops::Expand(LowOverheadLoop &LoLoop) {
1716 auto ExpandLoopEnd = [
this](LowOverheadLoop &LoLoop) {
1719 unsigned Opc = LoLoop.IsTailPredicationLegal() ?
1720 ARM::MVE_LETP : ARM::t2LEUpdate;
1724 unsigned Off = LoLoop.Dec == LoLoop.End ? 1 : 0;
1725 MIB.
add(End->getOperand(Off + 0));
1726 MIB.
add(End->getOperand(Off + 1));
1728 LoLoop.ToRemove.insert(LoLoop.Dec);
1729 LoLoop.ToRemove.insert(End);
1743 if (
BB->isLayoutSuccessor(Succ)) {
1753 for (
auto *
MI : VMOVCopies) {
1755 assert(
MI->getOpcode() == ARM::MQPRCopy &&
"Only expected MQPRCOPY!");
1760 ARM::D0 + (Dst - ARM::Q0) * 2)
1761 .
addReg(ARM::D0 + (Src - ARM::Q0) * 2)
1766 ARM::D0 + (Dst - ARM::Q0) * 2 + 1)
1767 .
addReg(ARM::D0 + (Src - ARM::Q0) * 2 + 1)
1771 MI->eraseFromParent();
1775 if (LoLoop.Revert) {
1777 RevertWhile(LoLoop.Start);
1779 RevertDo(LoLoop.Start);
1780 if (LoLoop.Dec == LoLoop.End)
1781 RevertLoopEndDec(LoLoop.End);
1785 ExpandVMOVCopies(LoLoop.VMOVCopies);
1786 LoLoop.Start = ExpandLoopStart(LoLoop);
1788 RemoveDeadBranch(LoLoop.Start);
1789 LoLoop.End = ExpandLoopEnd(LoLoop);
1790 RemoveDeadBranch(LoLoop.End);
1791 if (LoLoop.IsTailPredicationLegal())
1792 ConvertVPTBlocks(LoLoop);
1793 for (
auto *
I : LoLoop.ToRemove) {
1795 I->eraseFromParent();
1797 for (
auto *
I : LoLoop.BlockMasksToRecompute) {
1798 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Recomputing VPT/VPST Block Mask: " << *
I);
1804 PostOrderLoopTraversal DFS(LoLoop.ML, *MLI);
1807 for (
auto *
MBB : PostOrder) {
1821 bool ARMLowOverheadLoops::RevertNonLoops() {
1822 LLVM_DEBUG(
dbgs() <<
"ARM Loops: Reverting any remaining pseudos...\n");
1823 bool Changed =
false;
1825 for (
auto &
MBB : *MF) {
1832 for (
auto &
I :
MBB) {
1834 Starts.push_back(&
I);
1835 else if (
I.getOpcode() == ARM::t2LoopDec)
1837 else if (
I.getOpcode() == ARM::t2LoopEnd)
1839 else if (
I.getOpcode() == ARM::t2LoopEndDec)
1840 EndDecs.push_back(&
I);
1841 else if (
I.getOpcode() == ARM::MQPRCopy)
1842 MQPRCopies.push_back(&
I);
1845 if (Starts.empty() && Decs.empty() && Ends.empty() && EndDecs.empty() &&
1851 for (
auto *Start : Starts) {
1857 for (
auto *Dec : Decs)
1860 for (
auto *End : Ends)
1862 for (
auto *End : EndDecs)
1863 RevertLoopEndDec(End);
1864 for (
auto *
MI : MQPRCopies) {
1866 assert(
MI->getOpcode() == ARM::MQPRCopy &&
"Only expected MQPRCOPY!");
1869 MI->getOperand(0).getReg())
1870 .
add(
MI->getOperand(1))
1871 .
add(
MI->getOperand(1));
1873 MI->eraseFromParent();
1880 return new ARMLowOverheadLoops();
static bool isVectorPredicated(MachineInstr *MI)
virtual StringRef getRegAsmName(MCRegister Reg) const
Return the assembly name for Reg.
INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME, false, false) static bool TryRemove(MachineInstr *MI
static bool producesFalseLanesZero(MachineInstr &MI, const TargetRegisterClass *QPRs, const ReachingDefAnalysis &RDA, InstSet &FalseLanesZero)
void reset()
Re-run the analysis.
pred_iterator pred_begin()
bool hasSameReachingDef(MachineInstr *A, MachineInstr *B, MCRegister PhysReg) const
Return whether A and B use the same def of PhysReg.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
This is an optimization pass for GlobalISel generic memory operations.
bool isSafeToRemove(MachineInstr *MI, InstSet &ToRemove) const
Return whether removing this instruction will have no effect on the program, returning the redundant ...
bool isVpred(OperandType op)
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
void getExitBlocks(SmallVectorImpl< BlockT * > &ExitBlocks) const
Return all of the successor blocks of this loop.
MachineInstr * getUniqueReachingMIDef(MachineInstr *MI, MCRegister PhysReg) const
If a single MachineInstr creates the reaching definition, then return it.
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
This class provides the reaching def analysis.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
const MachineInstrBuilder & add(const MachineOperand &MO) const
FunctionPass * createARMLowOverheadLoopsPass()
void recomputeLivenessFlags(MachineBasicBlock &MBB)
Recomputes dead and kill flags in MBB.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
MachineBasicBlock * findLoopPreheader(MachineLoop *L, bool SpeculativePreheader=false, bool FindMultiLoopPreheader=false) const
Find the block that either is the loop preheader, or could speculatively be used as the preheader.
size_type size() const
Determine the number of elements in the SetVector.
static bool isDoLoopStart(const MachineInstr &MI)
ReachingDefAnalysis InstSet InstSet & Ignore
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
detail::enumerator< R > enumerate(R &&TheRange)
Given an input range, returns a new range whose values are are pair (A,B) such that A is the 0-based ...
ReachingDefAnalysis InstSet & ToRemove
static void recomputeLiveIns(MachineBasicBlock &MBB)
Convenience function for recomputing live-in's for MBB.
static bool isDomainMVE(MachineInstr *MI)
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
static bool hasVPRUse(MachineInstr &MI)
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
iterator_range< livein_iterator > liveins() const
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
A description of a memory reference used in the backend.
static bool shouldInspect(MachineInstr &MI)
Properties which a MachineFunction may have at a given point in time.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
static ARM::PredBlockMask CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter, MachineBasicBlock::instr_iterator EndIter, SmallVectorImpl< MachineInstr * > &DeadInstructions)
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
unsigned const TargetRegisterInfo * TRI
static unsigned VCTPOpcodeToLSTP(unsigned Opcode, bool IsDoLoop)
@ ValidForTailPredication
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
bool hasLocalDefBefore(MachineInstr *MI, MCRegister PhysReg) const
Provide whether the register has been defined in the same basic block as, and before,...
MachineInstr * getLocalLiveOutMIDef(MachineBasicBlock *MBB, MCRegister PhysReg) const
Return the local MI that produces the live out value for PhysReg, or nullptr for a non-live out or no...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
SmallPtrSet< MachineInstr *, 2 > Uses
bool isSafeToMoveBackwards(MachineInstr *From, MachineInstr *To) const
Return whether From can be moved backwards to just after To.
int getAddSubImmediate(MachineInstr &MI)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
unsigned pred_size() const
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
const MachineBasicBlock & front() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
iterator begin()
Get an iterator to the beginning of the SetVector.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
bool isPredicated(const MCInst &MI, const MCInstrInfo *MCII)
void getGlobalUses(MachineInstr *MI, MCRegister PhysReg, InstSet &Uses) const
Collect the users of the value stored in PhysReg, which is defined by MI.
static bool producesDoubleWidthResult(const MachineInstr &MI)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const MachineOperand & getOperand(unsigned i) const
void RevertLoopDec(MachineInstr *MI, const TargetInstrInfo *TII, bool SetFlags=false)
int findFirstVPTPredOperandIdx(const MachineInstr &MI)
void RevertLoopEnd(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool SkipCmp=false)
Represent the analysis usage information of a pass.
static int getVecSize(const MachineInstr &MI)
const MachineFunctionProperties & getProperties() const
Get the function properties.
ARMVCC::VPTCodes getVPTInstrPredicate(const MachineInstr &MI, Register &PredReg)
const HexagonInstrInfo * TII
Describe properties that are true of each instruction in the target description file.
MachineOperand class - Representation of each machine instruction operand.
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
MachineFunctionProperties & set(Property P)
Pair of physical register and lane mask.
static cl::opt< bool > DisableTailPredication("arm-loloops-disable-tailpred", cl::Hidden, cl::desc("Disable tail-predication in the ARM LowOverheadLoop pass"), cl::init(false))
Special value supplied for machine level alias analysis.
static bool isVectorPredicate(MachineInstr *MI)
static unsigned VCMPOpcodeToVPT(unsigned Opcode)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
static bool isVPTOpcode(int Opc)
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Representation of each machine instruction.
void RevertDoLoopStart(MachineInstr *MI, const TargetInstrInfo *TII)
@ RetainsPreviousHalfElement
initializer< Ty > init(const Ty &Val)
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::ZeroOrMore, cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isVCTP(const MachineInstr *MI)
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
print Print MemDeps of function
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Register getReg() const
getReg - Returns the register number.
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
static bool isRegInClass(const MachineOperand &MO, const TargetRegisterClass *Class)
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool insert(const value_type &X)
Insert a new element into the SetVector.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
MachineInstrBundleIterator< MachineInstr > iterator
static bool isHorizontalReduction(const MachineInstr &MI)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
iterator_range< succ_iterator > successors()
StringRef - Represent a constant reference to a string, i.e.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
void getReachingLocalUses(MachineInstr *MI, MCRegister PhysReg, InstSet &Uses) const
Provides the uses, in the same block as MI, of register that MI defines.
bool isOutermost() const
Return true if the loop does not have a parent (natural) loop.
void RevertWhileLoopStartLR(MachineInstr *MI, const TargetInstrInfo *TII, unsigned BrOpc=ARM::t2Bcc, bool UseCmp=false)
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
static bool isWhileLoopStart(const MachineInstr &MI)
MachineBasicBlock * getWhileLoopStartTargetBB(const MachineInstr &MI)
const MachineBasicBlock * getParent() const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
unsigned const MachineRegisterInfo * MRI
Wrapper class representing virtual and physical registers.
void collectKilledOperands(MachineInstr *MI, InstSet &Dead) const
Assuming MI is dead, recursively search the incoming operands which are killed by MI and collect thos...
void clear()
Completely clear the SetVector.
static bool ValidateMVEStore(MachineInstr *MI, MachineLoop *ML)
bool isSafeToDefRegAt(MachineInstr *MI, MCRegister PhysReg) const
Return whether a MachineInstr could be inserted at MI and safely define the given register without af...
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
BlockT * getHeader() const
void recomputeVPTBlockMask(MachineInstr &Instr)
void getGlobalReachingDefs(MachineInstr *MI, MCRegister PhysReg, InstSet &Defs) const
Collect all possible definitions of the value stored in PhysReg, which is used by MI.
static bool canGenerateNonZeros(const MachineInstr &MI)
iterator end()
Get an iterator to the end of the SetVector.
#define ARM_LOW_OVERHEAD_LOOPS_NAME
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
ReachingDefAnalysis & RDA
LLVM_NODISCARD bool empty() const
bool isReachingDefLiveOut(MachineInstr *MI, MCRegister PhysReg) const
Return whether the reaching def for MI also is live out of its parent block.
void addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB, Register DestReg)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
FunctionPass class - This class is used to implement most global optimizations.
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
MachineInstr * getMIOperand(MachineInstr *MI, unsigned Idx) const
If a single MachineInstr creates the reaching definition, for MIs operand at Idx, then return it.
AnalysisUsage & addRequired()
A vector that has set insertion semantics.
bool contains(ConstPtrType Ptr) const
static bool retainsPreviousHalfElement(const MachineInstr &MI)
static bool isMovRegOpcode(int Opc)
iterator_range< const_opInfo_iterator > operands() const
static bool isSubImmOpcode(int Opc)
static unsigned getTailPredVectorWidth(unsigned Opcode)
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
A Use represents the edge between a Value definition and its users.
reference emplace_back(ArgTypes &&... Args)
bool isSafeToMoveForwards(MachineInstr *From, MachineInstr *To) const
Return whether From can be moved forwards to just before To.
Wrapper class representing physical registers. Should be passed by value.
static bool isLoopStart(const MachineInstr &MI)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
iterator insert(iterator I, T &&Elt)