42#define DEBUG_TYPE "aarch64-ccmp"
48 cl::desc(
"Maximum number of instructions per speculated block."));
54STATISTIC(NumConsidered,
"Number of ccmps considered");
55STATISTIC(NumPhiRejs,
"Number of ccmps rejected (PHI)");
56STATISTIC(NumPhysRejs,
"Number of ccmps rejected (Physregs)");
57STATISTIC(NumPhi2Rejs,
"Number of ccmps rejected (PHI2)");
58STATISTIC(NumHeadBranchRejs,
"Number of ccmps rejected (Head branch)");
59STATISTIC(NumCmpBranchRejs,
"Number of ccmps rejected (CmpBB branch)");
60STATISTIC(NumCmpTermRejs,
"Number of ccmps rejected (CmpBB is cbz...)");
61STATISTIC(NumImmRangeRejs,
"Number of ccmps rejected (Imm out of range)");
62STATISTIC(NumLiveDstRejs,
"Number of ccmps rejected (Cmp dest live)");
63STATISTIC(NumMultNZCVUses,
"Number of ccmps rejected (NZCV used)");
64STATISTIC(NumUnknNZCVDefs,
"Number of ccmps rejected (NZCV def unknown)");
66STATISTIC(NumSpeculateRejs,
"Number of ccmps rejected (Can't speculate)");
68STATISTIC(NumConverted,
"Number of ccmp instructions created");
69STATISTIC(NumCompBranches,
"Number of cbz/cbnz branches converted");
172 bool trivialTailPHIs();
175 void updateTailPHIs();
178 bool isDeadDef(
unsigned DstReg);
201 bool canConvert(MachineBasicBlock *
MBB);
205 void convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks);
209 int expectedCodeSizeDelta()
const;
216 MI->getOpcode() == TargetOpcode::COPY) {
217 if (
MI->getOperand(1).getReg().isPhysical())
219 Reg =
MI->getOperand(1).getReg();
226bool SSACCmpConv::trivialTailPHIs() {
227 for (
auto &
I : *
Tail) {
230 unsigned HeadReg = 0, CmpBBReg = 0;
232 for (
unsigned oi = 1, oe =
I.getNumOperands(); oi != oe; oi += 2) {
233 MachineBasicBlock *
MBB =
I.getOperand(oi + 1).getMBB();
236 assert((!HeadReg || HeadReg ==
Reg) &&
"Inconsistent PHI operands");
240 assert((!CmpBBReg || CmpBBReg ==
Reg) &&
"Inconsistent PHI operands");
244 if (HeadReg != CmpBBReg)
252void SSACCmpConv::updateTailPHIs() {
253 for (
auto &
I : *
Tail) {
257 for (
unsigned oi =
I.getNumOperands(); oi > 2; oi -= 2) {
259 if (
I.getOperand(oi - 1).getMBB() == CmpBB) {
260 I.removeOperand(oi - 1);
261 I.removeOperand(oi - 2);
269bool SSACCmpConv::isDeadDef(
unsigned DstReg) {
271 if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
273 if (!Register::isVirtualRegister(DstReg))
286 assert(
Cond.size() == 1 &&
"Unknown Cond array format");
298 assert(
Cond.size() == 3 &&
"Unknown Cond array format");
303 assert(
Cond.size() == 3 &&
"Unknown Cond array format");
309MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *
MBB) {
314 if (!
I->readsRegister(AArch64::NZCV,
nullptr)) {
315 switch (
I->getOpcode()) {
331 assert(!
I->isTerminator() &&
"Spurious terminator");
332 switch (
I->getOpcode()) {
334 case AArch64::SUBSWri:
335 case AArch64::SUBSXri:
337 case AArch64::ADDSWri:
338 case AArch64::ADDSXri:
341 if (
I->getOperand(3).getImm() || !
isUInt<5>(
I->getOperand(2).getImm())) {
347 case AArch64::SUBSWrr:
348 case AArch64::SUBSXrr:
349 case AArch64::ADDSWrr:
350 case AArch64::ADDSXrr:
351 if (isDeadDef(
I->getOperand(0).getReg()))
353 LLVM_DEBUG(
dbgs() <<
"Can't convert compare with live destination: "
357 case AArch64::FCMPSrr:
358 case AArch64::FCMPDrr:
359 case AArch64::FCMPESrr:
360 case AArch64::FCMPEDrr:
392bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *
MBB,
393 const MachineInstr *CmpMI) {
406 if (
I.isDebugInstr())
430 bool DontMoveAcrossStore =
true;
431 if (!
I.isSafeToMove(DontMoveAcrossStore)) {
437 if (&
I != CmpMI &&
I.modifiesRegister(AArch64::NZCV,
TRI)) {
448bool SSACCmpConv::canConvert(MachineBasicBlock *
MBB) {
450 Tail = CmpBB =
nullptr;
454 MachineBasicBlock *Succ0 = Head->
succ_begin()[0];
455 MachineBasicBlock *Succ1 = Head->
succ_begin()[1];
483 if (!trivialTailPHIs()) {
489 if (!
Tail->livein_empty()) {
504 LLVM_DEBUG(
dbgs() <<
"Can't handle live-in physregs in CmpBB.\n");
511 MachineBasicBlock *
TBB =
nullptr, *FBB =
nullptr;
522 dbgs() <<
"analyzeBranch didn't find conditional branch in Head.\n");
549 dbgs() <<
"analyzeBranch didn't find conditional branch in CmpBB.\n");
554 if (!
parseCond(CmpBBCond, CmpBBTailCC)) {
565 <<
", CmpBB->Tail on "
568 CmpMI = findConvertibleCompare(CmpBB);
572 if (!canSpeculateInstrs(CmpBB, CmpMI)) {
579void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
608 Head2Tail + Head2CmpBB * CmpBB2Tail);
627 if (HeadCond[0].
getImm() == -1) {
630 switch (HeadCond[1].
getImm()) {
633 Opc = AArch64::SUBSWri;
637 Opc = AArch64::SUBSXri;
642 const MCInstrDesc &MCID =
TII->get(
Opc);
647 .
addReg(DestReg, RegState::Define | RegState::Dead)
660 unsigned FirstOp = 1;
661 bool isZBranch =
false;
665 case AArch64::SUBSWri:
Opc = AArch64::CCMPWi;
break;
666 case AArch64::SUBSWrr:
Opc = AArch64::CCMPWr;
break;
667 case AArch64::SUBSXri:
Opc = AArch64::CCMPXi;
break;
668 case AArch64::SUBSXrr:
Opc = AArch64::CCMPXr;
break;
669 case AArch64::ADDSWri:
Opc = AArch64::CCMNWi;
break;
670 case AArch64::ADDSWrr:
Opc = AArch64::CCMNWr;
break;
671 case AArch64::ADDSXri:
Opc = AArch64::CCMNXi;
break;
672 case AArch64::ADDSXrr:
Opc = AArch64::CCMNXr;
break;
673 case AArch64::FCMPSrr:
Opc = AArch64::FCCMPSrr; FirstOp = 0;
break;
674 case AArch64::FCMPDrr:
Opc = AArch64::FCCMPDrr; FirstOp = 0;
break;
675 case AArch64::FCMPESrr:
Opc = AArch64::FCCMPESrr; FirstOp = 0;
break;
676 case AArch64::FCMPEDrr:
Opc = AArch64::FCCMPEDrr; FirstOp = 0;
break;
679 Opc = AArch64::CCMPWi;
685 Opc = AArch64::CCMPXi;
697 const MCInstrDesc &MCID =
TII->get(
Opc);
699 TII->getRegClass(MCID, 0));
702 TII->getRegClass(MCID, 1));
714 bool isNZ = CmpMI->
getOpcode() == AArch64::CBNZW ||
728int SSACCmpConv::expectedCodeSizeDelta()
const {
733 if (HeadCond[0].
getImm() == -1) {
734 switch (HeadCond[1].
getImm()) {
768class AArch64ConditionalComparesImpl {
769 const MachineBranchProbabilityInfo *MBPI;
770 const TargetInstrInfo *
TII;
771 const TargetRegisterInfo *
TRI;
772 MCSchedModel SchedModel;
775 MachineRegisterInfo *MRI;
776 MachineDominatorTree *DomTree;
777 MachineLoopInfo *
Loops;
778 MachineTraceMetrics *Traces;
783 AArch64ConditionalComparesImpl(
const MachineBranchProbabilityInfo *MBPI,
784 MachineDominatorTree *DomTree,
785 MachineLoopInfo *
Loops,
786 MachineTraceMetrics *Traces)
787 : MBPI(MBPI), DomTree(DomTree),
Loops(
Loops), Traces(Traces) {}
789 bool run(MachineFunction &MF);
792 bool tryConvert(MachineBasicBlock *);
795 void invalidateTraces();
799class AArch64ConditionalComparesLegacy :
public MachineFunctionPass {
802 AArch64ConditionalComparesLegacy() : MachineFunctionPass(
ID) {
806 void getAnalysisUsage(AnalysisUsage &AU)
const override;
807 bool runOnMachineFunction(MachineFunction &MF)
override;
808 StringRef getPassName()
const override {
809 return "AArch64 Conditional Compares";
814char AArch64ConditionalComparesLegacy::ID = 0;
817 "AArch64 CCMP Pass",
false,
false)
825 return new AArch64ConditionalComparesLegacy();
828void AArch64ConditionalComparesLegacy::getAnalysisUsage(
841void AArch64ConditionalComparesImpl::updateDomTree(
846 for (MachineBasicBlock *RemovedMBB : Removed) {
848 assert(Node != HeadNode &&
"Cannot erase the head node");
849 assert(
Node->getIDom() == HeadNode &&
"CmpBB should be dominated by Head");
850 while (!
Node->isLeaf())
857void AArch64ConditionalComparesImpl::updateLoops(
861 for (MachineBasicBlock *RemovedMBB : Removed)
862 Loops->removeBlock(RemovedMBB);
866void AArch64ConditionalComparesImpl::invalidateTraces() {
874bool AArch64ConditionalComparesImpl::shouldConvert() {
879 MinInstr = Traces->
getEnsemble(MachineTraceStrategy::TS_MinInstrCount);
886 int CodeSizeDelta = CmpConv.expectedCodeSizeDelta();
887 LLVM_DEBUG(
dbgs() <<
"Code size delta: " << CodeSizeDelta <<
'\n');
890 if (CodeSizeDelta < 0)
892 if (CodeSizeDelta > 0) {
893 LLVM_DEBUG(
dbgs() <<
"Code size is increasing, give up on this one.\n");
910 unsigned CmpBBDepth =
913 <<
"\nCmpBB depth: " << CmpBBDepth <<
'\n');
914 if (CmpBBDepth > HeadDepth + DelayLimit) {
915 LLVM_DEBUG(
dbgs() <<
"Branch delay would be larger than " << DelayLimit
928 if (ResDepth > HeadDepth) {
935bool AArch64ConditionalComparesImpl::tryConvert(MachineBasicBlock *
MBB) {
939 SmallVector<MachineBasicBlock *, 4> RemovedBlocks;
940 CmpConv.convert(RemovedBlocks);
942 updateDomTree(RemovedBlocks);
943 updateLoops(RemovedBlocks);
944 for (MachineBasicBlock *
MBB : RemovedBlocks)
950bool AArch64ConditionalComparesImpl::run(MachineFunction &MF) {
951 LLVM_DEBUG(
dbgs() <<
"********** AArch64 Conditional Compares **********\n"
952 <<
"********** Function: " << MF.
getName() <<
'\n');
962 CmpConv.runOnMachineFunction(MF, MBPI);
970 if (tryConvert(
I->getBlock()))
976bool AArch64ConditionalComparesLegacy::runOnMachineFunction(
977 MachineFunction &MF) {
981 const MachineBranchProbabilityInfo *MBPI =
982 &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
983 MachineDominatorTree *DomTree =
984 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
985 MachineLoopInfo *
Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
986 MachineTraceMetrics *Traces =
987 &getAnalysis<MachineTraceMetricsWrapperPass>().getMTM();
989 AArch64ConditionalComparesImpl Impl(MBPI, DomTree,
Loops, Traces);
1004 AArch64ConditionalComparesImpl Impl(MBPI, DomTree,
Loops, Traces);
static Register lookThroughCopies(Register Reg, MachineRegisterInfo *MRI)
static cl::opt< bool > Stress("aarch64-stress-ccmp", cl::Hidden, cl::desc("Turn all knobs to 11"))
static cl::opt< unsigned > BlockInstrLimit("aarch64-ccmp-limit", cl::init(30), cl::Hidden, cl::desc("Maximum number of instructions per speculated block."))
static bool parseCond(ArrayRef< MachineOperand > Cond, AArch64CC::CondCode &CC)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static unsigned InstrCount
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
static cl::opt< bool > Stress("stress-early-ifcvt", cl::Hidden, cl::desc("Turn all knobs to 11"))
static cl::opt< unsigned > BlockInstrLimit("early-ifcvt-limit", cl::init(30), cl::Hidden, cl::desc("Maximum number of instructions per speculated block."))
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
Promote Memory to Register
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
void eraseNode(NodeT *BB)
eraseNode - Removes a node from the dominator tree.
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
FunctionPass class - This class is used to implement most global optimizations.
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
unsigned pred_size() const
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI void updateTerminator(MachineBasicBlock *PreviousLayoutSuccessor)
Update the terminator instructions in block to account for changes to block layout which may have bee...
LLVM_ABI void setSuccProbability(succ_iterator I, BranchProbability Prob)
Set successor probability of a given iterator.
succ_iterator succ_begin()
bool livein_empty() const
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned succ_size() const
LLVM_ABI void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
bool hasSuccessorProbabilities() const
Return true if any of the successors have probabilities attached to them.
LLVM_ABI void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
LLVM_ABI bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
Analysis pass which computes a MachineDominatorTree.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
Analysis pass that exposes the MachineLoopInfo for a machine function.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
Trace getTrace(const MachineBasicBlock *MBB)
Get the trace that passes through MBB.
InstrCycles getInstrCycles(const MachineInstr &MI) const
Return the depth and height of MI.
unsigned getResourceDepth(bool Bottom) const
Return the resource depth of the top/bottom of the trace center block.
Ensemble * getEnsemble(MachineTraceStrategy)
Get the trace ensemble representing the given trace selection strategy.
void invalidate(const MachineBasicBlock *MBB)
Invalidate cached information about MBB.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Wrapper class representing virtual and physical registers.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static const char * getCondCodeName(CondCode Code)
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< NodeBase * > Node
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI PhysRegInfo AnalyzePhysRegInBundle(const MachineInstr &MI, Register Reg, const TargetRegisterInfo *TRI)
AnalyzePhysRegInBundle - Analyze how the current instruction or bundle uses a physical register.
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
FunctionPass * createAArch64ConditionalCompares()
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
DomTreeNodeBase< MachineBasicBlock > MachineDomTreeNode
ArrayRef(const T &OneElt) -> ArrayRef< T >
iterator_range< df_iterator< T > > depth_first(const T &G)
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
void initializeAArch64ConditionalComparesLegacyPass(PassRegistry &)
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
unsigned MispredictPenalty
unsigned Depth
Earliest issue cycle as determined by data dependencies and instruction latencies from the beginning ...
bool Read
Reg or one of its aliases is read.
bool Defined
Reg or one of its aliases is defined.
bool Clobbered
There is a regmask operand indicating Reg is clobbered.