Go to the documentation of this file.
43 #define DEBUG_TYPE "aarch64-ccmp"
49 cl::desc(
"Maximum number of instructions per speculated block."));
55 STATISTIC(NumConsidered,
"Number of ccmps considered");
56 STATISTIC(NumPhiRejs,
"Number of ccmps rejected (PHI)");
57 STATISTIC(NumPhysRejs,
"Number of ccmps rejected (Physregs)");
58 STATISTIC(NumPhi2Rejs,
"Number of ccmps rejected (PHI2)");
59 STATISTIC(NumHeadBranchRejs,
"Number of ccmps rejected (Head branch)");
60 STATISTIC(NumCmpBranchRejs,
"Number of ccmps rejected (CmpBB branch)");
61 STATISTIC(NumCmpTermRejs,
"Number of ccmps rejected (CmpBB is cbz...)");
62 STATISTIC(NumImmRangeRejs,
"Number of ccmps rejected (Imm out of range)");
63 STATISTIC(NumLiveDstRejs,
"Number of ccmps rejected (Cmp dest live)");
64 STATISTIC(NumMultNZCVUses,
"Number of ccmps rejected (NZCV used)");
65 STATISTIC(NumUnknNZCVDefs,
"Number of ccmps rejected (NZCV def unknown)");
67 STATISTIC(NumSpeculateRejs,
"Number of ccmps rejected (Can't speculate)");
69 STATISTIC(NumConverted,
"Number of ccmp instructions created");
70 STATISTIC(NumCompBranches,
"Number of cbz/cbnz branches converted");
173 bool trivialTailPHIs();
176 void updateTailPHIs();
179 bool isDeadDef(
unsigned DstReg);
210 int expectedCodeSizeDelta()
const;
216 bool SSACCmpConv::trivialTailPHIs() {
217 for (
auto &
I : *
Tail) {
220 unsigned HeadReg = 0, CmpBBReg = 0;
222 for (
unsigned oi = 1, oe =
I.getNumOperands(); oi != oe; oi += 2) {
226 assert((!HeadReg || HeadReg ==
Reg) &&
"Inconsistent PHI operands");
230 assert((!CmpBBReg || CmpBBReg ==
Reg) &&
"Inconsistent PHI operands");
234 if (HeadReg != CmpBBReg)
242 void SSACCmpConv::updateTailPHIs() {
243 for (
auto &
I : *
Tail) {
247 for (
unsigned oi =
I.getNumOperands(); oi > 2; oi -= 2) {
249 if (
I.getOperand(oi - 1).getMBB() == CmpBB) {
250 I.removeOperand(oi - 1);
251 I.removeOperand(oi - 2);
259 bool SSACCmpConv::isDeadDef(
unsigned DstReg) {
261 if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
275 if (
Cond[0].getImm() != -1) {
276 assert(
Cond.size() == 1 &&
"Unknown Cond array format");
281 switch (
Cond[1].getImm()) {
288 assert(
Cond.size() == 3 &&
"Unknown Cond array format");
293 assert(
Cond.size() == 3 &&
"Unknown Cond array format");
304 if (!
I->readsRegister(AArch64::NZCV)) {
305 switch (
I->getOpcode()) {
321 assert(!
I->isTerminator() &&
"Spurious terminator");
322 switch (
I->getOpcode()) {
324 case AArch64::SUBSWri:
325 case AArch64::SUBSXri:
327 case AArch64::ADDSWri:
328 case AArch64::ADDSXri:
331 if (
I->getOperand(3).getImm() || !isUInt<5>(
I->getOperand(2).getImm())) {
337 case AArch64::SUBSWrr:
338 case AArch64::SUBSXrr:
339 case AArch64::ADDSWrr:
340 case AArch64::ADDSXrr:
341 if (isDeadDef(
I->getOperand(0).getReg()))
343 LLVM_DEBUG(
dbgs() <<
"Can't convert compare with live destination: "
347 case AArch64::FCMPSrr:
348 case AArch64::FCMPDrr:
349 case AArch64::FCMPESrr:
350 case AArch64::FCMPEDrr:
396 if (
I.isDebugInstr())
420 bool DontMoveAcrossStore =
true;
421 if (!
I.isSafeToMove(
nullptr, DontMoveAcrossStore)) {
427 if (&
I != CmpMI &&
I.modifiesRegister(AArch64::NZCV,
TRI)) {
440 Tail = CmpBB =
nullptr;
473 if (!trivialTailPHIs()) {
479 if (!
Tail->livein_empty()) {
494 LLVM_DEBUG(
dbgs() <<
"Can't handle live-in physregs in CmpBB.\n");
510 if (!TBB || HeadCond.empty()) {
512 dbgs() <<
"analyzeBranch didn't find conditional branch in Head.\n");
537 if (!TBB || CmpBBCond.empty()) {
539 dbgs() <<
"analyzeBranch didn't find conditional branch in CmpBB.\n");
544 if (!
parseCond(CmpBBCond, CmpBBTailCC)) {
555 <<
", CmpBB->Tail on "
558 CmpMI = findConvertibleCompare(CmpBB);
562 if (!canSpeculateInstrs(CmpBB, CmpMI)) {
598 Head2Tail + Head2CmpBB * CmpBB2Tail);
617 if (HeadCond[0].getImm() == -1) {
620 switch (HeadCond[1].getImm()) {
623 Opc = AArch64::SUBSWri;
627 Opc = AArch64::SUBSXri;
644 TII->getRegClass(MCID, 1,
TRI, *MF));
652 unsigned FirstOp = 1;
653 bool isZBranch =
false;
657 case AArch64::SUBSWri: Opc = AArch64::CCMPWi;
break;
658 case AArch64::SUBSWrr: Opc = AArch64::CCMPWr;
break;
659 case AArch64::SUBSXri: Opc = AArch64::CCMPXi;
break;
660 case AArch64::SUBSXrr: Opc = AArch64::CCMPXr;
break;
661 case AArch64::ADDSWri: Opc = AArch64::CCMNWi;
break;
662 case AArch64::ADDSWrr: Opc = AArch64::CCMNWr;
break;
663 case AArch64::ADDSXri: Opc = AArch64::CCMNXi;
break;
664 case AArch64::ADDSXrr: Opc = AArch64::CCMNXr;
break;
665 case AArch64::FCMPSrr: Opc = AArch64::FCCMPSrr; FirstOp = 0;
break;
666 case AArch64::FCMPDrr: Opc = AArch64::FCCMPDrr; FirstOp = 0;
break;
667 case AArch64::FCMPESrr: Opc = AArch64::FCCMPESrr; FirstOp = 0;
break;
668 case AArch64::FCMPEDrr: Opc = AArch64::FCCMPEDrr; FirstOp = 0;
break;
671 Opc = AArch64::CCMPWi;
677 Opc = AArch64::CCMPXi;
691 TII->getRegClass(MCID, 0,
TRI, *MF));
694 TII->getRegClass(MCID, 1,
TRI, *MF));
706 bool isNZ = CmpMI->
getOpcode() == AArch64::CBNZW ||
715 RemovedBlocks.push_back(CmpBB);
721 int SSACCmpConv::expectedCodeSizeDelta()
const {
726 if (HeadCond[0].getImm() == -1) {
727 switch (HeadCond[1].getImm()) {
783 return "AArch64 Conditional Compares";
790 void invalidateTraces();
798 "AArch64 CCMP Pass",
false,
false)
806 return new AArch64ConditionalCompares();
809 void AArch64ConditionalCompares::getAnalysisUsage(
AnalysisUsage &AU)
const {
821 void AArch64ConditionalCompares::updateDomTree(
828 assert(Node != HeadNode &&
"Cannot erase the head node");
829 assert(Node->getIDom() == HeadNode &&
"CmpBB should be dominated by Head");
830 while (Node->getNumChildren())
831 DomTree->changeImmediateDominator(Node->back(), HeadNode);
832 DomTree->eraseNode(RemovedMBB);
842 Loops->removeBlock(RemovedMBB);
846 void AArch64ConditionalCompares::invalidateTraces() {
847 Traces->invalidate(CmpConv.Head);
848 Traces->invalidate(CmpConv.CmpBB);
866 int CodeSizeDelta = CmpConv.expectedCodeSizeDelta();
867 LLVM_DEBUG(
dbgs() <<
"Code size delta: " << CodeSizeDelta <<
'\n');
870 if (CodeSizeDelta < 0)
872 if (CodeSizeDelta > 0) {
873 LLVM_DEBUG(
dbgs() <<
"Code size is increasing, give up on this one.\n");
885 unsigned DelayLimit = SchedModel.MispredictPenalty * 3 / 4;
889 Trace.getInstrCycles(*CmpConv.Head->getFirstTerminator()).Depth;
890 unsigned CmpBBDepth =
891 Trace.getInstrCycles(*CmpConv.CmpBB->getFirstTerminator()).Depth;
893 <<
"\nCmpBB depth: " << CmpBBDepth <<
'\n');
894 if (CmpBBDepth > HeadDepth + DelayLimit) {
895 LLVM_DEBUG(
dbgs() <<
"Branch delay would be larger than " << DelayLimit
902 unsigned ResDepth =
Trace.getResourceDepth(
true);
908 if (ResDepth > HeadDepth) {
916 bool Changed =
false;
920 CmpConv.convert(RemovedBlocks);
922 updateDomTree(RemovedBlocks);
923 updateLoops(RemovedBlocks);
928 bool AArch64ConditionalCompares::runOnMachineFunction(
MachineFunction &MF) {
929 LLVM_DEBUG(
dbgs() <<
"********** AArch64 Conditional Compares **********\n"
930 <<
"********** Function: " << MF.
getName() <<
'\n');
938 DomTree = &getAnalysis<MachineDominatorTree>();
939 Loops = getAnalysisIfAvailable<MachineLoopInfo>();
940 MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
941 Traces = &getAnalysis<MachineTraceMetrics>();
945 bool Changed =
false;
946 CmpConv.runOnMachineFunction(MF, MBPI);
954 if (tryConvert(
I->getBlock()))
unsigned succ_size() const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
This is an optimization pass for GlobalISel generic memory operations.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
@ Define
Register definition.
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
const MachineInstrBuilder & add(const MachineOperand &MO) const
virtual const TargetInstrInfo * getInstrInfo() const
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
Reg
All possible values of the reg field in the ModR/M byte.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void setSuccProbability(succ_iterator I, BranchProbability Prob)
Set successor probability of a given iterator.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
@ TS_MinInstrCount
Select the trace through a block that has the fewest instructions.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static unsigned InstrCount
@ Tail
Tail - This calling convention attemps to make calls as fast as possible while guaranteeing that tail...
INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp", "AArch64 CCMP Pass", false, false) INITIALIZE_PASS_END(AArch64ConditionalCompares
unsigned const TargetRegisterInfo * TRI
PhysRegInfo AnalyzePhysRegInBundle(const MachineInstr &MI, Register Reg, const TargetRegisterInfo *TRI)
AnalyzePhysRegInBundle - Analyze how the current instruction or bundle uses a physical register.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool Clobbered
There is a regmask operand indicating Reg is clobbered.
unsigned pred_size() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
TargetInstrInfo - Interface to description of machine instruction set.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const MachineOperand & getOperand(unsigned i) const
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
Represent the analysis usage information of a pass.
bool Read
Reg or one of its aliases is read.
const HexagonInstrInfo * TII
Describe properties that are true of each instruction in the target description file.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
STATISTIC(NumFunctions, "Total number of functions")
static const char * getCondCodeName(CondCode Code)
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
bool hasSuccessorProbabilities() const
Return true if any of the successors have probabilities attached to them.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
void initializeAArch64ConditionalComparesPass(PassRegistry &)
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Representation of each machine instruction.
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
initializer< Ty > init(const Ty &Val)
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
succ_iterator succ_begin()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Information about how a physical register Reg is used by a set of operands.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Register getReg() const
getReg - Returns the register number.
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
SmallVector< MachineOperand, 4 > Cond
StringRef - Represent a constant reference to a string, i.e.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool Defined
Reg or one of its aliases is defined.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static bool parseCond(ArrayRef< MachineOperand > Cond, AArch64CC::CondCode &CC)
Base class for the actual dominator tree node.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
unsigned const MachineRegisterInfo * MRI
Wrapper class representing virtual and physical registers.
iterator_range< df_iterator< T > > depth_first(const T &G)
A trace ensemble is a collection of traces selected using the same strategy, for example 'minimum res...
Function & getFunction()
Return the LLVM function that this machine code represents.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
Machine model for scheduling, bundling, and heuristics.
static CondCode getInvertedCondCode(CondCode Code)
bool livein_empty() const
static cl::opt< bool > Stress("aarch64-stress-ccmp", cl::Hidden, cl::desc("Turn all knobs to 11"))
Pass interface - Implemented by all 'passes'.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
FunctionPass class - This class is used to implement most global optimizations.
AnalysisUsage & addRequired()
void updateTerminator(MachineBasicBlock *PreviousLayoutSuccessor)
Update the terminator instructions in block to account for changes to block layout which may have bee...
FunctionPass * createAArch64ConditionalCompares()
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
static cl::opt< unsigned > BlockInstrLimit("aarch64-ccmp-limit", cl::init(30), cl::Hidden, cl::desc("Maximum number of instructions per speculated block."))