LLVM 20.0.0git
AArch64ConditionalCompares.cpp
Go to the documentation of this file.
1//===-- AArch64ConditionalCompares.cpp --- CCMP formation for AArch64 -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the AArch64ConditionalCompares pass which reduces
10// branching and code size by using the conditional compare instructions CCMP,
11// CCMN, and FCMP.
12//
13// The CFG transformations for forming conditional compares are very similar to
14// if-conversion, and this pass should run immediately before the early
15// if-conversion pass.
16//
17//===----------------------------------------------------------------------===//
18
19#include "AArch64.h"
21#include "llvm/ADT/Statistic.h"
30#include "llvm/CodeGen/Passes.h"
36#include "llvm/Support/Debug.h"
38
39using namespace llvm;
40
41#define DEBUG_TYPE "aarch64-ccmp"
42
43// Absolute maximum number of instructions allowed per speculated block.
44// This bypasses all other heuristics, so it should be set fairly high.
46 "aarch64-ccmp-limit", cl::init(30), cl::Hidden,
47 cl::desc("Maximum number of instructions per speculated block."));
48
49// Stress testing mode - disable heuristics.
50static cl::opt<bool> Stress("aarch64-stress-ccmp", cl::Hidden,
51 cl::desc("Turn all knobs to 11"));
52
53STATISTIC(NumConsidered, "Number of ccmps considered");
54STATISTIC(NumPhiRejs, "Number of ccmps rejected (PHI)");
55STATISTIC(NumPhysRejs, "Number of ccmps rejected (Physregs)");
56STATISTIC(NumPhi2Rejs, "Number of ccmps rejected (PHI2)");
57STATISTIC(NumHeadBranchRejs, "Number of ccmps rejected (Head branch)");
58STATISTIC(NumCmpBranchRejs, "Number of ccmps rejected (CmpBB branch)");
59STATISTIC(NumCmpTermRejs, "Number of ccmps rejected (CmpBB is cbz...)");
60STATISTIC(NumImmRangeRejs, "Number of ccmps rejected (Imm out of range)");
61STATISTIC(NumLiveDstRejs, "Number of ccmps rejected (Cmp dest live)");
62STATISTIC(NumMultNZCVUses, "Number of ccmps rejected (NZCV used)");
63STATISTIC(NumUnknNZCVDefs, "Number of ccmps rejected (NZCV def unknown)");
64
65STATISTIC(NumSpeculateRejs, "Number of ccmps rejected (Can't speculate)");
66
67STATISTIC(NumConverted, "Number of ccmp instructions created");
68STATISTIC(NumCompBranches, "Number of cbz/cbnz branches converted");
69
70//===----------------------------------------------------------------------===//
71// SSACCmpConv
72//===----------------------------------------------------------------------===//
73//
74// The SSACCmpConv class performs ccmp-conversion on SSA form machine code
75// after determining if it is possible. The class contains no heuristics;
76// external code should be used to determine when ccmp-conversion is a good
77// idea.
78//
79// CCmp-formation works on a CFG representing chained conditions, typically
80// from C's short-circuit || and && operators:
81//
82// From: Head To: Head
83// / | CmpBB
84// / | / |
85// | CmpBB / |
86// | / | Tail |
87// | / | | |
88// Tail | | |
89// | | | |
90// ... ... ... ...
91//
92// The Head block is terminated by a br.cond instruction, and the CmpBB block
93// contains compare + br.cond. Tail must be a successor of both.
94//
95// The cmp-conversion turns the compare instruction in CmpBB into a conditional
96// compare, and merges CmpBB into Head, speculatively executing its
97// instructions. The AArch64 conditional compare instructions have an immediate
98// operand that specifies the NZCV flag values when the condition is false and
99// the compare isn't executed. This makes it possible to chain compares with
100// different condition codes.
101//
102// Example:
103//
104// if (a == 5 || b == 17)
105// foo();
106//
107// Head:
108// cmp w0, #5
109// b.eq Tail
110// CmpBB:
111// cmp w1, #17
112// b.eq Tail
113// ...
114// Tail:
115// bl _foo
116//
117// Becomes:
118//
119// Head:
120// cmp w0, #5
121// ccmp w1, #17, 4, ne ; 4 = nZcv
122// b.eq Tail
123// ...
124// Tail:
125// bl _foo
126//
127// The ccmp condition code is the one that would cause the Head terminator to
128// branch to CmpBB.
129//
130// FIXME: It should also be possible to speculate a block on the critical edge
131// between Head and Tail, just like if-converting a diamond.
132//
133// FIXME: Handle PHIs in Tail by turning them into selects (if-conversion).
134
135namespace {
136class SSACCmpConv {
137 MachineFunction *MF;
138 const TargetInstrInfo *TII;
139 const TargetRegisterInfo *TRI;
142
143public:
144 /// The first block containing a conditional branch, dominating everything
145 /// else.
146 MachineBasicBlock *Head;
147
148 /// The block containing cmp+br.cond with a successor shared with Head.
149 MachineBasicBlock *CmpBB;
150
151 /// The common successor for Head and CmpBB.
153
154 /// The compare instruction in CmpBB that can be converted to a ccmp.
155 MachineInstr *CmpMI;
156
157private:
158 /// The branch condition in Head as determined by analyzeBranch.
160
161 /// The condition code that makes Head branch to CmpBB.
162 AArch64CC::CondCode HeadCmpBBCC;
163
164 /// The branch condition in CmpBB.
166
167 /// The condition code that makes CmpBB branch to Tail.
168 AArch64CC::CondCode CmpBBTailCC;
169
170 /// Check if the Tail PHIs are trivially convertible.
171 bool trivialTailPHIs();
172
173 /// Remove CmpBB from the Tail PHIs.
174 void updateTailPHIs();
175
176 /// Check if an operand defining DstReg is dead.
177 bool isDeadDef(unsigned DstReg);
178
179 /// Find the compare instruction in MBB that controls the conditional branch.
180 /// Return NULL if a convertible instruction can't be found.
181 MachineInstr *findConvertibleCompare(MachineBasicBlock *MBB);
182
183 /// Return true if all non-terminator instructions in MBB can be safely
184 /// speculated.
185 bool canSpeculateInstrs(MachineBasicBlock *MBB, const MachineInstr *CmpMI);
186
187public:
188 /// runOnMachineFunction - Initialize per-function data structures.
189 void runOnMachineFunction(MachineFunction &MF,
190 const MachineBranchProbabilityInfo *MBPI) {
191 this->MF = &MF;
192 this->MBPI = MBPI;
195 MRI = &MF.getRegInfo();
196 }
197
198 /// If the sub-CFG headed by MBB can be cmp-converted, initialize the
199 /// internal state, and return true.
200 bool canConvert(MachineBasicBlock *MBB);
201
202 /// Cmo-convert the last block passed to canConvertCmp(), assuming
203 /// it is possible. Add any erased blocks to RemovedBlocks.
205
206 /// Return the expected code size delta if the conversion into a
207 /// conditional compare is performed.
208 int expectedCodeSizeDelta() const;
209};
210} // end anonymous namespace
211
212// Check that all PHIs in Tail are selecting the same value from Head and CmpBB.
213// This means that no if-conversion is required when merging CmpBB into Head.
214bool SSACCmpConv::trivialTailPHIs() {
215 for (auto &I : *Tail) {
216 if (!I.isPHI())
217 break;
218 unsigned HeadReg = 0, CmpBBReg = 0;
219 // PHI operands come in (VReg, MBB) pairs.
220 for (unsigned oi = 1, oe = I.getNumOperands(); oi != oe; oi += 2) {
221 MachineBasicBlock *MBB = I.getOperand(oi + 1).getMBB();
222 Register Reg = I.getOperand(oi).getReg();
223 if (MBB == Head) {
224 assert((!HeadReg || HeadReg == Reg) && "Inconsistent PHI operands");
225 HeadReg = Reg;
226 }
227 if (MBB == CmpBB) {
228 assert((!CmpBBReg || CmpBBReg == Reg) && "Inconsistent PHI operands");
229 CmpBBReg = Reg;
230 }
231 }
232 if (HeadReg != CmpBBReg)
233 return false;
234 }
235 return true;
236}
237
238// Assuming that trivialTailPHIs() is true, update the Tail PHIs by simply
239// removing the CmpBB operands. The Head operands will be identical.
240void SSACCmpConv::updateTailPHIs() {
241 for (auto &I : *Tail) {
242 if (!I.isPHI())
243 break;
244 // I is a PHI. It can have multiple entries for CmpBB.
245 for (unsigned oi = I.getNumOperands(); oi > 2; oi -= 2) {
246 // PHI operands are (Reg, MBB) at (oi-2, oi-1).
247 if (I.getOperand(oi - 1).getMBB() == CmpBB) {
248 I.removeOperand(oi - 1);
249 I.removeOperand(oi - 2);
250 }
251 }
252 }
253}
254
255// This pass runs before the AArch64DeadRegisterDefinitions pass, so compares
256// are still writing virtual registers without any uses.
257bool SSACCmpConv::isDeadDef(unsigned DstReg) {
258 // Writes to the zero register are dead.
259 if (DstReg == AArch64::WZR || DstReg == AArch64::XZR)
260 return true;
261 if (!Register::isVirtualRegister(DstReg))
262 return false;
263 // A virtual register def without any uses will be marked dead later, and
264 // eventually replaced by the zero register.
265 return MRI->use_nodbg_empty(DstReg);
266}
267
268// Parse a condition code returned by analyzeBranch, and compute the CondCode
269// corresponding to TBB.
270// Return
272 // A normal br.cond simply has the condition code.
273 if (Cond[0].getImm() != -1) {
274 assert(Cond.size() == 1 && "Unknown Cond array format");
275 CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
276 return true;
277 }
278 // For tbz and cbz instruction, the opcode is next.
279 switch (Cond[1].getImm()) {
280 default:
281 // This includes tbz / tbnz branches which can't be converted to
282 // ccmp + br.cond.
283 return false;
284 case AArch64::CBZW:
285 case AArch64::CBZX:
286 assert(Cond.size() == 3 && "Unknown Cond array format");
288 return true;
289 case AArch64::CBNZW:
290 case AArch64::CBNZX:
291 assert(Cond.size() == 3 && "Unknown Cond array format");
293 return true;
294 }
295}
296
297MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
299 if (I == MBB->end())
300 return nullptr;
301 // The terminator must be controlled by the flags.
302 if (!I->readsRegister(AArch64::NZCV, /*TRI=*/nullptr)) {
303 switch (I->getOpcode()) {
304 case AArch64::CBZW:
305 case AArch64::CBZX:
306 case AArch64::CBNZW:
307 case AArch64::CBNZX:
308 // These can be converted into a ccmp against #0.
309 return &*I;
310 }
311 ++NumCmpTermRejs;
312 LLVM_DEBUG(dbgs() << "Flags not used by terminator: " << *I);
313 return nullptr;
314 }
315
316 // Now find the instruction controlling the terminator.
317 for (MachineBasicBlock::iterator B = MBB->begin(); I != B;) {
318 I = prev_nodbg(I, MBB->begin());
319 assert(!I->isTerminator() && "Spurious terminator");
320 switch (I->getOpcode()) {
321 // cmp is an alias for subs with a dead destination register.
322 case AArch64::SUBSWri:
323 case AArch64::SUBSXri:
324 // cmn is an alias for adds with a dead destination register.
325 case AArch64::ADDSWri:
326 case AArch64::ADDSXri:
327 // Check that the immediate operand is within range, ccmp wants a uimm5.
328 // Rd = SUBSri Rn, imm, shift
329 if (I->getOperand(3).getImm() || !isUInt<5>(I->getOperand(2).getImm())) {
330 LLVM_DEBUG(dbgs() << "Immediate out of range for ccmp: " << *I);
331 ++NumImmRangeRejs;
332 return nullptr;
333 }
334 [[fallthrough]];
335 case AArch64::SUBSWrr:
336 case AArch64::SUBSXrr:
337 case AArch64::ADDSWrr:
338 case AArch64::ADDSXrr:
339 if (isDeadDef(I->getOperand(0).getReg()))
340 return &*I;
341 LLVM_DEBUG(dbgs() << "Can't convert compare with live destination: "
342 << *I);
343 ++NumLiveDstRejs;
344 return nullptr;
345 case AArch64::FCMPSrr:
346 case AArch64::FCMPDrr:
347 case AArch64::FCMPESrr:
348 case AArch64::FCMPEDrr:
349 return &*I;
350 }
351
352 // Check for flag reads and clobbers.
353 PhysRegInfo PRI = AnalyzePhysRegInBundle(*I, AArch64::NZCV, TRI);
354
355 if (PRI.Read) {
356 // The ccmp doesn't produce exactly the same flags as the original
357 // compare, so reject the transform if there are uses of the flags
358 // besides the terminators.
359 LLVM_DEBUG(dbgs() << "Can't create ccmp with multiple uses: " << *I);
360 ++NumMultNZCVUses;
361 return nullptr;
362 }
363
364 if (PRI.Defined || PRI.Clobbered) {
365 LLVM_DEBUG(dbgs() << "Not convertible compare: " << *I);
366 ++NumUnknNZCVDefs;
367 return nullptr;
368 }
369 }
370 LLVM_DEBUG(dbgs() << "Flags not defined in " << printMBBReference(*MBB)
371 << '\n');
372 return nullptr;
373}
374
375/// Determine if all the instructions in MBB can safely
376/// be speculated. The terminators are not considered.
377///
378/// Only CmpMI is allowed to clobber the flags.
379///
380bool SSACCmpConv::canSpeculateInstrs(MachineBasicBlock *MBB,
381 const MachineInstr *CmpMI) {
382 // Reject any live-in physregs. It's probably NZCV/EFLAGS, and very hard to
383 // get right.
384 if (!MBB->livein_empty()) {
385 LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n");
386 return false;
387 }
388
389 unsigned InstrCount = 0;
390
391 // Check all instructions, except the terminators. It is assumed that
392 // terminators never have side effects or define any used register values.
393 for (auto &I : make_range(MBB->begin(), MBB->getFirstTerminator())) {
394 if (I.isDebugInstr())
395 continue;
396
397 if (++InstrCount > BlockInstrLimit && !Stress) {
398 LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than "
399 << BlockInstrLimit << " instructions.\n");
400 return false;
401 }
402
403 // There shouldn't normally be any phis in a single-predecessor block.
404 if (I.isPHI()) {
405 LLVM_DEBUG(dbgs() << "Can't hoist: " << I);
406 return false;
407 }
408
409 // Don't speculate loads. Note that it may be possible and desirable to
410 // speculate GOT or constant pool loads that are guaranteed not to trap,
411 // but we don't support that for now.
412 if (I.mayLoad()) {
413 LLVM_DEBUG(dbgs() << "Won't speculate load: " << I);
414 return false;
415 }
416
417 // We never speculate stores, so an AA pointer isn't necessary.
418 bool DontMoveAcrossStore = true;
419 if (!I.isSafeToMove(DontMoveAcrossStore)) {
420 LLVM_DEBUG(dbgs() << "Can't speculate: " << I);
421 return false;
422 }
423
424 // Only CmpMI is allowed to clobber the flags.
425 if (&I != CmpMI && I.modifiesRegister(AArch64::NZCV, TRI)) {
426 LLVM_DEBUG(dbgs() << "Clobbers flags: " << I);
427 return false;
428 }
429 }
430 return true;
431}
432
433/// Analyze the sub-cfg rooted in MBB, and return true if it is a potential
434/// candidate for cmp-conversion. Fill out the internal state.
435///
436bool SSACCmpConv::canConvert(MachineBasicBlock *MBB) {
437 Head = MBB;
438 Tail = CmpBB = nullptr;
439
440 if (Head->succ_size() != 2)
441 return false;
442 MachineBasicBlock *Succ0 = Head->succ_begin()[0];
443 MachineBasicBlock *Succ1 = Head->succ_begin()[1];
444
445 // CmpBB can only have a single predecessor. Tail is allowed many.
446 if (Succ0->pred_size() != 1)
447 std::swap(Succ0, Succ1);
448
449 // Succ0 is our candidate for CmpBB.
450 if (Succ0->pred_size() != 1 || Succ0->succ_size() != 2)
451 return false;
452
453 CmpBB = Succ0;
454 Tail = Succ1;
455
456 if (!CmpBB->isSuccessor(Tail))
457 return false;
458
459 // The CFG topology checks out.
460 LLVM_DEBUG(dbgs() << "\nTriangle: " << printMBBReference(*Head) << " -> "
461 << printMBBReference(*CmpBB) << " -> "
462 << printMBBReference(*Tail) << '\n');
463 ++NumConsidered;
464
465 // Tail is allowed to have many predecessors, but we can't handle PHIs yet.
466 //
467 // FIXME: Real PHIs could be if-converted as long as the CmpBB values are
468 // defined before The CmpBB cmp clobbers the flags. Alternatively, it should
469 // always be safe to sink the ccmp down to immediately before the CmpBB
470 // terminators.
471 if (!trivialTailPHIs()) {
472 LLVM_DEBUG(dbgs() << "Can't handle phis in Tail.\n");
473 ++NumPhiRejs;
474 return false;
475 }
476
477 if (!Tail->livein_empty()) {
478 LLVM_DEBUG(dbgs() << "Can't handle live-in physregs in Tail.\n");
479 ++NumPhysRejs;
480 return false;
481 }
482
483 // CmpBB should never have PHIs since Head is its only predecessor.
484 // FIXME: Clean them up if it happens.
485 if (!CmpBB->empty() && CmpBB->front().isPHI()) {
486 LLVM_DEBUG(dbgs() << "Can't handle phis in CmpBB.\n");
487 ++NumPhi2Rejs;
488 return false;
489 }
490
491 if (!CmpBB->livein_empty()) {
492 LLVM_DEBUG(dbgs() << "Can't handle live-in physregs in CmpBB.\n");
493 ++NumPhysRejs;
494 return false;
495 }
496
497 // The branch we're looking to eliminate must be analyzable.
498 HeadCond.clear();
499 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
500 if (TII->analyzeBranch(*Head, TBB, FBB, HeadCond)) {
501 LLVM_DEBUG(dbgs() << "Head branch not analyzable.\n");
502 ++NumHeadBranchRejs;
503 return false;
504 }
505
506 // This is weird, probably some sort of degenerate CFG, or an edge to a
507 // landing pad.
508 if (!TBB || HeadCond.empty()) {
510 dbgs() << "analyzeBranch didn't find conditional branch in Head.\n");
511 ++NumHeadBranchRejs;
512 return false;
513 }
514
515 if (!parseCond(HeadCond, HeadCmpBBCC)) {
516 LLVM_DEBUG(dbgs() << "Unsupported branch type on Head\n");
517 ++NumHeadBranchRejs;
518 return false;
519 }
520
521 // Make sure the branch direction is right.
522 if (TBB != CmpBB) {
523 assert(TBB == Tail && "Unexpected TBB");
524 HeadCmpBBCC = AArch64CC::getInvertedCondCode(HeadCmpBBCC);
525 }
526
527 CmpBBCond.clear();
528 TBB = FBB = nullptr;
529 if (TII->analyzeBranch(*CmpBB, TBB, FBB, CmpBBCond)) {
530 LLVM_DEBUG(dbgs() << "CmpBB branch not analyzable.\n");
531 ++NumCmpBranchRejs;
532 return false;
533 }
534
535 if (!TBB || CmpBBCond.empty()) {
537 dbgs() << "analyzeBranch didn't find conditional branch in CmpBB.\n");
538 ++NumCmpBranchRejs;
539 return false;
540 }
541
542 if (!parseCond(CmpBBCond, CmpBBTailCC)) {
543 LLVM_DEBUG(dbgs() << "Unsupported branch type on CmpBB\n");
544 ++NumCmpBranchRejs;
545 return false;
546 }
547
548 if (TBB != Tail)
549 CmpBBTailCC = AArch64CC::getInvertedCondCode(CmpBBTailCC);
550
551 LLVM_DEBUG(dbgs() << "Head->CmpBB on "
552 << AArch64CC::getCondCodeName(HeadCmpBBCC)
553 << ", CmpBB->Tail on "
554 << AArch64CC::getCondCodeName(CmpBBTailCC) << '\n');
555
556 CmpMI = findConvertibleCompare(CmpBB);
557 if (!CmpMI)
558 return false;
559
560 if (!canSpeculateInstrs(CmpBB, CmpMI)) {
561 ++NumSpeculateRejs;
562 return false;
563 }
564 return true;
565}
566
567void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
568 LLVM_DEBUG(dbgs() << "Merging " << printMBBReference(*CmpBB) << " into "
569 << printMBBReference(*Head) << ":\n"
570 << *CmpBB);
571
572 // All CmpBB instructions are moved into Head, and CmpBB is deleted.
573 // Update the CFG first.
574 updateTailPHIs();
575
576 // Save successor probabilties before removing CmpBB and Tail from their
577 // parents.
578 BranchProbability Head2CmpBB = MBPI->getEdgeProbability(Head, CmpBB);
579 BranchProbability CmpBB2Tail = MBPI->getEdgeProbability(CmpBB, Tail);
580
581 Head->removeSuccessor(CmpBB);
582 CmpBB->removeSuccessor(Tail);
583
584 // If Head and CmpBB had successor probabilties, udpate the probabilities to
585 // reflect the ccmp-conversion.
587
588 // Head is allowed two successors. We've removed CmpBB, so the remaining
589 // successor is Tail. We need to increase the successor probability for
590 // Tail to account for the CmpBB path we removed.
591 //
592 // Pr(Tail|Head) += Pr(CmpBB|Head) * Pr(Tail|CmpBB).
593 assert(*Head->succ_begin() == Tail && "Head successor is not Tail");
594 BranchProbability Head2Tail = MBPI->getEdgeProbability(Head, Tail);
595 Head->setSuccProbability(Head->succ_begin(),
596 Head2Tail + Head2CmpBB * CmpBB2Tail);
597
598 // We will transfer successors of CmpBB to Head in a moment without
599 // normalizing the successor probabilities. Set the successor probabilites
600 // before doing so.
601 //
602 // Pr(I|Head) = Pr(CmpBB|Head) * Pr(I|CmpBB).
603 for (auto I = CmpBB->succ_begin(), E = CmpBB->succ_end(); I != E; ++I) {
604 BranchProbability CmpBB2I = MBPI->getEdgeProbability(CmpBB, *I);
605 CmpBB->setSuccProbability(I, Head2CmpBB * CmpBB2I);
606 }
607 }
608
610 DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc();
611 TII->removeBranch(*Head);
612
613 // If the Head terminator was one of the cbz / tbz branches with built-in
614 // compare, we need to insert an explicit compare instruction in its place.
615 if (HeadCond[0].getImm() == -1) {
616 ++NumCompBranches;
617 unsigned Opc = 0;
618 switch (HeadCond[1].getImm()) {
619 case AArch64::CBZW:
620 case AArch64::CBNZW:
621 Opc = AArch64::SUBSWri;
622 break;
623 case AArch64::CBZX:
624 case AArch64::CBNZX:
625 Opc = AArch64::SUBSXri;
626 break;
627 default:
628 llvm_unreachable("Cannot convert Head branch");
629 }
630 const MCInstrDesc &MCID = TII->get(Opc);
631 // Create a dummy virtual register for the SUBS def.
632 Register DestReg =
633 MRI->createVirtualRegister(TII->getRegClass(MCID, 0, TRI, *MF));
634 // Insert a SUBS Rn, #0 instruction instead of the cbz / cbnz.
635 BuildMI(*Head, Head->end(), TermDL, MCID)
637 .add(HeadCond[2])
638 .addImm(0)
639 .addImm(0);
640 // SUBS uses the GPR*sp register classes.
641 MRI->constrainRegClass(HeadCond[2].getReg(),
642 TII->getRegClass(MCID, 1, TRI, *MF));
643 }
644
645 Head->splice(Head->end(), CmpBB, CmpBB->begin(), CmpBB->end());
646
647 // Now replace CmpMI with a ccmp instruction that also considers the incoming
648 // flags.
649 unsigned Opc = 0;
650 unsigned FirstOp = 1; // First CmpMI operand to copy.
651 bool isZBranch = false; // CmpMI is a cbz/cbnz instruction.
652 switch (CmpMI->getOpcode()) {
653 default:
654 llvm_unreachable("Unknown compare opcode");
655 case AArch64::SUBSWri: Opc = AArch64::CCMPWi; break;
656 case AArch64::SUBSWrr: Opc = AArch64::CCMPWr; break;
657 case AArch64::SUBSXri: Opc = AArch64::CCMPXi; break;
658 case AArch64::SUBSXrr: Opc = AArch64::CCMPXr; break;
659 case AArch64::ADDSWri: Opc = AArch64::CCMNWi; break;
660 case AArch64::ADDSWrr: Opc = AArch64::CCMNWr; break;
661 case AArch64::ADDSXri: Opc = AArch64::CCMNXi; break;
662 case AArch64::ADDSXrr: Opc = AArch64::CCMNXr; break;
663 case AArch64::FCMPSrr: Opc = AArch64::FCCMPSrr; FirstOp = 0; break;
664 case AArch64::FCMPDrr: Opc = AArch64::FCCMPDrr; FirstOp = 0; break;
665 case AArch64::FCMPESrr: Opc = AArch64::FCCMPESrr; FirstOp = 0; break;
666 case AArch64::FCMPEDrr: Opc = AArch64::FCCMPEDrr; FirstOp = 0; break;
667 case AArch64::CBZW:
668 case AArch64::CBNZW:
669 Opc = AArch64::CCMPWi;
670 FirstOp = 0;
671 isZBranch = true;
672 break;
673 case AArch64::CBZX:
674 case AArch64::CBNZX:
675 Opc = AArch64::CCMPXi;
676 FirstOp = 0;
677 isZBranch = true;
678 break;
679 }
680
681 // The ccmp instruction should set the flags according to the comparison when
682 // Head would have branched to CmpBB.
683 // The NZCV immediate operand should provide flags for the case where Head
684 // would have branched to Tail. These flags should cause the new Head
685 // terminator to branch to tail.
686 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CmpBBTailCC);
687 const MCInstrDesc &MCID = TII->get(Opc);
688 MRI->constrainRegClass(CmpMI->getOperand(FirstOp).getReg(),
689 TII->getRegClass(MCID, 0, TRI, *MF));
690 if (CmpMI->getOperand(FirstOp + 1).isReg())
691 MRI->constrainRegClass(CmpMI->getOperand(FirstOp + 1).getReg(),
692 TII->getRegClass(MCID, 1, TRI, *MF));
693 MachineInstrBuilder MIB = BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), MCID)
694 .add(CmpMI->getOperand(FirstOp)); // Register Rn
695 if (isZBranch)
696 MIB.addImm(0); // cbz/cbnz Rn -> ccmp Rn, #0
697 else
698 MIB.add(CmpMI->getOperand(FirstOp + 1)); // Register Rm / Immediate
699 MIB.addImm(NZCV).addImm(HeadCmpBBCC);
700
701 // If CmpMI was a terminator, we need a new conditional branch to replace it.
702 // This now becomes a Head terminator.
703 if (isZBranch) {
704 bool isNZ = CmpMI->getOpcode() == AArch64::CBNZW ||
705 CmpMI->getOpcode() == AArch64::CBNZX;
706 BuildMI(*Head, CmpMI, CmpMI->getDebugLoc(), TII->get(AArch64::Bcc))
708 .add(CmpMI->getOperand(1)); // Branch target.
709 }
710 CmpMI->eraseFromParent();
711 Head->updateTerminator(CmpBB->getNextNode());
712
713 RemovedBlocks.push_back(CmpBB);
714 LLVM_DEBUG(dbgs() << "Result:\n" << *Head);
715 ++NumConverted;
716}
717
718int SSACCmpConv::expectedCodeSizeDelta() const {
719 int delta = 0;
720 // If the Head terminator was one of the cbz / tbz branches with built-in
721 // compare, we need to insert an explicit compare instruction in its place
722 // plus a branch instruction.
723 if (HeadCond[0].getImm() == -1) {
724 switch (HeadCond[1].getImm()) {
725 case AArch64::CBZW:
726 case AArch64::CBNZW:
727 case AArch64::CBZX:
728 case AArch64::CBNZX:
729 // Therefore delta += 1
730 delta = 1;
731 break;
732 default:
733 llvm_unreachable("Cannot convert Head branch");
734 }
735 }
736 // If the Cmp terminator was one of the cbz / tbz branches with
737 // built-in compare, it will be turned into a compare instruction
738 // into Head, but we do not save any instruction.
739 // Otherwise, we save the branch instruction.
740 switch (CmpMI->getOpcode()) {
741 default:
742 --delta;
743 break;
744 case AArch64::CBZW:
745 case AArch64::CBNZW:
746 case AArch64::CBZX:
747 case AArch64::CBNZX:
748 break;
749 }
750 return delta;
751}
752
753//===----------------------------------------------------------------------===//
754// AArch64ConditionalCompares Pass
755//===----------------------------------------------------------------------===//
756
757namespace {
758class AArch64ConditionalCompares : public MachineFunctionPass {
760 const TargetInstrInfo *TII;
761 const TargetRegisterInfo *TRI;
762 MCSchedModel SchedModel;
763 // Does the proceeded function has Oz attribute.
764 bool MinSize;
766 MachineDominatorTree *DomTree;
768 MachineTraceMetrics *Traces;
770 SSACCmpConv CmpConv;
771
772public:
773 static char ID;
774 AArch64ConditionalCompares() : MachineFunctionPass(ID) {
776 }
777 void getAnalysisUsage(AnalysisUsage &AU) const override;
778 bool runOnMachineFunction(MachineFunction &MF) override;
779 StringRef getPassName() const override {
780 return "AArch64 Conditional Compares";
781 }
782
783private:
784 bool tryConvert(MachineBasicBlock *);
785 void updateDomTree(ArrayRef<MachineBasicBlock *> Removed);
786 void updateLoops(ArrayRef<MachineBasicBlock *> Removed);
787 void invalidateTraces();
788 bool shouldConvert();
789};
790} // end anonymous namespace
791
792char AArch64ConditionalCompares::ID = 0;
793
794INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp",
795 "AArch64 CCMP Pass", false, false)
799INITIALIZE_PASS_END(AArch64ConditionalCompares, "aarch64-ccmp",
800 "AArch64 CCMP Pass", false, false)
801
803 return new AArch64ConditionalCompares();
804}
805
806void AArch64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const {
815}
816
817/// Update the dominator tree after if-conversion erased some blocks.
818void AArch64ConditionalCompares::updateDomTree(
820 // convert() removes CmpBB which was previously dominated by Head.
821 // CmpBB children should be transferred to Head.
822 MachineDomTreeNode *HeadNode = DomTree->getNode(CmpConv.Head);
823 for (MachineBasicBlock *RemovedMBB : Removed) {
824 MachineDomTreeNode *Node = DomTree->getNode(RemovedMBB);
825 assert(Node != HeadNode && "Cannot erase the head node");
826 assert(Node->getIDom() == HeadNode && "CmpBB should be dominated by Head");
827 while (Node->getNumChildren())
828 DomTree->changeImmediateDominator(Node->back(), HeadNode);
829 DomTree->eraseNode(RemovedMBB);
830 }
831}
832
833/// Update LoopInfo after if-conversion.
834void
835AArch64ConditionalCompares::updateLoops(ArrayRef<MachineBasicBlock *> Removed) {
836 if (!Loops)
837 return;
838 for (MachineBasicBlock *RemovedMBB : Removed)
839 Loops->removeBlock(RemovedMBB);
840}
841
842/// Invalidate MachineTraceMetrics before if-conversion.
843void AArch64ConditionalCompares::invalidateTraces() {
844 Traces->invalidate(CmpConv.Head);
845 Traces->invalidate(CmpConv.CmpBB);
846}
847
848/// Apply cost model and heuristics to the if-conversion in IfConv.
849/// Return true if the conversion is a good idea.
850///
851bool AArch64ConditionalCompares::shouldConvert() {
852 // Stress testing mode disables all cost considerations.
853 if (Stress)
854 return true;
855 if (!MinInstr)
856 MinInstr = Traces->getEnsemble(MachineTraceStrategy::TS_MinInstrCount);
857
858 // Head dominates CmpBB, so it is always included in its trace.
859 MachineTraceMetrics::Trace Trace = MinInstr->getTrace(CmpConv.CmpBB);
860
861 // If code size is the main concern
862 if (MinSize) {
863 int CodeSizeDelta = CmpConv.expectedCodeSizeDelta();
864 LLVM_DEBUG(dbgs() << "Code size delta: " << CodeSizeDelta << '\n');
865 // If we are minimizing the code size, do the conversion whatever
866 // the cost is.
867 if (CodeSizeDelta < 0)
868 return true;
869 if (CodeSizeDelta > 0) {
870 LLVM_DEBUG(dbgs() << "Code size is increasing, give up on this one.\n");
871 return false;
872 }
873 // CodeSizeDelta == 0, continue with the regular heuristics
874 }
875
876 // Heuristic: The compare conversion delays the execution of the branch
877 // instruction because we must wait for the inputs to the second compare as
878 // well. The branch has no dependent instructions, but delaying it increases
879 // the cost of a misprediction.
880 //
881 // Set a limit on the delay we will accept.
882 unsigned DelayLimit = SchedModel.MispredictPenalty * 3 / 4;
883
884 // Instruction depths can be computed for all trace instructions above CmpBB.
885 unsigned HeadDepth =
886 Trace.getInstrCycles(*CmpConv.Head->getFirstTerminator()).Depth;
887 unsigned CmpBBDepth =
888 Trace.getInstrCycles(*CmpConv.CmpBB->getFirstTerminator()).Depth;
889 LLVM_DEBUG(dbgs() << "Head depth: " << HeadDepth
890 << "\nCmpBB depth: " << CmpBBDepth << '\n');
891 if (CmpBBDepth > HeadDepth + DelayLimit) {
892 LLVM_DEBUG(dbgs() << "Branch delay would be larger than " << DelayLimit
893 << " cycles.\n");
894 return false;
895 }
896
897 // Check the resource depth at the bottom of CmpBB - these instructions will
898 // be speculated.
899 unsigned ResDepth = Trace.getResourceDepth(true);
900 LLVM_DEBUG(dbgs() << "Resources: " << ResDepth << '\n');
901
902 // Heuristic: The speculatively executed instructions must all be able to
903 // merge into the Head block. The Head critical path should dominate the
904 // resource cost of the speculated instructions.
905 if (ResDepth > HeadDepth) {
906 LLVM_DEBUG(dbgs() << "Too many instructions to speculate.\n");
907 return false;
908 }
909 return true;
910}
911
912bool AArch64ConditionalCompares::tryConvert(MachineBasicBlock *MBB) {
913 bool Changed = false;
914 while (CmpConv.canConvert(MBB) && shouldConvert()) {
915 invalidateTraces();
917 CmpConv.convert(RemovedBlocks);
918 Changed = true;
919 updateDomTree(RemovedBlocks);
920 for (MachineBasicBlock *MBB : RemovedBlocks)
922 updateLoops(RemovedBlocks);
923 }
924 return Changed;
925}
926
927bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
928 LLVM_DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
929 << "********** Function: " << MF.getName() << '\n');
930 if (skipFunction(MF.getFunction()))
931 return false;
932
935 SchedModel = MF.getSubtarget().getSchedModel();
936 MRI = &MF.getRegInfo();
937 DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
938 Loops = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
939 MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
940 Traces = &getAnalysis<MachineTraceMetrics>();
941 MinInstr = nullptr;
942 MinSize = MF.getFunction().hasMinSize();
943
944 bool Changed = false;
945 CmpConv.runOnMachineFunction(MF, MBPI);
946
947 // Visit blocks in dominator tree pre-order. The pre-order enables multiple
948 // cmp-conversions from the same head block.
949 // Note that updateDomTree() modifies the children of the DomTree node
950 // currently being visited. The df_iterator supports that; it doesn't look at
951 // child_begin() / child_end() until after a node has been visited.
952 for (auto *I : depth_first(DomTree))
953 if (tryConvert(I->getBlock()))
954 Changed = true;
955
956 return Changed;
957}
unsigned const MachineRegisterInfo * MRI
static cl::opt< bool > Stress("aarch64-stress-ccmp", cl::Hidden, cl::desc("Turn all knobs to 11"))
static cl::opt< unsigned > BlockInstrLimit("aarch64-ccmp-limit", cl::init(30), cl::Hidden, cl::desc("Maximum number of instructions per speculated block."))
static bool parseCond(ArrayRef< MachineOperand > Cond, AArch64CC::CondCode &CC)
static bool shouldConvert(Constant &C, AArch64PromoteConstant::PromotionCacheTy &PromotionCache)
MachineBasicBlock & MBB
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static unsigned InstrCount
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
expand large fp convert
const HexagonInstrInfo * TII
Hexagon Hardware Loops
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A debug info location.
Definition: DebugLoc.h:33
Base class for the actual dominator tree node.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:702
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
Remove the branching code at the end of the specific MBB.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify) const override
Analyze the branching code at the end of MBB, returning true if it cannot be understood (e....
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
unsigned pred_size() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void updateTerminator(MachineBasicBlock *PreviousLayoutSuccessor)
Update the terminator instructions in block to account for changes to block layout which may have bee...
void setSuccProbability(succ_iterator I, BranchProbability Prob)
Set successor probability of a given iterator.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned succ_size() const
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
bool hasSuccessorProbabilities() const
Return true if any of the successors have probabilities attached to them.
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:498
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
bool isPHI() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A trace ensemble is a collection of traces selected using the same strategy, for example 'minimum res...
A trace represents a plausible sequence of executed basic blocks that passes through the current basi...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
bool empty() const
Definition: SmallVector.h:95
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:587
void push_back(const T &Elt)
Definition: SmallVector.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TargetInstrInfo - Interface to description of machine instruction set.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:353
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static const char * getCondCodeName(CondCode Code)
static CondCode getInvertedCondCode(CondCode Code)
static unsigned getNZCVToSatisfyCondCode(CondCode Code)
Given a condition code, return NZCV flags that would satisfy that condition.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ Dead
Unused definition.
@ Define
Register definition.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
PhysRegInfo AnalyzePhysRegInBundle(const MachineInstr &MI, Register Reg, const TargetRegisterInfo *TRI)
AnalyzePhysRegInBundle - Analyze how the current instruction or bundle uses a physical register.
FunctionPass * createAArch64ConditionalCompares()
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
iterator_range< df_iterator< T > > depth_first(const T &G)
void initializeAArch64ConditionalComparesPass(PassRegistry &)
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
Machine model for scheduling, bundling, and heuristics.
Definition: MCSchedule.h:253
Information about how a physical register Reg is used by a set of operands.
bool Read
Reg or one of its aliases is read.
bool Defined
Reg or one of its aliases is defined.
bool Clobbered
There is a regmask operand indicating Reg is clobbered.