LLVM 23.0.0git
AArch64CodeLayoutOpt.cpp
Go to the documentation of this file.
1//===-- AArch64CodeLayoutOpt.cpp - Code Layout Optimizations --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass runs after instruction scheduling and employs code layout
10// optimizations for certain patterns.
11//
12// Option -aarch64-code-layout-opt-enable selects instruction pairs to optimize:
13// cmp-csel: Enable CMP/CMN-CSEL code layout optimization
14// fcmp-fcsel: Enable FCMP-FCSEL code layout optimization
15//
16// The initial implementation induces function alignment when a supported
17// pattern is detected, and possibly instruction-alignment when a pair would
18// straddle cache-lines.
19//===----------------------------------------------------------------------===//
20
21#include "AArch64.h"
22#include "AArch64InstrInfo.h"
23#include "AArch64Subtarget.h"
25#include "llvm/ADT/Statistic.h"
29#include "llvm/Support/Debug.h"
32
33using namespace llvm;
34
35#define DEBUG_TYPE "aarch64-code-layout-opt"
36#define DBG(...) LLVM_DEBUG(dbgs() << DEBUG_TYPE ": " << __VA_ARGS__)
37#define AARCH64_CODE_LAYOUT_OPT_NAME "AArch64 Code Layout Optimization"
38
40 CmpCsel, // Align CMP/CMN-CSEL pairs
41 FcmpFcsel, // Align FCMP-FCSEL pairs
42};
43
45 "aarch64-code-layout-opt-enable", cl::Hidden, cl::CommaSeparated,
46 cl::desc("Enable code alignment optimization for instruction pairs"),
48 clEnumValN(CmpCsel, "cmp-csel", "CMP/CMN-CSEL pair alignment (32-bit)"),
49 clEnumValN(FcmpFcsel, "fcmp-fcsel", "FCMP-FCSEL pair alignment")));
50
52 "aarch64-code-layout-opt-align-functions", cl::Hidden,
53 cl::desc("Function alignment in bytes for code layout optimization "
54 "(must be a power of 2)"),
55 cl::init(64), cl::callback([](const unsigned &Val) {
56 if (!isPowerOf2_32(Val))
58 "aarch64-code-layout-opt-align must be a power of 2");
59 }));
60
61STATISTIC(NumFunctionsAligned,
62 "Number of functions with aligned (to 64-bytes by default)");
63STATISTIC(NumCmpCselPairsDetected,
64 "Number of CMP/CMN-CSEL pairs detected for alignment");
65STATISTIC(NumFcmpFcselPairsDetected,
66 "Number of FCMP-FCSEL pairs detected for alignment");
67
68namespace {
69
70class AArch64CodeLayoutOpt : public MachineFunctionPass {
71public:
72 static char ID;
73 AArch64CodeLayoutOpt() : MachineFunctionPass(ID) {}
74 void getAnalysisUsage(AnalysisUsage &AU) const override;
75 bool runOnMachineFunction(MachineFunction &MF) override;
76 StringRef getPassName() const override {
78 }
79
80private:
81 const AArch64InstrInfo *TII = nullptr;
82
83 /// Align each fusible CMP/CMN-CSEL or FCMP-FCSEL pair in \p MBB by emitting
84 /// .p2align before the lead instruction (splitting the block if needed).
85 /// \returns true iff at least one pair was found and aligned.
86 bool alignLayoutSensitivePatterns(MachineBasicBlock *MBB);
87
88 /// Emit .p2align before MI. Splits the block if MI is not at its start.
89 void emitP2Align(MachineInstr &MI, Align DesiredAlign,
90 unsigned MaxSkipBytes = 4);
91
92 bool optimizeForCodeLayout(MachineFunction &MF);
93};
94
95} // end anonymous namespace
96
97char AArch64CodeLayoutOpt::ID = 0;
98
99INITIALIZE_PASS(AArch64CodeLayoutOpt, "aarch64-code-layout-opt",
100 AARCH64_CODE_LAYOUT_OPT_NAME, false, false)
101
102void AArch64CodeLayoutOpt::getAnalysisUsage(AnalysisUsage &AU) const {
103 AU.setPreservesAll();
105}
106
108 return new AArch64CodeLayoutOpt();
109}
110
111/// \returns true iff Opc is a floating-point comparison (FCMP/FCMPE).
112static bool isFloatingPointCompare(unsigned Opc) {
113 switch (Opc) {
114 case AArch64::FCMPSrr:
115 case AArch64::FCMPDrr:
116 case AArch64::FCMPESrr:
117 case AArch64::FCMPEDrr:
118 case AArch64::FCMPHrr:
119 case AArch64::FCMPEHrr:
120 return true;
121 default:
122 return false;
123 }
124}
125
126/// \returns true iff Opc is a floating-point conditional select (FCSEL).
128 switch (Opc) {
129 case AArch64::FCSELSrrr:
130 case AArch64::FCSELDrrr:
131 case AArch64::FCSELHrrr:
132 return true;
133 default:
134 return false;
135 }
136}
137
138/// \returns true if MI is a qualifying 32-bit CMP or CMN instruction.
139/// CMP is encoded as SUBS with WZR destination, CMN as ADDS with WZR.
140/// Only simple variants (no shifted/extended reg) qualify, and immediate
141/// variants require no LSL shift and small immediates (<=15).
143 switch (MI.getOpcode()) {
144 case AArch64::SUBSWrr:
145 case AArch64::ADDSWrr:
146 return MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr);
147 case AArch64::SUBSWri:
148 case AArch64::ADDSWri:
149 return MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) &&
150 MI.getOperand(3).getImm() == 0 && MI.getOperand(2).getImm() <= 15;
151 case AArch64::SUBSWrs:
152 case AArch64::ADDSWrs:
153 return MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) &&
154 !AArch64InstrInfo::hasShiftedReg(MI);
155 case AArch64::SUBSWrx:
156 return MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) &&
157 !AArch64InstrInfo::hasExtendedReg(MI);
158 default:
159 return false;
160 }
161}
162
163bool AArch64CodeLayoutOpt::runOnMachineFunction(MachineFunction &MF) {
164 const Function &F = MF.getFunction();
165 // hasOptSize() returns true for both -Os and -Oz.
166 if (F.hasOptSize())
167 return false;
168
169 const auto *Subtarget = &MF.getSubtarget<AArch64Subtarget>();
170 TII = Subtarget->getInstrInfo();
171
172 // Default: enable when the subtarget opts in via FeatureAlignCmpCSelPairs.
173 if (!EnableCodeAlignment.getBits() && Subtarget->hasAlignCmpCSelPairs()) {
174 if (Subtarget->hasFuseCmpCSel())
176 if (Subtarget->hasFuseFCmpFCSel())
178 }
179
180 if (!(EnableCodeAlignment.isSet(CmpCsel) && Subtarget->hasFuseCmpCSel()) &&
181 !(EnableCodeAlignment.isSet(FcmpFcsel) && Subtarget->hasFuseFCmpFCSel()))
182 return false;
183
184 return optimizeForCodeLayout(MF);
185}
186
187void AArch64CodeLayoutOpt::emitP2Align(MachineInstr &MI, Align DesiredAlign,
188 unsigned MaxSkipBytes) {
189 MachineBasicBlock *MBB = MI.getParent();
190
191 auto FirstReal =
193 if (&*FirstReal != &MI) {
194 auto PrevIt = prev_nodbg(MI.getIterator(), MBB->instr_begin());
195 MBB = MBB->splitAt(*PrevIt, /*UpdateLiveIns=*/true);
196 }
197
198 MBB->setAlignment(DesiredAlign);
199 MBB->setMaxBytesForAlignment(MaxSkipBytes);
200}
201
202// Align each fusible CMP/CMN-CSEL or FCMP-FCSEL pair in MBB by emitting
203// .p2align before the lead instruction (splitting the block if needed).
204// A pair is: a qualifying lead instruction immediately followed by its
205// consumer (CMP/CMN→CSEL or FCMP→FCSEL), with no intervening instructions.
206// Returns true iff at least one pair was found and aligned.
207bool AArch64CodeLayoutOpt::alignLayoutSensitivePatterns(
208 MachineBasicBlock *MBB) {
209 auto End = MBB->instr_end();
211
212 for (auto &MI : instructionsWithoutDebug(MBB->begin(), MBB->end())) {
213 auto NextIt =
214 skipDebugInstructionsForward(std::next(MI.getIterator()), End);
215 if (NextIt == End)
216 break;
217
218 // --- CMP/CMN-CSEL detection ---
220 NextIt->getOpcode() == AArch64::CSELWr) {
221 Pairs.push_back({&MI, true});
222 continue;
223 }
224
225 // --- FCMP-FCSEL detection ---
226 if (EnableCodeAlignment.isSet(FcmpFcsel) &&
227 isFloatingPointCompare(MI.getOpcode()) &&
228 isFloatingPointConditionalSelect(NextIt->getOpcode())) {
229 Pairs.push_back({&MI, false});
230 continue;
231 }
232 }
233
234 for (auto &[MI, IsCmpCsel] : Pairs) {
235 emitP2Align(*MI, Align(64));
236 DBG(".p2align 6, , 4 before " << *MI);
237 ++(IsCmpCsel ? NumCmpCselPairsDetected : NumFcmpFcselPairsDetected);
238 }
239
240 return !Pairs.empty();
241}
242
243bool AArch64CodeLayoutOpt::optimizeForCodeLayout(MachineFunction &MF) {
244 DBG("optimizeForCodeLayout: " << MF.getName() << "\n");
245
246 bool Changed = false;
247 for (auto &MBB : MF)
248 Changed |= alignLayoutSensitivePatterns(&MBB);
249
250 if (!Changed)
251 return false;
252
253 if (MF.getAlignment() < Align(FunctionAlignBytes)) {
254 MF.setAlignment(Align(FunctionAlignBytes));
255 ++NumFunctionsAligned;
256 DBG("Set " << FunctionAlignBytes << "-byte alignment for function "
257 << MF.getName() << "\n");
258 } else {
259 DBG("Function " << MF.getName() << " already has sufficient alignment\n");
260 }
261 return true;
262}
static bool isFloatingPointConditionalSelect(unsigned Opc)
#define AARCH64_CODE_LAYOUT_OPT_NAME
static cl::bits< CodeLayoutOpt > EnableCodeAlignment("aarch64-code-layout-opt-enable", cl::Hidden, cl::CommaSeparated, cl::desc("Enable code alignment optimization for instruction pairs"), cl::values(clEnumValN(CmpCsel, "cmp-csel", "CMP/CMN-CSEL pair alignment (32-bit)"), clEnumValN(FcmpFcsel, "fcmp-fcsel", "FCMP-FCSEL pair alignment")))
static cl::opt< unsigned > FunctionAlignBytes("aarch64-code-layout-opt-align-functions", cl::Hidden, cl::desc("Function alignment in bytes for code layout optimization " "(must be a power of 2)"), cl::init(64), cl::callback([](const unsigned &Val) { if(!isPowerOf2_32(Val)) report_fatal_error("aarch64-code-layout-opt-align must be a power of 2");}))
static bool isFloatingPointCompare(unsigned Opc)
#define DBG(...)
static bool isQualifyingIntCompare(const MachineInstr &MI)
MachineBasicBlock & MBB
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:54
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
void setMaxBytesForAlignment(unsigned MaxBytes)
Set the maximum amount of padding allowed for aligning the basic block.
void setAlignment(Align A)
Set alignment of the basic block.
LLVM_ABI MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
void push_back(const T &Elt)
Changed
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
cb< typename detail::callback_traits< F >::result_type, typename detail::callback_traits< F >::arg_type > callback(F CB)
This is an optimization pass for GlobalISel generic memory operations.
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
FunctionPass * createAArch64CodeLayoutOptPass()
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.