35#define DEBUG_TYPE "aarch64-code-layout-opt"
36#define DBG(...) LLVM_DEBUG(dbgs() << DEBUG_TYPE ": " << __VA_ARGS__)
37#define AARCH64_CODE_LAYOUT_OPT_NAME "AArch64 Code Layout Optimization"
46 cl::desc(
"Enable code alignment optimization for instruction pairs"),
52 "aarch64-code-layout-opt-align-functions",
cl::Hidden,
53 cl::desc(
"Function alignment in bytes for code layout optimization "
54 "(must be a power of 2)"),
58 "aarch64-code-layout-opt-align must be a power of 2");
62 "Number of functions with aligned (to 64-bytes by default)");
64 "Number of CMP/CMN-CSEL pairs detected for alignment");
66 "Number of FCMP-FCSEL pairs detected for alignment");
74 void getAnalysisUsage(AnalysisUsage &AU)
const override;
75 bool runOnMachineFunction(MachineFunction &MF)
override;
76 StringRef getPassName()
const override {
81 const AArch64InstrInfo *TII =
nullptr;
86 bool alignLayoutSensitivePatterns(MachineBasicBlock *
MBB);
89 void emitP2Align(MachineInstr &
MI, Align DesiredAlign,
90 unsigned MaxSkipBytes = 4);
92 bool optimizeForCodeLayout(MachineFunction &MF);
97char AArch64CodeLayoutOpt::ID = 0;
102void AArch64CodeLayoutOpt::getAnalysisUsage(
AnalysisUsage &AU)
const {
103 AU.setPreservesAll();
108 return new AArch64CodeLayoutOpt();
114 case AArch64::FCMPSrr:
115 case AArch64::FCMPDrr:
116 case AArch64::FCMPESrr:
117 case AArch64::FCMPEDrr:
118 case AArch64::FCMPHrr:
119 case AArch64::FCMPEHrr:
129 case AArch64::FCSELSrrr:
130 case AArch64::FCSELDrrr:
131 case AArch64::FCSELHrrr:
143 switch (
MI.getOpcode()) {
144 case AArch64::SUBSWrr:
145 case AArch64::ADDSWrr:
146 return MI.definesRegister(AArch64::WZR,
nullptr);
147 case AArch64::SUBSWri:
148 case AArch64::ADDSWri:
149 return MI.definesRegister(AArch64::WZR,
nullptr) &&
150 MI.getOperand(3).getImm() == 0 &&
MI.getOperand(2).getImm() <= 15;
151 case AArch64::SUBSWrs:
152 case AArch64::ADDSWrs:
153 return MI.definesRegister(AArch64::WZR,
nullptr) &&
154 !AArch64InstrInfo::hasShiftedReg(
MI);
155 case AArch64::SUBSWrx:
156 return MI.definesRegister(AArch64::WZR,
nullptr) &&
157 !AArch64InstrInfo::hasExtendedReg(
MI);
169 const auto *Subtarget = &MF.
getSubtarget<AArch64Subtarget>();
170 TII = Subtarget->getInstrInfo();
174 if (Subtarget->hasFuseCmpCSel())
176 if (Subtarget->hasFuseFCmpFCSel())
184 return optimizeForCodeLayout(MF);
187void AArch64CodeLayoutOpt::emitP2Align(MachineInstr &
MI, Align DesiredAlign,
188 unsigned MaxSkipBytes) {
189 MachineBasicBlock *
MBB =
MI.getParent();
193 if (&*FirstReal != &
MI) {
207bool AArch64CodeLayoutOpt::alignLayoutSensitivePatterns(
208 MachineBasicBlock *
MBB) {
220 NextIt->getOpcode() == AArch64::CSELWr) {
234 for (
auto &[
MI, IsCmpCsel] : Pairs) {
236 DBG(
".p2align 6, , 4 before " << *
MI);
237 ++(IsCmpCsel ? NumCmpCselPairsDetected : NumFcmpFcselPairsDetected);
240 return !Pairs.empty();
243bool AArch64CodeLayoutOpt::optimizeForCodeLayout(MachineFunction &MF) {
244 DBG(
"optimizeForCodeLayout: " << MF.
getName() <<
"\n");
248 Changed |= alignLayoutSensitivePatterns(&
MBB);
255 ++NumFunctionsAligned;
257 << MF.getName() <<
"\n");
259 DBG(
"Function " << MF.getName() <<
" already has sufficient alignment\n");
static bool isFloatingPointConditionalSelect(unsigned Opc)
#define AARCH64_CODE_LAYOUT_OPT_NAME
static cl::bits< CodeLayoutOpt > EnableCodeAlignment("aarch64-code-layout-opt-enable", cl::Hidden, cl::CommaSeparated, cl::desc("Enable code alignment optimization for instruction pairs"), cl::values(clEnumValN(CmpCsel, "cmp-csel", "CMP/CMN-CSEL pair alignment (32-bit)"), clEnumValN(FcmpFcsel, "fcmp-fcsel", "FCMP-FCSEL pair alignment")))
static cl::opt< unsigned > FunctionAlignBytes("aarch64-code-layout-opt-align-functions", cl::Hidden, cl::desc("Function alignment in bytes for code layout optimization " "(must be a power of 2)"), cl::init(64), cl::callback([](const unsigned &Val) { if(!isPowerOf2_32(Val)) report_fatal_error("aarch64-code-layout-opt-align must be a power of 2");}))
static bool isFloatingPointCompare(unsigned Opc)
static bool isQualifyingIntCompare(const MachineInstr &MI)
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
const HexagonInstrInfo * TII
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
instr_iterator instr_begin()
void setMaxBytesForAlignment(unsigned MaxBytes)
Set the maximum amount of padding allowed for aligning the basic block.
void setAlignment(Align A)
Set alignment of the basic block.
LLVM_ABI MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
instr_iterator instr_end()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
void push_back(const T &Elt)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
cb< typename detail::callback_traits< F >::result_type, typename detail::callback_traits< F >::arg_type > callback(F CB)
This is an optimization pass for GlobalISel generic memory operations.
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
FunctionPass * createAArch64CodeLayoutOptPass()
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.