doxygen/AArch64CodeLayoutOpt_8cpp_source.html

//===-- AArch64CodeLayoutOpt.cpp - Code Layout Optimizations --===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This pass runs after instruction scheduling and employs code layout

// optimizations for certain patterns.

//

// Option -aarch64-code-layout-opt-enable selects instruction pairs to optimize:

//   cmp-csel:   Enable CMP/CMN-CSEL code layout optimization

//   fcmp-fcsel: Enable FCMP-FCSEL code layout optimization

//

// The initial implementation induces function alignment when a supported

// pattern is detected, and possibly instruction-alignment when a pair would

// straddle cache-lines.

//===----------------------------------------------------------------------===//


#include "AArch64.h"

#include "AArch64InstrInfo.h"

#include "AArch64Subtarget.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/ADT/Statistic.h"

#include "llvm/CodeGen/MachineBasicBlock.h"

#include "llvm/CodeGen/MachineFunctionPass.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Support/MathExtras.h"


using namespace llvm;


#define DEBUG_TYPE "aarch64-code-layout-opt"

#define DBG(...) LLVM_DEBUG(dbgs() << DEBUG_TYPE ": " << __VA_ARGS__)

#define AARCH64_CODE_LAYOUT_OPT_NAME "AArch64 Code Layout Optimization"


enum CodeLayoutOpt {

  CmpCsel,   // Align CMP/CMN-CSEL pairs

  FcmpFcsel, // Align FCMP-FCSEL pairs

};


static cl::bits<CodeLayoutOpt> EnableCodeAlignment(

    "aarch64-code-layout-opt-enable", cl::Hidden, cl::CommaSeparated,

    cl::desc("Enable code alignment optimization for instruction pairs"),

    cl::values(

        clEnumValN(CmpCsel, "cmp-csel", "CMP/CMN-CSEL pair alignment (32-bit)"),

        clEnumValN(FcmpFcsel, "fcmp-fcsel", "FCMP-FCSEL pair alignment")));


static cl::opt<unsigned> FunctionAlignBytes(

    "aarch64-code-layout-opt-align-functions", cl::Hidden,

    cl::desc("Function alignment in bytes for code layout optimization "

             "(must be a power of 2)"),

    cl::init(64), cl::callback([](const unsigned &Val) {

      if (!isPowerOf2_32(Val))

        report_fatal_error(

            "aarch64-code-layout-opt-align must be a power of 2");

    }));


STATISTIC(NumFunctionsAligned,

          "Number of functions with aligned (to 64-bytes by default)");

STATISTIC(NumCmpCselPairsDetected,

          "Number of CMP/CMN-CSEL pairs detected for alignment");

STATISTIC(NumFcmpFcselPairsDetected,

          "Number of FCMP-FCSEL pairs detected for alignment");


namespace {


class AArch64CodeLayoutOpt : public MachineFunctionPass {

public:

  static char ID;

  AArch64CodeLayoutOpt() : MachineFunctionPass(ID) {}

  void getAnalysisUsage(AnalysisUsage &AU) const override;

  bool runOnMachineFunction(MachineFunction &MF) override;

  StringRef getPassName() const override {

    return AARCH64_CODE_LAYOUT_OPT_NAME;

  }


private:

  const AArch64InstrInfo *TII = nullptr;


  /// Align each fusible CMP/CMN-CSEL or FCMP-FCSEL pair in \p MBB by emitting

  /// .p2align before the lead instruction (splitting the block if needed).

  /// \returns true iff at least one pair was found and aligned.

  bool alignLayoutSensitivePatterns(MachineBasicBlock *MBB);


  /// Emit .p2align before MI. Splits the block if MI is not at its start.

  void emitP2Align(MachineInstr &MI, Align DesiredAlign,

                   unsigned MaxSkipBytes = 4);


  bool optimizeForCodeLayout(MachineFunction &MF);

};


} // end anonymous namespace


char AArch64CodeLayoutOpt::ID = 0;


INITIALIZE_PASS(AArch64CodeLayoutOpt, "aarch64-code-layout-opt",

                AARCH64_CODE_LAYOUT_OPT_NAME, false, false)


void AArch64CodeLayoutOpt::getAnalysisUsage(AnalysisUsage &AU) const {

  AU.setPreservesAll();

  MachineFunctionPass::getAnalysisUsage(AU);

}


FunctionPass *llvm::createAArch64CodeLayoutOptPass() {

  return new AArch64CodeLayoutOpt();

}


/// \returns true iff Opc is a floating-point comparison (FCMP/FCMPE).


static bool isFloatingPointCompare(unsigned Opc) {

  switch (Opc) {

  case AArch64::FCMPSrr:

  case AArch64::FCMPDrr:

  case AArch64::FCMPESrr:

  case AArch64::FCMPEDrr:

  case AArch64::FCMPHrr:

  case AArch64::FCMPEHrr:

    return true;

  default:

    return false;

  }

}


/// \returns true iff Opc is a floating-point conditional select (FCSEL).


static bool isFloatingPointConditionalSelect(unsigned Opc) {

  switch (Opc) {

  case AArch64::FCSELSrrr:

  case AArch64::FCSELDrrr:

  case AArch64::FCSELHrrr:

    return true;

  default:

    return false;

  }

}


/// \returns true if MI is a qualifying 32-bit CMP or CMN instruction.

/// CMP is encoded as SUBS with WZR destination, CMN as ADDS with WZR.

/// Only simple variants (no shifted/extended reg) qualify, and immediate

/// variants require no LSL shift and small immediates (<=15).


static bool isQualifyingIntCompare(const MachineInstr &MI) {

  switch (MI.getOpcode()) {

  case AArch64::SUBSWrr:

  case AArch64::ADDSWrr:

    return MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr);

  case AArch64::SUBSWri:

  case AArch64::ADDSWri:

    return MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) &&

           MI.getOperand(3).getImm() == 0 && MI.getOperand(2).getImm() <= 15;

  case AArch64::SUBSWrs:

  case AArch64::ADDSWrs:

    return MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) &&

           !AArch64InstrInfo::hasShiftedReg(MI);

  case AArch64::SUBSWrx:

    return MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) &&

           !AArch64InstrInfo::hasExtendedReg(MI);

  default:

    return false;

  }

}


bool AArch64CodeLayoutOpt::runOnMachineFunction(MachineFunction &MF) {

  const Function &F = MF.getFunction();

  // hasOptSize() returns true for both -Os and -Oz.

  if (F.hasOptSize())

    return false;


  const auto *Subtarget = &MF.getSubtarget<AArch64Subtarget>();

  TII = Subtarget->getInstrInfo();


  // Default: enable when the subtarget opts in via FeatureAlignCmpCSelPairs.

  if (!EnableCodeAlignment.getBits() && Subtarget->hasAlignCmpCSelPairs()) {

    if (Subtarget->hasFuseCmpCSel())

      EnableCodeAlignment.addValue(CmpCsel);

    if (Subtarget->hasFuseFCmpFCSel())

      EnableCodeAlignment.addValue(FcmpFcsel);

  }


  if (!(EnableCodeAlignment.isSet(CmpCsel) && Subtarget->hasFuseCmpCSel()) &&

      !(EnableCodeAlignment.isSet(FcmpFcsel) && Subtarget->hasFuseFCmpFCSel()))

    return false;


  return optimizeForCodeLayout(MF);

}


void AArch64CodeLayoutOpt::emitP2Align(MachineInstr &MI, Align DesiredAlign,

                                       unsigned MaxSkipBytes) {

  MachineBasicBlock *MBB = MI.getParent();


  auto FirstReal =

      skipDebugInstructionsForward(MBB->instr_begin(), MBB->instr_end());

  if (&*FirstReal != &MI) {

    auto PrevIt = prev_nodbg(MI.getIterator(), MBB->instr_begin());

    MBB = MBB->splitAt(*PrevIt, /*UpdateLiveIns=*/true);

  }


  MBB->setAlignment(DesiredAlign);

  MBB->setMaxBytesForAlignment(MaxSkipBytes);

}


// Align each fusible CMP/CMN-CSEL or FCMP-FCSEL pair in MBB by emitting

// .p2align before the lead instruction (splitting the block if needed).

// A pair is: a qualifying lead instruction immediately followed by its

// consumer (CMP/CMN→CSEL or FCMP→FCSEL), with no intervening instructions.

// Returns true iff at least one pair was found and aligned.

bool AArch64CodeLayoutOpt::alignLayoutSensitivePatterns(

    MachineBasicBlock *MBB) {

  auto End = MBB->instr_end();

  SmallVector<std::pair<MachineInstr *, bool>, 4> Pairs;


  for (auto &MI : instructionsWithoutDebug(MBB->begin(), MBB->end())) {

    auto NextIt =

        skipDebugInstructionsForward(std::next(MI.getIterator()), End);

    if (NextIt == End)

      break;


    // --- CMP/CMN-CSEL detection ---

    if (EnableCodeAlignment.isSet(CmpCsel) && isQualifyingIntCompare(MI) &&

        NextIt->getOpcode() == AArch64::CSELWr) {

      Pairs.push_back({&MI, true});

      continue;

    }


    // --- FCMP-FCSEL detection ---

    if (EnableCodeAlignment.isSet(FcmpFcsel) &&

        isFloatingPointCompare(MI.getOpcode()) &&

        isFloatingPointConditionalSelect(NextIt->getOpcode())) {

      Pairs.push_back({&MI, false});

      continue;

    }

  }


  for (auto &[MI, IsCmpCsel] : Pairs) {

    emitP2Align(*MI, Align(64));

    DBG(".p2align 6, , 4 before " << *MI);

    ++(IsCmpCsel ? NumCmpCselPairsDetected : NumFcmpFcselPairsDetected);

  }


  return !Pairs.empty();

}


bool AArch64CodeLayoutOpt::optimizeForCodeLayout(MachineFunction &MF) {

  DBG("optimizeForCodeLayout: " << MF.getName() << "\n");


  bool Changed = false;

  for (auto &MBB : MF)

    Changed |= alignLayoutSensitivePatterns(&MBB);


  if (!Changed)

    return false;


  if (MF.getAlignment() < Align(FunctionAlignBytes)) {

    MF.setAlignment(Align(FunctionAlignBytes));

    ++NumFunctionsAligned;

    DBG("Set " << FunctionAlignBytes << "-byte alignment for function "

               << MF.getName() << "\n");

  } else {

    DBG("Function " << MF.getName() << " already has sufficient alignment\n");

  }

  return true;

}

isFloatingPointConditionalSelect
static bool isFloatingPointConditionalSelect(unsigned Opc)
Definition AArch64CodeLayoutOpt.cpp:127

AARCH64_CODE_LAYOUT_OPT_NAME
#define AARCH64_CODE_LAYOUT_OPT_NAME
Definition AArch64CodeLayoutOpt.cpp:37

EnableCodeAlignment
static cl::bits< CodeLayoutOpt > EnableCodeAlignment("aarch64-code-layout-opt-enable", cl::Hidden, cl::CommaSeparated, cl::desc("Enable code alignment optimization for instruction pairs"), cl::values(clEnumValN(CmpCsel, "cmp-csel", "CMP/CMN-CSEL pair alignment (32-bit)"), clEnumValN(FcmpFcsel, "fcmp-fcsel", "FCMP-FCSEL pair alignment")))

FunctionAlignBytes
static cl::opt< unsigned > FunctionAlignBytes("aarch64-code-layout-opt-align-functions", cl::Hidden, cl::desc("Function alignment in bytes for code layout optimization " "(must be a power of 2)"), cl::init(64), cl::callback([](const unsigned &Val) { if(!isPowerOf2_32(Val)) report_fatal_error("aarch64-code-layout-opt-align must be a power of 2");}))

CodeLayoutOpt
CodeLayoutOpt
Definition AArch64CodeLayoutOpt.cpp:39

CmpCsel
@ CmpCsel
Definition AArch64CodeLayoutOpt.cpp:40

FcmpFcsel
@ FcmpFcsel
Definition AArch64CodeLayoutOpt.cpp:41

isFloatingPointCompare
static bool isFloatingPointCompare(unsigned Opc)
Definition AArch64CodeLayoutOpt.cpp:112

DBG
#define DBG(...)
Definition AArch64CodeLayoutOpt.cpp:36

isQualifyingIntCompare
static bool isQualifyingIntCompare(const MachineInstr &MI)
Definition AArch64CodeLayoutOpt.cpp:142

AArch64InstrInfo.h

AArch64Subtarget.h

AArch64.h

MBB
MachineBasicBlock & MBB
Definition ARMSLSHardening.cpp:71

CommandLine.h

clEnumValN
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition CommandLine.h:687

TII
const HexagonInstrInfo * TII
Definition HexagonCopyToCombine.cpp:118

MI
IRTranslator LLVM IR MI
Definition IRTranslator.cpp:110

F
#define F(x, y, z)
Definition MD5.cpp:54

MachineBasicBlock.h

MachineFunctionPass.h

MathExtras.h

INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56

Opc
auto Opc
Definition RISCVRedundantCopyElimination.cpp:77

SmallVector.h
This file defines the SmallVector class.

Statistic.h
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...

STATISTIC
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171

Debug.h

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition PassAnalysisSupport.h:48

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314

llvm::MachineBasicBlock::instr_begin
instr_iterator instr_begin()
Definition MachineBasicBlock.h:368

llvm::MachineBasicBlock::setMaxBytesForAlignment
void setMaxBytesForAlignment(unsigned MaxBytes)
Set the maximum amount of padding allowed for aligning the basic block.
Definition MachineBasicBlock.h:665

llvm::MachineBasicBlock::setAlignment
void setAlignment(Align A)
Set alignment of the basic block.
Definition MachineBasicBlock.h:654

llvm::MachineBasicBlock::begin
iterator begin()
Definition MachineBasicBlock.h:384

llvm::MachineBasicBlock::splitAt
LLVM_ABI MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
Definition MachineBasicBlock.cpp:1052

llvm::MachineBasicBlock::instr_end
instr_iterator instr_end()
Definition MachineBasicBlock.h:370

llvm::MachineBasicBlock::end
iterator end()
Definition MachineBasicBlock.h:386

llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition MachineFunctionPass.h:31

llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition MachineFunctionPass.cpp:188

llvm::MachineFunction
Definition MachineFunction.h:294

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition MachineFunction.h:788

llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition MachineFunction.cpp:674

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition MachineFunction.h:749

llvm::MachineInstr
Representation of each machine instruction.
Definition MachineInstr.h:73

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition SmallVector.h:423

llvm::cl::bits
Definition CommandLine.h:1878

llvm::cl::opt
Definition CommandLine.h:1454

Changed
Changed
Definition ObjCARCOpts.cpp:2366

ErrorHandling.h

llvm::AMDGPU::HSAMD::Kernel::Arg::Key::Align
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
Definition AMDGPUMetadata.h:183

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm::cl::Hidden
@ Hidden
Definition CommandLine.h:138

llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition CommandLine.h:712

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition CommandLine.h:444

llvm::cl::CommaSeparated
@ CommaSeparated
Definition CommandLine.h:164

llvm::cl::callback
cb< typename detail::callback_traits< F >::result_type, typename detail::callback_traits< F >::arg_type > callback(F CB)
Definition CommandLine.h:524

llvm::codeview::PublicSymFlags::Function
@ Function
Definition CodeView.h:408

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition FunctionInfo.h:25

llvm::skipDebugInstructionsForward
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
Definition MachineBasicBlock.h:1488

llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279

llvm::report_fatal_error
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163

llvm::SmallVector
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Definition SmallVector.h:1151

llvm::instructionsWithoutDebug
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
Definition MachineBasicBlock.h:1526

llvm::createAArch64CodeLayoutOptPass
FunctionPass * createAArch64CodeLayoutOptPass()

llvm::prev_nodbg
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
Definition MachineBasicBlock.h:1519

llvm::cl::desc
Definition CommandLine.h:410