doxygen/GCNVOPDUtils_8cpp_source.html

//===- GCNVOPDUtils.cpp - GCN VOPD Utils  ------------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file This file contains the AMDGPU DAG scheduling

/// mutation to pair VOPD instructions back to back. It also contains

//  subroutines useful in the creation of VOPD instructions

//

//===----------------------------------------------------------------------===//


#include "GCNVOPDUtils.h"

#include "AMDGPUSubtarget.h"

#include "GCNSubtarget.h"

#include "MCTargetDesc/AMDGPUMCTargetDesc.h"

#include "SIInstrInfo.h"

#include "Utils/AMDGPUBaseInfo.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/CodeGen/MachineBasicBlock.h"

#include "llvm/CodeGen/MachineInstr.h"

#include "llvm/CodeGen/MachineOperand.h"

#include "llvm/CodeGen/MachineRegisterInfo.h"

#include "llvm/CodeGen/MacroFusion.h"

#include "llvm/CodeGen/ScheduleDAG.h"

#include "llvm/CodeGen/ScheduleDAGMutation.h"

#include "llvm/CodeGen/TargetInstrInfo.h"

#include "llvm/MC/MCInst.h"


using namespace llvm;


#define DEBUG_TYPE "gcn-vopd-utils"


bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII,

                                   const MachineInstr &FirstMI,

                                   const MachineInstr &SecondMI) {

  namespace VOPD = AMDGPU::VOPD;


  const MachineFunction *MF = FirstMI.getMF();

  const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();

  const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(ST.getRegisterInfo());

  const MachineRegisterInfo &MRI = MF->getRegInfo();

  // Literals also count against scalar bus limit

  SmallVector<const MachineOperand *> UniqueLiterals;

  auto addLiteral = [&](const MachineOperand &Op) {

    for (auto &Literal : UniqueLiterals) {

      if (Literal->isIdenticalTo(Op))

        return;

    }

    UniqueLiterals.push_back(&Op);

  };

  SmallVector<Register> UniqueScalarRegs;

  assert([&]() -> bool {

    for (auto MII = MachineBasicBlock::const_iterator(&FirstMI);

         MII != FirstMI.getParent()->instr_end(); ++MII) {

      if (&*MII == &SecondMI)

        return true;

    }

    return false;

  }() && "Expected FirstMI to precede SecondMI");

  // Cannot pair dependent instructions

  for (const auto &Use : SecondMI.uses())

    if (Use.isReg() && FirstMI.modifiesRegister(Use.getReg(), TRI))

      return false;


  auto getVRegIdx = [&](unsigned OpcodeIdx, unsigned OperandIdx) {

    const MachineInstr &MI = (OpcodeIdx == VOPD::X) ? FirstMI : SecondMI;

    const MachineOperand &Operand = MI.getOperand(OperandIdx);

    if (Operand.isReg() && TRI->isVectorRegister(MRI, Operand.getReg()))

      return Operand.getReg();

    return Register();

  };


  auto InstInfo =

      AMDGPU::getVOPDInstInfo(FirstMI.getDesc(), SecondMI.getDesc());


  for (auto CompIdx : VOPD::COMPONENTS) {

    const MachineInstr &MI = (CompIdx == VOPD::X) ? FirstMI : SecondMI;


    const MachineOperand &Src0 = MI.getOperand(VOPD::Component::SRC0);

    if (Src0.isReg()) {

      if (!TRI->isVectorRegister(MRI, Src0.getReg())) {

        if (!is_contained(UniqueScalarRegs, Src0.getReg()))

          UniqueScalarRegs.push_back(Src0.getReg());

      }

    } else {

      if (!TII.isInlineConstant(MI, VOPD::Component::SRC0))

        addLiteral(Src0);

    }


    if (InstInfo[CompIdx].hasMandatoryLiteral()) {

      auto CompOprIdx = InstInfo[CompIdx].getMandatoryLiteralCompOperandIndex();

      addLiteral(MI.getOperand(CompOprIdx));

    }

    if (MI.getDesc().hasImplicitUseOfPhysReg(AMDGPU::VCC))

      UniqueScalarRegs.push_back(AMDGPU::VCC_LO);

  }


  if (UniqueLiterals.size() > 1)

    return false;

  if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2)

    return false;


  // On GFX12 if both OpX and OpY are V_MOV_B32 then OPY uses SRC2 source-cache.

  bool SkipSrc = ST.getGeneration() >= AMDGPUSubtarget::GFX12 &&

                 FirstMI.getOpcode() == AMDGPU::V_MOV_B32_e32 &&

                 SecondMI.getOpcode() == AMDGPU::V_MOV_B32_e32;


  if (InstInfo.hasInvalidOperand(getVRegIdx, SkipSrc))

    return false;


  LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI

                    << "\n\tY: " << SecondMI << "\n");

  return true;

}


/// Check if the instr pair, FirstMI and SecondMI, should be scheduled

/// together. Given SecondMI, when FirstMI is unspecified, then check if

/// SecondMI may be part of a fused pair at all.

static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII,

                                       const TargetSubtargetInfo &TSI,

                                       const MachineInstr *FirstMI,

                                       const MachineInstr &SecondMI) {

  const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII);

  unsigned Opc2 = SecondMI.getOpcode();

  auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);


  // One instruction case

  if (!FirstMI)

    return SecondCanBeVOPD.Y;


  unsigned Opc = FirstMI->getOpcode();

  auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);


  if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) ||

        (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)))

    return false;


  return checkVOPDRegConstraints(STII, *FirstMI, SecondMI);

}


namespace {

/// Adapts design from MacroFusion

/// Puts valid candidate instructions back-to-back so they can easily

/// be turned into VOPD instructions

/// Greedily pairs instruction candidates. O(n^2) algorithm.

struct VOPDPairingMutation : ScheduleDAGMutation {

  MacroFusionPredTy shouldScheduleAdjacent; // NOLINT: function pointer


  VOPDPairingMutation(

      MacroFusionPredTy shouldScheduleAdjacent) // NOLINT: function pointer

      : shouldScheduleAdjacent(shouldScheduleAdjacent) {}


  void apply(ScheduleDAGInstrs *DAG) override {

    const TargetInstrInfo &TII = *DAG->TII;

    const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>();

    if (!AMDGPU::hasVOPD(ST) || !ST.isWave32()) {

      LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n");

      return;

    }


    std::vector<SUnit>::iterator ISUI, JSUI;

    for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) {

      const MachineInstr *IMI = ISUI->getInstr();

      if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI))

        continue;

      if (!hasLessThanNumFused(*ISUI, 2))

        continue;


      for (JSUI = ISUI + 1; JSUI != DAG->SUnits.end(); ++JSUI) {

        if (JSUI->isBoundaryNode())

          continue;

        const MachineInstr *JMI = JSUI->getInstr();

        if (!hasLessThanNumFused(*JSUI, 2) ||

            !shouldScheduleAdjacent(TII, ST, IMI, *JMI))

          continue;

        if (fuseInstructionPair(*DAG, *ISUI, *JSUI))

          break;

      }

    }

    LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n");

  }

};

} // namespace


std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() {

  return std::make_unique<VOPDPairingMutation>(shouldScheduleVOPDAdjacent);

}

MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105

shouldScheduleAdjacent
static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, const MachineInstr &SecondMI)
Check if the instr pair, FirstMI and SecondMI, should be fused together.
Definition: AArch64MacroFusion.cpp:443

AMDGPUBaseInfo.h

AMDGPUMCTargetDesc.h
Provides AMDGPU specific target descriptions.

AMDGPUSubtarget.h
Base class for AMDGPU specific classes of TargetSubtarget.

LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101

GCNSubtarget.h
AMD GCN specific subclass of TargetSubtarget.

shouldScheduleVOPDAdjacent
static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, const MachineInstr &SecondMI)
Check if the instr pair, FirstMI and SecondMI, should be scheduled together.
Definition: GCNVOPDUtils.cpp:123

GCNVOPDUtils.h

TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125

MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:113

MCInst.h

MachineBasicBlock.h

MachineInstr.h

MachineOperand.h

MachineRegisterInfo.h

TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1928

MacroFusion.h

SIInstrInfo.h
Interface definition for SIInstrInfo.

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

STLExtras.h
This file contains some templates that are useful if you are working with the STL at all.

ScheduleDAGMutation.h

ScheduleDAG.h

SmallVector.h
This file defines the SmallVector class.

TargetInstrInfo.h

llvm::AMDGPUSubtarget::GFX12
@ GFX12
Definition: AMDGPUSubtarget.h:43

llvm::DWARFExpression::Operation
This class represents an Operation in the Expression.
Definition: DWARFExpression.h:32

llvm::GCNSubtarget
Definition: GCNSubtarget.h:35

llvm::MachineFunction
Definition: MachineFunction.h:258

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:717

llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:727

llvm::MachineInstrBundleIterator< const MachineInstr >

llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:69

llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569

llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:346

llvm::MachineInstr::uses
iterator_range< mop_iterator > uses()
Returns a range that includes all operands that are register uses.
Definition: MachineInstr.h:733

llvm::MachineInstr::modifiesRegister
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
Definition: MachineInstr.h:1515

llvm::MachineInstr::getDesc
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:566

llvm::MachineInstr::getMF
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
Definition: MachineInstr.cpp:747

llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48

llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:329

llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:369

llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:51

llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19

llvm::SIInstrInfo
Definition: SIInstrInfo.h:83

llvm::SIRegisterInfo
Definition: SIRegisterInfo.h:32

llvm::ScheduleDAGInstrs
A ScheduleDAG for scheduling lists of MachineInstr.
Definition: ScheduleDAGInstrs.h:114

llvm::ScheduleDAGMutation
Mutate the DAG as a postpass after normal DAG building.
Definition: ScheduleDAGMutation.h:22

llvm::ScheduleDAG::TII
const TargetInstrInfo * TII
Target instruction information.
Definition: ScheduleDAG.h:575

llvm::ScheduleDAG::SUnits
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:579

llvm::ScheduleDAG::MF
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:577

llvm::SmallVectorBase::size
size_t size() const
Definition: SmallVector.h:91

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition: SmallVector.h:426

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209

llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:111

llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:63

llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43

llvm::AMDGPU::getCanBeVOPD
CanBeVOPD getCanBeVOPD(unsigned Opc)
Definition: AMDGPUBaseInfo.cpp:536

llvm::AMDGPU::getVOPDInstInfo
VOPD::InstInfo getVOPDInstInfo(const MCInstrDesc &OpX, const MCInstrDesc &OpY)
Definition: AMDGPUBaseInfo.cpp:751

llvm::AMDGPU::hasVOPD
bool hasVOPD(const MCSubtargetInfo &STI)
Definition: AMDGPUBaseInfo.cpp:2214

llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73

llvm::cl::apply
void apply(Opt *O, const Mod &M, const Mods &... Ms)
Definition: CommandLine.h:1309

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::createVOPDPairingMutation
std::unique_ptr< ScheduleDAGMutation > createVOPDPairingMutation()
Definition: GCNVOPDUtils.cpp:189

llvm::fuseInstructionPair
bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU, SUnit &SecondSU)
Create an artificial edge between FirstSU and SecondSU.
Definition: MacroFusion.cpp:53

llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163

llvm::MacroFusionPredTy
bool(*)(const TargetInstrInfo &TII, const TargetSubtargetInfo &STI, const MachineInstr *FirstMI, const MachineInstr &SecondMI) MacroFusionPredTy
Check if the instr pair, FirstMI and SecondMI, should be fused together.
Definition: MacroFusion.h:35

llvm::Op
DWARFExpression::Operation Op
Definition: DWARFExpression.cpp:22

llvm::ReplacementType::Literal
@ Literal

llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879

llvm::hasLessThanNumFused
bool hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit)
Checks if the number of cluster edges between SU and its predecessors is less than FuseLimit.
Definition: MacroFusion.cpp:46

llvm::checkVOPDRegConstraints
bool checkVOPDRegConstraints(const SIInstrInfo &TII, const MachineInstr &FirstMI, const MachineInstr &SecondMI)
Definition: GCNVOPDUtils.cpp:37