doxygen/AMDGPUWaitSGPRHazards_8cpp_source.html

//===- AMDGPUWaitSGPRHazards.cpp - Insert waits for SGPR read hazards -----===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file

/// Insert s_wait_alu instructions to mitigate SGPR read hazards on GFX12.

//

//===----------------------------------------------------------------------===//


#include "AMDGPUWaitSGPRHazards.h"

#include "AMDGPU.h"

#include "GCNSubtarget.h"

#include "MCTargetDesc/AMDGPUMCTargetDesc.h"

#include "SIInstrInfo.h"

#include "llvm/ADT/SetVector.h"


using namespace llvm;


#define DEBUG_TYPE "amdgpu-wait-sgpr-hazards"


static cl::opt<bool> GlobalEnableSGPRHazardWaits(

    "amdgpu-sgpr-hazard-wait", cl::init(true), cl::Hidden,

    cl::desc("Enable required s_wait_alu on SGPR hazards"));


static cl::opt<bool> GlobalCullSGPRHazardsOnFunctionBoundary(

    "amdgpu-sgpr-hazard-boundary-cull", cl::init(false), cl::Hidden,

    cl::desc("Cull hazards on function boundaries"));


static cl::opt<bool>

    GlobalCullSGPRHazardsAtMemWait("amdgpu-sgpr-hazard-mem-wait-cull",

                                   cl::init(false), cl::Hidden,

                                   cl::desc("Cull hazards on memory waits"));


static cl::opt<unsigned> GlobalCullSGPRHazardsMemWaitThreshold(

    "amdgpu-sgpr-hazard-mem-wait-cull-threshold", cl::init(8), cl::Hidden,

    cl::desc("Number of tracked SGPRs before initiating hazard cull on memory "

             "wait"));


namespace {


class AMDGPUWaitSGPRHazards {

public:

  const GCNSubtarget *ST;

  const SIInstrInfo *TII;

  const SIRegisterInfo *TRI;

  const MachineRegisterInfo *MRI;

  unsigned DsNopCount;


  bool EnableSGPRHazardWaits;

  bool CullSGPRHazardsOnFunctionBoundary;

  bool CullSGPRHazardsAtMemWait;

  unsigned CullSGPRHazardsMemWaitThreshold;


  AMDGPUWaitSGPRHazards() = default;


  // Return the numeric ID 0-127 for a given SGPR.

  static std::optional<unsigned> sgprNumber(Register Reg,

                                            const SIRegisterInfo &TRI) {

    switch (Reg) {

    case AMDGPU::M0:

    case AMDGPU::EXEC:

    case AMDGPU::EXEC_LO:

    case AMDGPU::EXEC_HI:

    case AMDGPU::SGPR_NULL:

    case AMDGPU::SGPR_NULL64:

      return {};

    default:

      break;

    }

    unsigned RegN = TRI.getHWRegIndex(Reg);

    if (RegN > 127)

      return {};

    return RegN;

  }


  static inline bool isVCC(Register Reg) {

    return Reg == AMDGPU::VCC || Reg == AMDGPU::VCC_LO || Reg == AMDGPU::VCC_HI;

  }


  // Adjust global offsets for instructions bundled with S_GETPC_B64 after

  // insertion of a new instruction.

  static void updateGetPCBundle(MachineInstr *NewMI) {

    if (!NewMI->isBundled())

      return;


    // Find start of bundle.

    auto I = NewMI->getIterator();

    while (I->isBundledWithPred())

      I--;

    if (I->isBundle())

      I++;


    // Bail if this is not an S_GETPC bundle.

    if (I->getOpcode() != AMDGPU::S_GETPC_B64)

      return;


    // Update offsets of any references in the bundle.

    const unsigned NewBytes = 4;

    assert(NewMI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&

           "Unexpected instruction insertion in bundle");

    auto NextMI = std::next(NewMI->getIterator());

    auto End = NewMI->getParent()->end();

    while (NextMI != End && NextMI->isBundledWithPred()) {

      for (auto &Operand : NextMI->operands()) {

        if (Operand.isGlobal())

          Operand.setOffset(Operand.getOffset() + NewBytes);

      }

      NextMI++;

    }

  }


  struct HazardState {

    static constexpr unsigned None = 0;

    static constexpr unsigned SALU = (1 << 0);

    static constexpr unsigned VALU = (1 << 1);


    std::bitset<64> Tracked;      // SGPR banks ever read by VALU

    std::bitset<128> SALUHazards; // SGPRs with uncommitted values from SALU

    std::bitset<128> VALUHazards; // SGPRs with uncommitted values from VALU

    unsigned VCCHazard = None;    // Source of current VCC writes

    bool ActiveFlat = false;      // Has unwaited flat instructions


    bool merge(const HazardState &RHS) {

      HazardState Orig(*this);

      *this |= RHS;

      return (*this != Orig);

    }


    bool operator==(const HazardState &RHS) const {

      return Tracked == RHS.Tracked && SALUHazards == RHS.SALUHazards &&

             VALUHazards == RHS.VALUHazards && VCCHazard == RHS.VCCHazard &&

             ActiveFlat == RHS.ActiveFlat;

    }


    bool operator!=(const HazardState &RHS) const { return !(*this == RHS); }


    void operator|=(const HazardState &RHS) {

      Tracked |= RHS.Tracked;

      SALUHazards |= RHS.SALUHazards;

      VALUHazards |= RHS.VALUHazards;

      VCCHazard |= RHS.VCCHazard;

      ActiveFlat |= RHS.ActiveFlat;

    }

  };


  struct BlockHazardState {

    HazardState In;

    HazardState Out;

  };


  DenseMap<const MachineBasicBlock *, BlockHazardState> BlockState;


  static constexpr unsigned WAVE32_NOPS = 4;

  static constexpr unsigned WAVE64_NOPS = 8;


  void insertHazardCull(MachineBasicBlock &MBB,

                        MachineBasicBlock::instr_iterator &MI) {

    assert(!MI->isBundled());

    unsigned Count = DsNopCount;

    while (Count--)

      BuildMI(MBB, MI, MI->getDebugLoc(), TII->get(AMDGPU::DS_NOP));

  }


  unsigned mergeMasks(unsigned Mask1, unsigned Mask2) {

    unsigned Mask = AMDGPU::DepCtr::getDefaultDepCtrEncoding(*ST);

    Mask = AMDGPU::DepCtr::encodeFieldSaSdst(

        Mask, std::min(AMDGPU::DepCtr::decodeFieldSaSdst(Mask1),

                       AMDGPU::DepCtr::decodeFieldSaSdst(Mask2)));

    Mask = AMDGPU::DepCtr::encodeFieldVaVcc(

        Mask, std::min(AMDGPU::DepCtr::decodeFieldVaVcc(Mask1),

                       AMDGPU::DepCtr::decodeFieldVaVcc(Mask2)));

    Mask = AMDGPU::DepCtr::encodeFieldVmVsrc(

        Mask, std::min(AMDGPU::DepCtr::decodeFieldVmVsrc(Mask1),

                       AMDGPU::DepCtr::decodeFieldVmVsrc(Mask2)));

    Mask = AMDGPU::DepCtr::encodeFieldVaSdst(

        Mask, std::min(AMDGPU::DepCtr::decodeFieldVaSdst(Mask1),

                       AMDGPU::DepCtr::decodeFieldVaSdst(Mask2)));

    Mask = AMDGPU::DepCtr::encodeFieldVaVdst(

        Mask, std::min(AMDGPU::DepCtr::decodeFieldVaVdst(Mask1),

                       AMDGPU::DepCtr::decodeFieldVaVdst(Mask2)));

    Mask = AMDGPU::DepCtr::encodeFieldHoldCnt(

        Mask, std::min(AMDGPU::DepCtr::decodeFieldHoldCnt(Mask1),

                       AMDGPU::DepCtr::decodeFieldHoldCnt(Mask2)));

    Mask = AMDGPU::DepCtr::encodeFieldVaSsrc(

        Mask, std::min(AMDGPU::DepCtr::decodeFieldVaSsrc(Mask1),

                       AMDGPU::DepCtr::decodeFieldVaSsrc(Mask2)));

    return Mask;

  }


  bool mergeConsecutiveWaitAlus(MachineBasicBlock::instr_iterator &MI,

                                unsigned Mask) {

    auto MBB = MI->getParent();

    if (MI == MBB->instr_begin())

      return false;


    auto It = prev_nodbg(MI, MBB->instr_begin());

    if (It->getOpcode() != AMDGPU::S_WAITCNT_DEPCTR)

      return false;


    It->getOperand(0).setImm(mergeMasks(Mask, It->getOperand(0).getImm()));

    return true;

  }


  bool runOnMachineBasicBlock(MachineBasicBlock &MBB, bool Emit) {

    enum { WA_VALU = 0x1, WA_SALU = 0x2, WA_VCC = 0x4 };


    HazardState State = BlockState[&MBB].In;

    SmallSet<Register, 8> SeenRegs;

    bool Emitted = false;

    unsigned DsNops = 0;


    for (MachineBasicBlock::instr_iterator MI = MBB.instr_begin(),

                                           E = MBB.instr_end();

         MI != E; ++MI) {

      if (MI->isMetaInstruction())

        continue;


      // Clear tracked SGPRs if sufficient DS_NOPs occur

      if (MI->getOpcode() == AMDGPU::DS_NOP) {

        if (++DsNops >= DsNopCount)

          State.Tracked.reset();

        continue;

      }

      DsNops = 0;


      // Snoop FLAT instructions to avoid adding culls before scratch/lds loads.

      // Culls could be disproportionate in cost to load time.

      if (SIInstrInfo::isFLAT(*MI) && !SIInstrInfo::isFLATGlobal(*MI))

        State.ActiveFlat = true;


      // SMEM or VMEM clears hazards

      // FIXME: adapt to add FLAT without VALU (so !isLDSDMA())?

      if ((SIInstrInfo::isVMEM(*MI) && !SIInstrInfo::isFLAT(*MI)) ||

          SIInstrInfo::isSMRD(*MI)) {

        State.VCCHazard = HazardState::None;

        State.SALUHazards.reset();

        State.VALUHazards.reset();

        continue;

      }


      // Existing S_WAITALU can clear hazards

      if (MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) {

        unsigned int Mask = MI->getOperand(0).getImm();

        if (AMDGPU::DepCtr::decodeFieldVaVcc(Mask) == 0)

          State.VCCHazard &= ~HazardState::VALU;

        if (AMDGPU::DepCtr::decodeFieldSaSdst(Mask) == 0) {

          State.SALUHazards.reset();

          State.VCCHazard &= ~HazardState::SALU;

        }

        if (AMDGPU::DepCtr::decodeFieldVaSdst(Mask) == 0)

          State.VALUHazards.reset();

        continue;

      }


      // Snoop counter waits to insert culls

      if (CullSGPRHazardsAtMemWait &&

          (MI->getOpcode() == AMDGPU::S_WAIT_LOADCNT ||

           MI->getOpcode() == AMDGPU::S_WAIT_SAMPLECNT ||

           MI->getOpcode() == AMDGPU::S_WAIT_BVHCNT) &&

          (MI->getOperand(0).isImm() && MI->getOperand(0).getImm() == 0) &&

          (State.Tracked.count() >= CullSGPRHazardsMemWaitThreshold)) {

        if (MI->getOpcode() == AMDGPU::S_WAIT_LOADCNT && State.ActiveFlat) {

          State.ActiveFlat = false;

        } else {

          State.Tracked.reset();

          if (Emit)

            insertHazardCull(MBB, MI);

          continue;

        }

      }


      // Process only VALUs and SALUs

      bool IsVALU = SIInstrInfo::isVALU(*MI);

      bool IsSALU = SIInstrInfo::isSALU(*MI);

      if (!IsVALU && !IsSALU)

        continue;


      unsigned Wait = 0;


      auto processOperand = [&](const MachineOperand &Op, bool IsUse) {

        if (!Op.isReg())

          return;

        Register Reg = Op.getReg();

        assert(!Op.getSubReg());

        if (!TRI->isSGPRReg(*MRI, Reg))

          return;


        // Only visit each register once

        if (!SeenRegs.insert(Reg).second)

          return;


        auto RegNumber = sgprNumber(Reg, *TRI);

        if (!RegNumber)

          return;


        // Track SGPRs by pair -- numeric ID of an 64b SGPR pair.

        // i.e. SGPR0 = SGPR0_SGPR1 = 0, SGPR3 = SGPR2_SGPR3 = 1, etc

        unsigned RegN = *RegNumber;

        unsigned PairN = (RegN >> 1) & 0x3f;


        // Read/write of untracked register is safe; but must record any new

        // reads.

        if (!State.Tracked[PairN]) {

          if (IsVALU && IsUse)

            State.Tracked.set(PairN);

          return;

        }


        uint8_t SGPRCount =

            AMDGPU::getRegBitWidth(*TRI->getRegClassForReg(*MRI, Reg)) / 32;


        if (IsUse) {

          // SALU reading SGPR clears VALU hazards

          if (IsSALU) {

            if (isVCC(Reg)) {

              if (State.VCCHazard & HazardState::VALU)

                State.VCCHazard = HazardState::None;

            } else {

              State.VALUHazards.reset();

            }

          }

          // Compute required waits

          for (uint8_t RegIdx = 0; RegIdx < SGPRCount; ++RegIdx) {

            Wait |= State.SALUHazards[RegN + RegIdx] ? WA_SALU : 0;

            Wait |= IsVALU && State.VALUHazards[RegN + RegIdx] ? WA_VALU : 0;

          }

          if (isVCC(Reg) && State.VCCHazard) {

            // Note: it's possible for both SALU and VALU to exist if VCC

            // was updated differently by merged predecessors.

            if (State.VCCHazard & HazardState::SALU)

              Wait |= WA_SALU;

            if (State.VCCHazard & HazardState::VALU)

              Wait |= WA_VCC;

          }

        } else {

          // Update hazards

          if (isVCC(Reg)) {

            State.VCCHazard = IsSALU ? HazardState::SALU : HazardState::VALU;

          } else {

            for (uint8_t RegIdx = 0; RegIdx < SGPRCount; ++RegIdx) {

              if (IsSALU)

                State.SALUHazards.set(RegN + RegIdx);

              else

                State.VALUHazards.set(RegN + RegIdx);

            }

          }

        }

      };


      const bool IsSetPC =

          (MI->isCall() || MI->isReturn() || MI->isIndirectBranch()) &&

          MI->getOpcode() != AMDGPU::S_ENDPGM &&

          MI->getOpcode() != AMDGPU::S_ENDPGM_SAVED;


      // Only consider implicit VCC specified by instruction descriptor.

      const bool HasImplicitVCC =

          llvm::any_of(MI->getDesc().implicit_uses(), isVCC) ||

          llvm::any_of(MI->getDesc().implicit_defs(), isVCC);


      if (IsSetPC) {

        // All SGPR writes before a call/return must be flushed as the

        // callee/caller will not will not see the hazard chain.

        if (State.VCCHazard & HazardState::VALU)

          Wait |= WA_VCC;

        if (State.SALUHazards.any() || (State.VCCHazard & HazardState::SALU))

          Wait |= WA_SALU;

        if (State.VALUHazards.any())

          Wait |= WA_VALU;

        if (CullSGPRHazardsOnFunctionBoundary && State.Tracked.any()) {

          State.Tracked.reset();

          if (Emit)

            insertHazardCull(MBB, MI);

        }

      } else {

        // Process uses to determine required wait.

        SeenRegs.clear();

        for (const MachineOperand &Op : MI->all_uses()) {

          if (Op.isImplicit() &&

              (!HasImplicitVCC || !Op.isReg() || !isVCC(Op.getReg())))

            continue;

          processOperand(Op, true);

        }

      }


      // Apply wait

      if (Wait) {

        unsigned Mask = AMDGPU::DepCtr::getDefaultDepCtrEncoding(*ST);

        if (Wait & WA_VCC) {

          State.VCCHazard &= ~HazardState::VALU;

          Mask = AMDGPU::DepCtr::encodeFieldVaVcc(Mask, 0);

        }

        if (Wait & WA_SALU) {

          State.SALUHazards.reset();

          State.VCCHazard &= ~HazardState::SALU;

          Mask = AMDGPU::DepCtr::encodeFieldSaSdst(Mask, 0);

        }

        if (Wait & WA_VALU) {

          State.VALUHazards.reset();

          Mask = AMDGPU::DepCtr::encodeFieldVaSdst(Mask, 0);

        }

        if (Emit) {

          if (!mergeConsecutiveWaitAlus(MI, Mask)) {

            auto NewMI = BuildMI(MBB, MI, MI->getDebugLoc(),

                                 TII->get(AMDGPU::S_WAITCNT_DEPCTR))

                             .addImm(Mask);

            updateGetPCBundle(NewMI);

          }

          Emitted = true;

        }

      }


      // On return from a call SGPR state is unknown, so all potential hazards.

      if (MI->isCall() && !CullSGPRHazardsOnFunctionBoundary)

        State.Tracked.set();


      // Update hazards based on defs.

      SeenRegs.clear();

      for (const MachineOperand &Op : MI->all_defs()) {

        if (Op.isImplicit() &&

            (!HasImplicitVCC || !Op.isReg() || !isVCC(Op.getReg())))

          continue;

        processOperand(Op, false);

      }

    }


    BlockHazardState &BS = BlockState[&MBB];

    bool Changed = State != BS.Out;

    if (Emit) {

      assert(!Changed && "Hazard state should not change on emit pass");

      return Emitted;

    }

    if (Changed)

      BS.Out = State;

    return Changed;

  }


  bool run(MachineFunction &MF) {

    ST = &MF.getSubtarget<GCNSubtarget>();

    if (!ST->hasVALUReadSGPRHazard())

      return false;


    // Parse settings

    EnableSGPRHazardWaits = GlobalEnableSGPRHazardWaits;

    CullSGPRHazardsOnFunctionBoundary = GlobalCullSGPRHazardsOnFunctionBoundary;

    CullSGPRHazardsAtMemWait = GlobalCullSGPRHazardsAtMemWait;

    CullSGPRHazardsMemWaitThreshold = GlobalCullSGPRHazardsMemWaitThreshold;


    if (!GlobalEnableSGPRHazardWaits.getNumOccurrences())

      EnableSGPRHazardWaits = MF.getFunction().getFnAttributeAsParsedInteger(

          "amdgpu-sgpr-hazard-wait", EnableSGPRHazardWaits);

    if (!GlobalCullSGPRHazardsOnFunctionBoundary.getNumOccurrences())

      CullSGPRHazardsOnFunctionBoundary =

          MF.getFunction().hasFnAttribute("amdgpu-sgpr-hazard-boundary-cull");

    if (!GlobalCullSGPRHazardsAtMemWait.getNumOccurrences())

      CullSGPRHazardsAtMemWait =

          MF.getFunction().hasFnAttribute("amdgpu-sgpr-hazard-mem-wait-cull");

    if (!GlobalCullSGPRHazardsMemWaitThreshold.getNumOccurrences())

      CullSGPRHazardsMemWaitThreshold =

          MF.getFunction().getFnAttributeAsParsedInteger(

              "amdgpu-sgpr-hazard-mem-wait-cull-threshold",

              CullSGPRHazardsMemWaitThreshold);


    // Bail if disabled

    if (!EnableSGPRHazardWaits)

      return false;


    TII = ST->getInstrInfo();

    TRI = ST->getRegisterInfo();

    MRI = &MF.getRegInfo();

    DsNopCount = ST->isWave64() ? WAVE64_NOPS : WAVE32_NOPS;


    auto CallingConv = MF.getFunction().getCallingConv();

    if (!AMDGPU::isEntryFunctionCC(CallingConv) &&

        !CullSGPRHazardsOnFunctionBoundary) {

      // Callee must consider all SGPRs as tracked.

      LLVM_DEBUG(dbgs() << "Is called function, track all SGPRs.\n");

      MachineBasicBlock &EntryBlock = MF.front();

      BlockState[&EntryBlock].In.Tracked.set();

    }


    // Calculate the hazard state for each basic block.

    // Iterate until a fixed point is reached.

    // Fixed point is guaranteed as merge function only ever increases

    // the hazard set, and all backedges will cause a merge.

    //

    // Note: we have to take care of the entry block as this technically

    // has an edge from outside the function. Failure to treat this as

    // a merge could prevent fixed point being reached.

    SetVector<MachineBasicBlock *> Worklist;

    for (auto &MBB : reverse(MF))

      Worklist.insert(&MBB);

    while (!Worklist.empty()) {

      auto &MBB = *Worklist.pop_back_val();

      bool Changed = runOnMachineBasicBlock(MBB, false);

      if (Changed) {

        // Note: take a copy of state here in case it is reallocated by map

        HazardState NewState = BlockState[&MBB].Out;

        // Propagate to all successor blocks

        for (auto Succ : MBB.successors()) {

          // We only need to merge hazards at CFG merge points.

          auto &SuccState = BlockState[Succ];

          if (Succ->getSinglePredecessor() && !Succ->isEntryBlock()) {

            if (SuccState.In != NewState) {

              SuccState.In = NewState;

              Worklist.insert(Succ);

            }

          } else if (SuccState.In.merge(NewState)) {

            Worklist.insert(Succ);

          }

        }

      }

    }


    LLVM_DEBUG(dbgs() << "Emit s_wait_alu instructions\n");


    // Final to emit wait instructions.

    bool Changed = false;

    for (auto &MBB : MF)

      Changed |= runOnMachineBasicBlock(MBB, true);


    BlockState.clear();

    return Changed;

  }

};


class AMDGPUWaitSGPRHazardsLegacy : public MachineFunctionPass {

public:

  static char ID;


  AMDGPUWaitSGPRHazardsLegacy() : MachineFunctionPass(ID) {}


  bool runOnMachineFunction(MachineFunction &MF) override {

    return AMDGPUWaitSGPRHazards().run(MF);

  }


  void getAnalysisUsage(AnalysisUsage &AU) const override {

    AU.setPreservesCFG();

    MachineFunctionPass::getAnalysisUsage(AU);

  }

};


} // namespace


char AMDGPUWaitSGPRHazardsLegacy::ID = 0;


char &llvm::AMDGPUWaitSGPRHazardsLegacyID = AMDGPUWaitSGPRHazardsLegacy::ID;


INITIALIZE_PASS(AMDGPUWaitSGPRHazardsLegacy, DEBUG_TYPE,

                "AMDGPU Insert waits for SGPR read hazards", false, false)


PreservedAnalyses

AMDGPUWaitSGPRHazardsPass::run(MachineFunction &MF,

                               MachineFunctionAnalysisManager &MFAM) {

  if (AMDGPUWaitSGPRHazards().run(MF))

    return getMachineFunctionPassPreservedAnalyses();

  return PreservedAnalyses::all();

}


MRI
unsigned const MachineRegisterInfo * MRI
Definition AArch64AdvSIMDScalarPass.cpp:103

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

AMDGPUMCTargetDesc.h
Provides AMDGPU specific target descriptions.

GlobalCullSGPRHazardsAtMemWait
static cl::opt< bool > GlobalCullSGPRHazardsAtMemWait("amdgpu-sgpr-hazard-mem-wait-cull", cl::init(false), cl::Hidden, cl::desc("Cull hazards on memory waits"))

GlobalCullSGPRHazardsMemWaitThreshold
static cl::opt< unsigned > GlobalCullSGPRHazardsMemWaitThreshold("amdgpu-sgpr-hazard-mem-wait-cull-threshold", cl::init(8), cl::Hidden, cl::desc("Number of tracked SGPRs before initiating hazard cull on memory " "wait"))

GlobalCullSGPRHazardsOnFunctionBoundary
static cl::opt< bool > GlobalCullSGPRHazardsOnFunctionBoundary("amdgpu-sgpr-hazard-boundary-cull", cl::init(false), cl::Hidden, cl::desc("Cull hazards on function boundaries"))

GlobalEnableSGPRHazardWaits
static cl::opt< bool > GlobalEnableSGPRHazardWaits("amdgpu-sgpr-hazard-wait", cl::init(true), cl::Hidden, cl::desc("Enable required s_wait_alu on SGPR hazards"))

AMDGPUWaitSGPRHazards.h

AMDGPU.h

MBB
MachineBasicBlock & MBB
Definition ARMSLSHardening.cpp:71

E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

updateGetPCBundle
static void updateGetPCBundle(MachineInstr *NewMI)
Definition GCNHazardRecognizer.cpp:3262

GCNSubtarget.h
AMD GCN specific subclass of TargetSubtarget.

DEBUG_TYPE
#define DEBUG_TYPE
Definition GenericCycleImpl.h:31

TII
const HexagonInstrInfo * TII
Definition HexagonCopyToCombine.cpp:118

MI
IRTranslator LLVM IR MI
Definition IRTranslator.cpp:110

merge
static LoopDeletionResult merge(LoopDeletionResult A, LoopDeletionResult B)
Definition LoopDeletion.cpp:51

I
#define I(x, y, z)
Definition MD5.cpp:57

Reg
Register Reg
Definition MachineSink.cpp:2117

TRI
Register const TargetRegisterInfo * TRI
Definition MachineSink.cpp:2118

Register
Promote Memory to Register
Definition Mem2Reg.cpp:110

INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56

SIInstrInfo.h
Interface definition for SIInstrInfo.

SetVector.h
This file implements a set that has insertion order iteration characteristics.

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition Debug.h:114

RHS
Value * RHS
Definition X86PartialReduction.cpp:81

llvm::AMDGPUWaitSGPRHazardsPass
Definition AMDGPUWaitSGPRHazards.h:17

llvm::AnalysisUsage::setPreservesCFG
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270

llvm::Function::getFnAttributeAsParsedInteger
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:777

llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270

llvm::Function::hasFnAttribute
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:730

llvm::GCNSubtarget
Definition GCNSubtarget.h:34

llvm::MachineBasicBlock::instr_begin
instr_iterator instr_begin()
Definition MachineBasicBlock.h:369

llvm::MachineBasicBlock::instr_iterator
Instructions::iterator instr_iterator
Definition MachineBasicBlock.h:344

llvm::MachineBasicBlock::instr_end
instr_iterator instr_end()
Definition MachineBasicBlock.h:371

llvm::MachineBasicBlock::end
iterator end()
Definition MachineBasicBlock.h:387

llvm::MachineBasicBlock::successors
iterator_range< succ_iterator > successors()
Definition MachineBasicBlock.h:474

llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition MachineFunctionPass.h:31

llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition MachineFunctionPass.cpp:184

llvm::MachineFunction
Definition MachineFunction.h:286

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition MachineFunction.h:762

llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition MachineFunction.h:772

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition MachineFunction.h:733

llvm::MachineFunction::front
const MachineBasicBlock & front() const
Definition MachineFunction.h:996

llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition MachineInstrBuilder.h:175

llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition MachineInstr.h:598

llvm::MachineInstr::getParent
const MachineBasicBlock * getParent() const
Definition MachineInstr.h:370

llvm::MachineInstr::isBundled
bool isBundled() const
Return true if this instruction part of a bundle.
Definition MachineInstr.h:495

llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition MachineRegisterInfo.h:53

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118

llvm::Register
Wrapper class representing virtual and physical registers.
Definition Register.h:20

llvm::SIInstrInfo
Definition SIInstrInfo.h:90

llvm::SIInstrInfo::isVMEM
static bool isVMEM(const MachineInstr &MI)
Definition SIInstrInfo.h:482

llvm::SIInstrInfo::isSMRD
static bool isSMRD(const MachineInstr &MI)
Definition SIInstrInfo.h:598

llvm::SIInstrInfo::isSALU
static bool isSALU(const MachineInstr &MI)
Definition SIInstrInfo.h:452

llvm::SIInstrInfo::isFLATGlobal
static bool isFLATGlobal(const MachineInstr &MI)
Definition SIInstrInfo.h:684

llvm::SIInstrInfo::isFLAT
static bool isFLAT(const MachineInstr &MI)
Definition SIInstrInfo.h:668

llvm::SIInstrInfo::isVALU
static bool isVALU(const MachineInstr &MI)
Definition SIInstrInfo.h:466

llvm::SIRegisterInfo
Definition SIRegisterInfo.h:40

llvm::SetVector::empty
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:100

llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151

llvm::SetVector::pop_back_val
value_type pop_back_val()
Definition SetVector.h:279

llvm::SmallSet::clear
void clear()
Definition SmallSet.h:210

llvm::SmallSet::insert
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183

llvm::cl::Option::getNumOccurrences
int getNumOccurrences() const
Definition CommandLine.h:400

llvm::cl::opt
Definition CommandLine.h:1454

llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition ilist_node.h:123

Changed
Changed
Definition ObjCARCOpts.cpp:2369

llvm::AMDGPU::DepCtr::decodeFieldVaVcc
unsigned decodeFieldVaVcc(unsigned Encoded)
Definition AMDGPUBaseInfo.cpp:2065

llvm::AMDGPU::DepCtr::encodeFieldVaVcc
unsigned encodeFieldVaVcc(unsigned Encoded, unsigned VaVcc)
Definition AMDGPUBaseInfo.cpp:2113

llvm::AMDGPU::DepCtr::encodeFieldHoldCnt
unsigned encodeFieldHoldCnt(unsigned Encoded, unsigned HoldCnt)
Definition AMDGPUBaseInfo.cpp:2131

llvm::AMDGPU::DepCtr::encodeFieldVaSsrc
unsigned encodeFieldVaSsrc(unsigned Encoded, unsigned VaSsrc)
Definition AMDGPUBaseInfo.cpp:2122

llvm::AMDGPU::DepCtr::encodeFieldVaVdst
unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst)
Definition AMDGPUBaseInfo.cpp:2086

llvm::AMDGPU::DepCtr::decodeFieldSaSdst
unsigned decodeFieldSaSdst(unsigned Encoded)
Definition AMDGPUBaseInfo.cpp:2057

llvm::AMDGPU::DepCtr::decodeFieldVaSdst
unsigned decodeFieldVaSdst(unsigned Encoded)
Definition AMDGPUBaseInfo.cpp:2061

llvm::AMDGPU::DepCtr::encodeFieldVmVsrc
unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc)
Definition AMDGPUBaseInfo.cpp:2077

llvm::AMDGPU::DepCtr::decodeFieldVaSsrc
unsigned decodeFieldVaSsrc(unsigned Encoded)
Definition AMDGPUBaseInfo.cpp:2069

llvm::AMDGPU::DepCtr::encodeFieldSaSdst
unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst)
Definition AMDGPUBaseInfo.cpp:2095

llvm::AMDGPU::DepCtr::decodeFieldVaVdst
unsigned decodeFieldVaVdst(unsigned Encoded)
Definition AMDGPUBaseInfo.cpp:2053

llvm::AMDGPU::DepCtr::decodeFieldHoldCnt
unsigned decodeFieldHoldCnt(unsigned Encoded)
Definition AMDGPUBaseInfo.cpp:2073

llvm::AMDGPU::DepCtr::getDefaultDepCtrEncoding
int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2024

llvm::AMDGPU::DepCtr::decodeFieldVmVsrc
unsigned decodeFieldVmVsrc(unsigned Encoded)
Definition AMDGPUBaseInfo.cpp:2049

llvm::AMDGPU::DepCtr::encodeFieldVaSdst
unsigned encodeFieldVaSdst(unsigned Encoded, unsigned VaSdst)
Definition AMDGPUBaseInfo.cpp:2104

llvm::AMDGPU::isEntryFunctionCC
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
Definition AMDGPUBaseInfo.h:1464

llvm::AMDGPU::getRegBitWidth
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
Definition SIRegisterInfo.cpp:3327

llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition BitmaskEnum.h:126

llvm::cl::Hidden
@ Hidden
Definition CommandLine.h:138

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition CommandLine.h:444

llvm::dxil::PointerTypeAnalysis::run
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Definition PointerTypeAnalysis.cpp:205

llvm::ms_demangle::CallingConv
CallingConv
Definition MicrosoftDemangleNodes.h:58

llvm::orc::SymbolState::Emitted
@ Emitted
Assigned address, still materializing.
Definition Core.h:794

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26

llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition MachineInstrBuilder.h:391

llvm::Wait
@ Wait
Definition Threading.h:60

llvm::operator!=
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:2114

llvm::AMDGPUWaitSGPRHazardsLegacyID
char & AMDGPUWaitSGPRHazardsLegacyID
Definition AMDGPUWaitSGPRHazards.cpp:550

llvm::MachineFunctionAnalysisManager
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
Definition MachineFunctionAnalysisManager.h:24

llvm::operator==
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
Definition AddressRanges.h:151

llvm::getMachineFunctionPassPreservedAnalyses
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
Definition MachinePassManager.cpp:162

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744

llvm::reverse
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406

llvm::dbgs
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207

llvm::Count
FunctionAddr VTableAddr Count
Definition InstrProf.h:139

llvm::Op
DWARFExpression::Operation Op
Definition DWARFExpressionPrinter.cpp:22

llvm::operator|=
bool operator|=(SparseBitVector< ElementSize > &LHS, const SparseBitVector< ElementSize > *RHS)
Definition SparseBitVector.h:820

llvm::prev_nodbg
IterT prev_nodbg(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It, then continue decrementing it while it points to a debug instruction.
Definition MachineBasicBlock.h:1524

llvm::cl::desc
Definition CommandLine.h:410