docs/doxygen/AMDGPUAsmPrinter_8cpp_source.html

//===-- AMDGPUAsmPrinter.cpp - AMDGPU assembly printer --------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file

///

/// The AMDGPUAsmPrinter is used to print both assembly string and also binary

/// code.  When passed an MCAsmStreamer it prints assembly and when passed

/// an MCObjectStreamer it outputs binary code.

//

//===----------------------------------------------------------------------===//

//


#include "AMDGPUAsmPrinter.h"

#include "AMDGPU.h"

#include "AMDGPUHSAMetadataStreamer.h"

#include "AMDGPUMCResourceInfo.h"

#include "AMDGPUResourceUsageAnalysis.h"

#include "AMDGPUTargetMachine.h"

#include "GCNSubtarget.h"

#include "MCTargetDesc/AMDGPUInstPrinter.h"

#include "MCTargetDesc/AMDGPUMCExpr.h"

#include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"

#include "MCTargetDesc/AMDGPUTargetStreamer.h"

#include "R600AsmPrinter.h"

#include "SIMachineFunctionInfo.h"

#include "TargetInfo/AMDGPUTargetInfo.h"

#include "Utils/AMDGPUBaseInfo.h"

#include "Utils/AMDKernelCodeTUtils.h"

#include "Utils/SIDefinesUtils.h"

#include "llvm/ADT/StringSet.h"

#include "llvm/Analysis/OptimizationRemarkEmitter.h"

#include "llvm/BinaryFormat/ELF.h"

#include "llvm/CodeGen/AsmPrinterHandler.h"

#include "llvm/CodeGen/MachineFrameInfo.h"

#include "llvm/CodeGen/MachineModuleInfo.h"

#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"

#include "llvm/IR/DiagnosticInfo.h"

#include "llvm/MC/MCAssembler.h"

#include "llvm/MC/MCContext.h"

#include "llvm/MC/MCSectionELF.h"

#include "llvm/MC/MCStreamer.h"

#include "llvm/MC/MCValue.h"

#include "llvm/MC/TargetRegistry.h"

#include "llvm/Support/AMDHSAKernelDescriptor.h"

#include "llvm/Support/Compiler.h"

#include "llvm/Target/TargetLoweringObjectFile.h"

#include "llvm/Target/TargetMachine.h"

#include "llvm/TargetParser/AMDGPUTargetParser.h"


using namespace llvm;

using namespace llvm::AMDGPU;


// This should get the default rounding mode from the kernel. We just set the

// default here, but this could change if the OpenCL rounding mode pragmas are

// used.

//

// The denormal mode here should match what is reported by the OpenCL runtime

// for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but

// can also be override to flush with the -cl-denorms-are-zero compiler flag.

//

// AMD OpenCL only sets flush none and reports CL_FP_DENORM for double

// precision, and leaves single precision to flush all and does not report

// CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports

// CL_FP_DENORM for both.

//

// FIXME: It seems some instructions do not support single precision denormals

// regardless of the mode (exp_*_f32, rcp_*_f32, rsq_*_f32, rsq_*f32, sqrt_f32,

// and sin_f32, cos_f32 on most parts).


// We want to use these instructions, and using fp32 denormals also causes

// instructions to run at the double precision rate for the device so it's

// probably best to just report no single precision denormals.


static uint32_t getFPMode(SIModeRegisterDefaults Mode) {

  return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) |

         FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) |

         FP_DENORM_MODE_SP(Mode.fpDenormModeSPValue()) |

         FP_DENORM_MODE_DP(Mode.fpDenormModeDPValue());

}


static AsmPrinter *


createAMDGPUAsmPrinterPass(TargetMachine &tm,

                           std::unique_ptr<MCStreamer> &&Streamer) {

  return new AMDGPUAsmPrinter(tm, std::move(Streamer));

}


extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void


LLVMInitializeAMDGPUAsmPrinter() {

  TargetRegistry::RegisterAsmPrinter(getTheR600Target(),

                                     llvm::createR600AsmPrinterPass);

  TargetRegistry::RegisterAsmPrinter(getTheGCNTarget(),

                                     createAMDGPUAsmPrinterPass);

}


namespace {

class AMDGPUAsmPrinterHandler : public AsmPrinterHandler {

protected:

  AMDGPUAsmPrinter *Asm;


public:

  AMDGPUAsmPrinterHandler(AMDGPUAsmPrinter *A) : Asm(A) {}


  void beginFunction(const MachineFunction *MF) override {}


  void endFunction(const MachineFunction *MF) override { Asm->endFunction(MF); }


  void endModule() override {}

};

} // End anonymous namespace


AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,

                                   std::unique_ptr<MCStreamer> Streamer)

    : AsmPrinter(TM, std::move(Streamer)) {

  assert(OutStreamer && "AsmPrinter constructed without streamer");

}


StringRef AMDGPUAsmPrinter::getPassName() const {

  return "AMDGPU Assembly Printer";

}


const MCSubtargetInfo *AMDGPUAsmPrinter::getGlobalSTI() const {

  return &TM.getMCSubtargetInfo();

}


AMDGPUTargetStreamer *AMDGPUAsmPrinter::getTargetStreamer() const {

  if (!OutStreamer)

    return nullptr;

  return static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());

}


void AMDGPUAsmPrinter::emitStartOfAsmFile(Module &M) {

  IsTargetStreamerInitialized = false;

}


void AMDGPUAsmPrinter::initTargetStreamer(Module &M) {

  IsTargetStreamerInitialized = true;


  // TODO: Which one is called first, emitStartOfAsmFile or

  // emitFunctionBodyStart?

  if (getTargetStreamer() && !getTargetStreamer()->getTargetID())

    initializeTargetID(M);


  if (TM.getTargetTriple().getOS() != Triple::AMDHSA &&

      TM.getTargetTriple().getOS() != Triple::AMDPAL)

    return;


  getTargetStreamer()->EmitDirectiveAMDGCNTarget();


  if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {

    getTargetStreamer()->EmitDirectiveAMDHSACodeObjectVersion(

        CodeObjectVersion);

    HSAMetadataStream->begin(M, *getTargetStreamer()->getTargetID());

  }


  if (TM.getTargetTriple().getOS() == Triple::AMDPAL)

    getTargetStreamer()->getPALMetadata()->readFromIR(M);

}


void AMDGPUAsmPrinter::emitEndOfAsmFile(Module &M) {

  // Init target streamer if it has not yet happened

  if (!IsTargetStreamerInitialized)

    initTargetStreamer(M);


  if (TM.getTargetTriple().getOS() != Triple::AMDHSA)

    getTargetStreamer()->EmitISAVersion();


  // Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA).

  // Emit HSA Metadata (NT_AMD_HSA_METADATA).

  if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {

    HSAMetadataStream->end();

    bool Success = HSAMetadataStream->emitTo(*getTargetStreamer());

    (void)Success;

    assert(Success && "Malformed HSA Metadata");

  }

}


void AMDGPUAsmPrinter::emitFunctionBodyStart() {

  const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();

  const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();

  const Function &F = MF->getFunction();


  // TODO: We're checking this late, would be nice to check it earlier.

  if (STM.requiresCodeObjectV6() && CodeObjectVersion < AMDGPU::AMDHSA_COV6) {

    reportFatalUsageError(

        STM.getCPU() + " is only available on code object version 6 or better");

  }


  // TODO: Which one is called first, emitStartOfAsmFile or

  // emitFunctionBodyStart?

  if (!getTargetStreamer()->getTargetID())

    initializeTargetID(*F.getParent());


  const auto &FunctionTargetID = STM.getTargetID();

  // Make sure function's xnack settings are compatible with module's

  // xnack settings.

  if (FunctionTargetID.isXnackSupported() &&

      FunctionTargetID.getXnackSetting() != IsaInfo::TargetIDSetting::Any &&

      FunctionTargetID.getXnackSetting() !=

          getTargetStreamer()->getTargetID()->getXnackSetting()) {

    OutContext.reportError(

        {}, "xnack setting of '" + Twine(MF->getName()) +

                "' function does not match module xnack setting");

    return;

  }

  // Make sure function's sramecc settings are compatible with module's

  // sramecc settings.

  if (FunctionTargetID.isSramEccSupported() &&

      FunctionTargetID.getSramEccSetting() != IsaInfo::TargetIDSetting::Any &&

      FunctionTargetID.getSramEccSetting() !=

          getTargetStreamer()->getTargetID()->getSramEccSetting()) {

    OutContext.reportError(

        {}, "sramecc setting of '" + Twine(MF->getName()) +

                "' function does not match module sramecc setting");

    return;

  }


  if (!MFI.isEntryFunction())

    return;


  if (STM.isMesaKernel(F) &&

      (F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||

       F.getCallingConv() == CallingConv::SPIR_KERNEL)) {

    AMDGPUMCKernelCodeT KernelCode;

    getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF);

    KernelCode.validate(&STM, MF->getContext());

    getTargetStreamer()->EmitAMDKernelCodeT(KernelCode);

  }


  if (STM.isAmdHsaOS())

    HSAMetadataStream->emitKernel(*MF, CurrentProgramInfo);

}


/// Set bits in a kernel descriptor MCExpr field:

///   return ((Dst & ~Mask) | (Value << Shift))


static const MCExpr *setBits(const MCExpr *Dst, const MCExpr *Value,

                             uint32_t Mask, uint32_t Shift, MCContext &Ctx) {

  const auto *Shft = MCConstantExpr::create(Shift, Ctx);

  const auto *Msk = MCConstantExpr::create(Mask, Ctx);

  Dst = MCBinaryExpr::createAnd(Dst, MCUnaryExpr::createNot(Msk, Ctx), Ctx);

  Dst = MCBinaryExpr::createOr(Dst, MCBinaryExpr::createShl(Value, Shft, Ctx),

                               Ctx);

  return Dst;

}


void AMDGPUAsmPrinter::endFunction(const MachineFunction *MF) {

  const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();

  if (!MFI.isEntryFunction())

    return;


  assert(TM.getTargetTriple().getOS() == Triple::AMDHSA);


  const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();

  MCContext &Ctx = MF->getContext();


  AMDGPU::MCKernelDescriptor KD =

      getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo);


  // Compute inst_pref_size using MCExpr label subtraction for exact code

  // size. At this point .Lfunc_end has been emitted (by the base AsmPrinter)

  // right after the function code, so (Lfunc_end - func_sym) gives the

  // exact function code size in bytes.

  if (STM.hasInstPrefSize()) {

    const MCExpr *CodeSizeExpr = MCBinaryExpr::createSub(

        MCSymbolRefExpr::create(getFunctionEnd(), OutContext),

        MCSymbolRefExpr::create(CurrentFnSym, OutContext), OutContext);


    uint32_t Mask, Shift, Width, CacheLineSize;

    STM.getInstPrefSizeArgs(Mask, Shift, Width, CacheLineSize);

    const MCExpr *InstPrefSize =

        AMDGPUMCExpr::createInstPrefSize(CodeSizeExpr, Ctx);

    KD.compute_pgm_rsrc3 =

        setBits(KD.compute_pgm_rsrc3, InstPrefSize, Mask, Shift, Ctx);

  }


  auto &Streamer = getTargetStreamer()->getStreamer();

  auto &Context = Streamer.getContext();

  auto &ObjectFileInfo = *Context.getObjectFileInfo();

  auto &ReadOnlySection = *ObjectFileInfo.getReadOnlySection();


  Streamer.pushSection();

  Streamer.switchSection(&ReadOnlySection);


  // CP microcode requires the kernel descriptor to be allocated on 64 byte

  // alignment.

  Streamer.emitValueToAlignment(Align(64), 0, 1, 0);

  ReadOnlySection.ensureMinAlignment(Align(64));


  SmallString<128> KernelName;

  getNameWithPrefix(KernelName, &MF->getFunction());

  getTargetStreamer()->EmitAmdhsaKernelDescriptor(

      STM, KernelName, KD, CurrentProgramInfo.NumVGPRsForWavesPerEU,

      MCBinaryExpr::createSub(

          CurrentProgramInfo.NumSGPRsForWavesPerEU,

          AMDGPUMCExpr::createExtraSGPRs(

              CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,

              getTargetStreamer()->getTargetID()->isXnackOnOrAny(), Context),

          Context),

      CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed);


  Streamer.popSection();

}


void AMDGPUAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {

  Register RegNo = MI->getOperand(0).getReg();


  SmallString<128> Str;

  raw_svector_ostream OS(Str);

  OS << "implicit-def: "

     << printReg(RegNo, MF->getSubtarget().getRegisterInfo());


  if (MI->getAsmPrinterFlags() & AMDGPU::SGPR_SPILL)

    OS << " : SGPR spill to VGPR lane";


  OutStreamer->AddComment(OS.str());

  OutStreamer->addBlankLine();

}


void AMDGPUAsmPrinter::emitFunctionEntryLabel() {

  if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {

    AsmPrinter::emitFunctionEntryLabel();

    return;

  }


  const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();

  const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();

  if (MFI->isEntryFunction() && STM.isAmdHsaOrMesa(MF->getFunction())) {

    SmallString<128> SymbolName;

    getNameWithPrefix(SymbolName, &MF->getFunction()),

        getTargetStreamer()->EmitAMDGPUSymbolType(SymbolName,

                                                  ELF::STT_AMDGPU_HSA_KERNEL);

  }

  if (DumpCodeInstEmitter) {

    // Disassemble function name label to text.

    DisasmLines.push_back(MF->getName().str() + ":");

    DisasmLineMaxLen = std::max(DisasmLineMaxLen, DisasmLines.back().size());

    HexLines.emplace_back("");

  }


  AsmPrinter::emitFunctionEntryLabel();

}


void AMDGPUAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {

  if (DumpCodeInstEmitter && !isBlockOnlyReachableByFallthrough(&MBB)) {

    // Write a line for the basic block label if it is not only fallthrough.

    DisasmLines.push_back((Twine("BB") + Twine(getFunctionNumber()) + "_" +

                           Twine(MBB.getNumber()) + ":")

                              .str());

    DisasmLineMaxLen = std::max(DisasmLineMaxLen, DisasmLines.back().size());

    HexLines.emplace_back("");

  }

  AsmPrinter::emitBasicBlockStart(MBB);

}


void AMDGPUAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {

  if (GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {

    if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) {

      OutContext.reportError({},

                             Twine(GV->getName()) +

                                 ": unsupported initializer for address space");

      return;

    }


    const Triple::OSType OS = TM.getTargetTriple().getOS();

    if (OS == Triple::AMDHSA || OS == Triple::AMDPAL) {

      if (!AMDGPUTargetMachine::EnableObjectLinking)

        return;

      // With object linking, LDS definitions should have been externalized

      // by earlier passes (e.g. LDS lowering, named barrier lowering).

      // Only declarations reach here, emitted as SHN_AMDGPU_LDS symbols

      // so the linker can assign their offsets.

      assert(GV->isDeclaration() &&

             "LDS definitions should have been externalized when object "

             "linking is enabled");

    }


    MCSymbol *GVSym = getSymbol(GV);


    GVSym->redefineIfPossible();

    if (GVSym->isDefined() || GVSym->isVariable())

      report_fatal_error("symbol '" + Twine(GVSym->getName()) +

                         "' is already defined");


    const DataLayout &DL = GV->getDataLayout();

    uint64_t Size = GV->getGlobalSize(DL);

    Align Alignment = GV->getAlign().value_or(Align(4));


    emitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());

    emitLinkage(GV, GVSym);

    auto *TS = getTargetStreamer();

    TS->emitAMDGPULDS(GVSym, Size, Alignment);

    return;

  }


  AsmPrinter::emitGlobalVariable(GV);

}


bool AMDGPUAsmPrinter::doInitialization(Module &M) {

  CodeObjectVersion = AMDGPU::getAMDHSACodeObjectVersion(M);


  if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {

    switch (CodeObjectVersion) {

    case AMDGPU::AMDHSA_COV4:

      HSAMetadataStream = std::make_unique<HSAMD::MetadataStreamerMsgPackV4>();

      break;

    case AMDGPU::AMDHSA_COV5:

      HSAMetadataStream = std::make_unique<HSAMD::MetadataStreamerMsgPackV5>();

      break;

    case AMDGPU::AMDHSA_COV6:

      HSAMetadataStream = std::make_unique<HSAMD::MetadataStreamerMsgPackV6>();

      break;

    default:

      reportFatalUsageError("unsupported code object version");

    }


    addAsmPrinterHandler(std::make_unique<AMDGPUAsmPrinterHandler>(this));

  }


  return AsmPrinter::doInitialization(M);

}


/// Mimics GCNSubtarget::computeOccupancy for MCExpr.

///

/// Remove dependency on GCNSubtarget and depend only only the necessary values

/// for said occupancy computation. Should match computeOccupancy implementation

/// without passing \p STM on.


const AMDGPUMCExpr *createOccupancy(unsigned InitOcc, const MCExpr *NumSGPRs,

                                    const MCExpr *NumVGPRs,

                                    unsigned DynamicVGPRBlockSize,

                                    const GCNSubtarget &STM, MCContext &Ctx) {

  unsigned MaxWaves = IsaInfo::getMaxWavesPerEU(STM);

  unsigned Granule = IsaInfo::getVGPRAllocGranule(STM, DynamicVGPRBlockSize);

  unsigned TargetTotalNumVGPRs = IsaInfo::getTotalNumVGPRs(STM);

  unsigned Generation = STM.getGeneration();


  auto CreateExpr = [&Ctx](unsigned Value) {

    return MCConstantExpr::create(Value, Ctx);

  };


  return AMDGPUMCExpr::create(AMDGPUMCExpr::AGVK_Occupancy,

                              {CreateExpr(MaxWaves), CreateExpr(Granule),

                               CreateExpr(TargetTotalNumVGPRs),

                               CreateExpr(Generation), CreateExpr(InitOcc),

                               NumSGPRs, NumVGPRs},

                              Ctx);

}


void AMDGPUAsmPrinter::validateMCResourceInfo(Function &F) {

  if (F.isDeclaration() || !AMDGPU::isModuleEntryFunctionCC(F.getCallingConv()))

    return;


  using RIK = MCResourceInfo::ResourceInfoKind;

  const GCNSubtarget &STM = TM.getSubtarget<GCNSubtarget>(F);

  MCSymbol *FnSym = TM.getSymbol(&F);


  auto TryGetMCExprValue = [](const MCExpr *Value, uint64_t &Res) -> bool {

    int64_t Val;

    if (Value->evaluateAsAbsolute(Val)) {

      Res = Val;

      return true;

    }

    return false;

  };


  const uint64_t MaxScratchPerWorkitem =

      STM.getMaxWaveScratchSize() / STM.getWavefrontSize();

  MCSymbol *ScratchSizeSymbol =

      RI.getSymbol(FnSym->getName(), RIK::RIK_PrivateSegSize, OutContext);

  uint64_t ScratchSize;

  if (ScratchSizeSymbol->isVariable() &&

      TryGetMCExprValue(ScratchSizeSymbol->getVariableValue(), ScratchSize) &&

      ScratchSize > MaxScratchPerWorkitem) {

    DiagnosticInfoStackSize DiagStackSize(F, ScratchSize, MaxScratchPerWorkitem,

                                          DS_Error);

    F.getContext().diagnose(DiagStackSize);

  }


  // Validate addressable scalar registers (i.e., prior to added implicit

  // SGPRs).

  MCSymbol *NumSGPRSymbol =

      RI.getSymbol(FnSym->getName(), RIK::RIK_NumSGPR, OutContext);

  if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&

      !STM.hasSGPRInitBug()) {

    unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();

    uint64_t NumSgpr;

    if (NumSGPRSymbol->isVariable() &&

        TryGetMCExprValue(NumSGPRSymbol->getVariableValue(), NumSgpr) &&

        NumSgpr > MaxAddressableNumSGPRs) {

      F.getContext().diagnose(DiagnosticInfoResourceLimit(

          F, "addressable scalar registers", NumSgpr, MaxAddressableNumSGPRs,

          DS_Error, DK_ResourceLimit));

      return;

    }

  }


  MCSymbol *VCCUsedSymbol =

      RI.getSymbol(FnSym->getName(), RIK::RIK_UsesVCC, OutContext);

  MCSymbol *FlatUsedSymbol =

      RI.getSymbol(FnSym->getName(), RIK::RIK_UsesFlatScratch, OutContext);

  uint64_t VCCUsed, FlatUsed, NumSgpr;


  if (NumSGPRSymbol->isVariable() && VCCUsedSymbol->isVariable() &&

      FlatUsedSymbol->isVariable() &&

      TryGetMCExprValue(NumSGPRSymbol->getVariableValue(), NumSgpr) &&

      TryGetMCExprValue(VCCUsedSymbol->getVariableValue(), VCCUsed) &&

      TryGetMCExprValue(FlatUsedSymbol->getVariableValue(), FlatUsed)) {


    // Recomputes NumSgprs + implicit SGPRs but all symbols should now be

    // resolvable.

    NumSgpr += IsaInfo::getNumExtraSGPRs(

        STM, VCCUsed, FlatUsed,

        getTargetStreamer()->getTargetID()->isXnackOnOrAny());

    if (STM.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ||

        STM.hasSGPRInitBug()) {

      unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();

      if (NumSgpr > MaxAddressableNumSGPRs) {

        F.getContext().diagnose(DiagnosticInfoResourceLimit(

            F, "scalar registers", NumSgpr, MaxAddressableNumSGPRs, DS_Error,

            DK_ResourceLimit));

        return;

      }

    }


    MCSymbol *NumVgprSymbol =

        RI.getSymbol(FnSym->getName(), RIK::RIK_NumVGPR, OutContext);

    MCSymbol *NumAgprSymbol =

        RI.getSymbol(FnSym->getName(), RIK::RIK_NumAGPR, OutContext);

    uint64_t NumVgpr, NumAgpr;


    MachineModuleInfo &MMI =

        getAnalysis<MachineModuleInfoWrapperPass>().getMMI();

    MachineFunction *MF = MMI.getMachineFunction(F);

    if (MF && NumVgprSymbol->isVariable() && NumAgprSymbol->isVariable() &&

        TryGetMCExprValue(NumVgprSymbol->getVariableValue(), NumVgpr) &&

        TryGetMCExprValue(NumAgprSymbol->getVariableValue(), NumAgpr)) {

      const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();

      unsigned MaxWaves = MFI.getMaxWavesPerEU();

      uint64_t TotalNumVgpr =

          getTotalNumVGPRs(STM.hasGFX90AInsts(), NumAgpr, NumVgpr);

      uint64_t NumVGPRsForWavesPerEU =

          std::max({TotalNumVgpr, (uint64_t)1,

                    (uint64_t)STM.getMinNumVGPRs(

                        MaxWaves, MFI.getDynamicVGPRBlockSize())});

      uint64_t NumSGPRsForWavesPerEU = std::max(

          {NumSgpr, (uint64_t)1, (uint64_t)STM.getMinNumSGPRs(MaxWaves)});

      const MCExpr *OccupancyExpr = createOccupancy(

          STM.getOccupancyWithWorkGroupSizes(*MF).second,

          MCConstantExpr::create(NumSGPRsForWavesPerEU, OutContext),

          MCConstantExpr::create(NumVGPRsForWavesPerEU, OutContext),

          MFI.getDynamicVGPRBlockSize(), STM, OutContext);

      uint64_t Occupancy;


      const auto [MinWEU, MaxWEU] = AMDGPU::getIntegerPairAttribute(

          F, "amdgpu-waves-per-eu", {0, 0}, true);


      if (TryGetMCExprValue(OccupancyExpr, Occupancy) && Occupancy < MinWEU) {

        DiagnosticInfoOptimizationFailure Diag(

            F, F.getSubprogram(),

            "failed to meet occupancy target given by 'amdgpu-waves-per-eu' in "

            "'" +

                F.getName() + "': desired occupancy was " + Twine(MinWEU) +

                ", final occupancy is " + Twine(Occupancy));

        F.getContext().diagnose(Diag);

        return;

      }

    }

  }

}


static void appendTypeEncoding(std::string &Enc, Type *Ty, const DataLayout &DL,

                               bool IsReturnType) {

  if (Ty->isVoidTy()) {

    Enc += 'v';

    return;

  }

  unsigned Bits = DL.getTypeSizeInBits(Ty);

  // Zero-sized non-void types (e.g. `{}` or `[0 x i8]`) consume no ABI

  // registers. For returns, emit the same no-result marker as void so the

  // parameter encoding still has an explicit return-type prefix.

  if (Bits == 0) {

    if (IsReturnType)

      Enc += 'v';

    return;

  }

  if (Bits <= 32)

    Enc += 'i';

  else if (Bits <= 64)

    Enc += 'l';

  else

    Enc.append(divideCeil(Bits, 32), 'i');

}


static std::string computeTypeId(const FunctionType *FTy,

                                 const DataLayout &DL) {

  std::string Enc;

  appendTypeEncoding(Enc, FTy->getReturnType(), DL, /*IsReturnType=*/true);

  for (Type *ParamTy : FTy->params())

    appendTypeEncoding(Enc, ParamTy, DL, /*IsReturnType=*/false);

  return Enc;

}


void AMDGPUAsmPrinter::collectCallEdge(const MachineInstr &MI) {

  if (!AMDGPUTargetMachine::EnableObjectLinking)

    return;

  const SIInstrInfo *TII = MF->getSubtarget<GCNSubtarget>().getInstrInfo();

  const MachineOperand *Callee =

      TII->getNamedOperand(MI, AMDGPU::OpName::callee);

  if (!Callee || !Callee->isGlobal())

    return;

  DirectCallEdges.insert(

      {getSymbol(&MF->getFunction()), getSymbol(Callee->getGlobal())});

}


void AMDGPUAsmPrinter::emitAMDGPUInfo(Module &M) {

  if (!AMDGPUTargetMachine::EnableObjectLinking)

    return;


  const NamedMDNode *LDSMD = M.getNamedMetadata("amdgpu.lds.uses");

  bool HasLDSUses = LDSMD && LDSMD->getNumOperands() > 0;


  const NamedMDNode *BarMD = M.getNamedMetadata("amdgpu.named_barrier.uses");

  bool HasNamedBarriers = BarMD && BarMD->getNumOperands() > 0;


  // Collect address-taken functions (with type IDs) and indirect call sites.

  DenseMap<const Function *, std::string> AddrTakenTypeIds;

  using IndirectCallInfo = std::pair<const Function *, std::string>;

  SmallVector<IndirectCallInfo, 8> IndirectCalls;


  for (const Function &F : M) {

    bool IsKernel = AMDGPU::isKernel(F.getCallingConv());


    if (!IsKernel && F.hasAddressTaken(/*PutOffender=*/nullptr,

                                       /*IgnoreCallbackUses=*/false,

                                       /*IgnoreAssumeLikeCalls=*/true,

                                       /*IgnoreLLVMUsed=*/true)) {

      AddrTakenTypeIds[&F] =

          computeTypeId(F.getFunctionType(), M.getDataLayout());

    }


    if (F.isDeclaration())

      continue;


    StringSet<> SeenTypeIds;

    for (const BasicBlock &BB : F) {

      for (const Instruction &I : BB) {

        const auto *CB = dyn_cast<CallBase>(&I);

        if (!CB || !CB->isIndirectCall())

          continue;

        std::string TId =

            computeTypeId(CB->getFunctionType(), M.getDataLayout());

        if (SeenTypeIds.insert(TId).second)

          IndirectCalls.push_back({&F, std::move(TId)});

      }

    }

  }


  if (FunctionInfos.empty() && DirectCallEdges.empty() && !HasLDSUses &&

      !HasNamedBarriers && AddrTakenTypeIds.empty() && IndirectCalls.empty())

    return;


  AMDGPU::InfoSectionData Data;

  Data.Funcs = std::move(FunctionInfos);


  for (auto &[F, TypeId] : AddrTakenTypeIds) {

    MCSymbol *Sym = getSymbol(F);

    Data.TypeIds.push_back({Sym, TypeId});

  }


  for (auto &[CallerSym, CalleeSym] : DirectCallEdges)

    Data.Calls.push_back({CallerSym, CalleeSym});

  DirectCallEdges.clear();


  if (HasLDSUses) {

    for (const MDNode *N : LDSMD->operands()) {

      auto *Func = mdconst::extract<Function>(N->getOperand(0));

      auto *LdsVar = mdconst::extract<GlobalVariable>(N->getOperand(1));

      Data.Uses.push_back({getSymbol(Func), getSymbol(LdsVar)});

    }

  }


  if (HasNamedBarriers) {

    for (const MDNode *N : BarMD->operands()) {

      auto *BarVar = mdconst::extract<GlobalVariable>(N->getOperand(0));

      MCSymbol *BarSym = getSymbol(BarVar);

      for (unsigned I = 1, E = N->getNumOperands(); I < E; ++I) {

        auto *Func = mdconst::extract<Function>(N->getOperand(I));

        Data.Uses.push_back({getSymbol(Func), BarSym});

      }

    }

  }


  for (auto &[Caller, Enc] : IndirectCalls) {

    MCSymbol *CallerSym = getSymbol(Caller);

    Data.IndirectCalls.push_back({CallerSym, Enc});

  }


  getTargetStreamer()->emitAMDGPUInfo(Data);

}


bool AMDGPUAsmPrinter::doFinalization(Module &M) {

  // Pad with s_code_end to help tools and guard against instruction prefetch

  // causing stale data in caches. Arguably this should be done by the linker,

  // which is why this isn't done for Mesa.

  // Don't do it if there is no code.

  const MCSubtargetInfo &STI = *getGlobalSTI();

  if ((AMDGPU::isGFX10Plus(STI) || AMDGPU::isGFX90A(STI)) &&

      (STI.getTargetTriple().getOS() == Triple::AMDHSA ||

       STI.getTargetTriple().getOS() == Triple::AMDPAL)) {

    MCSection *TextSect = getObjFileLowering().getTextSection();

    if (TextSect->hasInstructions()) {

      OutStreamer->switchSection(TextSect);

      getTargetStreamer()->EmitCodeEnd(STI);

    }

  }


  // Emit the unified .amdgpu.info section (per-function resources, call graph,

  // LDS/named-barrier use edges, indirect calls, and address-taken type IDs).

  emitAMDGPUInfo(M);


  // Assign expressions which can only be resolved when all other functions are

  // known.

  RI.finalize(OutContext);


  // Switch section and emit all GPR maximums within the processed module.

  OutStreamer->pushSection();

  MCSectionELF *MaxGPRSection =

      OutContext.getELFSection(".AMDGPU.gpr_maximums", ELF::SHT_PROGBITS, 0);

  OutStreamer->switchSection(MaxGPRSection);

  getTargetStreamer()->EmitMCResourceMaximums(

      RI.getMaxVGPRSymbol(OutContext), RI.getMaxAGPRSymbol(OutContext),

      RI.getMaxSGPRSymbol(OutContext), RI.getMaxNamedBarrierSymbol(OutContext));

  OutStreamer->popSection();


  // In the object-linking pipeline per-function resource MCExprs reference

  // external callee symbols that cannot be evaluated here, so cross-TU limit

  // checks would silently no-op for every non-leaf function. Defer resource

  // sanity checking to the linker, which re-validates against the aggregated

  // call graph in the combined .amdgpu.info metadata.

  if (!AMDGPUTargetMachine::EnableObjectLinking) {

    for (Function &F : M.functions())

      validateMCResourceInfo(F);

  }


  RI.reset();


  return AsmPrinter::doFinalization(M);

}


SmallString<128> AMDGPUAsmPrinter::getMCExprStr(const MCExpr *Value) {

  SmallString<128> Str;

  raw_svector_ostream OSS(Str);

  auto &Streamer = getTargetStreamer()->getStreamer();

  auto &Context = Streamer.getContext();

  const MCExpr *New = foldAMDGPUMCExpr(Value, Context);

  printAMDGPUMCExpr(New, OSS, &MAI);

  return Str;

}


// Print comments that apply to both callable functions and entry points.

void AMDGPUAsmPrinter::emitCommonFunctionComments(

    const MCExpr *NumVGPR, const MCExpr *NumAGPR, const MCExpr *TotalNumVGPR,

    const MCExpr *NumSGPR, const MCExpr *ScratchSize, uint64_t CodeSize,

    const AMDGPUMachineFunctionInfo *MFI) {

  OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false);

  OutStreamer->emitRawComment(" TotalNumSgprs: " + getMCExprStr(NumSGPR),

                              false);

  OutStreamer->emitRawComment(" NumVgprs: " + getMCExprStr(NumVGPR), false);

  if (NumAGPR && TotalNumVGPR) {

    OutStreamer->emitRawComment(" NumAgprs: " + getMCExprStr(NumAGPR), false);

    OutStreamer->emitRawComment(" TotalNumVgprs: " + getMCExprStr(TotalNumVGPR),

                                false);

  }

  OutStreamer->emitRawComment(" ScratchSize: " + getMCExprStr(ScratchSize),

                              false);

  OutStreamer->emitRawComment(" MemoryBound: " + Twine(MFI->isMemoryBound()),

                              false);

}


const MCExpr *AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(

    const MachineFunction &MF) const {

  const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();

  MCContext &Ctx = MF.getContext();

  uint16_t KernelCodeProperties = 0;

  const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI.getUserSGPRInfo();


  if (UserSGPRInfo.hasPrivateSegmentBuffer()) {

    KernelCodeProperties |=

        amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;

  }

  if (UserSGPRInfo.hasDispatchPtr()) {

    KernelCodeProperties |=

        amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;

  }

  if (UserSGPRInfo.hasQueuePtr()) {

    KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;

  }

  if (UserSGPRInfo.hasKernargSegmentPtr()) {

    KernelCodeProperties |=

        amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;

  }

  if (UserSGPRInfo.hasDispatchID()) {

    KernelCodeProperties |=

        amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;

  }

  if (UserSGPRInfo.hasFlatScratchInit()) {

    KernelCodeProperties |=

        amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;

  }

  if (UserSGPRInfo.hasPrivateSegmentSize()) {

    KernelCodeProperties |=

        amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;

  }

  if (MF.getSubtarget<GCNSubtarget>().isWave32()) {

    KernelCodeProperties |=

        amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;

  }


  // CurrentProgramInfo.DynamicCallStack is a MCExpr and could be

  // un-evaluatable at this point so it cannot be conditionally checked here.

  // Instead, we'll directly shift the possibly unknown MCExpr into its place

  // and bitwise-or it into KernelCodeProperties.

  const MCExpr *KernelCodePropExpr =

      MCConstantExpr::create(KernelCodeProperties, Ctx);

  const MCExpr *OrValue = MCConstantExpr::create(

      amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT, Ctx);

  OrValue = MCBinaryExpr::createShl(CurrentProgramInfo.DynamicCallStack,

                                    OrValue, Ctx);

  KernelCodePropExpr = MCBinaryExpr::createOr(KernelCodePropExpr, OrValue, Ctx);


  return KernelCodePropExpr;

}


MCKernelDescriptor

AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(const MachineFunction &MF,

                                            const SIProgramInfo &PI) const {

  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();

  const Function &F = MF.getFunction();

  const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();

  MCContext &Ctx = MF.getContext();


  MCKernelDescriptor KernelDescriptor;


  KernelDescriptor.group_segment_fixed_size =

      MCConstantExpr::create(PI.LDSSize, Ctx);

  KernelDescriptor.private_segment_fixed_size = PI.ScratchSize;


  Align MaxKernArgAlign;

  KernelDescriptor.kernarg_size = MCConstantExpr::create(

      STM.getKernArgSegmentSize(F, MaxKernArgAlign), Ctx);


  KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1(STM, Ctx);

  KernelDescriptor.compute_pgm_rsrc2 = PI.getComputePGMRSrc2(STM, Ctx);

  KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF);


  int64_t PGM_Rsrc3 = 1;

  bool EvaluatableRsrc3 =

      CurrentProgramInfo.ComputePGMRSrc3->evaluateAsAbsolute(PGM_Rsrc3);

  (void)PGM_Rsrc3;

  (void)EvaluatableRsrc3;

  assert(STM.getGeneration() >= AMDGPUSubtarget::GFX10 ||

         STM.hasGFX90AInsts() || STM.hasGFX1250Insts() || !EvaluatableRsrc3 ||

         static_cast<uint64_t>(PGM_Rsrc3) == 0);

  KernelDescriptor.compute_pgm_rsrc3 = CurrentProgramInfo.ComputePGMRSrc3;


  KernelDescriptor.kernarg_preload = MCConstantExpr::create(

      AMDGPU::hasKernargPreload(STM) ? Info->getNumKernargPreloadedSGPRs() : 0,

      Ctx);


  return KernelDescriptor;

}


bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {

  // Init target streamer lazily on the first function so that previous passes

  // can set metadata.

  if (!IsTargetStreamerInitialized)

    initTargetStreamer(*MF.getFunction().getParent());


  ResourceUsage =

      &getAnalysis<AMDGPUResourceUsageAnalysisWrapperPass>().getResourceInfo();

  CurrentProgramInfo.reset(MF);


  const AMDGPUMachineFunctionInfo *MFI =

      MF.getInfo<AMDGPUMachineFunctionInfo>();

  MCContext &Ctx = MF.getContext();


  // The starting address of all shader programs must be 256 bytes aligned.

  // Regular functions just need the basic required instruction alignment.

  MF.ensureAlignment(MFI->isEntryFunction() ? Align(256) : Align(4));


  SetupMachineFunction(MF);


  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();

  MCContext &Context = getObjFileLowering().getContext();

  // FIXME: This should be an explicit check for Mesa.

  if (!STM.isAmdHsaOS() && !STM.isAmdPalOS()) {

    MCSectionELF *ConfigSection =

        Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);

    OutStreamer->switchSection(ConfigSection);

  }


  RI.gatherResourceInfo(MF, *ResourceUsage, OutContext);


  if (AMDGPUTargetMachine::EnableObjectLinking) {

    const AMDGPUResourceUsageAnalysisWrapperPass::FunctionResourceInfo &RU =

        *ResourceUsage;

    FunctionInfos.push_back(

        {/*NumSGPR=*/static_cast<uint32_t>(RU.NumExplicitSGPR),

         /*NumArchVGPR=*/static_cast<uint32_t>(RU.NumVGPR),

         /*NumAccVGPR=*/static_cast<uint32_t>(RU.NumAGPR),

         /*PrivateSegmentSize=*/static_cast<uint32_t>(RU.PrivateSegmentSize),

         /*UsesVCC=*/RU.UsesVCC,

         /*UsesFlatScratch=*/RU.UsesFlatScratch,

         /*HasDynStack=*/RU.HasDynamicallySizedStack,

         /*Sym=*/getSymbol(&MF.getFunction())});

  }


  if (MFI->isModuleEntryFunction()) {

    getSIProgramInfo(CurrentProgramInfo, MF);

  }


  if (STM.isAmdPalOS()) {

    if (MFI->isEntryFunction())

      EmitPALMetadata(MF, CurrentProgramInfo);

    else if (MFI->isModuleEntryFunction())

      emitPALFunctionMetadata(MF);

  } else if (!STM.isAmdHsaOS()) {

    EmitProgramInfoSI(MF, CurrentProgramInfo);

  }


  DumpCodeInstEmitter = nullptr;

  if (STM.dumpCode()) {

    // For -dumpcode, get the assembler out of the streamer. This only works

    // with -filetype=obj.

    MCAssembler *Assembler = OutStreamer->getAssemblerPtr();

    if (Assembler)

      DumpCodeInstEmitter = Assembler->getEmitterPtr();

  }


  DisasmLines.clear();

  HexLines.clear();

  DisasmLineMaxLen = 0;


  emitFunctionBody();


  emitResourceUsageRemarks(MF, CurrentProgramInfo, MFI->isModuleEntryFunction(),

                           STM.hasMAIInsts());


  {

    using RIK = MCResourceInfo::ResourceInfoKind;

    getTargetStreamer()->EmitMCResourceInfo(

        RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumVGPR, OutContext),

        RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumAGPR, OutContext),

        RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumSGPR, OutContext),

        RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumNamedBarrier,

                     OutContext),

        RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_PrivateSegSize,

                     OutContext),

        RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_UsesVCC, OutContext),

        RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_UsesFlatScratch,

                     OutContext),

        RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_HasDynSizedStack,

                     OutContext),

        RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_HasRecursion,

                     OutContext),

        RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_HasIndirectCall,

                     OutContext));

  }


  // Emit _dvgpr$ symbol when appropriate.

  emitDVgprSymbol(MF);


  if (isVerbose()) {

    MCSectionELF *CommentSection =

        Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);

    OutStreamer->switchSection(CommentSection);


    if (!MFI->isEntryFunction()) {

      using RIK = MCResourceInfo::ResourceInfoKind;

      OutStreamer->emitRawComment(" Function info:", false);


      emitCommonFunctionComments(

          RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_NumVGPR, OutContext)

              ->getVariableValue(),

          STM.hasMAIInsts() ? RI.getSymbol(CurrentFnSym->getName(),

                                           RIK::RIK_NumAGPR, OutContext)

                                  ->getVariableValue()

                            : nullptr,

          RI.createTotalNumVGPRs(MF, Ctx),

          RI.createTotalNumSGPRs(

              MF,

              MF.getSubtarget<GCNSubtarget>().getTargetID().isXnackOnOrAny(),

              Ctx),

          RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_PrivateSegSize,

                       OutContext)

              ->getVariableValue(),

          CurrentProgramInfo.getFunctionCodeSize(MF), MFI);

      return false;

    }


    OutStreamer->emitRawComment(" Kernel info:", false);

    emitCommonFunctionComments(

        CurrentProgramInfo.NumArchVGPR,

        STM.hasMAIInsts() ? CurrentProgramInfo.NumAccVGPR : nullptr,

        CurrentProgramInfo.NumVGPR, CurrentProgramInfo.NumSGPR,

        CurrentProgramInfo.ScratchSize,

        CurrentProgramInfo.getFunctionCodeSize(MF), MFI);


    OutStreamer->emitRawComment(

        " FloatMode: " + Twine(CurrentProgramInfo.FloatMode), false);

    OutStreamer->emitRawComment(

        " IeeeMode: " + Twine(CurrentProgramInfo.IEEEMode), false);

    OutStreamer->emitRawComment(

        " LDSByteSize: " + Twine(CurrentProgramInfo.LDSSize) +

            " bytes/workgroup (compile time only)",

        false);


    OutStreamer->emitRawComment(

        " SGPRBlocks: " + getMCExprStr(CurrentProgramInfo.SGPRBlocks), false);


    OutStreamer->emitRawComment(

        " VGPRBlocks: " + getMCExprStr(CurrentProgramInfo.VGPRBlocks), false);


    OutStreamer->emitRawComment(

        " NumSGPRsForWavesPerEU: " +

            getMCExprStr(CurrentProgramInfo.NumSGPRsForWavesPerEU),

        false);

    OutStreamer->emitRawComment(

        " NumVGPRsForWavesPerEU: " +

            getMCExprStr(CurrentProgramInfo.NumVGPRsForWavesPerEU),

        false);


    if (STM.hasGFX90AInsts()) {

      const MCExpr *AdjustedAccum = MCBinaryExpr::createAdd(

          CurrentProgramInfo.AccumOffset, MCConstantExpr::create(1, Ctx), Ctx);

      AdjustedAccum = MCBinaryExpr::createMul(

          AdjustedAccum, MCConstantExpr::create(4, Ctx), Ctx);

      OutStreamer->emitRawComment(

          " AccumOffset: " + getMCExprStr(AdjustedAccum), false);

    }


    if (STM.hasGFX1250Insts())

      OutStreamer->emitRawComment(

          " NamedBarCnt: " + getMCExprStr(CurrentProgramInfo.NamedBarCnt),

          false);


    OutStreamer->emitRawComment(

        " Occupancy: " + getMCExprStr(CurrentProgramInfo.Occupancy), false);


    OutStreamer->emitRawComment(

        " WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false);


    OutStreamer->emitRawComment(

        " COMPUTE_PGM_RSRC2:SCRATCH_EN: " +

            getMCExprStr(CurrentProgramInfo.ScratchEnable),

        false);

    OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " +

                                    Twine(CurrentProgramInfo.UserSGPR),

                                false);

    OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +

                                    Twine(CurrentProgramInfo.TrapHandlerEnable),

                                false);

    OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_X_EN: " +

                                    Twine(CurrentProgramInfo.TGIdXEnable),

                                false);

    OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Y_EN: " +

                                    Twine(CurrentProgramInfo.TGIdYEnable),

                                false);

    OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Z_EN: " +

                                    Twine(CurrentProgramInfo.TGIdZEnable),

                                false);

    OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +

                                    Twine(CurrentProgramInfo.TIdIGCompCount),

                                false);


    [[maybe_unused]] int64_t PGMRSrc3;

    assert(STM.getGeneration() >= AMDGPUSubtarget::GFX10 ||

           STM.hasGFX90AInsts() || STM.hasGFX1250Insts() ||

           (CurrentProgramInfo.ComputePGMRSrc3->evaluateAsAbsolute(PGMRSrc3) &&

            static_cast<uint64_t>(PGMRSrc3) == 0));

    if (STM.hasGFX90AInsts()) {

      OutStreamer->emitRawComment(

          " COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: " +

              getMCExprStr(MCKernelDescriptor::bits_get(

                  CurrentProgramInfo.ComputePGMRSrc3,

                  amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,

                  amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, Ctx)),

          false);

      OutStreamer->emitRawComment(

          " COMPUTE_PGM_RSRC3_GFX90A:TG_SPLIT: " +

              getMCExprStr(MCKernelDescriptor::bits_get(

                  CurrentProgramInfo.ComputePGMRSrc3,

                  amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,

                  amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Ctx)),

          false);

    }

  }


  if (DumpCodeInstEmitter) {


    OutStreamer->switchSection(

        Context.getELFSection(".AMDGPU.disasm", ELF::SHT_PROGBITS, 0));


    for (size_t i = 0; i < DisasmLines.size(); ++i) {

      std::string Comment = "\n";

      if (!HexLines[i].empty()) {

        Comment = std::string(DisasmLineMaxLen - DisasmLines[i].size(), ' ');

        Comment += " ; " + HexLines[i] + "\n";

      }


      OutStreamer->emitBytes(StringRef(DisasmLines[i]));

      OutStreamer->emitBytes(StringRef(Comment));

    }

  }


  return false;

}


// When appropriate, add a _dvgpr$ symbol, with the value of the function

// symbol, plus an offset encoding one less than the number of VGPR blocks used

// by the function in bits 5..3 of the symbol value. A "VGPR block" can be

// either 16 VGPRs (for a max of 128), or 32 VGPRs (for a max of 256). This is

// used by a front-end to have functions that are chained rather than called,

// and a dispatcher that dynamically resizes the VGPR count before dispatching

// to a function.

void AMDGPUAsmPrinter::emitDVgprSymbol(MachineFunction &MF) {

  const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();

  if (MFI.isDynamicVGPREnabled() &&

      MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS_Chain) {

    MCContext &Ctx = MF.getContext();

    unsigned BlockSize = MFI.getDynamicVGPRBlockSize();

    MCValue NumVGPRs;

    if (!CurrentProgramInfo.NumVGPRsForWavesPerEU->evaluateAsRelocatable(

            NumVGPRs, nullptr) ||

        !NumVGPRs.isAbsolute()) {

      llvm_unreachable("unable to resolve NumVGPRs for _dvgpr$ symbol");

    }

    // Calculate number of VGPR blocks.

    // Treat 0 VGPRs as 1 VGPR to avoid underflowing.

    unsigned NumBlocks =

        divideCeil(std::max(unsigned(NumVGPRs.getConstant()), 1U), BlockSize);


    if (NumBlocks > 8) {

      OutContext.reportError({},

                             "too many DVGPR blocks for _dvgpr$ symbol for '" +

                                 Twine(CurrentFnSym->getName()) + "'");

      return;

    }

    unsigned EncodedNumBlocks = (NumBlocks - 1) << 3;

    // Add to function symbol to create _dvgpr$ symbol.

    const MCExpr *DVgprFuncVal = MCBinaryExpr::createAdd(

        MCSymbolRefExpr::create(CurrentFnSym, Ctx),

        MCConstantExpr::create(EncodedNumBlocks, Ctx), Ctx);

    MCSymbol *DVgprFuncSym =

        Ctx.getOrCreateSymbol(Twine("_dvgpr$") + CurrentFnSym->getName());

    OutStreamer->emitAssignment(DVgprFuncSym, DVgprFuncVal);

    emitVisibility(DVgprFuncSym, MF.getFunction().getVisibility());

    emitLinkage(&MF.getFunction(), DVgprFuncSym);

  }

}


// TODO: Fold this into emitFunctionBodyStart.

void AMDGPUAsmPrinter::initializeTargetID(const Module &M) {

  // In the beginning all features are either 'Any' or 'NotSupported',

  // depending on global target features. This will cover empty modules.

  getTargetStreamer()->initializeTargetID(*getGlobalSTI(),

                                          getGlobalSTI()->getFeatureString());


  // If module is empty, we are done.

  if (M.empty())

    return;


  // If module is not empty, need to find first 'Off' or 'On' feature

  // setting per feature from functions in module.

  for (auto &F : M) {

    auto &TSTargetID = getTargetStreamer()->getTargetID();

    if ((!TSTargetID->isXnackSupported() || TSTargetID->isXnackOnOrOff()) &&

        (!TSTargetID->isSramEccSupported() || TSTargetID->isSramEccOnOrOff()))

      break;


    const GCNSubtarget &STM = TM.getSubtarget<GCNSubtarget>(F);

    const IsaInfo::AMDGPUTargetID &STMTargetID = STM.getTargetID();

    if (TSTargetID->isXnackSupported())

      if (TSTargetID->getXnackSetting() == IsaInfo::TargetIDSetting::Any)

        TSTargetID->setXnackSetting(STMTargetID.getXnackSetting());

    if (TSTargetID->isSramEccSupported())

      if (TSTargetID->getSramEccSetting() == IsaInfo::TargetIDSetting::Any)

        TSTargetID->setSramEccSetting(STMTargetID.getSramEccSetting());

  }

}


// AccumOffset computed for the MCExpr equivalent of:

// alignTo(std::max(1, NumVGPR), 4) / 4 - 1;


static const MCExpr *computeAccumOffset(const MCExpr *NumVGPR, MCContext &Ctx) {

  const MCExpr *ConstFour = MCConstantExpr::create(4, Ctx);

  const MCExpr *ConstOne = MCConstantExpr::create(1, Ctx);


  // Can't be lower than 1 for subsequent alignTo.

  const MCExpr *MaximumTaken =

      AMDGPUMCExpr::createMax({ConstOne, NumVGPR}, Ctx);


  // Practically, it's computing divideCeil(MaximumTaken, 4).

  const MCExpr *DivCeil = MCBinaryExpr::createDiv(

      AMDGPUMCExpr::createAlignTo(MaximumTaken, ConstFour, Ctx), ConstFour,

      Ctx);


  return MCBinaryExpr::createSub(DivCeil, ConstOne, Ctx);

}


void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,

                                        const MachineFunction &MF) {

  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();

  MCContext &Ctx = MF.getContext();


  auto CreateExpr = [&Ctx](int64_t Value) {

    return MCConstantExpr::create(Value, Ctx);

  };


  auto TryGetMCExprValue = [](const MCExpr *Value, uint64_t &Res) -> bool {

    int64_t Val;

    if (Value->evaluateAsAbsolute(Val)) {

      Res = Val;

      return true;

    }

    return false;

  };


  auto GetSymRefExpr =

      [&](MCResourceInfo::ResourceInfoKind RIK) -> const MCExpr * {

    MCSymbol *Sym = RI.getSymbol(CurrentFnSym->getName(), RIK, OutContext);

    return MCSymbolRefExpr::create(Sym, Ctx);

  };


  using RIK = MCResourceInfo::ResourceInfoKind;

  ProgInfo.NumArchVGPR = GetSymRefExpr(RIK::RIK_NumVGPR);

  ProgInfo.NumAccVGPR = GetSymRefExpr(RIK::RIK_NumAGPR);

  ProgInfo.NumVGPR = AMDGPUMCExpr::createTotalNumVGPR(

      ProgInfo.NumAccVGPR, ProgInfo.NumArchVGPR, Ctx);


  ProgInfo.AccumOffset = computeAccumOffset(ProgInfo.NumArchVGPR, Ctx);

  ProgInfo.TgSplit = STM.isTgSplitEnabled();

  ProgInfo.NumSGPR = GetSymRefExpr(RIK::RIK_NumSGPR);

  ProgInfo.ScratchSize = GetSymRefExpr(RIK::RIK_PrivateSegSize);

  ProgInfo.VCCUsed = GetSymRefExpr(RIK::RIK_UsesVCC);

  ProgInfo.FlatUsed = GetSymRefExpr(RIK::RIK_UsesFlatScratch);

  ProgInfo.DynamicCallStack =

      MCBinaryExpr::createOr(GetSymRefExpr(RIK::RIK_HasDynSizedStack),

                             GetSymRefExpr(RIK::RIK_HasRecursion), Ctx);


  const MCExpr *BarBlkConst = MCConstantExpr::create(4, Ctx);

  const MCExpr *AlignToBlk = AMDGPUMCExpr::createAlignTo(

      GetSymRefExpr(RIK::RIK_NumNamedBarrier), BarBlkConst, Ctx);

  ProgInfo.NamedBarCnt = MCBinaryExpr::createDiv(AlignToBlk, BarBlkConst, Ctx);


  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();


  // The calculations related to SGPR/VGPR blocks are

  // duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be

  // unified.

  const MCExpr *ExtraSGPRs = AMDGPUMCExpr::createExtraSGPRs(

      ProgInfo.VCCUsed, ProgInfo.FlatUsed,

      getTargetStreamer()->getTargetID()->isXnackOnOrAny(), Ctx);


  // Check the addressable register limit before we add ExtraSGPRs.

  if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&

      !STM.hasSGPRInitBug()) {

    unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();

    uint64_t NumSgpr;

    if (TryGetMCExprValue(ProgInfo.NumSGPR, NumSgpr) &&

        NumSgpr > MaxAddressableNumSGPRs) {

      // This can happen due to a compiler bug or when using inline asm.

      LLVMContext &Ctx = MF.getFunction().getContext();

      Ctx.diagnose(DiagnosticInfoResourceLimit(

          MF.getFunction(), "addressable scalar registers", NumSgpr,

          MaxAddressableNumSGPRs, DS_Error, DK_ResourceLimit));

      ProgInfo.NumSGPR = CreateExpr(MaxAddressableNumSGPRs - 1);

    }

  }


  // Account for extra SGPRs and VGPRs reserved for debugger use.

  ProgInfo.NumSGPR = MCBinaryExpr::createAdd(ProgInfo.NumSGPR, ExtraSGPRs, Ctx);


  const Function &F = MF.getFunction();


  // Ensure there are enough SGPRs and VGPRs for wave dispatch, where wave

  // dispatch registers as function args.

  unsigned WaveDispatchNumSGPR = MFI->getNumWaveDispatchSGPRs(),

           WaveDispatchNumVGPR = MFI->getNumWaveDispatchVGPRs();


  if (WaveDispatchNumSGPR) {

    ProgInfo.NumSGPR = AMDGPUMCExpr::createMax(

        {ProgInfo.NumSGPR,

         MCBinaryExpr::createAdd(CreateExpr(WaveDispatchNumSGPR), ExtraSGPRs,

                                 Ctx)},

        Ctx);

  }


  if (WaveDispatchNumVGPR) {

    ProgInfo.NumArchVGPR = AMDGPUMCExpr::createMax(

        {ProgInfo.NumVGPR, CreateExpr(WaveDispatchNumVGPR)}, Ctx);


    ProgInfo.NumVGPR = AMDGPUMCExpr::createTotalNumVGPR(

        ProgInfo.NumAccVGPR, ProgInfo.NumArchVGPR, Ctx);

  }


  // Adjust number of registers used to meet default/requested minimum/maximum

  // number of waves per execution unit request.

  unsigned MaxWaves = MFI->getMaxWavesPerEU();

  ProgInfo.NumSGPRsForWavesPerEU =

      AMDGPUMCExpr::createMax({ProgInfo.NumSGPR, CreateExpr(1ul),

                               CreateExpr(STM.getMinNumSGPRs(MaxWaves))},

                              Ctx);

  ProgInfo.NumVGPRsForWavesPerEU =

      AMDGPUMCExpr::createMax({ProgInfo.NumVGPR, CreateExpr(1ul),

                               CreateExpr(STM.getMinNumVGPRs(

                                   MaxWaves, MFI->getDynamicVGPRBlockSize()))},

                              Ctx);


  if (STM.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ||

      STM.hasSGPRInitBug()) {

    unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();

    uint64_t NumSgpr;

    if (TryGetMCExprValue(ProgInfo.NumSGPR, NumSgpr) &&

        NumSgpr > MaxAddressableNumSGPRs) {

      // This can happen due to a compiler bug or when using inline asm to use

      // the registers which are usually reserved for vcc etc.

      LLVMContext &Ctx = MF.getFunction().getContext();

      Ctx.diagnose(DiagnosticInfoResourceLimit(

          MF.getFunction(), "scalar registers", NumSgpr, MaxAddressableNumSGPRs,

          DS_Error, DK_ResourceLimit));

      ProgInfo.NumSGPR = CreateExpr(MaxAddressableNumSGPRs);

      ProgInfo.NumSGPRsForWavesPerEU = CreateExpr(MaxAddressableNumSGPRs);

    }

  }


  if (STM.hasSGPRInitBug()) {

    ProgInfo.NumSGPR =

        CreateExpr(AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG);

    ProgInfo.NumSGPRsForWavesPerEU =

        CreateExpr(AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG);

  }


  if (MFI->getNumUserSGPRs() > STM.getMaxNumUserSGPRs()) {

    LLVMContext &Ctx = MF.getFunction().getContext();

    Ctx.diagnose(DiagnosticInfoResourceLimit(

        MF.getFunction(), "user SGPRs", MFI->getNumUserSGPRs(),

        STM.getMaxNumUserSGPRs(), DS_Error));

  }


  if (MFI->getLDSSize() > STM.getAddressableLocalMemorySize()) {

    LLVMContext &Ctx = MF.getFunction().getContext();

    Ctx.diagnose(DiagnosticInfoResourceLimit(

        MF.getFunction(), "local memory", MFI->getLDSSize(),

        STM.getAddressableLocalMemorySize(), DS_Error));

  }

  // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:

  // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1

  auto GetNumGPRBlocks = [&CreateExpr, &Ctx](const MCExpr *NumGPR,

                                             unsigned Granule) {

    const MCExpr *OneConst = CreateExpr(1ul);

    const MCExpr *GranuleConst = CreateExpr(Granule);

    const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);

    const MCExpr *AlignToGPR =

        AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);

    const MCExpr *DivGPR =

        MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);

    const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);

    return SubGPR;

  };

  // GFX10+ will always allocate 128 SGPRs and this field must be 0

  if (STM.getGeneration() >= AMDGPUSubtarget::GFX10) {

    ProgInfo.SGPRBlocks = CreateExpr(0ul);

  } else {

    ProgInfo.SGPRBlocks = GetNumGPRBlocks(ProgInfo.NumSGPRsForWavesPerEU,

                                          IsaInfo::getSGPREncodingGranule(STM));

  }

  ProgInfo.VGPRBlocks = GetNumGPRBlocks(ProgInfo.NumVGPRsForWavesPerEU,

                                        IsaInfo::getVGPREncodingGranule(STM));


  const SIModeRegisterDefaults Mode = MFI->getMode();


  // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode

  // register.

  ProgInfo.FloatMode = getFPMode(Mode);


  ProgInfo.IEEEMode = Mode.IEEE;


  // Make clamp modifier on NaN input returns 0.

  ProgInfo.DX10Clamp = Mode.DX10Clamp;


  unsigned LDSAlignShift = 8;

  switch (getLdsDwGranularity(STM)) {

  case 512:

  case 320:

    LDSAlignShift = 11;

    break;

  case 128:

    LDSAlignShift = 9;

    break;

  case 64:

    LDSAlignShift = 8;

    break;

  default:

    llvm_unreachable("invald LDS block size");

  }


  ProgInfo.SGPRSpill = MFI->getNumSpilledSGPRs();

  ProgInfo.VGPRSpill = MFI->getNumSpilledVGPRs();


  ProgInfo.LDSSize = MFI->getLDSSize();

  ProgInfo.LDSBlocks =

      alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;


  // The MCExpr equivalent of divideCeil.

  auto DivideCeil = [&Ctx](const MCExpr *Numerator, const MCExpr *Denominator) {

    const MCExpr *Ceil =

        AMDGPUMCExpr::createAlignTo(Numerator, Denominator, Ctx);

    return MCBinaryExpr::createDiv(Ceil, Denominator, Ctx);

  };


  // Scratch is allocated in 64-dword or 256-dword blocks.

  unsigned ScratchAlignShift =

      STM.getGeneration() >= AMDGPUSubtarget::GFX11 ? 8 : 10;

  // We need to program the hardware with the amount of scratch memory that

  // is used by the entire wave.  ProgInfo.ScratchSize is the amount of

  // scratch memory used per thread.

  ProgInfo.ScratchBlocks = DivideCeil(

      MCBinaryExpr::createMul(ProgInfo.ScratchSize,

                              CreateExpr(STM.getWavefrontSize()), Ctx),

      CreateExpr(1ULL << ScratchAlignShift));


  if (STM.supportsWGP()) {

    ProgInfo.WgpMode = STM.isCuModeEnabled() ? 0 : 1;

  }


  if (getIsaVersion(getGlobalSTI()->getCPU()).Major >= 10) {

    ProgInfo.MemOrdered = 1;

    ProgInfo.FwdProgress = !F.hasFnAttribute("amdgpu-no-fwd-progress");

  }


  // 0 = X, 1 = XY, 2 = XYZ

  unsigned TIDIGCompCnt = 0;

  if (MFI->hasWorkItemIDZ())

    TIDIGCompCnt = 2;

  else if (MFI->hasWorkItemIDY())

    TIDIGCompCnt = 1;


  // The private segment wave byte offset is the last of the system SGPRs. We

  // initially assumed it was allocated, and may have used it. It shouldn't harm

  // anything to disable it if we know the stack isn't used here. We may still

  // have emitted code reading it to initialize scratch, but if that's unused

  // reading garbage should be OK.

  ProgInfo.ScratchEnable = MCBinaryExpr::createLOr(

      MCBinaryExpr::createGT(ProgInfo.ScratchBlocks,

                             MCConstantExpr::create(0, Ctx), Ctx),

      ProgInfo.DynamicCallStack, Ctx);


  ProgInfo.UserSGPR = MFI->getNumUserSGPRs();

  // For AMDHSA, TRAP_HANDLER must be zero, as it is populated by the CP.

  ProgInfo.TrapHandlerEnable = STM.isAmdHsaOS() ? 0 : STM.hasTrapHandler();

  ProgInfo.TGIdXEnable = MFI->hasWorkGroupIDX();

  ProgInfo.TGIdYEnable = MFI->hasWorkGroupIDY();

  ProgInfo.TGIdZEnable = MFI->hasWorkGroupIDZ();

  ProgInfo.TGSizeEnable = MFI->hasWorkGroupInfo();

  ProgInfo.TIdIGCompCount = TIDIGCompCnt;

  ProgInfo.EXCPEnMSB = 0;

  // For AMDHSA, LDS_SIZE must be zero, as it is populated by the CP.

  ProgInfo.LdsSize = STM.isAmdHsaOS() ? 0 : ProgInfo.LDSBlocks;

  ProgInfo.EXCPEnable = 0;


  if (STM.hasGFX90AInsts()) {

    ProgInfo.ComputePGMRSrc3 =

        setBits(ProgInfo.ComputePGMRSrc3, ProgInfo.AccumOffset,

                amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,

                amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT, Ctx);

    ProgInfo.ComputePGMRSrc3 =

        setBits(ProgInfo.ComputePGMRSrc3, CreateExpr(ProgInfo.TgSplit),

                amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,

                amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT, Ctx);

  }


  if (STM.hasGFX1250Insts())

    ProgInfo.ComputePGMRSrc3 =

        setBits(ProgInfo.ComputePGMRSrc3, ProgInfo.NamedBarCnt,

                amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT,

                amdhsa::COMPUTE_PGM_RSRC3_GFX125_NAMED_BAR_CNT_SHIFT, Ctx);


  ProgInfo.Occupancy = createOccupancy(

      STM.computeOccupancy(F, ProgInfo.LDSSize).second,

      ProgInfo.NumSGPRsForWavesPerEU, ProgInfo.NumVGPRsForWavesPerEU,

      MFI->getDynamicVGPRBlockSize(), STM, Ctx);


  const auto [MinWEU, MaxWEU] =

      AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu", {0, 0}, true);

  uint64_t Occupancy;

  if (TryGetMCExprValue(ProgInfo.Occupancy, Occupancy) && Occupancy < MinWEU) {

    DiagnosticInfoOptimizationFailure Diag(

        F, F.getSubprogram(),

        "failed to meet occupancy target given by 'amdgpu-waves-per-eu' in "

        "'" +

            F.getName() + "': desired occupancy was " + Twine(MinWEU) +

            ", final occupancy is " + Twine(Occupancy));

    F.getContext().diagnose(Diag);

  }

}


static unsigned getRsrcReg(CallingConv::ID CallConv) {

  switch (CallConv) {

  default:

    [[fallthrough]];

  case CallingConv::AMDGPU_CS:

    return R_00B848_COMPUTE_PGM_RSRC1;

  case CallingConv::AMDGPU_LS:

    return R_00B528_SPI_SHADER_PGM_RSRC1_LS;

  case CallingConv::AMDGPU_HS:

    return R_00B428_SPI_SHADER_PGM_RSRC1_HS;

  case CallingConv::AMDGPU_ES:

    return R_00B328_SPI_SHADER_PGM_RSRC1_ES;

  case CallingConv::AMDGPU_GS:

    return R_00B228_SPI_SHADER_PGM_RSRC1_GS;

  case CallingConv::AMDGPU_VS:

    return R_00B128_SPI_SHADER_PGM_RSRC1_VS;

  case CallingConv::AMDGPU_PS:

    return R_00B028_SPI_SHADER_PGM_RSRC1_PS;

  }

}


void AMDGPUAsmPrinter::EmitProgramInfoSI(

    const MachineFunction &MF, const SIProgramInfo &CurrentProgramInfo) {

  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();

  unsigned RsrcReg = getRsrcReg(MF.getFunction().getCallingConv());

  MCContext &Ctx = MF.getContext();


  // (((Value) & Mask) << Shift)

  auto SetBits = [&Ctx](const MCExpr *Value, uint32_t Mask, uint32_t Shift) {

    const MCExpr *msk = MCConstantExpr::create(Mask, Ctx);

    const MCExpr *shft = MCConstantExpr::create(Shift, Ctx);

    return MCBinaryExpr::createShl(MCBinaryExpr::createAnd(Value, msk, Ctx),

                                   shft, Ctx);

  };


  auto EmitResolvedOrExpr = [this](const MCExpr *Value, unsigned Size) {

    int64_t Val;

    if (Value->evaluateAsAbsolute(Val))

      OutStreamer->emitIntValue(static_cast<uint64_t>(Val), Size);

    else

      OutStreamer->emitValue(Value, Size);

  };


  if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {

    OutStreamer->emitInt32(R_00B848_COMPUTE_PGM_RSRC1);


    EmitResolvedOrExpr(CurrentProgramInfo.getComputePGMRSrc1(STM, Ctx),

                       /*Size=*/4);


    OutStreamer->emitInt32(R_00B84C_COMPUTE_PGM_RSRC2);

    EmitResolvedOrExpr(CurrentProgramInfo.getComputePGMRSrc2(STM, Ctx),

                       /*Size=*/4);


    OutStreamer->emitInt32(R_00B860_COMPUTE_TMPRING_SIZE);


    // Sets bits according to S_0286E8_WAVESIZE_* mask and shift values for the

    // appropriate generation.

    if (STM.getGeneration() >= AMDGPUSubtarget::GFX12) {

      EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,

                                 /*Mask=*/0x3FFFF, /*Shift=*/12),

                         /*Size=*/4);

    } else if (STM.getGeneration() == AMDGPUSubtarget::GFX11) {

      EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,

                                 /*Mask=*/0x7FFF, /*Shift=*/12),

                         /*Size=*/4);

    } else {

      EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,

                                 /*Mask=*/0x1FFF, /*Shift=*/12),

                         /*Size=*/4);

    }


    // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =

    // 0" comment but I don't see a corresponding field in the register spec.

  } else {

    OutStreamer->emitInt32(RsrcReg);


    const MCExpr *GPRBlocks = MCBinaryExpr::createOr(

        SetBits(CurrentProgramInfo.VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0),

        SetBits(CurrentProgramInfo.SGPRBlocks, /*Mask=*/0x0F, /*Shift=*/6),

        MF.getContext());

    EmitResolvedOrExpr(GPRBlocks, /*Size=*/4);

    OutStreamer->emitInt32(R_0286E8_SPI_TMPRING_SIZE);


    // Sets bits according to S_0286E8_WAVESIZE_* mask and shift values for the

    // appropriate generation.

    if (STM.getGeneration() >= AMDGPUSubtarget::GFX12) {

      EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,

                                 /*Mask=*/0x3FFFF, /*Shift=*/12),

                         /*Size=*/4);

    } else if (STM.getGeneration() == AMDGPUSubtarget::GFX11) {

      EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,

                                 /*Mask=*/0x7FFF, /*Shift=*/12),

                         /*Size=*/4);

    } else {

      EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,

                                 /*Mask=*/0x1FFF, /*Shift=*/12),

                         /*Size=*/4);

    }

  }


  if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {

    OutStreamer->emitInt32(R_00B02C_SPI_SHADER_PGM_RSRC2_PS);

    unsigned ExtraLDSSize = STM.getGeneration() >= AMDGPUSubtarget::GFX11

                                ? divideCeil(CurrentProgramInfo.LDSBlocks, 2)

                                : CurrentProgramInfo.LDSBlocks;

    OutStreamer->emitInt32(S_00B02C_EXTRA_LDS_SIZE(ExtraLDSSize));

    OutStreamer->emitInt32(R_0286CC_SPI_PS_INPUT_ENA);

    OutStreamer->emitInt32(MFI->getPSInputEnable());

    OutStreamer->emitInt32(R_0286D0_SPI_PS_INPUT_ADDR);

    OutStreamer->emitInt32(MFI->getPSInputAddr());

  }


  OutStreamer->emitInt32(R_SPILLED_SGPRS);

  OutStreamer->emitInt32(MFI->getNumSpilledSGPRs());

  OutStreamer->emitInt32(R_SPILLED_VGPRS);

  OutStreamer->emitInt32(MFI->getNumSpilledVGPRs());

}


// Helper function to add common PAL Metadata 3.0+


static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD,

                                  const SIProgramInfo &CurrentProgramInfo,

                                  CallingConv::ID CC, const GCNSubtarget &ST,

                                  unsigned DynamicVGPRBlockSize) {

  if (ST.hasFeature(AMDGPU::FeatureDX10ClampAndIEEEMode))

    MD->setHwStage(CC, ".ieee_mode", (bool)CurrentProgramInfo.IEEEMode);


  MD->setHwStage(CC, ".wgp_mode", (bool)CurrentProgramInfo.WgpMode);

  MD->setHwStage(CC, ".mem_ordered", (bool)CurrentProgramInfo.MemOrdered);

  MD->setHwStage(CC, ".forward_progress", (bool)CurrentProgramInfo.FwdProgress);


  if (AMDGPU::isCompute(CC)) {

    MD->setHwStage(CC, ".trap_present",

                   (bool)CurrentProgramInfo.TrapHandlerEnable);

    MD->setHwStage(CC, ".excp_en", CurrentProgramInfo.EXCPEnable);


    if (DynamicVGPRBlockSize != 0)

      MD->setComputeRegisters(".dynamic_vgpr_en", true);

  }


  MD->updateHwStageMaximum(

      CC, ".lds_size",

      (unsigned)(CurrentProgramInfo.LdsSize * getLdsDwGranularity(ST) *

                 sizeof(uint32_t)));

}


// This is the equivalent of EmitProgramInfoSI above, but for when the OS type

// is AMDPAL.  It stores each compute/SPI register setting and other PAL

// metadata items into the PALMD::Metadata, combining with any provided by the

// frontend as LLVM metadata. Once all functions are written, the PAL metadata

// is then written as a single block in the .note section.

void AMDGPUAsmPrinter::EmitPALMetadata(

    const MachineFunction &MF, const SIProgramInfo &CurrentProgramInfo) {

  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

  auto CC = MF.getFunction().getCallingConv();

  auto *MD = getTargetStreamer()->getPALMetadata();

  auto &Ctx = MF.getContext();


  MD->setEntryPoint(CC, MF.getFunction().getName());

  MD->setNumUsedVgprs(CC, CurrentProgramInfo.NumVGPRsForWavesPerEU, Ctx);


  // For targets that support dynamic VGPRs, set the number of saved dynamic

  // VGPRs (if any) in the PAL metadata.

  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();

  if (MFI->isDynamicVGPREnabled() &&

      MFI->getScratchReservedForDynamicVGPRs() > 0)

    MD->setHwStage(CC, ".dynamic_vgpr_saved_count",

                   MFI->getScratchReservedForDynamicVGPRs() / 4);


  // Only set AGPRs for supported devices

  if (STM.hasMAIInsts()) {

    MD->setNumUsedAgprs(CC, CurrentProgramInfo.NumAccVGPR);

  }


  MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU, Ctx);

  if (MD->getPALMajorVersion() < 3) {

    MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC, STM, Ctx), Ctx);

    if (AMDGPU::isCompute(CC)) {

      MD->setRsrc2(CC, CurrentProgramInfo.getComputePGMRSrc2(STM, Ctx), Ctx);

    } else {

      const MCExpr *HasScratchBlocks =

          MCBinaryExpr::createGT(CurrentProgramInfo.ScratchBlocks,

                                 MCConstantExpr::create(0, Ctx), Ctx);

      auto [Shift, Mask] = getShiftMask(C_00B84C_SCRATCH_EN);

      MD->setRsrc2(CC, maskShiftSet(HasScratchBlocks, Mask, Shift, Ctx), Ctx);

    }

  } else {

    MD->setHwStage(CC, ".debug_mode", (bool)CurrentProgramInfo.DebugMode);

    MD->setHwStage(CC, ".scratch_en", msgpack::Type::Boolean,

                   CurrentProgramInfo.ScratchEnable);

    EmitPALMetadataCommon(MD, CurrentProgramInfo, CC, STM,

                          MFI->getDynamicVGPRBlockSize());

  }


  // ScratchSize is in bytes, 16 aligned.

  MD->setScratchSize(

      CC,

      AMDGPUMCExpr::createAlignTo(CurrentProgramInfo.ScratchSize,

                                  MCConstantExpr::create(16, Ctx), Ctx),

      Ctx);


  if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {

    unsigned ExtraLDSSize = STM.getGeneration() >= AMDGPUSubtarget::GFX11

                                ? divideCeil(CurrentProgramInfo.LDSBlocks, 2)

                                : CurrentProgramInfo.LDSBlocks;

    if (MD->getPALMajorVersion() < 3) {

      MD->setRsrc2(

          CC,

          MCConstantExpr::create(S_00B02C_EXTRA_LDS_SIZE(ExtraLDSSize), Ctx),

          Ctx);

      MD->setSpiPsInputEna(MFI->getPSInputEnable());

      MD->setSpiPsInputAddr(MFI->getPSInputAddr());

    } else {

      // Graphics registers

      const unsigned ExtraLdsDwGranularity =

          STM.getGeneration() >= AMDGPUSubtarget::GFX11 ? 256 : 128;

      MD->setGraphicsRegisters(

          ".ps_extra_lds_size",

          (unsigned)(ExtraLDSSize * ExtraLdsDwGranularity * sizeof(uint32_t)));


      // Set PsInputEna and PsInputAddr .spi_ps_input_ena and .spi_ps_input_addr

      static StringLiteral const PsInputFields[] = {

          ".persp_sample_ena",    ".persp_center_ena",

          ".persp_centroid_ena",  ".persp_pull_model_ena",

          ".linear_sample_ena",   ".linear_center_ena",

          ".linear_centroid_ena", ".line_stipple_tex_ena",

          ".pos_x_float_ena",     ".pos_y_float_ena",

          ".pos_z_float_ena",     ".pos_w_float_ena",

          ".front_face_ena",      ".ancillary_ena",

          ".sample_coverage_ena", ".pos_fixed_pt_ena"};

      unsigned PSInputEna = MFI->getPSInputEnable();

      unsigned PSInputAddr = MFI->getPSInputAddr();

      for (auto [Idx, Field] : enumerate(PsInputFields)) {

        MD->setGraphicsRegisters(".spi_ps_input_ena", Field,

                                 (bool)((PSInputEna >> Idx) & 1));

        MD->setGraphicsRegisters(".spi_ps_input_addr", Field,

                                 (bool)((PSInputAddr >> Idx) & 1));

      }

    }

  }


  // For version 3 and above the wave front size is already set in the metadata

  if (MD->getPALMajorVersion() < 3 && STM.isWave32())

    MD->setWave32(MF.getFunction().getCallingConv());

}


void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {

  auto *MD = getTargetStreamer()->getPALMetadata();

  const MachineFrameInfo &MFI = MF.getFrameInfo();

  StringRef FnName = MF.getFunction().getName();

  MD->setFunctionScratchSize(FnName, MFI.getStackSize());

  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  MCContext &Ctx = MF.getContext();


  if (MD->getPALMajorVersion() < 3) {

    // Set compute registers

    MD->setRsrc1(

        CallingConv::AMDGPU_CS,

        CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS, ST, Ctx), Ctx);

    MD->setRsrc2(CallingConv::AMDGPU_CS,

                 CurrentProgramInfo.getComputePGMRSrc2(ST, Ctx), Ctx);

  } else {

    EmitPALMetadataCommon(

        MD, CurrentProgramInfo, CallingConv::AMDGPU_CS, ST,

        MF.getInfo<SIMachineFunctionInfo>()->getDynamicVGPRBlockSize());

  }


  // Set optional info

  MD->setFunctionLdsSize(FnName, CurrentProgramInfo.LDSSize);

  MD->setFunctionNumUsedVgprs(FnName, CurrentProgramInfo.NumVGPRsForWavesPerEU);

  MD->setFunctionNumUsedSgprs(FnName, CurrentProgramInfo.NumSGPRsForWavesPerEU);

}


// This is supposed to be log2(Size)


static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {

  switch (Size) {

  case 4:

    return AMD_ELEMENT_4_BYTES;

  case 8:

    return AMD_ELEMENT_8_BYTES;

  case 16:

    return AMD_ELEMENT_16_BYTES;

  default:

    llvm_unreachable("invalid private_element_size");

  }

}


void AMDGPUAsmPrinter::getAmdKernelCode(AMDGPUMCKernelCodeT &Out,

                                        const SIProgramInfo &CurrentProgramInfo,

                                        const MachineFunction &MF) const {

  const Function &F = MF.getFunction();

  assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||

         F.getCallingConv() == CallingConv::SPIR_KERNEL);


  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();

  MCContext &Ctx = MF.getContext();


  Out.initDefault(STM, Ctx, /*InitMCExpr=*/false);


  Out.compute_pgm_resource1_registers =

      CurrentProgramInfo.getComputePGMRSrc1(STM, Ctx);

  Out.compute_pgm_resource2_registers =

      CurrentProgramInfo.getComputePGMRSrc2(STM, Ctx);

  Out.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;


  Out.is_dynamic_callstack = CurrentProgramInfo.DynamicCallStack;


  AMD_HSA_BITS_SET(Out.code_properties, AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,

                   getElementByteSizeValue(STM.getMaxPrivateElementSize(true)));


  const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI->getUserSGPRInfo();

  if (UserSGPRInfo.hasPrivateSegmentBuffer()) {

    Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;

  }


  if (UserSGPRInfo.hasDispatchPtr())

    Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;


  if (UserSGPRInfo.hasQueuePtr())

    Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;


  if (UserSGPRInfo.hasKernargSegmentPtr())

    Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;


  if (UserSGPRInfo.hasDispatchID())

    Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;


  if (UserSGPRInfo.hasFlatScratchInit())

    Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;


  if (UserSGPRInfo.hasPrivateSegmentSize())

    Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;


  if (STM.isXNACKEnabled())

    Out.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;


  Align MaxKernArgAlign;

  Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);

  Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;

  Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;

  Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;

  Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;


  // kernarg_segment_alignment is specified as log of the alignment.

  // The minimum alignment is 16.

  // FIXME: The metadata treats the minimum as 4?

  Out.kernarg_segment_alignment = Log2(std::max(Align(16), MaxKernArgAlign));

}


bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,

                                       const char *ExtraCode, raw_ostream &O) {

  // First try the generic code, which knows about modifiers like 'c' and 'n'.

  if (!AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O))

    return false;


  if (ExtraCode && ExtraCode[0]) {

    if (ExtraCode[1] != 0)

      return true; // Unknown modifier.


    switch (ExtraCode[0]) {

    case 'r':

      break;

    default:

      return true;

    }

  }


  // TODO: Should be able to support other operand types like globals.

  const MachineOperand &MO = MI->getOperand(OpNo);

  if (MO.isReg()) {

    AMDGPUInstPrinter::printRegOperand(MO.getReg(), O,

                                       *MF->getSubtarget().getRegisterInfo());

    return false;

  }

  if (MO.isImm()) {

    int64_t Val = MO.getImm();

    if (AMDGPU::isInlinableIntLiteral(Val)) {

      O << Val;

    } else if (isUInt<16>(Val)) {

      O << format("0x%" PRIx16, static_cast<uint16_t>(Val));

    } else if (isUInt<32>(Val)) {

      O << format("0x%" PRIx32, static_cast<uint32_t>(Val));

    } else {

      O << format("0x%" PRIx64, static_cast<uint64_t>(Val));

    }

    return false;

  }

  return true;

}


void AMDGPUAsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {

  AU.addRequired<AMDGPUResourceUsageAnalysisWrapperPass>();

  AU.addPreserved<AMDGPUResourceUsageAnalysisWrapperPass>();

  AU.addRequired<MachineModuleInfoWrapperPass>();

  AU.addPreserved<MachineModuleInfoWrapperPass>();

  AsmPrinter::getAnalysisUsage(AU);

}


void AMDGPUAsmPrinter::emitResourceUsageRemarks(

    const MachineFunction &MF, const SIProgramInfo &CurrentProgramInfo,

    bool isModuleEntryFunction, bool hasMAIInsts) {

  if (!ORE)

    return;


  const char *Name = "kernel-resource-usage";

  const char *Indent = "    ";


  // If the remark is not specifically enabled, do not output to yaml

  LLVMContext &Ctx = MF.getFunction().getContext();

  if (!Ctx.getDiagHandlerPtr()->isAnalysisRemarkEnabled(Name))

    return;


  // Currently non-kernel functions have no resources to emit.

  if (!isEntryFunctionCC(MF.getFunction().getCallingConv()))

    return;


  auto EmitResourceUsageRemark = [&](StringRef RemarkName,

                                     StringRef RemarkLabel, auto Argument) {

    // Add an indent for every line besides the line with the kernel name. This

    // makes it easier to tell which resource usage go with which kernel since

    // the kernel name will always be displayed first.

    std::string LabelStr = RemarkLabel.str() + ": ";

    if (RemarkName != "FunctionName")

      LabelStr = Indent + LabelStr;


    ORE->emit([&]() {

      return MachineOptimizationRemarkAnalysis(Name, RemarkName,

                                               MF.getFunction().getSubprogram(),

                                               &MF.front())

             << LabelStr << ore::NV(RemarkName, Argument);

    });

  };


  // FIXME: Formatting here is pretty nasty because clang does not accept

  // newlines from diagnostics. This forces us to emit multiple diagnostic

  // remarks to simulate newlines. If and when clang does accept newlines, this

  // formatting should be aggregated into one remark with newlines to avoid

  // printing multiple diagnostic location and diag opts.

  EmitResourceUsageRemark("FunctionName", "Function Name",

                          MF.getFunction().getName());

  EmitResourceUsageRemark("NumSGPR", "TotalSGPRs",

                          getMCExprStr(CurrentProgramInfo.NumSGPR));

  EmitResourceUsageRemark("NumVGPR", "VGPRs",

                          getMCExprStr(CurrentProgramInfo.NumArchVGPR));

  if (hasMAIInsts) {

    EmitResourceUsageRemark("NumAGPR", "AGPRs",

                            getMCExprStr(CurrentProgramInfo.NumAccVGPR));

  }

  EmitResourceUsageRemark("ScratchSize", "ScratchSize [bytes/lane]",

                          getMCExprStr(CurrentProgramInfo.ScratchSize));

  int64_t DynStack;

  bool DynStackEvaluatable =

      CurrentProgramInfo.DynamicCallStack->evaluateAsAbsolute(DynStack);

  StringRef DynamicStackStr =

      DynStackEvaluatable && DynStack ? "True" : "False";

  EmitResourceUsageRemark("DynamicStack", "Dynamic Stack", DynamicStackStr);

  EmitResourceUsageRemark("Occupancy", "Occupancy [waves/SIMD]",

                          getMCExprStr(CurrentProgramInfo.Occupancy));

  EmitResourceUsageRemark("SGPRSpill", "SGPRs Spill",

                          CurrentProgramInfo.SGPRSpill);

  EmitResourceUsageRemark("VGPRSpill", "VGPRs Spill",

                          CurrentProgramInfo.VGPRSpill);

  if (isModuleEntryFunction)

    EmitResourceUsageRemark("BytesLDS", "LDS Size [bytes/block]",

                            CurrentProgramInfo.LDSSize);

}


char AMDGPUAsmPrinter::ID = 0;


INITIALIZE_PASS(AMDGPUAsmPrinter, "amdgpu-asm-printer",

                "AMDGPU Assembly Printer", false, false)

assert
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")

EmitPALMetadataCommon
static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD, const SIProgramInfo &CurrentProgramInfo, CallingConv::ID CC, const GCNSubtarget &ST, unsigned DynamicVGPRBlockSize)
Definition AMDGPUAsmPrinter.cpp:1629

createOccupancy
const AMDGPUMCExpr * createOccupancy(unsigned InitOcc, const MCExpr *NumSGPRs, const MCExpr *NumVGPRs, unsigned DynamicVGPRBlockSize, const GCNSubtarget &STM, MCContext &Ctx)
Mimics GCNSubtarget::computeOccupancy for MCExpr.
Definition AMDGPUAsmPrinter.cpp:430

getRsrcReg
static unsigned getRsrcReg(CallingConv::ID CallConv)
Definition AMDGPUAsmPrinter.cpp:1509

LLVMInitializeAMDGPUAsmPrinter
LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmPrinter()
Definition AMDGPUAsmPrinter.cpp:92

getElementByteSizeValue
static amd_element_byte_size_t getElementByteSizeValue(unsigned Size)
Definition AMDGPUAsmPrinter.cpp:1783

setBits
static const MCExpr * setBits(const MCExpr *Dst, const MCExpr *Value, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
Set bits in a kernel descriptor MCExpr field: return ((Dst & ~Mask) | (Value << Shift))
Definition AMDGPUAsmPrinter.cpp:239

getFPMode
static uint32_t getFPMode(SIModeRegisterDefaults Mode)
Definition AMDGPUAsmPrinter.cpp:78

computeTypeId
static std::string computeTypeId(const FunctionType *FTy, const DataLayout &DL)
Definition AMDGPUAsmPrinter.cpp:596

computeAccumOffset
static const MCExpr * computeAccumOffset(const MCExpr *NumVGPR, MCContext &Ctx)
Definition AMDGPUAsmPrinter.cpp:1196

appendTypeEncoding
static void appendTypeEncoding(std::string &Enc, Type *Ty, const DataLayout &DL, bool IsReturnType)
Definition AMDGPUAsmPrinter.cpp:573

createAMDGPUAsmPrinterPass
static AsmPrinter * createAMDGPUAsmPrinterPass(TargetMachine &tm, std::unique_ptr< MCStreamer > &&Streamer)
Definition AMDGPUAsmPrinter.cpp:86

AMDGPUAsmPrinter.h
AMDGPU Assembly printer class.

AMDGPUBaseInfo.h

AMDGPUHSAMetadataStreamer.h
AMDGPU HSA Metadata Streamer.

AMDGPUInstPrinter.h

AMDGPUMCExpr.h

AMDGPUMCKernelDescriptor.h
AMDHSA kernel descriptor MCExpr struct for use in MC layer.

AMDGPUMCResourceInfo.h
MC infrastructure to propagate the function level resource usage info.

AMDGPUResourceUsageAnalysis.h
Analyzes how many registers and other resources are used by functions.

AMDGPUTargetInfo.h

AMDGPUTargetMachine.h
The AMDGPU TargetMachine interface definition for hw codegen targets.

AMDGPUTargetParser.h

AMDGPUTargetStreamer.h

AMDGPU.h

AMDHSAKernelDescriptor.h
AMDHSA kernel descriptor definitions.

AMDKernelCodeTUtils.h
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.

amd_element_byte_size_t
amd_element_byte_size_t
The values used to define the number of bytes to use for the swizzle element size.
Definition AMDKernelCodeT.h:53

AMD_ELEMENT_8_BYTES
@ AMD_ELEMENT_8_BYTES
Definition AMDKernelCodeT.h:56

AMD_ELEMENT_16_BYTES
@ AMD_ELEMENT_16_BYTES
Definition AMDKernelCodeT.h:57

AMD_ELEMENT_4_BYTES
@ AMD_ELEMENT_4_BYTES
Definition AMDKernelCodeT.h:55

AMD_HSA_BITS_SET
#define AMD_HSA_BITS_SET(dst, mask, val)
Definition AMDKernelCodeT.h:43

AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID
@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID
Definition AMDKernelCodeT.h:103

AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE
@ AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE
Definition AMDKernelCodeT.h:163

AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR
@ AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR
Definition AMDKernelCodeT.h:99

AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR
@ AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR
Definition AMDKernelCodeT.h:95

AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE
@ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE
Definition AMDKernelCodeT.h:111

AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER
@ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER
Definition AMDKernelCodeT.h:87

AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR
@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR
Definition AMDKernelCodeT.h:91

AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED
@ AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED
Definition AMDKernelCodeT.h:193

AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT
@ AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT
Definition AMDKernelCodeT.h:107

AMD_CODE_PROPERTY_IS_PTR64
@ AMD_CODE_PROPERTY_IS_PTR64
Definition AMDKernelCodeT.h:172

MBB
MachineBasicBlock & MBB
Definition ARMSLSHardening.cpp:71

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition ARMSLSHardening.cpp:73

AsmPrinterHandler.h

ELF.h

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")

Compiler.h

LLVM_ABI
#define LLVM_ABI
Definition Compiler.h:213

LLVM_EXTERNAL_VISIBILITY
#define LLVM_EXTERNAL_VISIBILITY
Definition Compiler.h:132

OutputCostKind::CodeSize
@ CodeSize
Definition CostModel.cpp:33

DiagnosticInfo.h

GCNSubtarget.h
AMD GCN specific subclass of TargetSubtarget.

TII
const HexagonInstrInfo * TII
Definition HexagonCopyToCombine.cpp:118

MI
IRTranslator LLVM IR MI
Definition IRTranslator.cpp:110

InlinePriorityMode::Size
@ Size
Definition InlineOrder.cpp:25

MCAssembler.h

MCContext.h

MCSectionELF.h

MCStreamer.h

MCValue.h

F
#define F(x, y, z)
Definition MD5.cpp:54

I
#define I(x, y, z)
Definition MD5.cpp:57

MachineFrameInfo.h

MachineModuleInfo.h

MachineOptimizationRemarkEmitter.h
===- MachineOptimizationRemarkEmitter.h - Opt Diagnostics -*- C++ -*-—===//

OptimizationRemarkEmitter.h

Field
OptimizedStructLayoutField Field
Definition OptimizedStructLayout.cpp:18

INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56

R600AsmPrinter.h
R600 Assembly printer class.

Mode
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))

SIDefinesUtils.h

R_00B028_SPI_SHADER_PGM_RSRC1_PS
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS
Definition SIDefines.h:1144

R_0286E8_SPI_TMPRING_SIZE
#define R_0286E8_SPI_TMPRING_SIZE
Definition SIDefines.h:1286

FP_ROUND_MODE_DP
#define FP_ROUND_MODE_DP(x)
Definition SIDefines.h:1268

C_00B84C_SCRATCH_EN
#define C_00B84C_SCRATCH_EN
Definition SIDefines.h:1180

FP_ROUND_ROUND_TO_NEAREST
#define FP_ROUND_ROUND_TO_NEAREST
Definition SIDefines.h:1260

R_0286D0_SPI_PS_INPUT_ADDR
#define R_0286D0_SPI_PS_INPUT_ADDR
Definition SIDefines.h:1219

R_00B860_COMPUTE_TMPRING_SIZE
#define R_00B860_COMPUTE_TMPRING_SIZE
Definition SIDefines.h:1281

R_00B428_SPI_SHADER_PGM_RSRC1_HS
#define R_00B428_SPI_SHADER_PGM_RSRC1_HS
Definition SIDefines.h:1167

R_00B328_SPI_SHADER_PGM_RSRC1_ES
#define R_00B328_SPI_SHADER_PGM_RSRC1_ES
Definition SIDefines.h:1166

R_00B528_SPI_SHADER_PGM_RSRC1_LS
#define R_00B528_SPI_SHADER_PGM_RSRC1_LS
Definition SIDefines.h:1175

R_0286CC_SPI_PS_INPUT_ENA
#define R_0286CC_SPI_PS_INPUT_ENA
Definition SIDefines.h:1218

R_00B128_SPI_SHADER_PGM_RSRC1_VS
#define R_00B128_SPI_SHADER_PGM_RSRC1_VS
Definition SIDefines.h:1153

FP_DENORM_MODE_DP
#define FP_DENORM_MODE_DP(x)
Definition SIDefines.h:1279

R_00B848_COMPUTE_PGM_RSRC1
#define R_00B848_COMPUTE_PGM_RSRC1
Definition SIDefines.h:1221

R_SPILLED_SGPRS
#define R_SPILLED_SGPRS
Definition SIDefines.h:1300

FP_ROUND_MODE_SP
#define FP_ROUND_MODE_SP(x)
Definition SIDefines.h:1267

FP_DENORM_MODE_SP
#define FP_DENORM_MODE_SP(x)
Definition SIDefines.h:1278

R_00B228_SPI_SHADER_PGM_RSRC1_GS
#define R_00B228_SPI_SHADER_PGM_RSRC1_GS
Definition SIDefines.h:1158

R_SPILLED_VGPRS
#define R_SPILLED_VGPRS
Definition SIDefines.h:1301

S_00B02C_EXTRA_LDS_SIZE
#define S_00B02C_EXTRA_LDS_SIZE(x)
Definition SIDefines.h:1152

R_00B84C_COMPUTE_PGM_RSRC2
#define R_00B84C_COMPUTE_PGM_RSRC2
Definition SIDefines.h:1177

R_00B02C_SPI_SHADER_PGM_RSRC2_PS
#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS
Definition SIDefines.h:1151

SIMachineFunctionInfo.h

StringSet.h
StringSet - A set-like wrapper for the StringMap.

BlockSize
static const int BlockSize
Definition TarWriter.cpp:33

TargetLoweringObjectFile.h

TargetRegistry.h

CacheLineSize
static cl::opt< unsigned > CacheLineSize("cache-line-size", cl::init(0), cl::Hidden, cl::desc("Use this to override the target cache line size when " "specified by the user."))

FunctionType
Definition ItaniumDemangle.h:835

llvm::AMDGPUAsmPrinter
Definition AMDGPUAsmPrinter.h:41

llvm::AMDGPUAsmPrinter::ID
static char ID
Definition AMDGPUAsmPrinter.h:43

llvm::AMDGPUAsmPrinter::emitFunctionEntryLabel
void emitFunctionEntryLabel() override
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
Definition AMDGPUAsmPrinter.cpp:322

llvm::AMDGPUAsmPrinter::getGlobalSTI
const MCSubtargetInfo * getGlobalSTI() const
Definition AMDGPUAsmPrinter.cpp:125

llvm::AMDGPUAsmPrinter::emitImplicitDef
void emitImplicitDef(const MachineInstr *MI) const override
Targets can override this to customize the output of IMPLICIT_DEF instructions in verbose mode.
Definition AMDGPUAsmPrinter.cpp:307

llvm::AMDGPUAsmPrinter::DisasmLines
std::vector< std::string > DisasmLines
Definition AMDGPUAsmPrinter.h:159

llvm::AMDGPUAsmPrinter::emitStartOfAsmFile
void emitStartOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the start of their fi...
Definition AMDGPUAsmPrinter.cpp:135

llvm::AMDGPUAsmPrinter::endFunction
void endFunction(const MachineFunction *MF)
Definition AMDGPUAsmPrinter.cpp:249

llvm::AMDGPUAsmPrinter::getPassName
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
Definition AMDGPUAsmPrinter.cpp:121

llvm::AMDGPUAsmPrinter::HexLines
std::vector< std::string > HexLines
Definition AMDGPUAsmPrinter.h:159

llvm::AMDGPUAsmPrinter::IsTargetStreamerInitialized
bool IsTargetStreamerInitialized
Definition AMDGPUAsmPrinter.h:161

llvm::AMDGPUAsmPrinter::emitGlobalVariable
void emitGlobalVariable(const GlobalVariable *GV) override
Emit the specified global variable to the .s file.
Definition AMDGPUAsmPrinter.cpp:358

llvm::AMDGPUAsmPrinter::DisasmLineMaxLen
size_t DisasmLineMaxLen
Definition AMDGPUAsmPrinter.h:160

llvm::AMDGPUAsmPrinter::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition AMDGPUAsmPrinter.cpp:1900

llvm::AMDGPUAsmPrinter::PrintAsmOperand
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &O) override
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
Definition AMDGPUAsmPrinter.cpp:1859

llvm::AMDGPUAsmPrinter::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
Definition AMDGPUAsmPrinter.cpp:875

llvm::AMDGPUAsmPrinter::doFinalization
bool doFinalization(Module &M) override
doFinalization - Virtual method overriden by subclasses to do any necessary clean up after all passes...
Definition AMDGPUAsmPrinter.cpp:703

llvm::AMDGPUAsmPrinter::emitEndOfAsmFile
void emitEndOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the end of their file...
Definition AMDGPUAsmPrinter.cpp:163

llvm::AMDGPUAsmPrinter::AMDGPUAsmPrinter
AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer)
Definition AMDGPUAsmPrinter.cpp:115

llvm::AMDGPUAsmPrinter::doInitialization
bool doInitialization(Module &M) override
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
Definition AMDGPUAsmPrinter.cpp:401

llvm::AMDGPUAsmPrinter::emitFunctionBodyStart
void emitFunctionBodyStart() override
Targets can override this to emit stuff before the first basic block in the function.
Definition AMDGPUAsmPrinter.cpp:181

llvm::AMDGPUAsmPrinter::emitBasicBlockStart
void emitBasicBlockStart(const MachineBasicBlock &MBB) override
Targets can override this to emit stuff at the start of a basic block.
Definition AMDGPUAsmPrinter.cpp:346

llvm::AMDGPUAsmPrinter::getTargetStreamer
AMDGPUTargetStreamer * getTargetStreamer() const
Definition AMDGPUAsmPrinter.cpp:129

llvm::AMDGPUInstPrinter::printRegOperand
static void printRegOperand(MCRegister Reg, raw_ostream &O, const MCRegisterInfo &MRI)
Definition AMDGPUInstPrinter.cpp:394

llvm::AMDGPUMCExpr
AMDGPU target specific MCExpr operations.
Definition AMDGPUMCExpr.h:31

llvm::AMDGPUMCExpr::createInstPrefSize
static const AMDGPUMCExpr * createInstPrefSize(const MCExpr *CodeSizeBytes, MCContext &Ctx)
Create an expression for instruction prefetch size computation: min(divideCeil(CodeSizeBytes,...
Definition AMDGPUMCExpr.cpp:312

llvm::AMDGPUMCExpr::createMax
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
Definition AMDGPUMCExpr.h:84

llvm::AMDGPUMCExpr::createTotalNumVGPR
static const AMDGPUMCExpr * createTotalNumVGPR(const MCExpr *NumAGPR, const MCExpr *NumVGPR, MCContext &Ctx)
Definition AMDGPUMCExpr.cpp:305

llvm::AMDGPUMCExpr::create
static const AMDGPUMCExpr * create(VariantKind Kind, ArrayRef< const MCExpr * > Args, MCContext &Ctx)
Definition AMDGPUMCExpr.cpp:47

llvm::AMDGPUMCExpr::createExtraSGPRs
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
Definition AMDGPUMCExpr.cpp:295

llvm::AMDGPUMCExpr::createAlignTo
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
Definition AMDGPUMCExpr.h:98

llvm::AMDGPUMCExpr::AGVK_Occupancy
@ AGVK_Occupancy
Definition AMDGPUMCExpr.h:40

llvm::AMDGPUMachineFunctionInfo
Definition AMDGPUMachineFunctionInfo.h:24

llvm::AMDGPUMachineFunctionInfo::isMemoryBound
bool isMemoryBound() const
Definition AMDGPUMachineFunctionInfo.h:98

llvm::AMDGPUMachineFunctionInfo::isModuleEntryFunction
bool isModuleEntryFunction() const
Definition AMDGPUMachineFunctionInfo.h:91

llvm::AMDGPUMachineFunctionInfo::needsWaveLimiter
bool needsWaveLimiter() const
Definition AMDGPUMachineFunctionInfo.h:100

llvm::AMDGPUMachineFunctionInfo::getLDSSize
uint32_t getLDSSize() const
Definition AMDGPUMachineFunctionInfo.h:79

llvm::AMDGPUMachineFunctionInfo::isEntryFunction
bool isEntryFunction() const
Definition AMDGPUMachineFunctionInfo.h:89

llvm::AMDGPUPALMetadata
Definition AMDGPUPALMetadata.h:25

llvm::AMDGPUPALMetadata::readFromIR
void readFromIR(Module &M)
Definition AMDGPUPALMetadata.cpp:55

llvm::AMDGPUPALMetadata::setHwStage
void setHwStage(unsigned CC, StringRef field, unsigned Val)
Definition AMDGPUPALMetadata.cpp:1092

llvm::AMDGPUPALMetadata::updateHwStageMaximum
void updateHwStageMaximum(unsigned CC, StringRef field, unsigned Val)
Definition AMDGPUPALMetadata.cpp:1081

llvm::AMDGPUPALMetadata::setComputeRegisters
void setComputeRegisters(StringRef field, unsigned Val)
Definition AMDGPUPALMetadata.cpp:1105

llvm::AMDGPUSubtarget::isAmdPalOS
bool isAmdPalOS() const
Definition AMDGPUSubtarget.h:169

llvm::AMDGPUSubtarget::getOccupancyWithWorkGroupSizes
std::pair< unsigned, unsigned > getOccupancyWithWorkGroupSizes(uint32_t LDSBytes, const Function &F) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
Definition AMDGPUSubtarget.h:146

llvm::AMDGPUSubtarget::GFX10
@ GFX10
Definition AMDGPUSubtarget.h:42

llvm::AMDGPUSubtarget::GFX12
@ GFX12
Definition AMDGPUSubtarget.h:44

llvm::AMDGPUSubtarget::SEA_ISLANDS
@ SEA_ISLANDS
Definition AMDGPUSubtarget.h:39

llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS
@ VOLCANIC_ISLANDS
Definition AMDGPUSubtarget.h:40

llvm::AMDGPUSubtarget::GFX11
@ GFX11
Definition AMDGPUSubtarget.h:43

llvm::AMDGPUSubtarget::getAddressableLocalMemorySize
unsigned getAddressableLocalMemorySize() const
Return the maximum number of bytes of LDS that can be allocated to a single workgroup.
Definition AMDGPUSubtarget.h:239

llvm::AMDGPUSubtarget::getKernArgSegmentSize
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
Definition AMDGPUSubtarget.cpp:394

llvm::AMDGPUSubtarget::isAmdHsaOS
bool isAmdHsaOS() const
Definition AMDGPUSubtarget.h:165

llvm::AMDGPUSubtarget::getWavefrontSize
unsigned getWavefrontSize() const
Definition AMDGPUSubtarget.h:219

llvm::AMDGPUTargetMachine::EnableObjectLinking
static bool EnableObjectLinking
Definition AMDGPUTargetMachine.h:43

llvm::AMDGPUTargetStreamer
Definition AMDGPUTargetStreamer.h:55

llvm::AMDGPUTargetStreamer::EmitAmdhsaKernelDescriptor
virtual void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const AMDGPU::MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, const MCExpr *NextSGPR, const MCExpr *ReserveVCC, const MCExpr *ReserveFlatScr)
Definition AMDGPUTargetStreamer.h:125

llvm::AMDGPUTargetStreamer::emitAMDGPUInfo
virtual void emitAMDGPUInfo(const AMDGPU::InfoSectionData &Data)
Definition AMDGPUTargetStreamer.h:131

llvm::AMDGPUTargetStreamer::getPALMetadata
AMDGPUPALMetadata * getPALMetadata()
Definition AMDGPUTargetStreamer.h:72

llvm::AMDGPUTargetStreamer::EmitDirectiveAMDHSACodeObjectVersion
virtual void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV)
Definition AMDGPUTargetStreamer.h:76

llvm::AMDGPUTargetStreamer::EmitISAVersion
virtual bool EmitISAVersion()
Definition AMDGPUTargetStreamer.h:100

llvm::AMDGPUTargetStreamer::initializeTargetID
void initializeTargetID(const MCSubtargetInfo &STI)
Definition AMDGPUTargetStreamer.h:142

llvm::AMDGPUTargetStreamer::EmitMCResourceInfo
virtual void EmitMCResourceInfo(const MCSymbol *NumVGPR, const MCSymbol *NumAGPR, const MCSymbol *NumExplicitSGPR, const MCSymbol *NumNamedBarrier, const MCSymbol *PrivateSegmentSize, const MCSymbol *UsesVCC, const MCSymbol *UsesFlatScratch, const MCSymbol *HasDynamicallySizedStack, const MCSymbol *HasRecursion, const MCSymbol *HasIndirectCall)
Definition AMDGPUTargetStreamer.h:87

llvm::AMDGPUTargetStreamer::EmitCodeEnd
virtual bool EmitCodeEnd(const MCSubtargetInfo &STI)
Definition AMDGPUTargetStreamer.h:122

llvm::AMDGPUTargetStreamer::EmitAMDGPUSymbolType
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type)
Definition AMDGPUTargetStreamer.h:82

llvm::AMDGPUTargetStreamer::EmitDirectiveAMDGCNTarget
virtual void EmitDirectiveAMDGCNTarget()
Definition AMDGPUTargetStreamer.h:74

llvm::AMDGPUTargetStreamer::EmitAMDKernelCodeT
virtual void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header)
Definition AMDGPUTargetStreamer.h:80

llvm::AMDGPUTargetStreamer::getTargetID
const std::optional< AMDGPU::IsaInfo::AMDGPUTargetID > & getTargetID() const
Definition AMDGPUTargetStreamer.h:136

llvm::AMDGPUTargetStreamer::EmitMCResourceMaximums
virtual void EmitMCResourceMaximums(const MCSymbol *MaxVGPR, const MCSymbol *MaxAGPR, const MCSymbol *MaxSGPR, const MCSymbol *MaxNamedBarrier)
Definition AMDGPUTargetStreamer.h:94

llvm::AMDGPU::IsaInfo::AMDGPUTargetID::setXnackSetting
void setXnackSetting(TargetIDSetting NewXnackSetting)
Sets xnack setting to NewXnackSetting.
Definition AMDGPUBaseInfo.h:189

llvm::AMDGPU::IsaInfo::AMDGPUTargetID::isXnackOnOrAny
bool isXnackOnOrAny() const
Definition AMDGPUBaseInfo.h:172

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition PassAnalysisSupport.h:48

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition PassAnalysisSupport.h:76

llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition PassAnalysisSupport.h:99

llvm::Argument
This class represents an incoming formal argument to a Function.
Definition Argument.h:32

llvm::AsmPrinterHandler
Collects and handles AsmPrinter objects required to build debug or EH information.
Definition AsmPrinterHandler.h:34

llvm::AsmPrinter
This class is intended to be used as a driving class for all asm writers.
Definition AsmPrinter.h:91

llvm::AsmPrinter::getObjFileLowering
const TargetLoweringObjectFile & getObjFileLowering() const
Return information about object file lowering.
Definition AsmPrinter.cpp:485

llvm::AsmPrinter::getSymbol
MCSymbol * getSymbol(const GlobalValue *GV) const
Definition AsmPrinter.cpp:763

llvm::AsmPrinter::emitGlobalVariable
virtual void emitGlobalVariable(const GlobalVariable *GV)
Emit the specified global variable to the .s file.
Definition AsmPrinter.cpp:785

llvm::AsmPrinter::TM
TargetMachine & TM
Target machine description.
Definition AsmPrinter.h:94

llvm::AsmPrinter::MF
MachineFunction * MF
The current machine function.
Definition AsmPrinter.h:109

llvm::AsmPrinter::SetupMachineFunction
virtual void SetupMachineFunction(MachineFunction &MF)
This should be called when a new MachineFunction is being processed from runOnMachineFunction.
Definition AsmPrinter.cpp:3175

llvm::AsmPrinter::emitFunctionBody
void emitFunctionBody()
This method emits the body and trailer for a function.
Definition AsmPrinter.cpp:2046

llvm::AsmPrinter::isBlockOnlyReachableByFallthrough
virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
Definition AsmPrinter.cpp:4896

llvm::AsmPrinter::doInitialization
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
Definition AsmPrinter.cpp:529

llvm::AsmPrinter::emitLinkage
virtual void emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const
This emits linkage information about GVSym based on GV, if this is supported by the target.
Definition AsmPrinter.cpp:718

llvm::AsmPrinter::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
Record analysis usage.
Definition AsmPrinter.cpp:517

llvm::AsmPrinter::getFunctionNumber
unsigned getFunctionNumber() const
Return a unique ID for the current function.
Definition AsmPrinter.cpp:481

llvm::AsmPrinter::ORE
MachineOptimizationRemarkEmitter * ORE
Optimization remark emitter.
Definition AsmPrinter.h:121

llvm::AsmPrinter::AsmPrinter
AsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer, char &ID=AsmPrinter::ID)
Definition AsmPrinter.cpp:410

llvm::AsmPrinter::CurrentFnSym
MCSymbol * CurrentFnSym
The symbol for the current function.
Definition AsmPrinter.h:128

llvm::AsmPrinter::MMI
MachineModuleInfo * MMI
This is a pointer to the current MachineModuleInfo.
Definition AsmPrinter.h:112

llvm::AsmPrinter::OutContext
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition AsmPrinter.h:101

llvm::AsmPrinter::doFinalization
bool doFinalization(Module &M) override
Shut down the asmprinter.
Definition AsmPrinter.cpp:2878

llvm::AsmPrinter::emitBasicBlockStart
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
Definition AsmPrinter.cpp:4752

llvm::AsmPrinter::emitVisibility
void emitVisibility(MCSymbol *Sym, unsigned Visibility, bool IsDefinition=true) const
This emits visibility information about symbol, if this is supported by the target.
Definition AsmPrinter.cpp:4855

llvm::AsmPrinter::OutStreamer
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition AsmPrinter.h:106

llvm::AsmPrinter::MAI
const MCAsmInfo & MAI
Target Asm Printer information.
Definition AsmPrinter.h:97

llvm::AsmPrinter::isVerbose
bool isVerbose() const
Return true if assembly output should contain comments.
Definition AsmPrinter.h:310

llvm::AsmPrinter::getFunctionEnd
MCSymbol * getFunctionEnd() const
Definition AsmPrinter.h:320

llvm::AsmPrinter::getNameWithPrefix
void getNameWithPrefix(SmallVectorImpl< char > &Name, const GlobalValue *GV) const
Definition AsmPrinter.cpp:758

llvm::AsmPrinter::emitFunctionEntryLabel
virtual void emitFunctionEntryLabel()
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
Definition AsmPrinter.cpp:1153

llvm::AsmPrinter::addAsmPrinterHandler
void addAsmPrinterHandler(std::unique_ptr< AsmPrinterHandler > Handler)
Definition AsmPrinter.cpp:4957

llvm::AsmPrinter::PrintAsmOperand
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
Definition AsmPrinterInlineAsm.cpp:469

llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64

llvm::DenseMapBase::empty
bool empty() const
Definition DenseMap.h:109

llvm::Function
Definition Function.h:65

llvm::Function::getSubprogram
DISubprogram * getSubprogram() const
Get the attached subprogram.
Definition Metadata.cpp:1964

llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272

llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358

llvm::GCNSubtarget
Definition GCNSubtarget.h:35

llvm::GCNSubtarget::getMinNumSGPRs
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
Definition GCNSubtarget.h:764

llvm::GCNSubtarget::getMinNumVGPRs
unsigned getMinNumVGPRs(unsigned WavesPerEU, unsigned DynamicVGPRBlockSize) const
Definition GCNSubtarget.h:843

llvm::GCNSubtarget::isTgSplitEnabled
bool isTgSplitEnabled() const
Definition GCNSubtarget.h:337

llvm::GCNSubtarget::hasInstPrefSize
bool hasInstPrefSize() const
Definition GCNSubtarget.h:429

llvm::GCNSubtarget::isCuModeEnabled
bool isCuModeEnabled() const
Definition GCNSubtarget.h:339

llvm::GCNSubtarget::getTargetID
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
Definition GCNSubtarget.h:147

llvm::GCNSubtarget::computeOccupancy
std::pair< unsigned, unsigned > computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Subtarget's minimum/maximum occupancy, in number of waves per EU, that can be achieved when the only ...
Definition GCNSubtarget.cpp:465

llvm::GCNSubtarget::isWave32
bool isWave32() const
Definition GCNSubtarget.h:893

llvm::GCNSubtarget::supportsWGP
bool supportsWGP() const
Definition GCNSubtarget.h:201

llvm::GCNSubtarget::getInstPrefSizeArgs
void getInstPrefSizeArgs(uint32_t &Mask, uint32_t &Shift, uint32_t &Width, uint32_t &CacheLineSize) const
Definition GCNSubtarget.h:431

llvm::GCNSubtarget::getMaxNumUserSGPRs
unsigned getMaxNumUserSGPRs() const
Definition GCNSubtarget.h:482

llvm::GCNSubtarget::getGeneration
Generation getGeneration() const
Definition GCNSubtarget.h:157

llvm::GCNSubtarget::getAddressableNumSGPRs
unsigned getAddressableNumSGPRs() const
Definition GCNSubtarget.h:758

llvm::GCNSubtarget::getMaxWaveScratchSize
unsigned getMaxWaveScratchSize() const
Definition GCNSubtarget.h:165

llvm::GCNUserSGPRUsageInfo::hasQueuePtr
bool hasQueuePtr() const
Definition GCNSubtarget.h:1037

llvm::GCNUserSGPRUsageInfo::hasKernargSegmentPtr
bool hasKernargSegmentPtr() const
Definition GCNSubtarget.h:1039

llvm::GCNUserSGPRUsageInfo::hasDispatchID
bool hasDispatchID() const
Definition GCNSubtarget.h:1041

llvm::GCNUserSGPRUsageInfo::hasPrivateSegmentBuffer
bool hasPrivateSegmentBuffer() const
Definition GCNSubtarget.h:1033

llvm::GCNUserSGPRUsageInfo::hasPrivateSegmentSize
bool hasPrivateSegmentSize() const
Definition GCNSubtarget.h:1045

llvm::GCNUserSGPRUsageInfo::hasDispatchPtr
bool hasDispatchPtr() const
Definition GCNSubtarget.h:1035

llvm::GCNUserSGPRUsageInfo::hasFlatScratchInit
bool hasFlatScratchInit() const
Definition GCNSubtarget.h:1043

llvm::GlobalValue::getVisibility
VisibilityTypes getVisibility() const
Definition GlobalValue.h:250

llvm::GlobalValue::isDeclaration
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:337

llvm::GlobalValue::getAddressSpace
unsigned getAddressSpace() const
Definition GlobalValue.h:207

llvm::GlobalValue::getDataLayout
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition Globals.cpp:141

llvm::GlobalVariable
Definition GlobalVariable.h:41

llvm::GlobalVariable::getInitializer
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
Definition GlobalVariable.h:155

llvm::GlobalVariable::hasInitializer
bool hasInitializer() const
Definitions have initializers, declarations don't.
Definition GlobalVariable.h:111

llvm::GlobalVariable::getAlign
MaybeAlign getAlign() const
Returns the alignment of the given variable.
Definition GlobalVariable.h:319

llvm::GlobalVariable::getGlobalSize
LLVM_ABI uint64_t getGlobalSize(const DataLayout &DL) const
Get the size of this global variable in bytes.
Definition Globals.cpp:569

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68

llvm::LLVMContext::diagnose
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Definition LLVMContext.cpp:249

llvm::MCAssembler
Definition MCAssembler.h:46

llvm::MCAssembler::getEmitterPtr
MCCodeEmitter * getEmitterPtr() const
Definition MCAssembler.h:177

llvm::MCBinaryExpr::createAdd
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:343

llvm::MCBinaryExpr::createAnd
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:348

llvm::MCBinaryExpr::createOr
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:408

llvm::MCBinaryExpr::createLOr
static const MCBinaryExpr * createLOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:378

llvm::MCBinaryExpr::createMul
static const MCBinaryExpr * createMul(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:398

llvm::MCBinaryExpr::createGT
static const MCBinaryExpr * createGT(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:363

llvm::MCBinaryExpr::createDiv
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:353

llvm::MCBinaryExpr::createShl
static const MCBinaryExpr * createShl(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:413

llvm::MCBinaryExpr::createSub
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition MCExpr.h:428

llvm::MCConstantExpr::create
static LLVM_ABI const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition MCExpr.cpp:212

llvm::MCContext
Context object for machine code objects.
Definition MCContext.h:83

llvm::MCContext::getObjectFileInfo
const MCObjectFileInfo * getObjectFileInfo() const
Definition MCContext.h:413

llvm::MCContext::reportError
LLVM_ABI void reportError(SMLoc L, const Twine &Msg)
Definition MCContext.cpp:1127

llvm::MCContext::getOrCreateSymbol
LLVM_ABI MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition MCContext.cpp:208

llvm::MCExpr
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34

llvm::MCExpr::evaluateAsRelocatable
LLVM_ABI bool evaluateAsRelocatable(MCValue &Res, const MCAssembler *Asm) const
Try to evaluate the expression to a relocatable value, i.e.
Definition MCExpr.cpp:450

llvm::MCObjectFileInfo::getReadOnlySection
MCSection * getReadOnlySection() const
Definition MCObjectFileInfo.h:277

llvm::MCObjectFileInfo::getTextSection
MCSection * getTextSection() const
Definition MCObjectFileInfo.h:274

llvm::MCObjectFileInfo::getContext
MCContext & getContext() const
Definition MCObjectFileInfo.h:257

llvm::MCResourceInfo::ResourceInfoKind
ResourceInfoKind
Definition AMDGPUMCResourceInfo.h:30

llvm::MCSectionELF
This represents a section on linux, lots of unix variants and some bare metal systems.
Definition MCSectionELF.h:27

llvm::MCSection
Instances of this class represent a uniqued identifier for a section in the current translation unit.
Definition MCSection.h:573

llvm::MCSection::ensureMinAlignment
void ensureMinAlignment(Align MinAlignment)
Makes sure that Alignment is at least MinAlignment.
Definition MCSection.h:661

llvm::MCSection::hasInstructions
bool hasInstructions() const
Definition MCSection.h:669

llvm::MCStreamer::getContext
MCContext & getContext() const
Definition MCStreamer.h:323

llvm::MCSubtargetInfo
Generic base class for all target subtargets.
Definition MCSubtargetInfo.h:77

llvm::MCSubtargetInfo::getTargetTriple
const Triple & getTargetTriple() const
Definition MCSubtargetInfo.h:111

llvm::MCSymbolRefExpr::create
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214

llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42

llvm::MCSymbol::isDefined
bool isDefined() const
isDefined - Check if this symbol is defined (i.e., it has an address).
Definition MCSymbol.h:233

llvm::MCSymbol::getName
StringRef getName() const
getName - Get the symbol name.
Definition MCSymbol.h:188

llvm::MCSymbol::isVariable
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition MCSymbol.h:267

llvm::MCSymbol::redefineIfPossible
void redefineIfPossible()
Prepare this symbol to be redefined.
Definition MCSymbol.h:212

llvm::MCSymbol::getVariableValue
const MCExpr * getVariableValue() const
Get the expression of the variable symbol.
Definition MCSymbol.h:270

llvm::MCTargetStreamer::getStreamer
MCStreamer & getStreamer()
Definition MCStreamer.h:103

llvm::MCUnaryExpr::createNot
static const MCUnaryExpr * createNot(const MCExpr *Expr, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:273

llvm::MCValue
Definition MCValue.h:30

llvm::MachineBasicBlock
Definition MachineBasicBlock.h:122

llvm::MachineFrameInfo::getStackSize
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Definition MachineFrameInfo.h:612

llvm::MachineFunction
Definition MachineFunction.h:294

llvm::MachineFunction::getContext
MCContext & getContext() const
Definition MachineFunction.h:735

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition MachineFunction.h:749

llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition MachineFunction.h:884

llvm::MachineFunction::front
const MachineBasicBlock & front() const
Definition MachineFunction.h:1024

llvm::MachineInstr
Representation of each machine instruction.
Definition MachineInstr.h:73

llvm::MachineModuleInfoWrapperPass
Definition MachineModuleInfo.h:173

llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition MachineOperand.h:49

llvm::MachineOperand::getImm
int64_t getImm() const
Definition MachineOperand.h:560

llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition MachineOperand.h:331

llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition MachineOperand.h:333

llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition MachineOperand.h:372

llvm::MachineOptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition MachineOptimizationRemarkEmitter.h:112

llvm::MachineOptimizationRemarkEmitter::emit
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Emit an optimization remark.
Definition MachineOptimizationRemarkEmitter.cpp:57

llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67

llvm::NamedMDNode::getNumOperands
LLVM_ABI unsigned getNumOperands() const
Definition Metadata.cpp:1523

llvm::NamedMDNode::operands
iterator_range< op_iterator > operands()
Definition Metadata.h:1856

llvm::Pass::getAnalysis
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
Definition PassAnalysisSupport.h:224

llvm::Register
Wrapper class representing virtual and physical registers.
Definition Register.h:20

llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition SIMachineFunctionInfo.h:415

llvm::SIMachineFunctionInfo::getNumWaveDispatchVGPRs
unsigned getNumWaveDispatchVGPRs() const
Definition SIMachineFunctionInfo.h:1015

llvm::SIMachineFunctionInfo::getNumSpilledVGPRs
unsigned getNumSpilledVGPRs() const
Definition SIMachineFunctionInfo.h:1106

llvm::SIMachineFunctionInfo::getNumWaveDispatchSGPRs
unsigned getNumWaveDispatchSGPRs() const
Definition SIMachineFunctionInfo.h:1011

llvm::SIMachineFunctionInfo::getNumSpilledSGPRs
unsigned getNumSpilledSGPRs() const
Definition SIMachineFunctionInfo.h:1102

llvm::SIMachineFunctionInfo::getUserSGPRInfo
GCNUserSGPRUsageInfo & getUserSGPRInfo()
Definition SIMachineFunctionInfo.h:701

llvm::SIMachineFunctionInfo::getDynamicVGPRBlockSize
unsigned getDynamicVGPRBlockSize() const
Definition SIMachineFunctionInfo.h:851

llvm::SIMachineFunctionInfo::getMaxWavesPerEU
unsigned getMaxWavesPerEU() const
Definition SIMachineFunctionInfo.h:1174

llvm::SIMachineFunctionInfo::hasWorkGroupIDZ
bool hasWorkGroupIDZ() const
Definition SIMachineFunctionInfo.h:943

llvm::SIMachineFunctionInfo::hasWorkGroupIDY
bool hasWorkGroupIDY() const
Definition SIMachineFunctionInfo.h:939

llvm::SIMachineFunctionInfo::getMode
SIModeRegisterDefaults getMode() const
Definition SIMachineFunctionInfo.h:674

llvm::SIMachineFunctionInfo::hasWorkGroupInfo
bool hasWorkGroupInfo() const
Definition SIMachineFunctionInfo.h:947

llvm::SIMachineFunctionInfo::hasWorkItemIDY
bool hasWorkItemIDY() const
Definition SIMachineFunctionInfo.h:959

llvm::SIMachineFunctionInfo::hasWorkGroupIDX
bool hasWorkGroupIDX() const
Definition SIMachineFunctionInfo.h:935

llvm::SIMachineFunctionInfo::getNumUserSGPRs
unsigned getNumUserSGPRs() const
Definition SIMachineFunctionInfo.h:999

llvm::SIMachineFunctionInfo::getScratchReservedForDynamicVGPRs
unsigned getScratchReservedForDynamicVGPRs() const
Definition SIMachineFunctionInfo.h:854

llvm::SIMachineFunctionInfo::isDynamicVGPREnabled
bool isDynamicVGPREnabled() const
Definition SIMachineFunctionInfo.h:850

llvm::SIMachineFunctionInfo::getPSInputAddr
unsigned getPSInputAddr() const
Definition SIMachineFunctionInfo.h:1118

llvm::SIMachineFunctionInfo::hasWorkItemIDZ
bool hasWorkItemIDZ() const
Definition SIMachineFunctionInfo.h:963

llvm::SIMachineFunctionInfo::getPSInputEnable
unsigned getPSInputEnable() const
Definition SIMachineFunctionInfo.h:1122

llvm::SmallString
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition SmallVector.h:423

llvm::SmallVectorTemplateCommon::empty
bool empty() const
Definition SmallVector.h:86

llvm::StringRef
Represent a constant reference to a string, i.e.
Definition StringRef.h:56

llvm::StringSet::insert
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition StringSet.h:39

llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition TargetMachine.h:83

llvm::TargetMachine::getTargetTriple
const Triple & getTargetTriple() const
Definition TargetMachine.h:132

llvm::Triple::OSType
OSType
Definition Triple.h:212

llvm::Triple::AMDHSA
@ AMDHSA
Definition Triple.h:236

llvm::Triple::AMDPAL
@ AMDPAL
Definition Triple.h:246

llvm::Triple::getOS
OSType getOS() const
Get the parsed operating system type of this triple.
Definition Triple.h:445

llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46

llvm::Value
LLVM Value Representation.
Definition Value.h:75

llvm::Value::getName
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318

llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53

llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition raw_ostream.h:692

llvm::raw_svector_ostream::str
StringRef str() const
Return a StringRef for the vector contents.
Definition raw_ostream.h:721

uint16_t

uint32_t

uint64_t

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition ErrorHandling.h:164

TargetMachine.h

llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition AMDGPUAddrSpace.h:36

llvm::AMDGPU::HSAMD::Kernel::Arg::Key::Align
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
Definition AMDGPUMetadata.h:183

llvm::AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
Definition AMDGPUBaseInfo.h:150

llvm::AMDGPU::IsaInfo::getVGPREncodingGranule
unsigned getVGPREncodingGranule(const MCSubtargetInfo &STI, std::optional< bool > EnableWavefrontSize32)
Definition AMDGPUBaseInfo.cpp:1421

llvm::AMDGPU::IsaInfo::getSGPREncodingGranule
unsigned getSGPREncodingGranule(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:1302

llvm::AMDGPU::IsaInfo::getTotalNumVGPRs
unsigned getTotalNumVGPRs(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:1438

llvm::AMDGPU::IsaInfo::getMaxWavesPerEU
unsigned getMaxWavesPerEU(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:1271

llvm::AMDGPU::IsaInfo::getNumExtraSGPRs
unsigned getNumExtraSGPRs(const MCSubtargetInfo &STI, bool VCCUsed, bool FlatScrUsed, bool XNACKUsed)
Definition AMDGPUBaseInfo.cpp:1357

llvm::AMDGPU::IsaInfo::TargetIDSetting::Any
@ Any
Definition AMDGPUBaseInfo.h:154

llvm::AMDGPU::IsaInfo::getVGPRAllocGranule
unsigned getVGPRAllocGranule(const MCSubtargetInfo &STI, unsigned DynamicVGPRBlockSize, std::optional< bool > EnableWavefrontSize32)
Definition AMDGPUBaseInfo.cpp:1399

llvm::AMDGPU
Definition AMDGPUMetadataVerifier.h:34

llvm::AMDGPU::getTotalNumVGPRs
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR)
Definition AMDGPUBaseInfo.cpp:2715

llvm::AMDGPU::printAMDGPUMCExpr
void printAMDGPUMCExpr(const MCExpr *Expr, raw_ostream &OS, const MCAsmInfo *MAI)
Definition AMDGPUMCExpr.cpp:704

llvm::AMDGPU::AMDHSA_COV4
@ AMDHSA_COV4
Definition AMDGPUBaseInfo.h:64

llvm::AMDGPU::AMDHSA_COV5
@ AMDHSA_COV5
Definition AMDGPUBaseInfo.h:64

llvm::AMDGPU::AMDHSA_COV6
@ AMDHSA_COV6
Definition AMDGPUBaseInfo.h:64

llvm::AMDGPU::isModuleEntryFunctionCC
LLVM_READNONE constexpr bool isModuleEntryFunctionCC(CallingConv::ID CC)
Definition AMDGPUBaseInfo.h:1521

llvm::AMDGPU::getLdsDwGranularity
unsigned getLdsDwGranularity(const MCSubtargetInfo &ST)
Definition AMDGPUBaseInfo.cpp:3769

llvm::AMDGPU::getIsaVersion
LLVM_ABI IsaVersion getIsaVersion(StringRef GPU)
Definition AMDGPUTargetParser.cpp:111

llvm::AMDGPU::maskShiftSet
const MCExpr * maskShiftSet(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
Provided with the MCExpr * Val, uint32 Mask and Shift, will return the masked and left shifted,...
Definition SIDefinesUtils.h:44

llvm::AMDGPU::getAMDHSACodeObjectVersion
unsigned getAMDHSACodeObjectVersion(const Module &M)
Definition AMDGPUBaseInfo.cpp:213

llvm::AMDGPU::isGFX90A
bool isGFX90A(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2687

llvm::AMDGPU::isKernel
LLVM_READNONE constexpr bool isKernel(CallingConv::ID CC)
Definition AMDGPUBaseInfo.h:1532

llvm::AMDGPU::isEntryFunctionCC
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
Definition AMDGPUBaseInfo.h:1482

llvm::AMDGPU::isCompute
LLVM_READNONE constexpr bool isCompute(CallingConv::ID CC)
Definition AMDGPUBaseInfo.h:1477

llvm::AMDGPU::hasMAIInsts
bool hasMAIInsts(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2699

llvm::AMDGPU::isInlinableIntLiteral
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
Definition AMDGPUBaseInfo.h:1713

llvm::AMDGPU::foldAMDGPUMCExpr
const MCExpr * foldAMDGPUMCExpr(const MCExpr *Expr, MCContext &Ctx)
Definition AMDGPUMCExpr.cpp:695

llvm::AMDGPU::isGFX10Plus
bool isGFX10Plus(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2615

llvm::AMDGPU::SGPR_SPILL
@ SGPR_SPILL
Definition SIInstrInfo.h:1898

llvm::AMDGPU::getShiftMask
constexpr std::pair< unsigned, unsigned > getShiftMask(unsigned Value)
Deduce the least significant bit aligned shift and mask values for a binary Complement Value (as they...
Definition SIDefinesUtils.h:27

llvm::AMDGPU::hasKernargPreload
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
Definition AMDGPUBaseInfo.cpp:2711

llvm::AMDGPU::getIntegerPairAttribute
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
Definition AMDGPUBaseInfo.cpp:1672

llvm::ARM_MB::ST
@ ST
Definition ARMBaseInfo.h:73

llvm::ARM::ProfileKind::M
@ M
Definition ARMTargetParser.h:171

llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition BitmaskEnum.h:126

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24

llvm::CallingConv::AMDGPU_CS
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
Definition CallingConv.h:197

llvm::CallingConv::AMDGPU_VS
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
Definition CallingConv.h:188

llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition CallingConv.h:200

llvm::CallingConv::AMDGPU_HS
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition CallingConv.h:206

llvm::CallingConv::AMDGPU_GS
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
Definition CallingConv.h:191

llvm::CallingConv::AMDGPU_CS_Chain
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
Definition CallingConv.h:245

llvm::CallingConv::AMDGPU_PS
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition CallingConv.h:194

llvm::CallingConv::SPIR_KERNEL
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition CallingConv.h:144

llvm::CallingConv::AMDGPU_ES
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition CallingConv.h:218

llvm::CallingConv::AMDGPU_LS
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
Definition CallingConv.h:213

llvm::ELF::SHT_PROGBITS
@ SHT_PROGBITS
Definition ELF.h:1150

llvm::ELF::STT_AMDGPU_HSA_KERNEL
@ STT_AMDGPU_HSA_KERNEL
Definition ELF.h:1433

llvm::ISD::MCSymbol
@ MCSymbol
Definition ISDOpcodes.h:193

llvm::codeview::PublicSymFlags::Function
@ Function
Definition CodeView.h:408

llvm::lsp::MessageType::Info
@ Info
Definition Protocol.h:1295

llvm::mdconst::extract
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668

llvm::msgpack::Type::Boolean
@ Boolean
Definition MsgPackReader.h:58

llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition OptimizationRemarkEmitter.h:139

llvm::pdb::PDB_SymType::Callee
@ Callee
Definition PDBTypes.h:282

llvm::rdf::Func
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition FunctionInfo.h:25

llvm::Value
FunctionAddr VTableAddr Value
Definition InstrProf.h:137

llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1668

llvm::enumerate
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553

llvm::dyn_cast
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643

llvm::getCPU
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
Definition AVRTargetMachine.cpp:32

llvm::getTheR600Target
Target & getTheR600Target()
The target for R600 GPUs.
Definition AMDGPUTargetInfo.cpp:20

llvm::DK_ResourceLimit
@ DK_ResourceLimit
Definition DiagnosticInfo.h:66

llvm::createR600AsmPrinterPass
AsmPrinter * createR600AsmPrinterPass(TargetMachine &TM, std::unique_ptr< MCStreamer > &&Streamer)
Definition R600AsmPrinter.cpp:31

llvm::report_fatal_error
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163

llvm::alignTo
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144

llvm::isUInt
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189

llvm::SmallVector
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Definition SmallVector.h:1151

llvm::isa
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547

llvm::format
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition Format.h:129

llvm::WaitForUnlockResult::Success
@ Success
The lock was released successfully.
Definition AdvisoryLock.h:20

llvm::divideCeil
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394

llvm::Data
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221

llvm::getTheGCNTarget
Target & getTheGCNTarget()
The target for GCN GPUs.
Definition AMDGPUTargetInfo.cpp:26

llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1916

llvm::DS_Error
@ DS_Error
Definition DiagnosticInfo.h:51

llvm::Log2
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197

llvm::printReg
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition TargetRegisterInfo.cpp:110

llvm::reportFatalUsageError
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177

std
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:874

N
#define N

llvm::AMDGPUResourceUsageAnalysisImpl::SIFunctionResourceInfo::NumVGPR
int32_t NumVGPR
Definition AMDGPUResourceUsageAnalysis.h:35

llvm::AMDGPUResourceUsageAnalysisImpl::SIFunctionResourceInfo::HasDynamicallySizedStack
bool HasDynamicallySizedStack
Definition AMDGPUResourceUsageAnalysis.h:43

llvm::AMDGPUResourceUsageAnalysisImpl::SIFunctionResourceInfo::UsesVCC
bool UsesVCC
Definition AMDGPUResourceUsageAnalysis.h:41

llvm::AMDGPUResourceUsageAnalysisImpl::SIFunctionResourceInfo::PrivateSegmentSize
uint64_t PrivateSegmentSize
Definition AMDGPUResourceUsageAnalysis.h:40

llvm::AMDGPUResourceUsageAnalysisImpl::SIFunctionResourceInfo::NumExplicitSGPR
int32_t NumExplicitSGPR
Definition AMDGPUResourceUsageAnalysis.h:37

llvm::AMDGPUResourceUsageAnalysisImpl::SIFunctionResourceInfo::NumAGPR
int32_t NumAGPR
Definition AMDGPUResourceUsageAnalysis.h:36

llvm::AMDGPUResourceUsageAnalysisImpl::SIFunctionResourceInfo::UsesFlatScratch
bool UsesFlatScratch
Definition AMDGPUResourceUsageAnalysis.h:42

llvm::AMDGPUResourceUsageAnalysisWrapperPass
Definition AMDGPUResourceUsageAnalysis.h:55

llvm::AMDGPUResourceUsageAnalysisWrapperPass::FunctionResourceInfo
AMDGPUResourceUsageAnalysisImpl::SIFunctionResourceInfo FunctionResourceInfo
Definition AMDGPUResourceUsageAnalysis.h:56

llvm::AMDGPU::AMDGPUMCKernelCodeT
Definition AMDKernelCodeTUtils.h:33

llvm::AMDGPU::AMDGPUMCKernelCodeT::kernarg_segment_byte_size
uint64_t kernarg_segment_byte_size
Definition AMDKernelCodeTUtils.h:52

llvm::AMDGPU::AMDGPUMCKernelCodeT::initDefault
void initDefault(const MCSubtargetInfo &STI, MCContext &Ctx, bool InitMCExpr=true)
Definition AMDKernelCodeTUtils.cpp:364

llvm::AMDGPU::AMDGPUMCKernelCodeT::workitem_private_segment_byte_size
const MCExpr * workitem_private_segment_byte_size
Definition AMDKernelCodeTUtils.h:75

llvm::AMDGPU::AMDGPUMCKernelCodeT::code_properties
uint32_t code_properties
Definition AMDKernelCodeTUtils.h:49

llvm::AMDGPU::AMDGPUMCKernelCodeT::compute_pgm_resource2_registers
const MCExpr * compute_pgm_resource2_registers
Definition AMDKernelCodeTUtils.h:70

llvm::AMDGPU::AMDGPUMCKernelCodeT::kernarg_segment_alignment
uint8_t kernarg_segment_alignment
Definition AMDKernelCodeTUtils.h:60

llvm::AMDGPU::AMDGPUMCKernelCodeT::validate
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
Definition AMDKernelCodeTUtils.cpp:383

llvm::AMDGPU::AMDGPUMCKernelCodeT::wavefront_sgpr_count
const MCExpr * wavefront_sgpr_count
Definition AMDKernelCodeTUtils.h:73

llvm::AMDGPU::AMDGPUMCKernelCodeT::workitem_vgpr_count
const MCExpr * workitem_vgpr_count
Definition AMDKernelCodeTUtils.h:74

llvm::AMDGPU::AMDGPUMCKernelCodeT::is_dynamic_callstack
const MCExpr * is_dynamic_callstack
Definition AMDKernelCodeTUtils.h:72

llvm::AMDGPU::AMDGPUMCKernelCodeT::workgroup_group_segment_byte_size
uint32_t workgroup_group_segment_byte_size
Definition AMDKernelCodeTUtils.h:50

llvm::AMDGPU::AMDGPUMCKernelCodeT::compute_pgm_resource1_registers
const MCExpr * compute_pgm_resource1_registers
Definition AMDKernelCodeTUtils.h:69

llvm::AMDGPU::MCKernelDescriptor
Definition AMDGPUMCKernelDescriptor.h:27

llvm::AMDGPU::MCKernelDescriptor::compute_pgm_rsrc2
const MCExpr * compute_pgm_rsrc2
Definition AMDGPUMCKernelDescriptor.h:33

llvm::AMDGPU::MCKernelDescriptor::kernarg_size
const MCExpr * kernarg_size
Definition AMDGPUMCKernelDescriptor.h:30

llvm::AMDGPU::MCKernelDescriptor::kernarg_preload
const MCExpr * kernarg_preload
Definition AMDGPUMCKernelDescriptor.h:35

llvm::AMDGPU::MCKernelDescriptor::compute_pgm_rsrc3
const MCExpr * compute_pgm_rsrc3
Definition AMDGPUMCKernelDescriptor.h:31

llvm::AMDGPU::MCKernelDescriptor::private_segment_fixed_size
const MCExpr * private_segment_fixed_size
Definition AMDGPUMCKernelDescriptor.h:29

llvm::AMDGPU::MCKernelDescriptor::bits_get
static const MCExpr * bits_get(const MCExpr *Src, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
Definition AMDGPUMCKernelDescriptor.cpp:97

llvm::AMDGPU::MCKernelDescriptor::compute_pgm_rsrc1
const MCExpr * compute_pgm_rsrc1
Definition AMDGPUMCKernelDescriptor.h:32

llvm::AMDGPU::MCKernelDescriptor::group_segment_fixed_size
const MCExpr * group_segment_fixed_size
Definition AMDGPUMCKernelDescriptor.h:28

llvm::AMDGPU::MCKernelDescriptor::kernel_code_properties
const MCExpr * kernel_code_properties
Definition AMDGPUMCKernelDescriptor.h:34

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39

llvm::SIModeRegisterDefaults
Definition SIModeRegisterDefaults.h:20

llvm::SIProgramInfo
Track resource usage for kernels / entry functions.
Definition SIProgramInfo.h:32

llvm::SIProgramInfo::SGPRSpill
unsigned SGPRSpill
Definition SIProgramInfo.h:75

llvm::SIProgramInfo::NumSGPR
const MCExpr * NumSGPR
Definition SIProgramInfo.h:74

llvm::SIProgramInfo::TIdIGCompCount
uint32_t TIdIGCompCount
Definition SIProgramInfo.h:62

llvm::SIProgramInfo::NumArchVGPR
const MCExpr * NumArchVGPR
Definition SIProgramInfo.h:70

llvm::SIProgramInfo::TGIdZEnable
uint32_t TGIdZEnable
Definition SIProgramInfo.h:60

llvm::SIProgramInfo::LDSBlocks
uint32_t LDSBlocks
Definition SIProgramInfo.h:51

llvm::SIProgramInfo::VGPRBlocks
const MCExpr * VGPRBlocks
Definition SIProgramInfo.h:36

llvm::SIProgramInfo::ScratchBlocks
const MCExpr * ScratchBlocks
Definition SIProgramInfo.h:52

llvm::SIProgramInfo::EXCPEnable
uint32_t EXCPEnable
Definition SIProgramInfo.h:65

llvm::SIProgramInfo::ComputePGMRSrc3
const MCExpr * ComputePGMRSrc3
Definition SIProgramInfo.h:67

llvm::SIProgramInfo::getComputePGMRSrc1
const MCExpr * getComputePGMRSrc1(const GCNSubtarget &ST, MCContext &Ctx) const
Compute the value of the ComputePGMRsrc1 register.
Definition SIProgramInfo.cpp:177

llvm::SIProgramInfo::VCCUsed
const MCExpr * VCCUsed
Definition SIProgramInfo.h:97

llvm::SIProgramInfo::UserSGPR
uint32_t UserSGPR
Definition SIProgramInfo.h:56

llvm::SIProgramInfo::FloatMode
uint32_t FloatMode
Definition SIProgramInfo.h:39

llvm::SIProgramInfo::FlatUsed
const MCExpr * FlatUsed
Definition SIProgramInfo.h:78

llvm::SIProgramInfo::TrapHandlerEnable
uint32_t TrapHandlerEnable
Definition SIProgramInfo.h:57

llvm::SIProgramInfo::NamedBarCnt
const MCExpr * NamedBarCnt
Definition SIProgramInfo.h:87

llvm::SIProgramInfo::ScratchEnable
const MCExpr * ScratchEnable
Definition SIProgramInfo.h:55

llvm::SIProgramInfo::AccumOffset
const MCExpr * AccumOffset
Definition SIProgramInfo.h:72

llvm::SIProgramInfo::NumAccVGPR
const MCExpr * NumAccVGPR
Definition SIProgramInfo.h:71

llvm::SIProgramInfo::DynamicCallStack
const MCExpr * DynamicCallStack
Definition SIProgramInfo.h:94

llvm::SIProgramInfo::LdsSize
uint32_t LdsSize
Definition SIProgramInfo.h:64

llvm::SIProgramInfo::SGPRBlocks
const MCExpr * SGPRBlocks
Definition SIProgramInfo.h:37

llvm::SIProgramInfo::NumVGPRsForWavesPerEU
const MCExpr * NumVGPRsForWavesPerEU
Definition SIProgramInfo.h:84

llvm::SIProgramInfo::NumVGPR
const MCExpr * NumVGPR
Definition SIProgramInfo.h:69

llvm::SIProgramInfo::DX10Clamp
uint32_t DX10Clamp
Definition SIProgramInfo.h:41

llvm::SIProgramInfo::TGIdXEnable
uint32_t TGIdXEnable
Definition SIProgramInfo.h:58

llvm::SIProgramInfo::IEEEMode
uint32_t IEEEMode
Definition SIProgramInfo.h:43

llvm::SIProgramInfo::LDSSize
uint32_t LDSSize
Definition SIProgramInfo.h:77

llvm::SIProgramInfo::Occupancy
const MCExpr * Occupancy
Definition SIProgramInfo.h:90

llvm::SIProgramInfo::ScratchSize
const MCExpr * ScratchSize
Definition SIProgramInfo.h:48

llvm::SIProgramInfo::WgpMode
uint32_t WgpMode
Definition SIProgramInfo.h:44

llvm::SIProgramInfo::FwdProgress
uint32_t FwdProgress
Definition SIProgramInfo.h:46

llvm::SIProgramInfo::NumSGPRsForWavesPerEU
const MCExpr * NumSGPRsForWavesPerEU
Definition SIProgramInfo.h:81

llvm::SIProgramInfo::getComputePGMRSrc2
const MCExpr * getComputePGMRSrc2(const GCNSubtarget &ST, MCContext &Ctx) const
Compute the value of the ComputePGMRsrc2 register.
Definition SIProgramInfo.cpp:202

llvm::SIProgramInfo::TGSizeEnable
uint32_t TGSizeEnable
Definition SIProgramInfo.h:61

llvm::SIProgramInfo::TgSplit
uint32_t TgSplit
Definition SIProgramInfo.h:73

llvm::SIProgramInfo::EXCPEnMSB
uint32_t EXCPEnMSB
Definition SIProgramInfo.h:63

llvm::SIProgramInfo::TGIdYEnable
uint32_t TGIdYEnable
Definition SIProgramInfo.h:59

llvm::SIProgramInfo::VGPRSpill
unsigned VGPRSpill
Definition SIProgramInfo.h:76

llvm::SIProgramInfo::MemOrdered
uint32_t MemOrdered
Definition SIProgramInfo.h:45

llvm::TargetRegistry::RegisterAsmPrinter
static void RegisterAsmPrinter(Target &T, Target::AsmPrinterCtorTy Fn)
RegisterAsmPrinter - Register an AsmPrinter implementation for the given target.
Definition TargetRegistry.h:897