doxygen/AMDGPUAsmPrinter_8cpp_source.html

//===-- AMDGPUAsmPrinter.cpp - AMDGPU assembly printer --------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file

///

/// The AMDGPUAsmPrinter is used to print both assembly string and also binary

/// code.  When passed an MCAsmStreamer it prints assembly and when passed

/// an MCObjectStreamer it outputs binary code.

//

//===----------------------------------------------------------------------===//

//


#include "AMDGPUAsmPrinter.h"

#include "AMDGPU.h"

#include "AMDGPUHSAMetadataStreamer.h"

#include "AMDGPUResourceUsageAnalysis.h"

#include "GCNSubtarget.h"

#include "MCTargetDesc/AMDGPUInstPrinter.h"

#include "MCTargetDesc/AMDGPUMCExpr.h"

#include "MCTargetDesc/AMDGPUMCKernelDescriptor.h"

#include "MCTargetDesc/AMDGPUTargetStreamer.h"

#include "R600AsmPrinter.h"

#include "SIMachineFunctionInfo.h"

#include "TargetInfo/AMDGPUTargetInfo.h"

#include "Utils/AMDGPUBaseInfo.h"

#include "Utils/AMDKernelCodeTUtils.h"

#include "Utils/SIDefinesUtils.h"

#include "llvm/Analysis/OptimizationRemarkEmitter.h"

#include "llvm/BinaryFormat/ELF.h"

#include "llvm/CodeGen/MachineFrameInfo.h"

#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"

#include "llvm/IR/DiagnosticInfo.h"

#include "llvm/MC/MCAssembler.h"

#include "llvm/MC/MCContext.h"

#include "llvm/MC/MCSectionELF.h"

#include "llvm/MC/MCStreamer.h"

#include "llvm/MC/TargetRegistry.h"

#include "llvm/Support/AMDHSAKernelDescriptor.h"

#include "llvm/Target/TargetLoweringObjectFile.h"

#include "llvm/Target/TargetMachine.h"

#include "llvm/TargetParser/TargetParser.h"


using namespace llvm;

using namespace llvm::AMDGPU;


// This should get the default rounding mode from the kernel. We just set the

// default here, but this could change if the OpenCL rounding mode pragmas are

// used.

//

// The denormal mode here should match what is reported by the OpenCL runtime

// for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but

// can also be override to flush with the -cl-denorms-are-zero compiler flag.

//

// AMD OpenCL only sets flush none and reports CL_FP_DENORM for double

// precision, and leaves single precision to flush all and does not report

// CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports

// CL_FP_DENORM for both.

//

// FIXME: It seems some instructions do not support single precision denormals

// regardless of the mode (exp_*_f32, rcp_*_f32, rsq_*_f32, rsq_*f32, sqrt_f32,

// and sin_f32, cos_f32 on most parts).


// We want to use these instructions, and using fp32 denormals also causes

// instructions to run at the double precision rate for the device so it's

// probably best to just report no single precision denormals.

static uint32_t getFPMode(SIModeRegisterDefaults Mode) {

  return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) |

         FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) |

         FP_DENORM_MODE_SP(Mode.fpDenormModeSPValue()) |

         FP_DENORM_MODE_DP(Mode.fpDenormModeDPValue());

}


static AsmPrinter *

createAMDGPUAsmPrinterPass(TargetMachine &tm,

                           std::unique_ptr<MCStreamer> &&Streamer) {

  return new AMDGPUAsmPrinter(tm, std::move(Streamer));

}


extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmPrinter() {

  TargetRegistry::RegisterAsmPrinter(getTheR600Target(),

                                     llvm::createR600AsmPrinterPass);

  TargetRegistry::RegisterAsmPrinter(getTheGCNTarget(),

                                     createAMDGPUAsmPrinterPass);

}


AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,

                                   std::unique_ptr<MCStreamer> Streamer)

    : AsmPrinter(TM, std::move(Streamer)) {

  assert(OutStreamer && "AsmPrinter constructed without streamer");

}


StringRef AMDGPUAsmPrinter::getPassName() const {

  return "AMDGPU Assembly Printer";

}


const MCSubtargetInfo *AMDGPUAsmPrinter::getGlobalSTI() const {

  return TM.getMCSubtargetInfo();

}


AMDGPUTargetStreamer* AMDGPUAsmPrinter::getTargetStreamer() const {

  if (!OutStreamer)

    return nullptr;

  return static_cast<AMDGPUTargetStreamer*>(OutStreamer->getTargetStreamer());

}


void AMDGPUAsmPrinter::emitStartOfAsmFile(Module &M) {

  IsTargetStreamerInitialized = false;

}


void AMDGPUAsmPrinter::initTargetStreamer(Module &M) {

  IsTargetStreamerInitialized = true;


  // TODO: Which one is called first, emitStartOfAsmFile or

  // emitFunctionBodyStart?

  if (getTargetStreamer() && !getTargetStreamer()->getTargetID())

    initializeTargetID(M);


  if (TM.getTargetTriple().getOS() != Triple::AMDHSA &&

      TM.getTargetTriple().getOS() != Triple::AMDPAL)

    return;


  getTargetStreamer()->EmitDirectiveAMDGCNTarget();


  if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {

    getTargetStreamer()->EmitDirectiveAMDHSACodeObjectVersion(

        CodeObjectVersion);

    HSAMetadataStream->begin(M, *getTargetStreamer()->getTargetID());

  }


  if (TM.getTargetTriple().getOS() == Triple::AMDPAL)

    getTargetStreamer()->getPALMetadata()->readFromIR(M);

}


void AMDGPUAsmPrinter::emitEndOfAsmFile(Module &M) {

  // Init target streamer if it has not yet happened

  if (!IsTargetStreamerInitialized)

    initTargetStreamer(M);


  if (TM.getTargetTriple().getOS() != Triple::AMDHSA)

    getTargetStreamer()->EmitISAVersion();


  // Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA).

  // Emit HSA Metadata (NT_AMD_HSA_METADATA).

  if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {

    HSAMetadataStream->end();

    bool Success = HSAMetadataStream->emitTo(*getTargetStreamer());

    (void)Success;

    assert(Success && "Malformed HSA Metadata");

  }

}


void AMDGPUAsmPrinter::emitFunctionBodyStart() {

  const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();

  const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();

  const Function &F = MF->getFunction();


  // TODO: We're checking this late, would be nice to check it earlier.

  if (STM.requiresCodeObjectV6() && CodeObjectVersion < AMDGPU::AMDHSA_COV6) {

    report_fatal_error(

        STM.getCPU() + " is only available on code object version 6 or better",

        /*gen_crash_diag*/ false);

  }


  // TODO: Which one is called first, emitStartOfAsmFile or

  // emitFunctionBodyStart?

  if (!getTargetStreamer()->getTargetID())

    initializeTargetID(*F.getParent());


  const auto &FunctionTargetID = STM.getTargetID();

  // Make sure function's xnack settings are compatible with module's

  // xnack settings.

  if (FunctionTargetID.isXnackSupported() &&

      FunctionTargetID.getXnackSetting() != IsaInfo::TargetIDSetting::Any &&

      FunctionTargetID.getXnackSetting() != getTargetStreamer()->getTargetID()->getXnackSetting()) {

    OutContext.reportError({}, "xnack setting of '" + Twine(MF->getName()) +

                           "' function does not match module xnack setting");

    return;

  }

  // Make sure function's sramecc settings are compatible with module's

  // sramecc settings.

  if (FunctionTargetID.isSramEccSupported() &&

      FunctionTargetID.getSramEccSetting() != IsaInfo::TargetIDSetting::Any &&

      FunctionTargetID.getSramEccSetting() != getTargetStreamer()->getTargetID()->getSramEccSetting()) {

    OutContext.reportError({}, "sramecc setting of '" + Twine(MF->getName()) +

                           "' function does not match module sramecc setting");

    return;

  }


  if (!MFI.isEntryFunction())

    return;


  if (STM.isMesaKernel(F) &&

      (F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||

       F.getCallingConv() == CallingConv::SPIR_KERNEL)) {

    AMDGPUMCKernelCodeT KernelCode;

    getAmdKernelCode(KernelCode, CurrentProgramInfo, *MF);

    KernelCode.validate(&STM, MF->getContext());

    getTargetStreamer()->EmitAMDKernelCodeT(KernelCode);

  }


  if (STM.isAmdHsaOS())

    HSAMetadataStream->emitKernel(*MF, CurrentProgramInfo);


  if (MFI.getNumKernargPreloadedSGPRs() > 0) {

    assert(AMDGPU::hasKernargPreload(STM));

    getTargetStreamer()->EmitKernargPreloadHeader(*getGlobalSTI(),

                                                  STM.isAmdHsaOS());

  }

}


void AMDGPUAsmPrinter::emitFunctionBodyEnd() {

  const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();

  if (!MFI.isEntryFunction())

    return;


  if (TM.getTargetTriple().getOS() != Triple::AMDHSA)

    return;


  auto &Streamer = getTargetStreamer()->getStreamer();

  auto &Context = Streamer.getContext();

  auto &ObjectFileInfo = *Context.getObjectFileInfo();

  auto &ReadOnlySection = *ObjectFileInfo.getReadOnlySection();


  Streamer.pushSection();

  Streamer.switchSection(&ReadOnlySection);


  // CP microcode requires the kernel descriptor to be allocated on 64 byte

  // alignment.

  Streamer.emitValueToAlignment(Align(64), 0, 1, 0);

  ReadOnlySection.ensureMinAlignment(Align(64));


  const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();


  SmallString<128> KernelName;

  getNameWithPrefix(KernelName, &MF->getFunction());

  getTargetStreamer()->EmitAmdhsaKernelDescriptor(

      STM, KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),

      CurrentProgramInfo.NumVGPRsForWavesPerEU,

      MCBinaryExpr::createSub(

          CurrentProgramInfo.NumSGPRsForWavesPerEU,

          AMDGPUMCExpr::createExtraSGPRs(

              CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,

              getTargetStreamer()->getTargetID()->isXnackOnOrAny(), Context),

          Context),

      CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed);


  Streamer.popSection();

}


void AMDGPUAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {

  Register RegNo = MI->getOperand(0).getReg();


  SmallString<128> Str;

  raw_svector_ostream OS(Str);

  OS << "implicit-def: "

     << printReg(RegNo, MF->getSubtarget().getRegisterInfo());


  if (MI->getAsmPrinterFlags() & AMDGPU::SGPR_SPILL)

    OS << " : SGPR spill to VGPR lane";


  OutStreamer->AddComment(OS.str());

  OutStreamer->addBlankLine();

}


void AMDGPUAsmPrinter::emitFunctionEntryLabel() {

  if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {

    AsmPrinter::emitFunctionEntryLabel();

    return;

  }


  const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();

  const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();

  if (MFI->isEntryFunction() && STM.isAmdHsaOrMesa(MF->getFunction())) {

    SmallString<128> SymbolName;

    getNameWithPrefix(SymbolName, &MF->getFunction()),

    getTargetStreamer()->EmitAMDGPUSymbolType(

        SymbolName, ELF::STT_AMDGPU_HSA_KERNEL);

  }

  if (DumpCodeInstEmitter) {

    // Disassemble function name label to text.

    DisasmLines.push_back(MF->getName().str() + ":");

    DisasmLineMaxLen = std::max(DisasmLineMaxLen, DisasmLines.back().size());

    HexLines.emplace_back("");

  }


  AsmPrinter::emitFunctionEntryLabel();

}


void AMDGPUAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {

  if (DumpCodeInstEmitter && !isBlockOnlyReachableByFallthrough(&MBB)) {

    // Write a line for the basic block label if it is not only fallthrough.

    DisasmLines.push_back(

        (Twine("BB") + Twine(getFunctionNumber())

         + "_" + Twine(MBB.getNumber()) + ":").str());

    DisasmLineMaxLen = std::max(DisasmLineMaxLen, DisasmLines.back().size());

    HexLines.emplace_back("");

  }

  AsmPrinter::emitBasicBlockStart(MBB);

}


void AMDGPUAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {

  if (GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {

    if (GV->hasInitializer() && !isa<UndefValue>(GV->getInitializer())) {

      OutContext.reportError({},

                             Twine(GV->getName()) +

                                 ": unsupported initializer for address space");

      return;

    }


    // LDS variables aren't emitted in HSA or PAL yet.

    const Triple::OSType OS = TM.getTargetTriple().getOS();

    if (OS == Triple::AMDHSA || OS == Triple::AMDPAL)

      return;


    MCSymbol *GVSym = getSymbol(GV);


    GVSym->redefineIfPossible();

    if (GVSym->isDefined() || GVSym->isVariable())

      report_fatal_error("symbol '" + Twine(GVSym->getName()) +

                         "' is already defined");


    const DataLayout &DL = GV->getDataLayout();

    uint64_t Size = DL.getTypeAllocSize(GV->getValueType());

    Align Alignment = GV->getAlign().value_or(Align(4));


    emitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration());

    emitLinkage(GV, GVSym);

    auto TS = getTargetStreamer();

    TS->emitAMDGPULDS(GVSym, Size, Alignment);

    return;

  }


  AsmPrinter::emitGlobalVariable(GV);

}


bool AMDGPUAsmPrinter::doInitialization(Module &M) {

  CodeObjectVersion = AMDGPU::getAMDHSACodeObjectVersion(M);


  if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {

    switch (CodeObjectVersion) {

    case AMDGPU::AMDHSA_COV4:

      HSAMetadataStream = std::make_unique<HSAMD::MetadataStreamerMsgPackV4>();

      break;

    case AMDGPU::AMDHSA_COV5:

      HSAMetadataStream = std::make_unique<HSAMD::MetadataStreamerMsgPackV5>();

      break;

    case AMDGPU::AMDHSA_COV6:

      HSAMetadataStream = std::make_unique<HSAMD::MetadataStreamerMsgPackV6>();

      break;

    default:

      report_fatal_error("Unexpected code object version");

    }

  }

  return AsmPrinter::doInitialization(M);

}


bool AMDGPUAsmPrinter::doFinalization(Module &M) {

  // Pad with s_code_end to help tools and guard against instruction prefetch

  // causing stale data in caches. Arguably this should be done by the linker,

  // which is why this isn't done for Mesa.

  const MCSubtargetInfo &STI = *getGlobalSTI();

  if ((AMDGPU::isGFX10Plus(STI) || AMDGPU::isGFX90A(STI)) &&

      (STI.getTargetTriple().getOS() == Triple::AMDHSA ||

       STI.getTargetTriple().getOS() == Triple::AMDPAL)) {

    OutStreamer->switchSection(getObjFileLowering().getTextSection());

    getTargetStreamer()->EmitCodeEnd(STI);

  }


  return AsmPrinter::doFinalization(M);

}


// Print comments that apply to both callable functions and entry points.

void AMDGPUAsmPrinter::emitCommonFunctionComments(

    uint32_t NumVGPR, std::optional<uint32_t> NumAGPR, uint32_t TotalNumVGPR,

    uint32_t NumSGPR, uint64_t ScratchSize, uint64_t CodeSize,

    const AMDGPUMachineFunction *MFI) {

  OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false);

  OutStreamer->emitRawComment(" NumSgprs: " + Twine(NumSGPR), false);

  OutStreamer->emitRawComment(" NumVgprs: " + Twine(NumVGPR), false);

  if (NumAGPR) {

    OutStreamer->emitRawComment(" NumAgprs: " + Twine(*NumAGPR), false);

    OutStreamer->emitRawComment(" TotalNumVgprs: " + Twine(TotalNumVGPR),

                                false);

  }

  OutStreamer->emitRawComment(" ScratchSize: " + Twine(ScratchSize), false);

  OutStreamer->emitRawComment(" MemoryBound: " + Twine(MFI->isMemoryBound()),

                              false);

}


SmallString<128> AMDGPUAsmPrinter::getMCExprStr(const MCExpr *Value) {

  SmallString<128> Str;

  raw_svector_ostream OSS(Str);

  int64_t IVal;

  if (Value->evaluateAsAbsolute(IVal)) {

    OSS << static_cast<uint64_t>(IVal);

  } else {

    Value->print(OSS, MAI);

  }

  return Str;

}


void AMDGPUAsmPrinter::emitCommonFunctionComments(

    const MCExpr *NumVGPR, const MCExpr *NumAGPR, const MCExpr *TotalNumVGPR,

    const MCExpr *NumSGPR, const MCExpr *ScratchSize, uint64_t CodeSize,

    const AMDGPUMachineFunction *MFI) {

  OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false);

  OutStreamer->emitRawComment(" NumSgprs: " + getMCExprStr(NumSGPR), false);

  OutStreamer->emitRawComment(" NumVgprs: " + getMCExprStr(NumVGPR), false);

  if (NumAGPR && TotalNumVGPR) {

    OutStreamer->emitRawComment(" NumAgprs: " + getMCExprStr(NumAGPR), false);

    OutStreamer->emitRawComment(" TotalNumVgprs: " + getMCExprStr(TotalNumVGPR),

                                false);

  }

  OutStreamer->emitRawComment(" ScratchSize: " + getMCExprStr(ScratchSize),

                              false);

  OutStreamer->emitRawComment(" MemoryBound: " + Twine(MFI->isMemoryBound()),

                              false);

}


const MCExpr *AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(

    const MachineFunction &MF) const {

  const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();

  MCContext &Ctx = MF.getContext();

  uint16_t KernelCodeProperties = 0;

  const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI.getUserSGPRInfo();


  if (UserSGPRInfo.hasPrivateSegmentBuffer()) {

    KernelCodeProperties |=

        amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;

  }

  if (UserSGPRInfo.hasDispatchPtr()) {

    KernelCodeProperties |=

        amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;

  }

  if (UserSGPRInfo.hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5) {

    KernelCodeProperties |=

        amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;

  }

  if (UserSGPRInfo.hasKernargSegmentPtr()) {

    KernelCodeProperties |=

        amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;

  }

  if (UserSGPRInfo.hasDispatchID()) {

    KernelCodeProperties |=

        amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;

  }

  if (UserSGPRInfo.hasFlatScratchInit()) {

    KernelCodeProperties |=

        amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;

  }

  if (UserSGPRInfo.hasPrivateSegmentSize()) {

    KernelCodeProperties |=

        amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;

  }

  if (MF.getSubtarget<GCNSubtarget>().isWave32()) {

    KernelCodeProperties |=

        amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;

  }


  // CurrentProgramInfo.DynamicCallStack is a MCExpr and could be

  // un-evaluatable at this point so it cannot be conditionally checked here.

  // Instead, we'll directly shift the possibly unknown MCExpr into its place

  // and bitwise-or it into KernelCodeProperties.

  const MCExpr *KernelCodePropExpr =

      MCConstantExpr::create(KernelCodeProperties, Ctx);

  const MCExpr *OrValue = MCConstantExpr::create(

      amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK_SHIFT, Ctx);

  OrValue = MCBinaryExpr::createShl(CurrentProgramInfo.DynamicCallStack,

                                    OrValue, Ctx);

  KernelCodePropExpr = MCBinaryExpr::createOr(KernelCodePropExpr, OrValue, Ctx);


  return KernelCodePropExpr;

}


MCKernelDescriptor

AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(const MachineFunction &MF,

                                            const SIProgramInfo &PI) const {

  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();

  const Function &F = MF.getFunction();

  const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();

  MCContext &Ctx = MF.getContext();


  MCKernelDescriptor KernelDescriptor;


  KernelDescriptor.group_segment_fixed_size =

      MCConstantExpr::create(PI.LDSSize, Ctx);

  KernelDescriptor.private_segment_fixed_size = PI.ScratchSize;


  Align MaxKernArgAlign;

  KernelDescriptor.kernarg_size = MCConstantExpr::create(

      STM.getKernArgSegmentSize(F, MaxKernArgAlign), Ctx);


  KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1(STM, Ctx);

  KernelDescriptor.compute_pgm_rsrc2 = PI.getComputePGMRSrc2(Ctx);

  KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF);


  int64_t PGRM_Rsrc3 = 1;

  bool EvaluatableRsrc3 =

      CurrentProgramInfo.ComputePGMRSrc3GFX90A->evaluateAsAbsolute(PGRM_Rsrc3);

  (void)PGRM_Rsrc3;

  (void)EvaluatableRsrc3;

  assert(STM.hasGFX90AInsts() || !EvaluatableRsrc3 ||

         static_cast<uint64_t>(PGRM_Rsrc3) == 0);

  KernelDescriptor.compute_pgm_rsrc3 = CurrentProgramInfo.ComputePGMRSrc3GFX90A;


  KernelDescriptor.kernarg_preload = MCConstantExpr::create(

      AMDGPU::hasKernargPreload(STM) ? Info->getNumKernargPreloadedSGPRs() : 0,

      Ctx);


  return KernelDescriptor;

}


bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {

  // Init target streamer lazily on the first function so that previous passes

  // can set metadata.

  if (!IsTargetStreamerInitialized)

    initTargetStreamer(*MF.getFunction().getParent());


  ResourceUsage = &getAnalysis<AMDGPUResourceUsageAnalysis>();

  CurrentProgramInfo.reset(MF);


  const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();

  MCContext &Ctx = MF.getContext();


  // The starting address of all shader programs must be 256 bytes aligned.

  // Regular functions just need the basic required instruction alignment.

  MF.setAlignment(MFI->isEntryFunction() ? Align(256) : Align(4));


  SetupMachineFunction(MF);


  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();

  MCContext &Context = getObjFileLowering().getContext();

  // FIXME: This should be an explicit check for Mesa.

  if (!STM.isAmdHsaOS() && !STM.isAmdPalOS()) {

    MCSectionELF *ConfigSection =

        Context.getELFSection(".AMDGPU.config", ELF::SHT_PROGBITS, 0);

    OutStreamer->switchSection(ConfigSection);

  }


  if (MFI->isModuleEntryFunction()) {

    getSIProgramInfo(CurrentProgramInfo, MF);

  }


  if (STM.isAmdPalOS()) {

    if (MFI->isEntryFunction())

      EmitPALMetadata(MF, CurrentProgramInfo);

    else if (MFI->isModuleEntryFunction())

      emitPALFunctionMetadata(MF);

  } else if (!STM.isAmdHsaOS()) {

    EmitProgramInfoSI(MF, CurrentProgramInfo);

  }


  DumpCodeInstEmitter = nullptr;

  if (STM.dumpCode()) {

    // For -dumpcode, get the assembler out of the streamer. This only works

    // with -filetype=obj.

    MCAssembler *Assembler = OutStreamer->getAssemblerPtr();

    if (Assembler)

      DumpCodeInstEmitter = Assembler->getEmitterPtr();

  }


  DisasmLines.clear();

  HexLines.clear();

  DisasmLineMaxLen = 0;


  emitFunctionBody();


  emitResourceUsageRemarks(MF, CurrentProgramInfo, MFI->isModuleEntryFunction(),

                           STM.hasMAIInsts());


  if (isVerbose()) {

    MCSectionELF *CommentSection =

        Context.getELFSection(".AMDGPU.csdata", ELF::SHT_PROGBITS, 0);

    OutStreamer->switchSection(CommentSection);


    if (!MFI->isEntryFunction()) {

      OutStreamer->emitRawComment(" Function info:", false);

      const AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo &Info =

          ResourceUsage->getResourceInfo(&MF.getFunction());

      emitCommonFunctionComments(

          Info.NumVGPR,

          STM.hasMAIInsts() ? Info.NumAGPR : std::optional<uint32_t>(),

          Info.getTotalNumVGPRs(STM),

          Info.getTotalNumSGPRs(MF.getSubtarget<GCNSubtarget>()),

          Info.PrivateSegmentSize, getFunctionCodeSize(MF), MFI);

      return false;

    }


    OutStreamer->emitRawComment(" Kernel info:", false);

    emitCommonFunctionComments(

        CurrentProgramInfo.NumArchVGPR,

        STM.hasMAIInsts() ? CurrentProgramInfo.NumAccVGPR : nullptr,

        CurrentProgramInfo.NumVGPR, CurrentProgramInfo.NumSGPR,

        CurrentProgramInfo.ScratchSize, getFunctionCodeSize(MF), MFI);


    OutStreamer->emitRawComment(

      " FloatMode: " + Twine(CurrentProgramInfo.FloatMode), false);

    OutStreamer->emitRawComment(

      " IeeeMode: " + Twine(CurrentProgramInfo.IEEEMode), false);

    OutStreamer->emitRawComment(

      " LDSByteSize: " + Twine(CurrentProgramInfo.LDSSize) +

      " bytes/workgroup (compile time only)", false);


    OutStreamer->emitRawComment(

        " SGPRBlocks: " + getMCExprStr(CurrentProgramInfo.SGPRBlocks), false);


    OutStreamer->emitRawComment(

        " VGPRBlocks: " + getMCExprStr(CurrentProgramInfo.VGPRBlocks), false);


    OutStreamer->emitRawComment(

        " NumSGPRsForWavesPerEU: " +

            getMCExprStr(CurrentProgramInfo.NumSGPRsForWavesPerEU),

        false);

    OutStreamer->emitRawComment(

        " NumVGPRsForWavesPerEU: " +

            getMCExprStr(CurrentProgramInfo.NumVGPRsForWavesPerEU),

        false);


    if (STM.hasGFX90AInsts()) {

      const MCExpr *AdjustedAccum = MCBinaryExpr::createAdd(

          CurrentProgramInfo.AccumOffset, MCConstantExpr::create(1, Ctx), Ctx);

      AdjustedAccum = MCBinaryExpr::createMul(

          AdjustedAccum, MCConstantExpr::create(4, Ctx), Ctx);

      OutStreamer->emitRawComment(

          " AccumOffset: " + getMCExprStr(AdjustedAccum), false);

    }


    OutStreamer->emitRawComment(

        " Occupancy: " + getMCExprStr(CurrentProgramInfo.Occupancy), false);


    OutStreamer->emitRawComment(

      " WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false);


    OutStreamer->emitRawComment(

        " COMPUTE_PGM_RSRC2:SCRATCH_EN: " +

            getMCExprStr(CurrentProgramInfo.ScratchEnable),

        false);

    OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " +

                                    Twine(CurrentProgramInfo.UserSGPR),

                                false);

    OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +

                                    Twine(CurrentProgramInfo.TrapHandlerEnable),

                                false);

    OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_X_EN: " +

                                    Twine(CurrentProgramInfo.TGIdXEnable),

                                false);

    OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Y_EN: " +

                                    Twine(CurrentProgramInfo.TGIdYEnable),

                                false);

    OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Z_EN: " +

                                    Twine(CurrentProgramInfo.TGIdZEnable),

                                false);

    OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +

                                    Twine(CurrentProgramInfo.TIdIGCompCount),

                                false);


    [[maybe_unused]] int64_t PGMRSrc3;

    assert(STM.hasGFX90AInsts() ||

           (CurrentProgramInfo.ComputePGMRSrc3GFX90A->evaluateAsAbsolute(

                PGMRSrc3) &&

            static_cast<uint64_t>(PGMRSrc3) == 0));

    if (STM.hasGFX90AInsts()) {

      OutStreamer->emitRawComment(

          " COMPUTE_PGM_RSRC3_GFX90A:ACCUM_OFFSET: " +

              getMCExprStr(MCKernelDescriptor::bits_get(

                  CurrentProgramInfo.ComputePGMRSrc3GFX90A,

                  amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT,

                  amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, Ctx)),

          false);

      OutStreamer->emitRawComment(

          " COMPUTE_PGM_RSRC3_GFX90A:TG_SPLIT: " +

              getMCExprStr(MCKernelDescriptor::bits_get(

                  CurrentProgramInfo.ComputePGMRSrc3GFX90A,

                  amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT,

                  amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Ctx)),

          false);

    }

  }


  if (DumpCodeInstEmitter) {


    OutStreamer->switchSection(

        Context.getELFSection(".AMDGPU.disasm", ELF::SHT_PROGBITS, 0));


    for (size_t i = 0; i < DisasmLines.size(); ++i) {

      std::string Comment = "\n";

      if (!HexLines[i].empty()) {

        Comment = std::string(DisasmLineMaxLen - DisasmLines[i].size(), ' ');

        Comment += " ; " + HexLines[i] + "\n";

      }


      OutStreamer->emitBytes(StringRef(DisasmLines[i]));

      OutStreamer->emitBytes(StringRef(Comment));

    }

  }


  return false;

}


// TODO: Fold this into emitFunctionBodyStart.

void AMDGPUAsmPrinter::initializeTargetID(const Module &M) {

  // In the beginning all features are either 'Any' or 'NotSupported',

  // depending on global target features. This will cover empty modules.

  getTargetStreamer()->initializeTargetID(*getGlobalSTI(),

                                          getGlobalSTI()->getFeatureString());


  // If module is empty, we are done.

  if (M.empty())

    return;


  // If module is not empty, need to find first 'Off' or 'On' feature

  // setting per feature from functions in module.

  for (auto &F : M) {

    auto &TSTargetID = getTargetStreamer()->getTargetID();

    if ((!TSTargetID->isXnackSupported() || TSTargetID->isXnackOnOrOff()) &&

        (!TSTargetID->isSramEccSupported() || TSTargetID->isSramEccOnOrOff()))

      break;


    const GCNSubtarget &STM = TM.getSubtarget<GCNSubtarget>(F);

    const IsaInfo::AMDGPUTargetID &STMTargetID = STM.getTargetID();

    if (TSTargetID->isXnackSupported())

      if (TSTargetID->getXnackSetting() == IsaInfo::TargetIDSetting::Any)

        TSTargetID->setXnackSetting(STMTargetID.getXnackSetting());

    if (TSTargetID->isSramEccSupported())

      if (TSTargetID->getSramEccSetting() == IsaInfo::TargetIDSetting::Any)

        TSTargetID->setSramEccSetting(STMTargetID.getSramEccSetting());

  }

}


uint64_t AMDGPUAsmPrinter::getFunctionCodeSize(const MachineFunction &MF) const {

  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();

  const SIInstrInfo *TII = STM.getInstrInfo();


  uint64_t CodeSize = 0;


  for (const MachineBasicBlock &MBB : MF) {

    for (const MachineInstr &MI : MBB) {

      // TODO: CodeSize should account for multiple functions.


      // TODO: Should we count size of debug info?

      if (MI.isDebugInstr())

        continue;


      CodeSize += TII->getInstSizeInBytes(MI);

    }

  }


  return CodeSize;

}


void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,

                                        const MachineFunction &MF) {

  const AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo &Info =

      ResourceUsage->getResourceInfo(&MF.getFunction());

  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();

  MCContext &Ctx = MF.getContext();


  auto CreateExpr = [&Ctx](int64_t Value) {

    return MCConstantExpr::create(Value, Ctx);

  };


  auto TryGetMCExprValue = [](const MCExpr *Value, uint64_t &Res) -> bool {

    int64_t Val;

    if (Value->evaluateAsAbsolute(Val)) {

      Res = Val;

      return true;

    }

    return false;

  };


  ProgInfo.NumArchVGPR = CreateExpr(Info.NumVGPR);

  ProgInfo.NumAccVGPR = CreateExpr(Info.NumAGPR);

  ProgInfo.NumVGPR = CreateExpr(Info.getTotalNumVGPRs(STM));

  ProgInfo.AccumOffset =

      CreateExpr(alignTo(std::max(1, Info.NumVGPR), 4) / 4 - 1);

  ProgInfo.TgSplit = STM.isTgSplitEnabled();

  ProgInfo.NumSGPR = CreateExpr(Info.NumExplicitSGPR);

  ProgInfo.ScratchSize = CreateExpr(Info.PrivateSegmentSize);

  ProgInfo.VCCUsed = CreateExpr(Info.UsesVCC);

  ProgInfo.FlatUsed = CreateExpr(Info.UsesFlatScratch);

  ProgInfo.DynamicCallStack =

      CreateExpr(Info.HasDynamicallySizedStack || Info.HasRecursion);


  const uint64_t MaxScratchPerWorkitem =

      STM.getMaxWaveScratchSize() / STM.getWavefrontSize();

  uint64_t ScratchSize;

  if (TryGetMCExprValue(ProgInfo.ScratchSize, ScratchSize) &&

      ScratchSize > MaxScratchPerWorkitem) {

    DiagnosticInfoStackSize DiagStackSize(MF.getFunction(), ScratchSize,

                                          MaxScratchPerWorkitem, DS_Error);

    MF.getFunction().getContext().diagnose(DiagStackSize);

  }


  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();


  // The calculations related to SGPR/VGPR blocks are

  // duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be

  // unified.

  const MCExpr *ExtraSGPRs = AMDGPUMCExpr::createExtraSGPRs(

      ProgInfo.VCCUsed, ProgInfo.FlatUsed,

      getTargetStreamer()->getTargetID()->isXnackOnOrAny(), Ctx);


  // Check the addressable register limit before we add ExtraSGPRs.

  if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&

      !STM.hasSGPRInitBug()) {

    unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();

    uint64_t NumSgpr;

    if (TryGetMCExprValue(ProgInfo.NumSGPR, NumSgpr) &&

        NumSgpr > MaxAddressableNumSGPRs) {

      // This can happen due to a compiler bug or when using inline asm.

      LLVMContext &Ctx = MF.getFunction().getContext();

      DiagnosticInfoResourceLimit Diag(

          MF.getFunction(), "addressable scalar registers", NumSgpr,

          MaxAddressableNumSGPRs, DS_Error, DK_ResourceLimit);

      Ctx.diagnose(Diag);

      ProgInfo.NumSGPR = CreateExpr(MaxAddressableNumSGPRs - 1);

    }

  }


  // Account for extra SGPRs and VGPRs reserved for debugger use.

  ProgInfo.NumSGPR = MCBinaryExpr::createAdd(ProgInfo.NumSGPR, ExtraSGPRs, Ctx);


  const Function &F = MF.getFunction();


  // Ensure there are enough SGPRs and VGPRs for wave dispatch, where wave

  // dispatch registers are function args.

  unsigned WaveDispatchNumSGPR = 0, WaveDispatchNumVGPR = 0;


  if (isShader(F.getCallingConv())) {

    bool IsPixelShader =

        F.getCallingConv() == CallingConv::AMDGPU_PS && !STM.isAmdHsaOS();


    // Calculate the number of VGPR registers based on the SPI input registers

    uint32_t InputEna = 0;

    uint32_t InputAddr = 0;

    unsigned LastEna = 0;


    if (IsPixelShader) {

      // Note for IsPixelShader:

      // By this stage, all enabled inputs are tagged in InputAddr as well.

      // We will use InputAddr to determine whether the input counts against the

      // vgpr total and only use the InputEnable to determine the last input

      // that is relevant - if extra arguments are used, then we have to honour

      // the InputAddr for any intermediate non-enabled inputs.

      InputEna = MFI->getPSInputEnable();

      InputAddr = MFI->getPSInputAddr();


      // We only need to consider input args up to the last used arg.

      assert((InputEna || InputAddr) &&

             "PSInputAddr and PSInputEnable should "

             "never both be 0 for AMDGPU_PS shaders");

      // There are some rare circumstances where InputAddr is non-zero and

      // InputEna can be set to 0. In this case we default to setting LastEna

      // to 1.

      LastEna = InputEna ? llvm::Log2_32(InputEna) + 1 : 1;

    }


    // FIXME: We should be using the number of registers determined during

    // calling convention lowering to legalize the types.

    const DataLayout &DL = F.getDataLayout();

    unsigned PSArgCount = 0;

    unsigned IntermediateVGPR = 0;

    for (auto &Arg : F.args()) {

      unsigned NumRegs = (DL.getTypeSizeInBits(Arg.getType()) + 31) / 32;

      if (Arg.hasAttribute(Attribute::InReg)) {

        WaveDispatchNumSGPR += NumRegs;

      } else {

        // If this is a PS shader and we're processing the PS Input args (first

        // 16 VGPR), use the InputEna and InputAddr bits to define how many

        // VGPRs are actually used.

        // Any extra VGPR arguments are handled as normal arguments (and

        // contribute to the VGPR count whether they're used or not).

        if (IsPixelShader && PSArgCount < 16) {

          if ((1 << PSArgCount) & InputAddr) {

            if (PSArgCount < LastEna)

              WaveDispatchNumVGPR += NumRegs;

            else

              IntermediateVGPR += NumRegs;

          }

          PSArgCount++;

        } else {

          // If there are extra arguments we have to include the allocation for

          // the non-used (but enabled with InputAddr) input arguments

          if (IntermediateVGPR) {

            WaveDispatchNumVGPR += IntermediateVGPR;

            IntermediateVGPR = 0;

          }

          WaveDispatchNumVGPR += NumRegs;

        }

      }

    }

    ProgInfo.NumSGPR = AMDGPUMCExpr::createMax(

        {ProgInfo.NumSGPR, CreateExpr(WaveDispatchNumSGPR)}, Ctx);


    ProgInfo.NumArchVGPR = AMDGPUMCExpr::createMax(

        {ProgInfo.NumVGPR, CreateExpr(WaveDispatchNumVGPR)}, Ctx);


    ProgInfo.NumVGPR = AMDGPUMCExpr::createTotalNumVGPR(

        ProgInfo.NumAccVGPR, ProgInfo.NumArchVGPR, Ctx);

  }


  // Adjust number of registers used to meet default/requested minimum/maximum

  // number of waves per execution unit request.

  unsigned MaxWaves = MFI->getMaxWavesPerEU();

  ProgInfo.NumSGPRsForWavesPerEU =

      AMDGPUMCExpr::createMax({ProgInfo.NumSGPR, CreateExpr(1ul),

                               CreateExpr(STM.getMinNumSGPRs(MaxWaves))},

                              Ctx);

  ProgInfo.NumVGPRsForWavesPerEU =

      AMDGPUMCExpr::createMax({ProgInfo.NumVGPR, CreateExpr(1ul),

                               CreateExpr(STM.getMinNumVGPRs(MaxWaves))},

                              Ctx);


  if (STM.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ||

      STM.hasSGPRInitBug()) {

    unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();

    uint64_t NumSgpr;

    if (TryGetMCExprValue(ProgInfo.NumSGPR, NumSgpr) &&

        NumSgpr > MaxAddressableNumSGPRs) {

      // This can happen due to a compiler bug or when using inline asm to use

      // the registers which are usually reserved for vcc etc.

      LLVMContext &Ctx = MF.getFunction().getContext();

      DiagnosticInfoResourceLimit Diag(MF.getFunction(), "scalar registers",

                                       NumSgpr, MaxAddressableNumSGPRs,

                                       DS_Error, DK_ResourceLimit);

      Ctx.diagnose(Diag);

      ProgInfo.NumSGPR = CreateExpr(MaxAddressableNumSGPRs);

      ProgInfo.NumSGPRsForWavesPerEU = CreateExpr(MaxAddressableNumSGPRs);

    }

  }


  if (STM.hasSGPRInitBug()) {

    ProgInfo.NumSGPR =

        CreateExpr(AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG);

    ProgInfo.NumSGPRsForWavesPerEU =

        CreateExpr(AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG);

  }


  if (MFI->getNumUserSGPRs() > STM.getMaxNumUserSGPRs()) {

    LLVMContext &Ctx = MF.getFunction().getContext();

    DiagnosticInfoResourceLimit Diag(MF.getFunction(), "user SGPRs",

                                     MFI->getNumUserSGPRs(),

                                     STM.getMaxNumUserSGPRs(), DS_Error);

    Ctx.diagnose(Diag);

  }


  if (MFI->getLDSSize() >

      static_cast<unsigned>(STM.getAddressableLocalMemorySize())) {

    LLVMContext &Ctx = MF.getFunction().getContext();

    DiagnosticInfoResourceLimit Diag(

        MF.getFunction(), "local memory", MFI->getLDSSize(),

        STM.getAddressableLocalMemorySize(), DS_Error);

    Ctx.diagnose(Diag);

  }

  // The MCExpr equivalent of getNumSGPRBlocks/getNumVGPRBlocks:

  // (alignTo(max(1u, NumGPR), GPREncodingGranule) / GPREncodingGranule) - 1

  auto GetNumGPRBlocks = [&CreateExpr, &Ctx](const MCExpr *NumGPR,

                                             unsigned Granule) {

    const MCExpr *OneConst = CreateExpr(1ul);

    const MCExpr *GranuleConst = CreateExpr(Granule);

    const MCExpr *MaxNumGPR = AMDGPUMCExpr::createMax({NumGPR, OneConst}, Ctx);

    const MCExpr *AlignToGPR =

        AMDGPUMCExpr::createAlignTo(MaxNumGPR, GranuleConst, Ctx);

    const MCExpr *DivGPR =

        MCBinaryExpr::createDiv(AlignToGPR, GranuleConst, Ctx);

    const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);

    return SubGPR;

  };


  ProgInfo.SGPRBlocks = GetNumGPRBlocks(ProgInfo.NumSGPRsForWavesPerEU,

                                        IsaInfo::getSGPREncodingGranule(&STM));

  ProgInfo.VGPRBlocks = GetNumGPRBlocks(ProgInfo.NumVGPRsForWavesPerEU,

                                        IsaInfo::getVGPREncodingGranule(&STM));


  const SIModeRegisterDefaults Mode = MFI->getMode();


  // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode

  // register.

  ProgInfo.FloatMode = getFPMode(Mode);


  ProgInfo.IEEEMode = Mode.IEEE;


  // Make clamp modifier on NaN input returns 0.

  ProgInfo.DX10Clamp = Mode.DX10Clamp;


  unsigned LDSAlignShift;

  if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {

    // LDS is allocated in 64 dword blocks.

    LDSAlignShift = 8;

  } else {

    // LDS is allocated in 128 dword blocks.

    LDSAlignShift = 9;

  }


  ProgInfo.SGPRSpill = MFI->getNumSpilledSGPRs();

  ProgInfo.VGPRSpill = MFI->getNumSpilledVGPRs();


  ProgInfo.LDSSize = MFI->getLDSSize();

  ProgInfo.LDSBlocks =

      alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;


  // The MCExpr equivalent of divideCeil.

  auto DivideCeil = [&Ctx](const MCExpr *Numerator, const MCExpr *Denominator) {

    const MCExpr *Ceil =

        AMDGPUMCExpr::createAlignTo(Numerator, Denominator, Ctx);

    return MCBinaryExpr::createDiv(Ceil, Denominator, Ctx);

  };


  // Scratch is allocated in 64-dword or 256-dword blocks.

  unsigned ScratchAlignShift =

      STM.getGeneration() >= AMDGPUSubtarget::GFX11 ? 8 : 10;

  // We need to program the hardware with the amount of scratch memory that

  // is used by the entire wave.  ProgInfo.ScratchSize is the amount of

  // scratch memory used per thread.

  ProgInfo.ScratchBlocks = DivideCeil(

      MCBinaryExpr::createMul(ProgInfo.ScratchSize,

                              CreateExpr(STM.getWavefrontSize()), Ctx),

      CreateExpr(1ULL << ScratchAlignShift));


  if (getIsaVersion(getGlobalSTI()->getCPU()).Major >= 10) {

    ProgInfo.WgpMode = STM.isCuModeEnabled() ? 0 : 1;

    ProgInfo.MemOrdered = 1;

  }


  // 0 = X, 1 = XY, 2 = XYZ

  unsigned TIDIGCompCnt = 0;

  if (MFI->hasWorkItemIDZ())

    TIDIGCompCnt = 2;

  else if (MFI->hasWorkItemIDY())

    TIDIGCompCnt = 1;


  // The private segment wave byte offset is the last of the system SGPRs. We

  // initially assumed it was allocated, and may have used it. It shouldn't harm

  // anything to disable it if we know the stack isn't used here. We may still

  // have emitted code reading it to initialize scratch, but if that's unused

  // reading garbage should be OK.

  ProgInfo.ScratchEnable = MCBinaryExpr::createLOr(

      MCBinaryExpr::createGT(ProgInfo.ScratchBlocks,

                             MCConstantExpr::create(0, Ctx), Ctx),

      ProgInfo.DynamicCallStack, Ctx);


  ProgInfo.UserSGPR = MFI->getNumUserSGPRs();

  // For AMDHSA, TRAP_HANDLER must be zero, as it is populated by the CP.

  ProgInfo.TrapHandlerEnable =

      STM.isAmdHsaOS() ? 0 : STM.isTrapHandlerEnabled();

  ProgInfo.TGIdXEnable = MFI->hasWorkGroupIDX();

  ProgInfo.TGIdYEnable = MFI->hasWorkGroupIDY();

  ProgInfo.TGIdZEnable = MFI->hasWorkGroupIDZ();

  ProgInfo.TGSizeEnable = MFI->hasWorkGroupInfo();

  ProgInfo.TIdIGCompCount = TIDIGCompCnt;

  ProgInfo.EXCPEnMSB = 0;

  // For AMDHSA, LDS_SIZE must be zero, as it is populated by the CP.

  ProgInfo.LdsSize = STM.isAmdHsaOS() ? 0 : ProgInfo.LDSBlocks;

  ProgInfo.EXCPEnable = 0;


  if (STM.hasGFX90AInsts()) {

    // return ((Dst & ~Mask) | (Value << Shift))

    auto SetBits = [&Ctx](const MCExpr *Dst, const MCExpr *Value, uint32_t Mask,

                          uint32_t Shift) {

      auto Shft = MCConstantExpr::create(Shift, Ctx);

      auto Msk = MCConstantExpr::create(Mask, Ctx);

      Dst = MCBinaryExpr::createAnd(Dst, MCUnaryExpr::createNot(Msk, Ctx), Ctx);

      Dst = MCBinaryExpr::createOr(

          Dst, MCBinaryExpr::createShl(Value, Shft, Ctx), Ctx);

      return Dst;

    };


    ProgInfo.ComputePGMRSrc3GFX90A =

        SetBits(ProgInfo.ComputePGMRSrc3GFX90A, ProgInfo.AccumOffset,

                amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,

                amdhsa::COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET_SHIFT);

    ProgInfo.ComputePGMRSrc3GFX90A =

        SetBits(ProgInfo.ComputePGMRSrc3GFX90A, CreateExpr(ProgInfo.TgSplit),

                amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,

                amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT);

  }


  ProgInfo.Occupancy = AMDGPUMCExpr::createOccupancy(

      STM.computeOccupancy(F, ProgInfo.LDSSize), ProgInfo.NumSGPRsForWavesPerEU,

      ProgInfo.NumVGPRsForWavesPerEU, STM, Ctx);


  const auto [MinWEU, MaxWEU] =

      AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu", {0, 0}, true);

  uint64_t Occupancy;

  if (TryGetMCExprValue(ProgInfo.Occupancy, Occupancy) && Occupancy < MinWEU) {

    DiagnosticInfoOptimizationFailure Diag(

        F, F.getSubprogram(),

        "failed to meet occupancy target given by 'amdgpu-waves-per-eu' in "

        "'" +

            F.getName() + "': desired occupancy was " + Twine(MinWEU) +

            ", final occupancy is " + Twine(Occupancy));

    F.getContext().diagnose(Diag);

  }

}


static unsigned getRsrcReg(CallingConv::ID CallConv) {

  switch (CallConv) {

  default: [[fallthrough]];

  case CallingConv::AMDGPU_CS: return R_00B848_COMPUTE_PGM_RSRC1;

  case CallingConv::AMDGPU_LS: return R_00B528_SPI_SHADER_PGM_RSRC1_LS;

  case CallingConv::AMDGPU_HS: return R_00B428_SPI_SHADER_PGM_RSRC1_HS;

  case CallingConv::AMDGPU_ES: return R_00B328_SPI_SHADER_PGM_RSRC1_ES;

  case CallingConv::AMDGPU_GS: return R_00B228_SPI_SHADER_PGM_RSRC1_GS;

  case CallingConv::AMDGPU_VS: return R_00B128_SPI_SHADER_PGM_RSRC1_VS;

  case CallingConv::AMDGPU_PS: return R_00B028_SPI_SHADER_PGM_RSRC1_PS;

  }

}


void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,

                                         const SIProgramInfo &CurrentProgramInfo) {

  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();

  unsigned RsrcReg = getRsrcReg(MF.getFunction().getCallingConv());

  MCContext &Ctx = MF.getContext();


  // (((Value) & Mask) << Shift)

  auto SetBits = [&Ctx](const MCExpr *Value, uint32_t Mask, uint32_t Shift) {

    const MCExpr *msk = MCConstantExpr::create(Mask, Ctx);

    const MCExpr *shft = MCConstantExpr::create(Shift, Ctx);

    return MCBinaryExpr::createShl(MCBinaryExpr::createAnd(Value, msk, Ctx),

                                   shft, Ctx);

  };


  auto EmitResolvedOrExpr = [this](const MCExpr *Value, unsigned Size) {

    int64_t Val;

    if (Value->evaluateAsAbsolute(Val))

      OutStreamer->emitIntValue(static_cast<uint64_t>(Val), Size);

    else

      OutStreamer->emitValue(Value, Size);

  };


  if (AMDGPU::isCompute(MF.getFunction().getCallingConv())) {

    OutStreamer->emitInt32(R_00B848_COMPUTE_PGM_RSRC1);


    EmitResolvedOrExpr(CurrentProgramInfo.getComputePGMRSrc1(STM, Ctx),

                       /*Size=*/4);


    OutStreamer->emitInt32(R_00B84C_COMPUTE_PGM_RSRC2);

    EmitResolvedOrExpr(CurrentProgramInfo.getComputePGMRSrc2(Ctx), /*Size=*/4);


    OutStreamer->emitInt32(R_00B860_COMPUTE_TMPRING_SIZE);


    // Sets bits according to S_0286E8_WAVESIZE_* mask and shift values for the

    // appropriate generation.

    if (STM.getGeneration() >= AMDGPUSubtarget::GFX12) {

      EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,

                                 /*Mask=*/0x3FFFF, /*Shift=*/12),

                         /*Size=*/4);

    } else if (STM.getGeneration() == AMDGPUSubtarget::GFX11) {

      EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,

                                 /*Mask=*/0x7FFF, /*Shift=*/12),

                         /*Size=*/4);

    } else {

      EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,

                                 /*Mask=*/0x1FFF, /*Shift=*/12),

                         /*Size=*/4);

    }


    // TODO: Should probably note flat usage somewhere. SC emits a "FlatPtr32 =

    // 0" comment but I don't see a corresponding field in the register spec.

  } else {

    OutStreamer->emitInt32(RsrcReg);


    const MCExpr *GPRBlocks = MCBinaryExpr::createOr(

        SetBits(CurrentProgramInfo.VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0),

        SetBits(CurrentProgramInfo.SGPRBlocks, /*Mask=*/0x0F, /*Shift=*/6),

        MF.getContext());

    EmitResolvedOrExpr(GPRBlocks, /*Size=*/4);

    OutStreamer->emitInt32(R_0286E8_SPI_TMPRING_SIZE);


    // Sets bits according to S_0286E8_WAVESIZE_* mask and shift values for the

    // appropriate generation.

    if (STM.getGeneration() >= AMDGPUSubtarget::GFX12) {

      EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,

                                 /*Mask=*/0x3FFFF, /*Shift=*/12),

                         /*Size=*/4);

    } else if (STM.getGeneration() == AMDGPUSubtarget::GFX11) {

      EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,

                                 /*Mask=*/0x7FFF, /*Shift=*/12),

                         /*Size=*/4);

    } else {

      EmitResolvedOrExpr(SetBits(CurrentProgramInfo.ScratchBlocks,

                                 /*Mask=*/0x1FFF, /*Shift=*/12),

                         /*Size=*/4);

    }

  }


  if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {

    OutStreamer->emitInt32(R_00B02C_SPI_SHADER_PGM_RSRC2_PS);

    unsigned ExtraLDSSize = STM.getGeneration() >= AMDGPUSubtarget::GFX11

                                ? divideCeil(CurrentProgramInfo.LDSBlocks, 2)

                                : CurrentProgramInfo.LDSBlocks;

    OutStreamer->emitInt32(S_00B02C_EXTRA_LDS_SIZE(ExtraLDSSize));

    OutStreamer->emitInt32(R_0286CC_SPI_PS_INPUT_ENA);

    OutStreamer->emitInt32(MFI->getPSInputEnable());

    OutStreamer->emitInt32(R_0286D0_SPI_PS_INPUT_ADDR);

    OutStreamer->emitInt32(MFI->getPSInputAddr());

  }


  OutStreamer->emitInt32(R_SPILLED_SGPRS);

  OutStreamer->emitInt32(MFI->getNumSpilledSGPRs());

  OutStreamer->emitInt32(R_SPILLED_VGPRS);

  OutStreamer->emitInt32(MFI->getNumSpilledVGPRs());

}


// Helper function to add common PAL Metadata 3.0+

static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD,

                                  const SIProgramInfo &CurrentProgramInfo,

                                  CallingConv::ID CC, const GCNSubtarget &ST) {

  if (ST.hasIEEEMode())

    MD->setHwStage(CC, ".ieee_mode", (bool)CurrentProgramInfo.IEEEMode);


  MD->setHwStage(CC, ".wgp_mode", (bool)CurrentProgramInfo.WgpMode);

  MD->setHwStage(CC, ".mem_ordered", (bool)CurrentProgramInfo.MemOrdered);


  if (AMDGPU::isCompute(CC)) {

    MD->setHwStage(CC, ".trap_present",

                   (bool)CurrentProgramInfo.TrapHandlerEnable);

    MD->setHwStage(CC, ".excp_en", CurrentProgramInfo.EXCPEnable);

  }


  MD->setHwStage(CC, ".lds_size",

                 (unsigned)(CurrentProgramInfo.LdsSize *

                            getLdsDwGranularity(ST) * sizeof(uint32_t)));

}


// This is the equivalent of EmitProgramInfoSI above, but for when the OS type

// is AMDPAL.  It stores each compute/SPI register setting and other PAL

// metadata items into the PALMD::Metadata, combining with any provided by the

// frontend as LLVM metadata. Once all functions are written, the PAL metadata

// is then written as a single block in the .note section.

void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,

       const SIProgramInfo &CurrentProgramInfo) {

  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

  auto CC = MF.getFunction().getCallingConv();

  auto MD = getTargetStreamer()->getPALMetadata();

  auto &Ctx = MF.getContext();


  MD->setEntryPoint(CC, MF.getFunction().getName());

  MD->setNumUsedVgprs(CC, CurrentProgramInfo.NumVGPRsForWavesPerEU, Ctx);


  // Only set AGPRs for supported devices

  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();

  if (STM.hasMAIInsts()) {

    MD->setNumUsedAgprs(CC, CurrentProgramInfo.NumAccVGPR);

  }


  MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU, Ctx);

  if (MD->getPALMajorVersion() < 3) {

    MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC, STM, Ctx), Ctx);

    if (AMDGPU::isCompute(CC)) {

      MD->setRsrc2(CC, CurrentProgramInfo.getComputePGMRSrc2(Ctx), Ctx);

    } else {

      const MCExpr *HasScratchBlocks =

          MCBinaryExpr::createGT(CurrentProgramInfo.ScratchBlocks,

                                 MCConstantExpr::create(0, Ctx), Ctx);

      auto [Shift, Mask] = getShiftMask(C_00B84C_SCRATCH_EN);

      MD->setRsrc2(CC, maskShiftSet(HasScratchBlocks, Mask, Shift, Ctx), Ctx);

    }

  } else {

    MD->setHwStage(CC, ".debug_mode", (bool)CurrentProgramInfo.DebugMode);

    MD->setHwStage(CC, ".scratch_en", msgpack::Type::Boolean,

                   CurrentProgramInfo.ScratchEnable);

    EmitPALMetadataCommon(MD, CurrentProgramInfo, CC, STM);

  }


  // ScratchSize is in bytes, 16 aligned.

  MD->setScratchSize(

      CC,

      AMDGPUMCExpr::createAlignTo(CurrentProgramInfo.ScratchSize,

                                  MCConstantExpr::create(16, Ctx), Ctx),

      Ctx);


  if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {

    unsigned ExtraLDSSize = STM.getGeneration() >= AMDGPUSubtarget::GFX11

                                ? divideCeil(CurrentProgramInfo.LDSBlocks, 2)

                                : CurrentProgramInfo.LDSBlocks;

    if (MD->getPALMajorVersion() < 3) {

      MD->setRsrc2(

          CC,

          MCConstantExpr::create(S_00B02C_EXTRA_LDS_SIZE(ExtraLDSSize), Ctx),

          Ctx);

      MD->setSpiPsInputEna(MFI->getPSInputEnable());

      MD->setSpiPsInputAddr(MFI->getPSInputAddr());

    } else {

      // Graphics registers

      const unsigned ExtraLdsDwGranularity =

          STM.getGeneration() >= AMDGPUSubtarget::GFX11 ? 256 : 128;

      MD->setGraphicsRegisters(

          ".ps_extra_lds_size",

          (unsigned)(ExtraLDSSize * ExtraLdsDwGranularity * sizeof(uint32_t)));


      // Set PsInputEna and PsInputAddr .spi_ps_input_ena and .spi_ps_input_addr

      static StringLiteral const PsInputFields[] = {

          ".persp_sample_ena",    ".persp_center_ena",

          ".persp_centroid_ena",  ".persp_pull_model_ena",

          ".linear_sample_ena",   ".linear_center_ena",

          ".linear_centroid_ena", ".line_stipple_tex_ena",

          ".pos_x_float_ena",     ".pos_y_float_ena",

          ".pos_z_float_ena",     ".pos_w_float_ena",

          ".front_face_ena",      ".ancillary_ena",

          ".sample_coverage_ena", ".pos_fixed_pt_ena"};

      unsigned PSInputEna = MFI->getPSInputEnable();

      unsigned PSInputAddr = MFI->getPSInputAddr();

      for (auto [Idx, Field] : enumerate(PsInputFields)) {

        MD->setGraphicsRegisters(".spi_ps_input_ena", Field,

                                 (bool)((PSInputEna >> Idx) & 1));

        MD->setGraphicsRegisters(".spi_ps_input_addr", Field,

                                 (bool)((PSInputAddr >> Idx) & 1));

      }

    }

  }


  // For version 3 and above the wave front size is already set in the metadata

  if (MD->getPALMajorVersion() < 3 && STM.isWave32())

    MD->setWave32(MF.getFunction().getCallingConv());

}


void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {

  auto *MD = getTargetStreamer()->getPALMetadata();

  const MachineFrameInfo &MFI = MF.getFrameInfo();

  StringRef FnName = MF.getFunction().getName();

  MD->setFunctionScratchSize(FnName, MFI.getStackSize());

  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

  MCContext &Ctx = MF.getContext();


  if (MD->getPALMajorVersion() < 3) {

    // Set compute registers

    MD->setRsrc1(

        CallingConv::AMDGPU_CS,

        CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS, ST, Ctx), Ctx);

    MD->setRsrc2(CallingConv::AMDGPU_CS,

                 CurrentProgramInfo.getComputePGMRSrc2(Ctx), Ctx);

  } else {

    EmitPALMetadataCommon(MD, CurrentProgramInfo, CallingConv::AMDGPU_CS, ST);

  }


  // Set optional info

  MD->setFunctionLdsSize(FnName, CurrentProgramInfo.LDSSize);

  MD->setFunctionNumUsedVgprs(FnName, CurrentProgramInfo.NumVGPRsForWavesPerEU);

  MD->setFunctionNumUsedSgprs(FnName, CurrentProgramInfo.NumSGPRsForWavesPerEU);

}


// This is supposed to be log2(Size)

static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {

  switch (Size) {

  case 4:

    return AMD_ELEMENT_4_BYTES;

  case 8:

    return AMD_ELEMENT_8_BYTES;

  case 16:

    return AMD_ELEMENT_16_BYTES;

  default:

    llvm_unreachable("invalid private_element_size");

  }

}


void AMDGPUAsmPrinter::getAmdKernelCode(AMDGPUMCKernelCodeT &Out,

                                        const SIProgramInfo &CurrentProgramInfo,

                                        const MachineFunction &MF) const {

  const Function &F = MF.getFunction();

  assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||

         F.getCallingConv() == CallingConv::SPIR_KERNEL);


  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();

  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();

  MCContext &Ctx = MF.getContext();


  Out.initDefault(&STM, Ctx, /*InitMCExpr=*/false);


  Out.compute_pgm_resource1_registers =

      CurrentProgramInfo.getComputePGMRSrc1(STM, Ctx);

  Out.compute_pgm_resource2_registers =

      CurrentProgramInfo.getComputePGMRSrc2(Ctx);

  Out.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;


  Out.is_dynamic_callstack = CurrentProgramInfo.DynamicCallStack;


  AMD_HSA_BITS_SET(Out.code_properties, AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,

                   getElementByteSizeValue(STM.getMaxPrivateElementSize(true)));


  const GCNUserSGPRUsageInfo &UserSGPRInfo = MFI->getUserSGPRInfo();

  if (UserSGPRInfo.hasPrivateSegmentBuffer()) {

    Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;

  }


  if (UserSGPRInfo.hasDispatchPtr())

    Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;


  if (UserSGPRInfo.hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5)

    Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;


  if (UserSGPRInfo.hasKernargSegmentPtr())

    Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR;


  if (UserSGPRInfo.hasDispatchID())

    Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID;


  if (UserSGPRInfo.hasFlatScratchInit())

    Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;


  if (UserSGPRInfo.hasPrivateSegmentSize())

    Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE;


  if (UserSGPRInfo.hasDispatchPtr())

    Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;


  if (STM.isXNACKEnabled())

    Out.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;


  Align MaxKernArgAlign;

  Out.kernarg_segment_byte_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);

  Out.wavefront_sgpr_count = CurrentProgramInfo.NumSGPR;

  Out.workitem_vgpr_count = CurrentProgramInfo.NumVGPR;

  Out.workitem_private_segment_byte_size = CurrentProgramInfo.ScratchSize;

  Out.workgroup_group_segment_byte_size = CurrentProgramInfo.LDSSize;


  // kernarg_segment_alignment is specified as log of the alignment.

  // The minimum alignment is 16.

  // FIXME: The metadata treats the minimum as 4?

  Out.kernarg_segment_alignment = Log2(std::max(Align(16), MaxKernArgAlign));

}


bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,

                                       const char *ExtraCode, raw_ostream &O) {

  // First try the generic code, which knows about modifiers like 'c' and 'n'.

  if (!AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O))

    return false;


  if (ExtraCode && ExtraCode[0]) {

    if (ExtraCode[1] != 0)

      return true; // Unknown modifier.


    switch (ExtraCode[0]) {

    case 'r':

      break;

    default:

      return true;

    }

  }


  // TODO: Should be able to support other operand types like globals.

  const MachineOperand &MO = MI->getOperand(OpNo);

  if (MO.isReg()) {

    AMDGPUInstPrinter::printRegOperand(MO.getReg(), O,

                                       *MF->getSubtarget().getRegisterInfo());

    return false;

  }

  if (MO.isImm()) {

    int64_t Val = MO.getImm();

    if (AMDGPU::isInlinableIntLiteral(Val)) {

      O << Val;

    } else if (isUInt<16>(Val)) {

      O << format("0x%" PRIx16, static_cast<uint16_t>(Val));

    } else if (isUInt<32>(Val)) {

      O << format("0x%" PRIx32, static_cast<uint32_t>(Val));

    } else {

      O << format("0x%" PRIx64, static_cast<uint64_t>(Val));

    }

    return false;

  }

  return true;

}


void AMDGPUAsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {

  AU.addRequired<AMDGPUResourceUsageAnalysis>();

  AU.addPreserved<AMDGPUResourceUsageAnalysis>();

  AsmPrinter::getAnalysisUsage(AU);

}


void AMDGPUAsmPrinter::emitResourceUsageRemarks(

    const MachineFunction &MF, const SIProgramInfo &CurrentProgramInfo,

    bool isModuleEntryFunction, bool hasMAIInsts) {

  if (!ORE)

    return;


  const char *Name = "kernel-resource-usage";

  const char *Indent = "    ";


  // If the remark is not specifically enabled, do not output to yaml

  LLVMContext &Ctx = MF.getFunction().getContext();

  if (!Ctx.getDiagHandlerPtr()->isAnalysisRemarkEnabled(Name))

    return;


  // Currently non-kernel functions have no resources to emit.

  if (!isEntryFunctionCC(MF.getFunction().getCallingConv()))

    return;


  auto EmitResourceUsageRemark = [&](StringRef RemarkName,

                                     StringRef RemarkLabel, auto Argument) {

    // Add an indent for every line besides the line with the kernel name. This

    // makes it easier to tell which resource usage go with which kernel since

    // the kernel name will always be displayed first.

    std::string LabelStr = RemarkLabel.str() + ": ";

    if (RemarkName != "FunctionName")

      LabelStr = Indent + LabelStr;


    ORE->emit([&]() {

      return MachineOptimizationRemarkAnalysis(Name, RemarkName,

                                               MF.getFunction().getSubprogram(),

                                               &MF.front())

             << LabelStr << ore::NV(RemarkName, Argument);

    });

  };


  // FIXME: Formatting here is pretty nasty because clang does not accept

  // newlines from diagnostics. This forces us to emit multiple diagnostic

  // remarks to simulate newlines. If and when clang does accept newlines, this

  // formatting should be aggregated into one remark with newlines to avoid

  // printing multiple diagnostic location and diag opts.

  EmitResourceUsageRemark("FunctionName", "Function Name",

                          MF.getFunction().getName());

  EmitResourceUsageRemark("NumSGPR", "SGPRs",

                          getMCExprStr(CurrentProgramInfo.NumSGPR));

  EmitResourceUsageRemark("NumVGPR", "VGPRs",

                          getMCExprStr(CurrentProgramInfo.NumArchVGPR));

  if (hasMAIInsts) {

    EmitResourceUsageRemark("NumAGPR", "AGPRs",

                            getMCExprStr(CurrentProgramInfo.NumAccVGPR));

  }

  EmitResourceUsageRemark("ScratchSize", "ScratchSize [bytes/lane]",

                          getMCExprStr(CurrentProgramInfo.ScratchSize));

  int64_t DynStack;

  bool DynStackEvaluatable =

      CurrentProgramInfo.DynamicCallStack->evaluateAsAbsolute(DynStack);

  StringRef DynamicStackStr =

      DynStackEvaluatable && DynStack ? "True" : "False";

  EmitResourceUsageRemark("DynamicStack", "Dynamic Stack", DynamicStackStr);

  EmitResourceUsageRemark("Occupancy", "Occupancy [waves/SIMD]",

                          getMCExprStr(CurrentProgramInfo.Occupancy));

  EmitResourceUsageRemark("SGPRSpill", "SGPRs Spill",

                          CurrentProgramInfo.SGPRSpill);

  EmitResourceUsageRemark("VGPRSpill", "VGPRs Spill",

                          CurrentProgramInfo.VGPRSpill);

  if (isModuleEntryFunction)

    EmitResourceUsageRemark("BytesLDS", "LDS Size [bytes/block]",

                            CurrentProgramInfo.LDSSize);

}

Success
#define Success
Definition: AArch64Disassembler.cpp:213

LLVMInitializeAMDGPUAsmPrinter
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmPrinter()
Definition: AMDGPUAsmPrinter.cpp:84

EmitPALMetadataCommon
static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD, const SIProgramInfo &CurrentProgramInfo, CallingConv::ID CC, const GCNSubtarget &ST)
Definition: AMDGPUAsmPrinter.cpp:1212

getRsrcReg
static unsigned getRsrcReg(CallingConv::ID CallConv)
Definition: AMDGPUAsmPrinter.cpp:1101

getElementByteSizeValue
static amd_element_byte_size_t getElementByteSizeValue(unsigned Size)
Definition: AMDGPUAsmPrinter.cpp:1350

getFPMode
static uint32_t getFPMode(SIModeRegisterDefaults Mode)
Definition: AMDGPUAsmPrinter.cpp:71

createAMDGPUAsmPrinterPass
static AsmPrinter * createAMDGPUAsmPrinterPass(TargetMachine &tm, std::unique_ptr< MCStreamer > &&Streamer)
Definition: AMDGPUAsmPrinter.cpp:79

AMDGPUAsmPrinter.h
AMDGPU Assembly printer class.

AMDGPUBaseInfo.h

AMDGPUHSAMetadataStreamer.h
AMDGPU HSA Metadata Streamer.

AMDGPUInstPrinter.h

AMDGPUMCExpr.h

AMDGPUMCKernelDescriptor.h
AMDHSA kernel descriptor MCExpr struct for use in MC layer.

AMDGPUResourceUsageAnalysis.h
Analyzes how many registers and other resources are used by functions.

AMDGPUTargetInfo.h

AMDGPUTargetStreamer.h

AMDGPU.h

AMDHSAKernelDescriptor.h
AMDHSA kernel descriptor definitions.

AMDKernelCodeTUtils.h
MC layer struct for AMDGPUMCKernelCodeT, provides MCExpr functionality where required.

amd_element_byte_size_t
amd_element_byte_size_t
The values used to define the number of bytes to use for the swizzle element size.
Definition: AMDKernelCodeT.h:53

AMD_ELEMENT_8_BYTES
@ AMD_ELEMENT_8_BYTES
Definition: AMDKernelCodeT.h:56

AMD_ELEMENT_16_BYTES
@ AMD_ELEMENT_16_BYTES
Definition: AMDKernelCodeT.h:57

AMD_ELEMENT_4_BYTES
@ AMD_ELEMENT_4_BYTES
Definition: AMDKernelCodeT.h:55

AMD_HSA_BITS_SET
#define AMD_HSA_BITS_SET(dst, mask, val)
Definition: AMDKernelCodeT.h:43

AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID
@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID
Definition: AMDKernelCodeT.h:103

AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE
@ AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE
Definition: AMDKernelCodeT.h:163

AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR
@ AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR
Definition: AMDKernelCodeT.h:99

AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR
@ AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR
Definition: AMDKernelCodeT.h:95

AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE
@ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE
Definition: AMDKernelCodeT.h:111

AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER
@ AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER
Definition: AMDKernelCodeT.h:87

AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR
@ AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR
Definition: AMDKernelCodeT.h:91

AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED
@ AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED
Definition: AMDKernelCodeT.h:193

AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT
@ AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT
Definition: AMDKernelCodeT.h:107

AMD_CODE_PROPERTY_IS_PTR64
@ AMD_CODE_PROPERTY_IS_PTR64
Definition: AMDKernelCodeT.h:172

MBB
MachineBasicBlock & MBB
Definition: ARMSLSHardening.cpp:71

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: ARMSLSHardening.cpp:73

ELF.h

Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27

LLVM_EXTERNAL_VISIBILITY
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:135

Idx
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Definition: DeadArgumentElimination.cpp:352

DiagnosticInfo.h

Name
std::string Name
Definition: ELFObjHandler.cpp:77

Size
uint64_t Size
Definition: ELFObjHandler.cpp:81

GCNSubtarget.h
AMD GCN specific subclass of TargetSubtarget.

TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125

MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:113

MCAssembler.h

MCContext.h

MCSectionELF.h

MCStreamer.h

F
#define F(x, y, z)
Definition: MD5.cpp:55

MachineFrameInfo.h

MachineOptimizationRemarkEmitter.h
===- MachineOptimizationRemarkEmitter.h - Opt Diagnostics -*- C++ -*-—===//

OptimizationRemarkEmitter.h

TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:48

R600AsmPrinter.h
R600 Assembly printer class.

CC
auto CC
Definition: RISCVRedundantCopyElimination.cpp:79

SIDefinesUtils.h

R_00B028_SPI_SHADER_PGM_RSRC1_PS
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS
Definition: SIDefines.h:1046

R_0286E8_SPI_TMPRING_SIZE
#define R_0286E8_SPI_TMPRING_SIZE
Definition: SIDefines.h:1184

FP_ROUND_MODE_DP
#define FP_ROUND_MODE_DP(x)
Definition: SIDefines.h:1166

C_00B84C_SCRATCH_EN
#define C_00B84C_SCRATCH_EN
Definition: SIDefines.h:1082

FP_ROUND_ROUND_TO_NEAREST
#define FP_ROUND_ROUND_TO_NEAREST
Definition: SIDefines.h:1158

R_0286D0_SPI_PS_INPUT_ADDR
#define R_0286D0_SPI_PS_INPUT_ADDR
Definition: SIDefines.h:1117

R_00B860_COMPUTE_TMPRING_SIZE
#define R_00B860_COMPUTE_TMPRING_SIZE
Definition: SIDefines.h:1179

R_00B428_SPI_SHADER_PGM_RSRC1_HS
#define R_00B428_SPI_SHADER_PGM_RSRC1_HS
Definition: SIDefines.h:1069

R_00B328_SPI_SHADER_PGM_RSRC1_ES
#define R_00B328_SPI_SHADER_PGM_RSRC1_ES
Definition: SIDefines.h:1068

R_00B528_SPI_SHADER_PGM_RSRC1_LS
#define R_00B528_SPI_SHADER_PGM_RSRC1_LS
Definition: SIDefines.h:1077

R_0286CC_SPI_PS_INPUT_ENA
#define R_0286CC_SPI_PS_INPUT_ENA
Definition: SIDefines.h:1116

R_00B128_SPI_SHADER_PGM_RSRC1_VS
#define R_00B128_SPI_SHADER_PGM_RSRC1_VS
Definition: SIDefines.h:1055

FP_DENORM_MODE_DP
#define FP_DENORM_MODE_DP(x)
Definition: SIDefines.h:1177

R_00B848_COMPUTE_PGM_RSRC1
#define R_00B848_COMPUTE_PGM_RSRC1
Definition: SIDefines.h:1119

R_SPILLED_SGPRS
#define R_SPILLED_SGPRS
Definition: SIDefines.h:1198

FP_ROUND_MODE_SP
#define FP_ROUND_MODE_SP(x)
Definition: SIDefines.h:1165

FP_DENORM_MODE_SP
#define FP_DENORM_MODE_SP(x)
Definition: SIDefines.h:1176

R_00B228_SPI_SHADER_PGM_RSRC1_GS
#define R_00B228_SPI_SHADER_PGM_RSRC1_GS
Definition: SIDefines.h:1060

R_SPILLED_VGPRS
#define R_SPILLED_VGPRS
Definition: SIDefines.h:1199

S_00B02C_EXTRA_LDS_SIZE
#define S_00B02C_EXTRA_LDS_SIZE(x)
Definition: SIDefines.h:1054

R_00B84C_COMPUTE_PGM_RSRC2
#define R_00B84C_COMPUTE_PGM_RSRC2
Definition: SIDefines.h:1079

R_00B02C_SPI_SHADER_PGM_RSRC2_PS
#define R_00B02C_SPI_SHADER_PGM_RSRC2_PS
Definition: SIDefines.h:1053

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

SIMachineFunctionInfo.h

OS
raw_pwrite_stream & OS
Definition: SampleProfWriter.cpp:53

TargetLoweringObjectFile.h

TargetParser.h

TargetRegistry.h

llvm::AMDGPUAsmPrinter
Definition: AMDGPUAsmPrinter.h:36

llvm::AMDGPUAsmPrinter::emitFunctionEntryLabel
void emitFunctionEntryLabel() override
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
Definition: AMDGPUAsmPrinter.cpp:270

llvm::AMDGPUAsmPrinter::getGlobalSTI
const MCSubtargetInfo * getGlobalSTI() const
Definition: AMDGPUAsmPrinter.cpp:101

llvm::AMDGPUAsmPrinter::emitImplicitDef
void emitImplicitDef(const MachineInstr *MI) const override
Targets can override this to customize the output of IMPLICIT_DEF instructions in verbose mode.
Definition: AMDGPUAsmPrinter.cpp:255

llvm::AMDGPUAsmPrinter::DisasmLines
std::vector< std::string > DisasmLines
Definition: AMDGPUAsmPrinter.h:140

llvm::AMDGPUAsmPrinter::emitStartOfAsmFile
void emitStartOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the start of their fi...
Definition: AMDGPUAsmPrinter.cpp:111

llvm::AMDGPUAsmPrinter::getPassName
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
Definition: AMDGPUAsmPrinter.cpp:97

llvm::AMDGPUAsmPrinter::HexLines
std::vector< std::string > HexLines
Definition: AMDGPUAsmPrinter.h:140

llvm::AMDGPUAsmPrinter::IsTargetStreamerInitialized
bool IsTargetStreamerInitialized
Definition: AMDGPUAsmPrinter.h:142

llvm::AMDGPUAsmPrinter::emitGlobalVariable
void emitGlobalVariable(const GlobalVariable *GV) override
Emit the specified global variable to the .s file.
Definition: AMDGPUAsmPrinter.cpp:306

llvm::AMDGPUAsmPrinter::DisasmLineMaxLen
size_t DisasmLineMaxLen
Definition: AMDGPUAsmPrinter.h:141

llvm::AMDGPUAsmPrinter::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
Record analysis usage.
Definition: AMDGPUAsmPrinter.cpp:1470

llvm::AMDGPUAsmPrinter::PrintAsmOperand
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &O) override
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
Definition: AMDGPUAsmPrinter.cpp:1429

llvm::AMDGPUAsmPrinter::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
Definition: AMDGPUAsmPrinter.cpp:518

llvm::AMDGPUAsmPrinter::emitFunctionBodyEnd
void emitFunctionBodyEnd() override
Targets can override this to emit stuff after the last basic block in the function.
Definition: AMDGPUAsmPrinter.cpp:216

llvm::AMDGPUAsmPrinter::doFinalization
bool doFinalization(Module &M) override
Shut down the asmprinter.
Definition: AMDGPUAsmPrinter.cpp:362

llvm::AMDGPUAsmPrinter::emitEndOfAsmFile
void emitEndOfAsmFile(Module &M) override
This virtual method can be overridden by targets that want to emit something at the end of their file...
Definition: AMDGPUAsmPrinter.cpp:139

llvm::AMDGPUAsmPrinter::AMDGPUAsmPrinter
AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer)
Definition: AMDGPUAsmPrinter.cpp:91

llvm::AMDGPUAsmPrinter::doInitialization
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
Definition: AMDGPUAsmPrinter.cpp:341

llvm::AMDGPUAsmPrinter::emitFunctionBodyStart
void emitFunctionBodyStart() override
Targets can override this to emit stuff before the first basic block in the function.
Definition: AMDGPUAsmPrinter.cpp:157

llvm::AMDGPUAsmPrinter::emitBasicBlockStart
void emitBasicBlockStart(const MachineBasicBlock &MBB) override
Targets can override this to emit stuff at the start of a basic block.
Definition: AMDGPUAsmPrinter.cpp:294

llvm::AMDGPUAsmPrinter::getTargetStreamer
AMDGPUTargetStreamer * getTargetStreamer() const
Definition: AMDGPUAsmPrinter.cpp:105

llvm::AMDGPUInstPrinter::printRegOperand
static void printRegOperand(unsigned RegNo, raw_ostream &O, const MCRegisterInfo &MRI)
Definition: AMDGPUInstPrinter.cpp:367

llvm::AMDGPUMCExpr::createMax
static const AMDGPUMCExpr * createMax(ArrayRef< const MCExpr * > Args, MCContext &Ctx)
Definition: AMDGPUMCExpr.h:69

llvm::AMDGPUMCExpr::createOccupancy
static const AMDGPUMCExpr * createOccupancy(unsigned InitOcc, const MCExpr *NumSGPRs, const MCExpr *NumVGPRs, const GCNSubtarget &STM, MCContext &Ctx)
Mimics GCNSubtarget::computeOccupancy for MCExpr.
Definition: AMDGPUMCExpr.cpp:286

llvm::AMDGPUMCExpr::createTotalNumVGPR
static const AMDGPUMCExpr * createTotalNumVGPR(const MCExpr *NumAGPR, const MCExpr *NumVGPR, MCContext &Ctx)
Definition: AMDGPUMCExpr.cpp:275

llvm::AMDGPUMCExpr::createExtraSGPRs
static const AMDGPUMCExpr * createExtraSGPRs(const MCExpr *VCCUsed, const MCExpr *FlatScrUsed, bool XNACKUsed, MCContext &Ctx)
Allow delayed MCExpr resolve of ExtraSGPRs (in case VCCUsed or FlatScrUsed are unresolvable but neede...
Definition: AMDGPUMCExpr.cpp:265

llvm::AMDGPUMCExpr::createAlignTo
static const AMDGPUMCExpr * createAlignTo(const MCExpr *Value, const MCExpr *Align, MCContext &Ctx)
Definition: AMDGPUMCExpr.h:83

llvm::AMDGPUMachineFunction
Definition: AMDGPUMachineFunction.h:24

llvm::AMDGPUMachineFunction::getLDSSize
uint32_t getLDSSize() const
Definition: AMDGPUMachineFunction.h:79

llvm::AMDGPUMachineFunction::isMemoryBound
bool isMemoryBound() const
Definition: AMDGPUMachineFunction.h:104

llvm::AMDGPUMachineFunction::needsWaveLimiter
bool needsWaveLimiter() const
Definition: AMDGPUMachineFunction.h:108

llvm::AMDGPUMachineFunction::isEntryFunction
bool isEntryFunction() const
Definition: AMDGPUMachineFunction.h:87

llvm::AMDGPUMachineFunction::isModuleEntryFunction
bool isModuleEntryFunction() const
Definition: AMDGPUMachineFunction.h:91

llvm::AMDGPUPALMetadata
Definition: AMDGPUPALMetadata.h:26

llvm::AMDGPUPALMetadata::readFromIR
void readFromIR(Module &M)
Definition: AMDGPUPALMetadata.cpp:31

llvm::AMDGPUPALMetadata::setHwStage
void setHwStage(unsigned CC, StringRef field, unsigned Val)
Definition: AMDGPUPALMetadata.cpp:1045

llvm::AMDGPUSubtarget::isAmdPalOS
bool isAmdPalOS() const
Definition: AMDGPUSubtarget.h:132

llvm::AMDGPUSubtarget::GFX12
@ GFX12
Definition: AMDGPUSubtarget.h:43

llvm::AMDGPUSubtarget::SEA_ISLANDS
@ SEA_ISLANDS
Definition: AMDGPUSubtarget.h:38

llvm::AMDGPUSubtarget::VOLCANIC_ISLANDS
@ VOLCANIC_ISLANDS
Definition: AMDGPUSubtarget.h:39

llvm::AMDGPUSubtarget::GFX11
@ GFX11
Definition: AMDGPUSubtarget.h:42

llvm::AMDGPUSubtarget::getAddressableLocalMemorySize
unsigned getAddressableLocalMemorySize() const
Definition: AMDGPUSubtarget.h:233

llvm::AMDGPUSubtarget::getKernArgSegmentSize
unsigned getKernArgSegmentSize(const Function &F, Align &MaxAlign) const
Definition: AMDGPUSubtarget.cpp:626

llvm::AMDGPUSubtarget::isAmdHsaOS
bool isAmdHsaOS() const
Definition: AMDGPUSubtarget.h:128

llvm::AMDGPUSubtarget::getWavefrontSize
unsigned getWavefrontSize() const
Definition: AMDGPUSubtarget.h:221

llvm::AMDGPUTargetStreamer
Definition: AMDGPUTargetStreamer.h:31

llvm::AMDGPUTargetStreamer::EmitAmdhsaKernelDescriptor
virtual void EmitAmdhsaKernelDescriptor(const MCSubtargetInfo &STI, StringRef KernelName, const AMDGPU::MCKernelDescriptor &KernelDescriptor, const MCExpr *NextVGPR, const MCExpr *NextSGPR, const MCExpr *ReserveVCC, const MCExpr *ReserveFlatScr)
Definition: AMDGPUTargetStreamer.h:95

llvm::AMDGPUTargetStreamer::getPALMetadata
AMDGPUPALMetadata * getPALMetadata()
Definition: AMDGPUTargetStreamer.h:48

llvm::AMDGPUTargetStreamer::EmitDirectiveAMDHSACodeObjectVersion
virtual void EmitDirectiveAMDHSACodeObjectVersion(unsigned COV)
Definition: AMDGPUTargetStreamer.h:52

llvm::AMDGPUTargetStreamer::EmitISAVersion
virtual bool EmitISAVersion()
Definition: AMDGPUTargetStreamer.h:64

llvm::AMDGPUTargetStreamer::initializeTargetID
void initializeTargetID(const MCSubtargetInfo &STI)
Definition: AMDGPUTargetStreamer.h:110

llvm::AMDGPUTargetStreamer::EmitCodeEnd
virtual bool EmitCodeEnd(const MCSubtargetInfo &STI)
Definition: AMDGPUTargetStreamer.h:86

llvm::AMDGPUTargetStreamer::EmitAMDGPUSymbolType
virtual void EmitAMDGPUSymbolType(StringRef SymbolName, unsigned Type)
Definition: AMDGPUTargetStreamer.h:58

llvm::AMDGPUTargetStreamer::EmitDirectiveAMDGCNTarget
virtual void EmitDirectiveAMDGCNTarget()
Definition: AMDGPUTargetStreamer.h:50

llvm::AMDGPUTargetStreamer::EmitAMDKernelCodeT
virtual void EmitAMDKernelCodeT(AMDGPU::AMDGPUMCKernelCodeT &Header)
Definition: AMDGPUTargetStreamer.h:56

llvm::AMDGPUTargetStreamer::getTargetID
const std::optional< AMDGPU::IsaInfo::AMDGPUTargetID > & getTargetID() const
Definition: AMDGPUTargetStreamer.h:104

llvm::AMDGPUTargetStreamer::EmitKernargPreloadHeader
virtual bool EmitKernargPreloadHeader(const MCSubtargetInfo &STI, bool TrapEnabled)
Definition: AMDGPUTargetStreamer.h:89

llvm::AMDGPU::IsaInfo::AMDGPUTargetID
Definition: AMDGPUBaseInfo.h:123

llvm::AMDGPU::IsaInfo::AMDGPUTargetID::setXnackSetting
void setXnackSetting(TargetIDSetting NewXnackSetting)
Sets xnack setting to NewXnackSetting.
Definition: AMDGPUBaseInfo.h:158

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75

llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98

llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31

llvm::AsmPrinter
This class is intended to be used as a driving class for all asm writers.
Definition: AsmPrinter.h:86

llvm::AsmPrinter::getObjFileLowering
const TargetLoweringObjectFile & getObjFileLowering() const
Return information about object file lowering.
Definition: AsmPrinter.cpp:383

llvm::AsmPrinter::getSymbol
MCSymbol * getSymbol(const GlobalValue *GV) const
Definition: AsmPrinter.cpp:676

llvm::AsmPrinter::emitGlobalVariable
virtual void emitGlobalVariable(const GlobalVariable *GV)
Emit the specified global variable to the .s file.
Definition: AsmPrinter.cpp:698

llvm::AsmPrinter::TM
TargetMachine & TM
Target machine description.
Definition: AsmPrinter.h:89

llvm::AsmPrinter::MAI
const MCAsmInfo * MAI
Target Asm Printer information.
Definition: AsmPrinter.h:92

llvm::AsmPrinter::MF
MachineFunction * MF
The current machine function.
Definition: AsmPrinter.h:104

llvm::AsmPrinter::SetupMachineFunction
virtual void SetupMachineFunction(MachineFunction &MF)
This should be called when a new MachineFunction is being processed from runOnMachineFunction.
Definition: AsmPrinter.cpp:2551

llvm::AsmPrinter::emitFunctionBody
void emitFunctionBody()
This method emits the body and trailer for a function.
Definition: AsmPrinter.cpp:1689

llvm::AsmPrinter::isBlockOnlyReachableByFallthrough
virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const
Return true if the basic block has exactly one predecessor and the control transfer mechanism between...
Definition: AsmPrinter.cpp:4089

llvm::AsmPrinter::doInitialization
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
Definition: AsmPrinter.cpp:434

llvm::AsmPrinter::emitLinkage
virtual void emitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const
This emits linkage information about GVSym based on GV, if this is supported by the target.
Definition: AsmPrinter.cpp:631

llvm::AsmPrinter::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
Record analysis usage.
Definition: AsmPrinter.cpp:425

llvm::AsmPrinter::getFunctionNumber
unsigned getFunctionNumber() const
Return a unique ID for the current function.
Definition: AsmPrinter.cpp:379

llvm::AsmPrinter::ORE
MachineOptimizationRemarkEmitter * ORE
Optimization remark emitter.
Definition: AsmPrinter.h:116

llvm::AsmPrinter::OutContext
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition: AsmPrinter.h:96

llvm::AsmPrinter::doFinalization
bool doFinalization(Module &M) override
Shut down the asmprinter.
Definition: AsmPrinter.cpp:2284

llvm::AsmPrinter::emitBasicBlockStart
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
Definition: AsmPrinter.cpp:3953

llvm::AsmPrinter::emitVisibility
void emitVisibility(MCSymbol *Sym, unsigned Visibility, bool IsDefinition=true) const
This emits visibility information about symbol, if this is supported by the target.
Definition: AsmPrinter.cpp:4047

llvm::AsmPrinter::OutStreamer
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition: AsmPrinter.h:101

llvm::AsmPrinter::isVerbose
bool isVerbose() const
Return true if assembly output should contain comments.
Definition: AsmPrinter.h:254

llvm::AsmPrinter::getNameWithPrefix
void getNameWithPrefix(SmallVectorImpl< char > &Name, const GlobalValue *GV) const
Definition: AsmPrinter.cpp:671

llvm::AsmPrinter::emitFunctionEntryLabel
virtual void emitFunctionEntryLabel()
EmitFunctionEntryLabel - Emit the label that is the entrypoint for the function.
Definition: AsmPrinter.cpp:1054

llvm::AsmPrinter::PrintAsmOperand
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
Definition: AsmPrinterInlineAsm.cpp:469

llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110

llvm::DiagnosticInfoOptimizationFailure
Diagnostic information for optimization failures.
Definition: DiagnosticInfo.h:976

llvm::DiagnosticInfoResourceLimit
Diagnostic information for stack size etc.
Definition: DiagnosticInfo.h:358

llvm::DiagnosticInfoStackSize
Definition: DiagnosticInfo.h:393

llvm::Function
Definition: Function.h:64

llvm::Function::getSubprogram
DISubprogram * getSubprogram() const
Get the attached subprogram.
Definition: Metadata.cpp:1830

llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:274

llvm::Function::getContext
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:358

llvm::GCNSubtarget
Definition: GCNSubtarget.h:35

llvm::GCNSubtarget::getMinNumSGPRs
unsigned getMinNumSGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1398

llvm::GCNSubtarget::hasGFX90AInsts
bool hasGFX90AInsts() const
Definition: GCNSubtarget.h:1225

llvm::GCNSubtarget::computeOccupancy
unsigned computeOccupancy(const Function &F, unsigned LDSSize=0, unsigned NumSGPRs=0, unsigned NumVGPRs=0) const
Return occupancy for the given function.
Definition: AMDGPUSubtarget.cpp:730

llvm::GCNSubtarget::hasMAIInsts
bool hasMAIInsts() const
Definition: GCNSubtarget.h:815

llvm::GCNSubtarget::getInstrInfo
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:266

llvm::GCNSubtarget::hasSGPRInitBug
bool hasSGPRInitBug() const
Definition: GCNSubtarget.h:1123

llvm::GCNSubtarget::isTgSplitEnabled
bool isTgSplitEnabled() const
Definition: GCNSubtarget.h:609

llvm::GCNSubtarget::getMinNumVGPRs
unsigned getMinNumVGPRs(unsigned WavesPerEU) const
Definition: GCNSubtarget.h:1474

llvm::GCNSubtarget::isCuModeEnabled
bool isCuModeEnabled() const
Definition: GCNSubtarget.h:613

llvm::GCNSubtarget::getTargetID
const AMDGPU::IsaInfo::AMDGPUTargetID & getTargetID() const
Definition: GCNSubtarget.h:302

llvm::GCNSubtarget::dumpCode
bool dumpCode() const
Definition: GCNSubtarget.h:513

llvm::GCNSubtarget::isTrapHandlerEnabled
bool isTrapHandlerEnabled() const
Definition: GCNSubtarget.h:601

llvm::GCNSubtarget::isWave32
bool isWave32() const
Definition: GCNSubtarget.h:1519

llvm::GCNSubtarget::getMaxNumUserSGPRs
unsigned getMaxNumUserSGPRs() const
Definition: GCNSubtarget.h:981

llvm::GCNSubtarget::getGeneration
Generation getGeneration() const
Definition: GCNSubtarget.h:317

llvm::GCNSubtarget::getAddressableNumSGPRs
unsigned getAddressableNumSGPRs() const
Definition: GCNSubtarget.h:1392

llvm::GCNSubtarget::getMaxWaveScratchSize
unsigned getMaxWaveScratchSize() const
Definition: GCNSubtarget.h:321

llvm::GCNUserSGPRUsageInfo
Definition: GCNSubtarget.h:1581

llvm::GCNUserSGPRUsageInfo::hasQueuePtr
bool hasQueuePtr() const
Definition: GCNSubtarget.h:1589

llvm::GCNUserSGPRUsageInfo::hasKernargSegmentPtr
bool hasKernargSegmentPtr() const
Definition: GCNSubtarget.h:1591

llvm::GCNUserSGPRUsageInfo::hasDispatchID
bool hasDispatchID() const
Definition: GCNSubtarget.h:1593

llvm::GCNUserSGPRUsageInfo::hasPrivateSegmentBuffer
bool hasPrivateSegmentBuffer() const
Definition: GCNSubtarget.h:1585

llvm::GCNUserSGPRUsageInfo::hasPrivateSegmentSize
bool hasPrivateSegmentSize() const
Definition: GCNSubtarget.h:1597

llvm::GCNUserSGPRUsageInfo::hasDispatchPtr
bool hasDispatchPtr() const
Definition: GCNSubtarget.h:1587

llvm::GCNUserSGPRUsageInfo::hasFlatScratchInit
bool hasFlatScratchInit() const
Definition: GCNSubtarget.h:1595

llvm::GlobalObject::getAlign
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
Definition: GlobalObject.h:80

llvm::GlobalValue::getVisibility
VisibilityTypes getVisibility() const
Definition: GlobalValue.h:248

llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:290

llvm::GlobalValue::getAddressSpace
unsigned getAddressSpace() const
Definition: GlobalValue.h:205

llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656

llvm::GlobalValue::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition: Globals.cpp:124

llvm::GlobalValue::getValueType
Type * getValueType() const
Definition: GlobalValue.h:296

llvm::GlobalVariable
Definition: GlobalVariable.h:39

llvm::GlobalVariable::getInitializer
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
Definition: GlobalVariable.h:141

llvm::GlobalVariable::hasInitializer
bool hasInitializer() const
Definitions have initializers, declarations don't.
Definition: GlobalVariable.h:97

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67

llvm::LLVMContext::diagnose
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Definition: LLVMContext.cpp:260

llvm::LLVMContext::getDiagHandlerPtr
const DiagnosticHandler * getDiagHandlerPtr() const
getDiagHandlerPtr - Returns const raw pointer of DiagnosticHandler set by setDiagnosticHandler.
Definition: LLVMContext.cpp:378

llvm::MCAssembler
Definition: MCAssembler.h:53

llvm::MCAssembler::getEmitterPtr
MCCodeEmitter * getEmitterPtr() const
Definition: MCAssembler.h:186

llvm::MCBinaryExpr::createAnd
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:537

llvm::MCBinaryExpr::createAdd
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:532

llvm::MCBinaryExpr::createOr
static const MCBinaryExpr * createOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:597

llvm::MCBinaryExpr::createLOr
static const MCBinaryExpr * createLOr(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:567

llvm::MCBinaryExpr::createMul
static const MCBinaryExpr * createMul(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:587

llvm::MCBinaryExpr::createGT
static const MCBinaryExpr * createGT(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:552

llvm::MCBinaryExpr::createDiv
static const MCBinaryExpr * createDiv(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:542

llvm::MCBinaryExpr::createShl
static const MCBinaryExpr * createShl(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:602

llvm::MCBinaryExpr::createSub
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:617

llvm::MCConstantExpr::create
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:193

llvm::MCContext
Context object for machine code objects.
Definition: MCContext.h:83

llvm::MCContext::getObjectFileInfo
const MCObjectFileInfo * getObjectFileInfo() const
Definition: MCContext.h:416

llvm::MCContext::reportError
void reportError(SMLoc L, const Twine &Msg)
Definition: MCContext.cpp:1067

llvm::MCExpr
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34

llvm::MCObjectFileInfo::getReadOnlySection
MCSection * getReadOnlySection() const
Definition: MCObjectFileInfo.h:270

llvm::MCObjectFileInfo::getContext
MCContext & getContext() const
Definition: MCObjectFileInfo.h:248

llvm::MCSectionELF
This represents a section on linux, lots of unix variants and some bare metal systems.
Definition: MCSectionELF.h:27

llvm::MCSection::ensureMinAlignment
void ensureMinAlignment(Align MinAlignment)
Makes sure that Alignment is at least MinAlignment.
Definition: MCSection.h:148

llvm::MCStreamer::getContext
MCContext & getContext() const
Definition: MCStreamer.h:300

llvm::MCSubtargetInfo
Generic base class for all target subtargets.
Definition: MCSubtargetInfo.h:76

llvm::MCSubtargetInfo::getTargetTriple
const Triple & getTargetTriple() const
Definition: MCSubtargetInfo.h:108

llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41

llvm::MCSymbol::isDefined
bool isDefined() const
isDefined - Check if this symbol is defined (i.e., it has an address).
Definition: MCSymbol.h:250

llvm::MCSymbol::getName
StringRef getName() const
getName - Get the symbol name.
Definition: MCSymbol.h:205

llvm::MCSymbol::isVariable
bool isVariable() const
isVariable - Check if this is a variable symbol.
Definition: MCSymbol.h:300

llvm::MCSymbol::redefineIfPossible
void redefineIfPossible()
Prepare this symbol to be redefined.
Definition: MCSymbol.h:232

llvm::MCTargetStreamer::getStreamer
MCStreamer & getStreamer()
Definition: MCStreamer.h:102

llvm::MCUnaryExpr::createNot
static const MCUnaryExpr * createNot(const MCExpr *Expr, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition: MCExpr.h:462

llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:124

llvm::MachineBasicBlock::getNumber
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
Definition: MachineBasicBlock.h:1207

llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:106

llvm::MachineFrameInfo::getStackSize
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Definition: MachineFrameInfo.h:587

llvm::MachineFunction
Definition: MachineFunction.h:258

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:717

llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition: MachineFunction.cpp:610

llvm::MachineFunction::setAlignment
void setAlignment(Align A)
setAlignment - Set the alignment of the function.
Definition: MachineFunction.h:768

llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:733

llvm::MachineFunction::getContext
MCContext & getContext() const
Definition: MachineFunction.h:669

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:683

llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:815

llvm::MachineFunction::front
const MachineBasicBlock & front() const
Definition: MachineFunction.h:933

llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:69

llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48

llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:556

llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:329

llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition: MachineOperand.h:331

llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:369

llvm::MachineOptimizationRemarkAnalysis
Diagnostic information for optimization analysis remarks.
Definition: MachineOptimizationRemarkEmitter.h:111

llvm::MachineOptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Emit an optimization remark.
Definition: MachineOptimizationRemarkEmitter.cpp:57

llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65

llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19

llvm::SIInstrInfo
Definition: SIInstrInfo.h:83

llvm::SIMachineFunctionInfo
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Definition: SIMachineFunctionInfo.h:376

llvm::SIMachineFunctionInfo::getNumSpilledVGPRs
unsigned getNumSpilledVGPRs() const
Definition: SIMachineFunctionInfo.h:982

llvm::SIMachineFunctionInfo::getNumSpilledSGPRs
unsigned getNumSpilledSGPRs() const
Definition: SIMachineFunctionInfo.h:978

llvm::SIMachineFunctionInfo::getUserSGPRInfo
GCNUserSGPRUsageInfo & getUserSGPRInfo()
Definition: SIMachineFunctionInfo.h:604

llvm::SIMachineFunctionInfo::getMaxWavesPerEU
unsigned getMaxWavesPerEU() const
Definition: SIMachineFunctionInfo.h:1050

llvm::SIMachineFunctionInfo::hasWorkGroupIDZ
bool hasWorkGroupIDZ() const
Definition: SIMachineFunctionInfo.h:829

llvm::SIMachineFunctionInfo::hasWorkGroupIDY
bool hasWorkGroupIDY() const
Definition: SIMachineFunctionInfo.h:825

llvm::SIMachineFunctionInfo::getMode
SIModeRegisterDefaults getMode() const
Definition: SIMachineFunctionInfo.h:584

llvm::SIMachineFunctionInfo::hasWorkGroupInfo
bool hasWorkGroupInfo() const
Definition: SIMachineFunctionInfo.h:833

llvm::SIMachineFunctionInfo::hasWorkItemIDY
bool hasWorkItemIDY() const
Definition: SIMachineFunctionInfo.h:845

llvm::SIMachineFunctionInfo::hasWorkGroupIDX
bool hasWorkGroupIDX() const
Definition: SIMachineFunctionInfo.h:821

llvm::SIMachineFunctionInfo::getNumKernargPreloadedSGPRs
unsigned getNumKernargPreloadedSGPRs() const
Definition: SIMachineFunctionInfo.h:893

llvm::SIMachineFunctionInfo::getNumUserSGPRs
unsigned getNumUserSGPRs() const
Definition: SIMachineFunctionInfo.h:885

llvm::SIMachineFunctionInfo::getPSInputAddr
unsigned getPSInputAddr() const
Definition: SIMachineFunctionInfo.h:994

llvm::SIMachineFunctionInfo::hasWorkItemIDZ
bool hasWorkItemIDZ() const
Definition: SIMachineFunctionInfo.h:849

llvm::SIMachineFunctionInfo::getPSInputEnable
unsigned getPSInputEnable() const
Definition: SIMachineFunctionInfo.h:998

llvm::SmallString
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26

llvm::StringLiteral
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:838

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50

llvm::StringRef::str
std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:215

llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77

llvm::TargetMachine::getTargetTriple
const Triple & getTargetTriple() const
Definition: TargetMachine.h:126

llvm::TargetMachine::getMCSubtargetInfo
const MCSubtargetInfo * getMCSubtargetInfo() const
Definition: TargetMachine.h:216

llvm::TargetMachine::getSubtarget
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Definition: TargetMachine.h:171

llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:128

llvm::Triple::OSType
OSType
Definition: Triple.h:199

llvm::Triple::AMDHSA
@ AMDHSA
Definition: Triple.h:223

llvm::Triple::AMDPAL
@ AMDPAL
Definition: Triple.h:233

llvm::Triple::getOS
OSType getOS() const
Get the parsed operating system type of this triple.
Definition: Triple.h:384

llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81

llvm::Value
LLVM Value Representation.
Definition: Value.h:74

llvm::Value::print
void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
Definition: AsmWriter.cpp:5022

llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309

llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52

llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:691

uint16_t

uint32_t

uint64_t

unsigned

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143

TargetMachine.h

llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPUAddrSpace.h:35

llvm::AMDGPU::IsaInfo::getVGPREncodingGranule
unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, std::optional< bool > EnableWavefrontSize32)
Definition: AMDGPUBaseInfo.cpp:1121

llvm::AMDGPU::IsaInfo::getSGPREncodingGranule
unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:1004

llvm::AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG
@ FIXED_NUM_SGPRS_FOR_INIT_BUG
Definition: AMDGPUBaseInfo.h:112

llvm::AMDGPU
Definition: AMDGPUMetadataVerifier.h:33

llvm::AMDGPU::getLdsDwGranularity
unsigned getLdsDwGranularity(const MCSubtargetInfo &ST)
Definition: AMDGPUBaseInfo.cpp:2975

llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:2018

llvm::AMDGPU::getIsaVersion
IsaVersion getIsaVersion(StringRef GPU)
Definition: TargetParser.cpp:225

llvm::AMDGPU::isCompute
bool isCompute(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:2014

llvm::AMDGPU::maskShiftSet
const MCExpr * maskShiftSet(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
Provided with the MCExpr * Val, uint32 Mask and Shift, will return the masked and left shifted,...
Definition: SIDefinesUtils.h:44

llvm::AMDGPU::getAMDHSACodeObjectVersion
unsigned getAMDHSACodeObjectVersion(const Module &M)
Definition: AMDGPUBaseInfo.cpp:173

llvm::AMDGPU::isGFX90A
bool isGFX90A(const MCSubtargetInfo &STI)
Definition: AMDGPUBaseInfo.cpp:2198

llvm::AMDGPU::hasMAIInsts
bool hasMAIInsts(const MCSubtargetInfo &STI)
Definition: AMDGPUBaseInfo.cpp:2210

llvm::AMDGPU::isInlinableIntLiteral
LLVM_READNONE bool isInlinableIntLiteral(int64_t Literal)
Is this literal inlinable, and not one of the values intended for floating point values.
Definition: AMDGPUBaseInfo.h:1415

llvm::AMDGPU::AMDHSA_COV4
@ AMDHSA_COV4
Definition: AMDGPUBaseInfo.h:55

llvm::AMDGPU::AMDHSA_COV5
@ AMDHSA_COV5
Definition: AMDGPUBaseInfo.h:55

llvm::AMDGPU::AMDHSA_COV6
@ AMDHSA_COV6
Definition: AMDGPUBaseInfo.h:55

llvm::AMDGPU::isShader
bool isShader(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1993

llvm::AMDGPU::isGFX10Plus
bool isGFX10Plus(const MCSubtargetInfo &STI)
Definition: AMDGPUBaseInfo.cpp:2146

llvm::AMDGPU::getShiftMask
constexpr std::pair< unsigned, unsigned > getShiftMask(unsigned Value)
Deduce the least significant bit aligned shift and mask values for a binary Complement Value (as they...
Definition: SIDefinesUtils.h:27

llvm::AMDGPU::SGPR_SPILL
@ SGPR_SPILL
Definition: SIInstrInfo.h:1573

llvm::AMDGPU::hasKernargPreload
unsigned hasKernargPreload(const MCSubtargetInfo &STI)
Definition: AMDGPUBaseInfo.cpp:2222

llvm::AMDGPU::getIntegerPairAttribute
std::pair< unsigned, unsigned > getIntegerPairAttribute(const Function &F, StringRef Name, std::pair< unsigned, unsigned > Default, bool OnlyFirstRequired)
Definition: AMDGPUBaseInfo.cpp:1296

llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73

llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121

llvm::CallingConv::AMDGPU_CS
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:197

llvm::CallingConv::AMDGPU_VS
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
Definition: CallingConv.h:188

llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:200

llvm::CallingConv::AMDGPU_HS
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:206

llvm::CallingConv::AMDGPU_GS
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:191

llvm::CallingConv::AMDGPU_PS
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:194

llvm::CallingConv::SPIR_KERNEL
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144

llvm::CallingConv::AMDGPU_ES
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition: CallingConv.h:218

llvm::CallingConv::AMDGPU_LS
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:213

llvm::ELF::STT_AMDGPU_HSA_KERNEL
@ STT_AMDGPU_HSA_KERNEL
Definition: ELF.h:1344

llvm::ELF::SHT_PROGBITS
@ SHT_PROGBITS
Definition: ELF.h:1068

llvm::TailPredication::Mode
Mode
Definition: ARMTargetTransformInfo.h:43

llvm::msgpack::Type::Boolean
@ Boolean

llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:137

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680

llvm::enumerate
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2400

llvm::getCPU
static StringRef getCPU(StringRef CPU)
Processes a CPU name.
Definition: AVRTargetMachine.cpp:34

llvm::getTheR600Target
Target & getTheR600Target()
The target for R600 GPUs.
Definition: AMDGPUTargetInfo.cpp:19

llvm::DK_ResourceLimit
@ DK_ResourceLimit
Definition: DiagnosticInfo.h:62

llvm::createR600AsmPrinterPass
AsmPrinter * createR600AsmPrinterPass(TargetMachine &TM, std::unique_ptr< MCStreamer > &&Streamer)
Definition: R600AsmPrinter.cpp:31

llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340

llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167

llvm::format
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:125

llvm::divideCeil
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:403

llvm::getTheGCNTarget
Target & getTheGCNTarget()
The target for GCN GPUs.
Definition: AMDGPUTargetInfo.cpp:25

llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155

llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1849

llvm::DS_Error
@ DS_Error
Definition: DiagnosticInfo.h:50

llvm::Log2
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208

llvm::printReg
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Definition: TargetRegisterInfo.cpp:108

std
Implement std::hash so that hash_code can be used in STL containers.
Definition: BitVector.h:858

llvm::AMDGPUResourceUsageAnalysis::SIFunctionResourceInfo
Definition: AMDGPUResourceUsageAnalysis.h:32

llvm::AMDGPUResourceUsageAnalysis
Definition: AMDGPUResourceUsageAnalysis.h:27

llvm::AMDGPUResourceUsageAnalysis::getResourceInfo
const SIFunctionResourceInfo & getResourceInfo(const Function *F) const
Definition: AMDGPUResourceUsageAnalysis.h:67

llvm::AMDGPU::AMDGPUMCKernelCodeT
Definition: AMDKernelCodeTUtils.h:32

llvm::AMDGPU::AMDGPUMCKernelCodeT::kernarg_segment_byte_size
uint64_t kernarg_segment_byte_size
Definition: AMDKernelCodeTUtils.h:51

llvm::AMDGPU::AMDGPUMCKernelCodeT::workitem_private_segment_byte_size
const MCExpr * workitem_private_segment_byte_size
Definition: AMDKernelCodeTUtils.h:74

llvm::AMDGPU::AMDGPUMCKernelCodeT::code_properties
uint32_t code_properties
Definition: AMDKernelCodeTUtils.h:48

llvm::AMDGPU::AMDGPUMCKernelCodeT::compute_pgm_resource2_registers
const MCExpr * compute_pgm_resource2_registers
Definition: AMDKernelCodeTUtils.h:69

llvm::AMDGPU::AMDGPUMCKernelCodeT::kernarg_segment_alignment
uint8_t kernarg_segment_alignment
Definition: AMDKernelCodeTUtils.h:59

llvm::AMDGPU::AMDGPUMCKernelCodeT::validate
void validate(const MCSubtargetInfo *STI, MCContext &Ctx)
Definition: AMDKernelCodeTUtils.cpp:407

llvm::AMDGPU::AMDGPUMCKernelCodeT::wavefront_sgpr_count
const MCExpr * wavefront_sgpr_count
Definition: AMDKernelCodeTUtils.h:72

llvm::AMDGPU::AMDGPUMCKernelCodeT::initDefault
void initDefault(const MCSubtargetInfo *STI, MCContext &Ctx, bool InitMCExpr=true)
Definition: AMDKernelCodeTUtils.cpp:388

llvm::AMDGPU::AMDGPUMCKernelCodeT::workitem_vgpr_count
const MCExpr * workitem_vgpr_count
Definition: AMDKernelCodeTUtils.h:73

llvm::AMDGPU::AMDGPUMCKernelCodeT::is_dynamic_callstack
const MCExpr * is_dynamic_callstack
Definition: AMDKernelCodeTUtils.h:71

llvm::AMDGPU::AMDGPUMCKernelCodeT::workgroup_group_segment_byte_size
uint32_t workgroup_group_segment_byte_size
Definition: AMDKernelCodeTUtils.h:49

llvm::AMDGPU::AMDGPUMCKernelCodeT::compute_pgm_resource1_registers
const MCExpr * compute_pgm_resource1_registers
Definition: AMDKernelCodeTUtils.h:68

llvm::AMDGPU::MCKernelDescriptor
Definition: AMDGPUMCKernelDescriptor.h:27

llvm::AMDGPU::MCKernelDescriptor::compute_pgm_rsrc2
const MCExpr * compute_pgm_rsrc2
Definition: AMDGPUMCKernelDescriptor.h:33

llvm::AMDGPU::MCKernelDescriptor::kernarg_size
const MCExpr * kernarg_size
Definition: AMDGPUMCKernelDescriptor.h:30

llvm::AMDGPU::MCKernelDescriptor::kernarg_preload
const MCExpr * kernarg_preload
Definition: AMDGPUMCKernelDescriptor.h:35

llvm::AMDGPU::MCKernelDescriptor::compute_pgm_rsrc3
const MCExpr * compute_pgm_rsrc3
Definition: AMDGPUMCKernelDescriptor.h:31

llvm::AMDGPU::MCKernelDescriptor::private_segment_fixed_size
const MCExpr * private_segment_fixed_size
Definition: AMDGPUMCKernelDescriptor.h:29

llvm::AMDGPU::MCKernelDescriptor::bits_get
static const MCExpr * bits_get(const MCExpr *Src, uint32_t Shift, uint32_t Mask, MCContext &Ctx)
Definition: AMDGPUMCKernelDescriptor.cpp:92

llvm::AMDGPU::MCKernelDescriptor::compute_pgm_rsrc1
const MCExpr * compute_pgm_rsrc1
Definition: AMDGPUMCKernelDescriptor.h:32

llvm::AMDGPU::MCKernelDescriptor::group_segment_fixed_size
const MCExpr * group_segment_fixed_size
Definition: AMDGPUMCKernelDescriptor.h:28

llvm::AMDGPU::MCKernelDescriptor::kernel_code_properties
const MCExpr * kernel_code_properties
Definition: AMDGPUMCKernelDescriptor.h:34

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39

llvm::DiagnosticHandler::isAnalysisRemarkEnabled
virtual bool isAnalysisRemarkEnabled(StringRef PassName) const
Return true if analysis remarks are enabled, override to provide different implementation.
Definition: DiagnosticHandler.cpp:71

llvm::OptimizedStructLayoutField
A field in a structure.
Definition: OptimizedStructLayout.h:45

llvm::SIModeRegisterDefaults
Definition: SIModeRegisterDefaults.h:20

llvm::SIProgramInfo
Track resource usage for kernels / entry functions.
Definition: SIProgramInfo.h:31

llvm::SIProgramInfo::SGPRSpill
unsigned SGPRSpill
Definition: SIProgramInfo.h:71

llvm::SIProgramInfo::NumSGPR
const MCExpr * NumSGPR
Definition: SIProgramInfo.h:70

llvm::SIProgramInfo::TIdIGCompCount
uint32_t TIdIGCompCount
Definition: SIProgramInfo.h:58

llvm::SIProgramInfo::ComputePGMRSrc3GFX90A
const MCExpr * ComputePGMRSrc3GFX90A
Definition: SIProgramInfo.h:63

llvm::SIProgramInfo::NumArchVGPR
const MCExpr * NumArchVGPR
Definition: SIProgramInfo.h:66

llvm::SIProgramInfo::getComputePGMRSrc2
const MCExpr * getComputePGMRSrc2(MCContext &Ctx) const
Compute the value of the ComputePGMRsrc2 register.
Definition: SIProgramInfo.cpp:189

llvm::SIProgramInfo::DebugMode
uint32_t DebugMode
Definition: SIProgramInfo.h:39

llvm::SIProgramInfo::TGIdZEnable
uint32_t TGIdZEnable
Definition: SIProgramInfo.h:56

llvm::SIProgramInfo::LDSBlocks
uint32_t LDSBlocks
Definition: SIProgramInfo.h:47

llvm::SIProgramInfo::VGPRBlocks
const MCExpr * VGPRBlocks
Definition: SIProgramInfo.h:33

llvm::SIProgramInfo::ScratchBlocks
const MCExpr * ScratchBlocks
Definition: SIProgramInfo.h:48

llvm::SIProgramInfo::EXCPEnable
uint32_t EXCPEnable
Definition: SIProgramInfo.h:61

llvm::SIProgramInfo::getComputePGMRSrc1
const MCExpr * getComputePGMRSrc1(const GCNSubtarget &ST, MCContext &Ctx) const
Compute the value of the ComputePGMRsrc1 register.
Definition: SIProgramInfo.cpp:164

llvm::SIProgramInfo::VCCUsed
const MCExpr * VCCUsed
Definition: SIProgramInfo.h:90

llvm::SIProgramInfo::UserSGPR
uint32_t UserSGPR
Definition: SIProgramInfo.h:52

llvm::SIProgramInfo::FloatMode
uint32_t FloatMode
Definition: SIProgramInfo.h:36

llvm::SIProgramInfo::FlatUsed
const MCExpr * FlatUsed
Definition: SIProgramInfo.h:74

llvm::SIProgramInfo::TrapHandlerEnable
uint32_t TrapHandlerEnable
Definition: SIProgramInfo.h:53

llvm::SIProgramInfo::ScratchEnable
const MCExpr * ScratchEnable
Definition: SIProgramInfo.h:51

llvm::SIProgramInfo::AccumOffset
const MCExpr * AccumOffset
Definition: SIProgramInfo.h:68

llvm::SIProgramInfo::NumAccVGPR
const MCExpr * NumAccVGPR
Definition: SIProgramInfo.h:67

llvm::SIProgramInfo::DynamicCallStack
const MCExpr * DynamicCallStack
Definition: SIProgramInfo.h:87

llvm::SIProgramInfo::LdsSize
uint32_t LdsSize
Definition: SIProgramInfo.h:60

llvm::SIProgramInfo::SGPRBlocks
const MCExpr * SGPRBlocks
Definition: SIProgramInfo.h:34

llvm::SIProgramInfo::NumVGPRsForWavesPerEU
const MCExpr * NumVGPRsForWavesPerEU
Definition: SIProgramInfo.h:80

llvm::SIProgramInfo::NumVGPR
const MCExpr * NumVGPR
Definition: SIProgramInfo.h:65

llvm::SIProgramInfo::DX10Clamp
uint32_t DX10Clamp
Definition: SIProgramInfo.h:38

llvm::SIProgramInfo::TGIdXEnable
uint32_t TGIdXEnable
Definition: SIProgramInfo.h:54

llvm::SIProgramInfo::IEEEMode
uint32_t IEEEMode
Definition: SIProgramInfo.h:40

llvm::SIProgramInfo::getPGMRSrc1
const MCExpr * getPGMRSrc1(CallingConv::ID CC, const GCNSubtarget &ST, MCContext &Ctx) const
Definition: SIProgramInfo.cpp:174

llvm::SIProgramInfo::LDSSize
uint32_t LDSSize
Definition: SIProgramInfo.h:73

llvm::SIProgramInfo::Occupancy
const MCExpr * Occupancy
Definition: SIProgramInfo.h:83

llvm::SIProgramInfo::ScratchSize
const MCExpr * ScratchSize
Definition: SIProgramInfo.h:44

llvm::SIProgramInfo::WgpMode
uint32_t WgpMode
Definition: SIProgramInfo.h:41

llvm::SIProgramInfo::NumSGPRsForWavesPerEU
const MCExpr * NumSGPRsForWavesPerEU
Definition: SIProgramInfo.h:77

llvm::SIProgramInfo::TGSizeEnable
uint32_t TGSizeEnable
Definition: SIProgramInfo.h:57

llvm::SIProgramInfo::TgSplit
uint32_t TgSplit
Definition: SIProgramInfo.h:69

llvm::SIProgramInfo::EXCPEnMSB
uint32_t EXCPEnMSB
Definition: SIProgramInfo.h:59

llvm::SIProgramInfo::reset
void reset(const MachineFunction &MF)
Definition: SIProgramInfo.cpp:25

llvm::SIProgramInfo::TGIdYEnable
uint32_t TGIdYEnable
Definition: SIProgramInfo.h:55

llvm::SIProgramInfo::VGPRSpill
unsigned VGPRSpill
Definition: SIProgramInfo.h:72

llvm::SIProgramInfo::MemOrdered
uint32_t MemOrdered
Definition: SIProgramInfo.h:42

llvm::TargetRegistry::RegisterAsmPrinter
static void RegisterAsmPrinter(Target &T, Target::AsmPrinterCtorTy Fn)
RegisterAsmPrinter - Register an AsmPrinter implementation for the given target.
Definition: TargetRegistry.h:865