doxygen/AMDGPUAttributor_8cpp_source.html

//===- AMDGPUAttributor.cpp -----------------------------------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file This pass uses Attributor framework to deduce AMDGPU attributes.

//

//===----------------------------------------------------------------------===//


#include "AMDGPU.h"

#include "GCNSubtarget.h"

#include "Utils/AMDGPUBaseInfo.h"

#include "llvm/Analysis/CycleAnalysis.h"

#include "llvm/CodeGen/TargetPassConfig.h"

#include "llvm/IR/IntrinsicsAMDGPU.h"

#include "llvm/IR/IntrinsicsR600.h"

#include "llvm/Target/TargetMachine.h"

#include "llvm/Transforms/IPO/Attributor.h"


#define DEBUG_TYPE "amdgpu-attributor"


namespace llvm {

void initializeCycleInfoWrapperPassPass(PassRegistry &);

} // namespace llvm


using namespace llvm;


static cl::opt<unsigned> KernargPreloadCount(

    "amdgpu-kernarg-preload-count",

    cl::desc("How many kernel arguments to preload onto SGPRs"), cl::init(0));


#define AMDGPU_ATTRIBUTE(Name, Str) Name##_POS,


enum ImplicitArgumentPositions {

  #include "AMDGPUAttributes.def"

  LAST_ARG_POS

};


#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,


enum ImplicitArgumentMask {

  NOT_IMPLICIT_INPUT = 0,

  #include "AMDGPUAttributes.def"

  ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1

};


#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},

static constexpr std::pair<ImplicitArgumentMask,

                           StringLiteral> ImplicitAttrs[] = {

 #include "AMDGPUAttributes.def"

};


// We do not need to note the x workitem or workgroup id because they are always

// initialized.

//

// TODO: We should not add the attributes if the known compile time workgroup

// size is 1 for y/z.

static ImplicitArgumentMask

intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,

                    bool HasApertureRegs, bool SupportsGetDoorBellID,

                    unsigned CodeObjectVersion) {

  switch (ID) {

  case Intrinsic::amdgcn_workitem_id_x:

    NonKernelOnly = true;

    return WORKITEM_ID_X;

  case Intrinsic::amdgcn_workgroup_id_x:

    NonKernelOnly = true;

    return WORKGROUP_ID_X;

  case Intrinsic::amdgcn_workitem_id_y:

  case Intrinsic::r600_read_tidig_y:

    return WORKITEM_ID_Y;

  case Intrinsic::amdgcn_workitem_id_z:

  case Intrinsic::r600_read_tidig_z:

    return WORKITEM_ID_Z;

  case Intrinsic::amdgcn_workgroup_id_y:

  case Intrinsic::r600_read_tgid_y:

    return WORKGROUP_ID_Y;

  case Intrinsic::amdgcn_workgroup_id_z:

  case Intrinsic::r600_read_tgid_z:

    return WORKGROUP_ID_Z;

  case Intrinsic::amdgcn_lds_kernel_id:

    return LDS_KERNEL_ID;

  case Intrinsic::amdgcn_dispatch_ptr:

    return DISPATCH_PTR;

  case Intrinsic::amdgcn_dispatch_id:

    return DISPATCH_ID;

  case Intrinsic::amdgcn_implicitarg_ptr:

    return IMPLICIT_ARG_PTR;

  // Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access

  // queue_ptr.

  case Intrinsic::amdgcn_queue_ptr:

    NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);

    return QUEUE_PTR;

  case Intrinsic::amdgcn_is_shared:

  case Intrinsic::amdgcn_is_private:

    if (HasApertureRegs)

      return NOT_IMPLICIT_INPUT;

    // Under V5, we need implicitarg_ptr + offsets to access private_base or

    // shared_base. For pre-V5, however, need to access them through queue_ptr +

    // offsets.

    return CodeObjectVersion >= AMDGPU::AMDHSA_COV5 ? IMPLICIT_ARG_PTR :

                                                      QUEUE_PTR;

  case Intrinsic::trap:

    if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4.

      return CodeObjectVersion >= AMDGPU::AMDHSA_COV4 ? NOT_IMPLICIT_INPUT :

                                                        QUEUE_PTR;

    NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);

    return QUEUE_PTR;

  default:

    return NOT_IMPLICIT_INPUT;

  }

}


static bool castRequiresQueuePtr(unsigned SrcAS) {

  return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;

}


static bool isDSAddress(const Constant *C) {

  const GlobalValue *GV = dyn_cast<GlobalValue>(C);

  if (!GV)

    return false;

  unsigned AS = GV->getAddressSpace();

  return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;

}


/// Returns true if the function requires the implicit argument be passed

/// regardless of the function contents.

static bool funcRequiresHostcallPtr(const Function &F) {

  // Sanitizers require the hostcall buffer passed in the implicit arguments.

  return F.hasFnAttribute(Attribute::SanitizeAddress) ||

         F.hasFnAttribute(Attribute::SanitizeThread) ||

         F.hasFnAttribute(Attribute::SanitizeMemory) ||

         F.hasFnAttribute(Attribute::SanitizeHWAddress) ||

         F.hasFnAttribute(Attribute::SanitizeMemTag);

}


namespace {

class AMDGPUInformationCache : public InformationCache {

public:

  AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,

                         BumpPtrAllocator &Allocator,

                         SetVector<Function *> *CGSCC, TargetMachine &TM)

      : InformationCache(M, AG, Allocator, CGSCC), TM(TM),

        CodeObjectVersion(AMDGPU::getAMDHSACodeObjectVersion(M)) {}


  TargetMachine &TM;


  enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };


  /// Check if the subtarget has aperture regs.

  bool hasApertureRegs(Function &F) {

    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

    return ST.hasApertureRegs();

  }


  /// Check if the subtarget supports GetDoorbellID.

  bool supportsGetDoorbellID(Function &F) {

    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

    return ST.supportsGetDoorbellID();

  }


  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) {

    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

    return ST.getFlatWorkGroupSizes(F);

  }


  std::pair<unsigned, unsigned>

  getMaximumFlatWorkGroupRange(const Function &F) {

    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

    return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};

  }


  /// Get code object version.

  unsigned getCodeObjectVersion() const {

    return CodeObjectVersion;

  }


  /// Get the effective value of "amdgpu-waves-per-eu" for the function,

  /// accounting for the interaction with the passed value to use for

  /// "amdgpu-flat-work-group-size".

  std::pair<unsigned, unsigned>

  getWavesPerEU(const Function &F,

                std::pair<unsigned, unsigned> FlatWorkGroupSize) {

    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

    return ST.getWavesPerEU(F, FlatWorkGroupSize);

  }


  std::pair<unsigned, unsigned>

  getEffectiveWavesPerEU(const Function &F,

                         std::pair<unsigned, unsigned> WavesPerEU,

                         std::pair<unsigned, unsigned> FlatWorkGroupSize) {

    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

    return ST.getEffectiveWavesPerEU(WavesPerEU, FlatWorkGroupSize);

  }


  unsigned getMaxWavesPerEU(const Function &F) {

    const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

    return ST.getMaxWavesPerEU();

  }


private:

  /// Check if the ConstantExpr \p CE requires the queue pointer.

  static bool visitConstExpr(const ConstantExpr *CE) {

    if (CE->getOpcode() == Instruction::AddrSpaceCast) {

      unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();

      return castRequiresQueuePtr(SrcAS);

    }

    return false;

  }


  /// Get the constant access bitmap for \p C.

  uint8_t getConstantAccess(const Constant *C,

                            SmallPtrSetImpl<const Constant *> &Visited) {

    auto It = ConstantStatus.find(C);

    if (It != ConstantStatus.end())

      return It->second;


    uint8_t Result = 0;

    if (isDSAddress(C))

      Result = DS_GLOBAL;


    if (const auto *CE = dyn_cast<ConstantExpr>(C))

      if (visitConstExpr(CE))

        Result |= ADDR_SPACE_CAST;


    for (const Use &U : C->operands()) {

      const auto *OpC = dyn_cast<Constant>(U);

      if (!OpC || !Visited.insert(OpC).second)

        continue;


      Result |= getConstantAccess(OpC, Visited);

    }

    return Result;

  }


public:

  /// Returns true if \p Fn needs the queue pointer because of \p C.

  bool needsQueuePtr(const Constant *C, Function &Fn) {

    bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());

    bool HasAperture = hasApertureRegs(Fn);


    // No need to explore the constants.

    if (!IsNonEntryFunc && HasAperture)

      return false;


    SmallPtrSet<const Constant *, 8> Visited;

    uint8_t Access = getConstantAccess(C, Visited);


    // We need to trap on DS globals in non-entry functions.

    if (IsNonEntryFunc && (Access & DS_GLOBAL))

      return true;


    return !HasAperture && (Access & ADDR_SPACE_CAST);

  }


private:

  /// Used to determine if the Constant needs the queue pointer.

  DenseMap<const Constant *, uint8_t> ConstantStatus;

  const unsigned CodeObjectVersion;

};


struct AAAMDAttributes

    : public StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,

                          AbstractAttribute> {

  using Base = StateWrapper<BitIntegerState<uint32_t, ALL_ARGUMENT_MASK, 0>,

                            AbstractAttribute>;


  AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}


  /// Create an abstract attribute view for the position \p IRP.

  static AAAMDAttributes &createForPosition(const IRPosition &IRP,

                                            Attributor &A);


  /// See AbstractAttribute::getName().

  const std::string getName() const override { return "AAAMDAttributes"; }


  /// See AbstractAttribute::getIdAddr().

  const char *getIdAddr() const override { return &ID; }


  /// This function should return true if the type of the \p AA is

  /// AAAMDAttributes.

  static bool classof(const AbstractAttribute *AA) {

    return (AA->getIdAddr() == &ID);

  }


  /// Unique ID (due to the unique address)

  static const char ID;

};

const char AAAMDAttributes::ID = 0;


struct AAUniformWorkGroupSize

    : public StateWrapper<BooleanState, AbstractAttribute> {

  using Base = StateWrapper<BooleanState, AbstractAttribute>;

  AAUniformWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}


  /// Create an abstract attribute view for the position \p IRP.

  static AAUniformWorkGroupSize &createForPosition(const IRPosition &IRP,

                                                   Attributor &A);


  /// See AbstractAttribute::getName().

  const std::string getName() const override {

    return "AAUniformWorkGroupSize";

  }


  /// See AbstractAttribute::getIdAddr().

  const char *getIdAddr() const override { return &ID; }


  /// This function should return true if the type of the \p AA is

  /// AAAMDAttributes.

  static bool classof(const AbstractAttribute *AA) {

    return (AA->getIdAddr() == &ID);

  }


  /// Unique ID (due to the unique address)

  static const char ID;

};

const char AAUniformWorkGroupSize::ID = 0;


struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {

  AAUniformWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)

      : AAUniformWorkGroupSize(IRP, A) {}


  void initialize(Attributor &A) override {

    Function *F = getAssociatedFunction();

    CallingConv::ID CC = F->getCallingConv();


    if (CC != CallingConv::AMDGPU_KERNEL)

      return;


    bool InitialValue = false;

    if (F->hasFnAttribute("uniform-work-group-size"))

      InitialValue =

          F->getFnAttribute("uniform-work-group-size").getValueAsString() ==

          "true";


    if (InitialValue)

      indicateOptimisticFixpoint();

    else

      indicatePessimisticFixpoint();

  }


  ChangeStatus updateImpl(Attributor &A) override {

    ChangeStatus Change = ChangeStatus::UNCHANGED;


    auto CheckCallSite = [&](AbstractCallSite CS) {

      Function *Caller = CS.getInstruction()->getFunction();

      LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()

                        << "->" << getAssociatedFunction()->getName() << "\n");


      const auto *CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(

          *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);

      if (!CallerInfo)

        return false;


      Change = Change | clampStateAndIndicateChange(this->getState(),

                                                    CallerInfo->getState());


      return true;

    };


    bool AllCallSitesKnown = true;

    if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))

      return indicatePessimisticFixpoint();


    return Change;

  }


  ChangeStatus manifest(Attributor &A) override {

    SmallVector<Attribute, 8> AttrList;

    LLVMContext &Ctx = getAssociatedFunction()->getContext();


    AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",

                                      getAssumed() ? "true" : "false"));

    return A.manifestAttrs(getIRPosition(), AttrList,

                           /* ForceReplace */ true);

  }


  bool isValidState() const override {

    // This state is always valid, even when the state is false.

    return true;

  }


  const std::string getAsStr(Attributor *) const override {

    return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";

  }


  /// See AbstractAttribute::trackStatistics()

  void trackStatistics() const override {}

};


AAUniformWorkGroupSize &

AAUniformWorkGroupSize::createForPosition(const IRPosition &IRP,

                                          Attributor &A) {

  if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)

    return *new (A.Allocator) AAUniformWorkGroupSizeFunction(IRP, A);

  llvm_unreachable(

      "AAUniformWorkGroupSize is only valid for function position");

}


struct AAAMDAttributesFunction : public AAAMDAttributes {

  AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)

      : AAAMDAttributes(IRP, A) {}


  void initialize(Attributor &A) override {

    Function *F = getAssociatedFunction();


    // If the function requires the implicit arg pointer due to sanitizers,

    // assume it's needed even if explicitly marked as not requiring it.

    const bool NeedsHostcall = funcRequiresHostcallPtr(*F);

    if (NeedsHostcall) {

      removeAssumedBits(IMPLICIT_ARG_PTR);

      removeAssumedBits(HOSTCALL_PTR);

    }


    for (auto Attr : ImplicitAttrs) {

      if (NeedsHostcall &&

          (Attr.first == IMPLICIT_ARG_PTR || Attr.first == HOSTCALL_PTR))

        continue;


      if (F->hasFnAttribute(Attr.second))

        addKnownBits(Attr.first);

    }


    if (F->isDeclaration())

      return;


    // Ignore functions with graphics calling conventions, these are currently

    // not allowed to have kernel arguments.

    if (AMDGPU::isGraphics(F->getCallingConv())) {

      indicatePessimisticFixpoint();

      return;

    }

  }


  ChangeStatus updateImpl(Attributor &A) override {

    Function *F = getAssociatedFunction();

    // The current assumed state used to determine a change.

    auto OrigAssumed = getAssumed();


    // Check for Intrinsics and propagate attributes.

    const AACallEdges *AAEdges = A.getAAFor<AACallEdges>(

        *this, this->getIRPosition(), DepClassTy::REQUIRED);

    if (!AAEdges || AAEdges->hasNonAsmUnknownCallee())

      return indicatePessimisticFixpoint();


    bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());


    bool NeedsImplicit = false;

    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());

    bool HasApertureRegs = InfoCache.hasApertureRegs(*F);

    bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F);

    unsigned COV = InfoCache.getCodeObjectVersion();


    for (Function *Callee : AAEdges->getOptimisticEdges()) {

      Intrinsic::ID IID = Callee->getIntrinsicID();

      if (IID == Intrinsic::not_intrinsic) {

        const AAAMDAttributes *AAAMD = A.getAAFor<AAAMDAttributes>(

            *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);

        if (!AAAMD)

          return indicatePessimisticFixpoint();

        *this &= *AAAMD;

        continue;

      }


      bool NonKernelOnly = false;

      ImplicitArgumentMask AttrMask =

          intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,

                              HasApertureRegs, SupportsGetDoorbellID, COV);

      if (AttrMask != NOT_IMPLICIT_INPUT) {

        if ((IsNonEntryFunc || !NonKernelOnly))

          removeAssumedBits(AttrMask);

      }

    }


    // Need implicitarg_ptr to acess queue_ptr, private_base, and shared_base.

    if (NeedsImplicit)

      removeAssumedBits(IMPLICIT_ARG_PTR);


    if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {

      // Under V5, we need implicitarg_ptr + offsets to access private_base or

      // shared_base. We do not actually need queue_ptr.

      if (COV >= 5)

        removeAssumedBits(IMPLICIT_ARG_PTR);

      else

        removeAssumedBits(QUEUE_PTR);

    }


    if (funcRetrievesMultigridSyncArg(A, COV)) {

      assert(!isAssumed(IMPLICIT_ARG_PTR) &&

             "multigrid_sync_arg needs implicitarg_ptr");

      removeAssumedBits(MULTIGRID_SYNC_ARG);

    }


    if (funcRetrievesHostcallPtr(A, COV)) {

      assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");

      removeAssumedBits(HOSTCALL_PTR);

    }


    if (funcRetrievesHeapPtr(A, COV)) {

      assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");

      removeAssumedBits(HEAP_PTR);

    }


    if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A, COV)) {

      assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");

      removeAssumedBits(QUEUE_PTR);

    }


    if (isAssumed(LDS_KERNEL_ID) && funcRetrievesLDSKernelId(A)) {

      removeAssumedBits(LDS_KERNEL_ID);

    }


    if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(A, COV))

      removeAssumedBits(DEFAULT_QUEUE);


    if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A, COV))

      removeAssumedBits(COMPLETION_ACTION);


    return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED

                                       : ChangeStatus::UNCHANGED;

  }


  ChangeStatus manifest(Attributor &A) override {

    SmallVector<Attribute, 8> AttrList;

    LLVMContext &Ctx = getAssociatedFunction()->getContext();


    for (auto Attr : ImplicitAttrs) {

      if (isKnown(Attr.first))

        AttrList.push_back(Attribute::get(Ctx, Attr.second));

    }


    return A.manifestAttrs(getIRPosition(), AttrList,

                           /* ForceReplace */ true);

  }


  const std::string getAsStr(Attributor *) const override {

    std::string Str;

    raw_string_ostream OS(Str);

    OS << "AMDInfo[";

    for (auto Attr : ImplicitAttrs)

      if (isAssumed(Attr.first))

        OS << ' ' << Attr.second;

    OS << " ]";

    return OS.str();

  }


  /// See AbstractAttribute::trackStatistics()

  void trackStatistics() const override {}


private:

  bool checkForQueuePtr(Attributor &A) {

    Function *F = getAssociatedFunction();

    bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());


    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());


    bool NeedsQueuePtr = false;


    auto CheckAddrSpaceCasts = [&](Instruction &I) {

      unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();

      if (castRequiresQueuePtr(SrcAS)) {

        NeedsQueuePtr = true;

        return false;

      }

      return true;

    };


    bool HasApertureRegs = InfoCache.hasApertureRegs(*F);


    // `checkForAllInstructions` is much more cheaper than going through all

    // instructions, try it first.


    // The queue pointer is not needed if aperture regs is present.

    if (!HasApertureRegs) {

      bool UsedAssumedInformation = false;

      A.checkForAllInstructions(CheckAddrSpaceCasts, *this,

                                {Instruction::AddrSpaceCast},

                                UsedAssumedInformation);

    }


    // If we found  that we need the queue pointer, nothing else to do.

    if (NeedsQueuePtr)

      return true;


    if (!IsNonEntryFunc && HasApertureRegs)

      return false;


    for (BasicBlock &BB : *F) {

      for (Instruction &I : BB) {

        for (const Use &U : I.operands()) {

          if (const auto *C = dyn_cast<Constant>(U)) {

            if (InfoCache.needsQueuePtr(C, *F))

              return true;

          }

        }

      }

    }


    return false;

  }


  bool funcRetrievesMultigridSyncArg(Attributor &A, unsigned COV) {

    auto Pos = llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition(COV);

    AA::RangeTy Range(Pos, 8);

    return funcRetrievesImplicitKernelArg(A, Range);

  }


  bool funcRetrievesHostcallPtr(Attributor &A, unsigned COV) {

    auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition(COV);

    AA::RangeTy Range(Pos, 8);

    return funcRetrievesImplicitKernelArg(A, Range);

  }


  bool funcRetrievesDefaultQueue(Attributor &A, unsigned COV) {

    auto Pos = llvm::AMDGPU::getDefaultQueueImplicitArgPosition(COV);

    AA::RangeTy Range(Pos, 8);

    return funcRetrievesImplicitKernelArg(A, Range);

  }


  bool funcRetrievesCompletionAction(Attributor &A, unsigned COV) {

    auto Pos = llvm::AMDGPU::getCompletionActionImplicitArgPosition(COV);

    AA::RangeTy Range(Pos, 8);

    return funcRetrievesImplicitKernelArg(A, Range);

  }


  bool funcRetrievesHeapPtr(Attributor &A, unsigned COV) {

    if (COV < 5)

      return false;

    AA::RangeTy Range(AMDGPU::ImplicitArg::HEAP_PTR_OFFSET, 8);

    return funcRetrievesImplicitKernelArg(A, Range);

  }


  bool funcRetrievesQueuePtr(Attributor &A, unsigned COV) {

    if (COV < 5)

      return false;

    AA::RangeTy Range(AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET, 8);

    return funcRetrievesImplicitKernelArg(A, Range);

  }


  bool funcRetrievesImplicitKernelArg(Attributor &A, AA::RangeTy Range) {

    // Check if this is a call to the implicitarg_ptr builtin and it

    // is used to retrieve the hostcall pointer. The implicit arg for

    // hostcall is not used only if every use of the implicitarg_ptr

    // is a load that clearly does not retrieve any byte of the

    // hostcall pointer. We check this by tracing all the uses of the

    // initial call to the implicitarg_ptr intrinsic.

    auto DoesNotLeadToKernelArgLoc = [&](Instruction &I) {

      auto &Call = cast<CallBase>(I);

      if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)

        return true;


      const auto *PointerInfoAA = A.getAAFor<AAPointerInfo>(

          *this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED);

      if (!PointerInfoAA)

        return false;


      return PointerInfoAA->forallInterferingAccesses(

          Range, [](const AAPointerInfo::Access &Acc, bool IsExact) {

            return Acc.getRemoteInst()->isDroppable();

          });

    };


    bool UsedAssumedInformation = false;

    return !A.checkForAllCallLikeInstructions(DoesNotLeadToKernelArgLoc, *this,

                                              UsedAssumedInformation);

  }


  bool funcRetrievesLDSKernelId(Attributor &A) {

    auto DoesNotRetrieve = [&](Instruction &I) {

      auto &Call = cast<CallBase>(I);

      return Call.getIntrinsicID() != Intrinsic::amdgcn_lds_kernel_id;

    };

    bool UsedAssumedInformation = false;

    return !A.checkForAllCallLikeInstructions(DoesNotRetrieve, *this,

                                              UsedAssumedInformation);

  }

};


AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,

                                                    Attributor &A) {

  if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)

    return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);

  llvm_unreachable("AAAMDAttributes is only valid for function position");

}


/// Base class to derive different size ranges.

struct AAAMDSizeRangeAttribute

    : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {

  using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;


  StringRef AttrName;


  AAAMDSizeRangeAttribute(const IRPosition &IRP, Attributor &A,

                          StringRef AttrName)

      : Base(IRP, 32), AttrName(AttrName) {}


  /// See AbstractAttribute::trackStatistics()

  void trackStatistics() const override {}


  template <class AttributeImpl>

  ChangeStatus updateImplImpl(Attributor &A) {

    ChangeStatus Change = ChangeStatus::UNCHANGED;


    auto CheckCallSite = [&](AbstractCallSite CS) {

      Function *Caller = CS.getInstruction()->getFunction();

      LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()

                        << "->" << getAssociatedFunction()->getName() << '\n');


      const auto *CallerInfo = A.getAAFor<AttributeImpl>(

          *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);

      if (!CallerInfo)

        return false;


      Change |=

          clampStateAndIndicateChange(this->getState(), CallerInfo->getState());


      return true;

    };


    bool AllCallSitesKnown = true;

    if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))

      return indicatePessimisticFixpoint();


    return Change;

  }


  ChangeStatus emitAttributeIfNotDefault(Attributor &A, unsigned Min,

                                         unsigned Max) {

    // Don't add the attribute if it's the implied default.

    if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)

      return ChangeStatus::UNCHANGED;


    Function *F = getAssociatedFunction();

    LLVMContext &Ctx = F->getContext();

    SmallString<10> Buffer;

    raw_svector_ostream OS(Buffer);

    OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;

    return A.manifestAttrs(getIRPosition(),

                           {Attribute::get(Ctx, AttrName, OS.str())},

                           /* ForceReplace */ true);

  }


  const std::string getAsStr(Attributor *) const override {

    std::string Str;

    raw_string_ostream OS(Str);

    OS << getName() << '[';

    OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;

    OS << ']';

    return OS.str();

  }

};


/// Propagate amdgpu-flat-work-group-size attribute.

struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {

  AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)

      : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-flat-work-group-size") {}


  void initialize(Attributor &A) override {

    Function *F = getAssociatedFunction();

    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());

    unsigned MinGroupSize, MaxGroupSize;

    std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F);

    intersectKnown(

        ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1)));


    if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))

      indicatePessimisticFixpoint();

  }


  ChangeStatus updateImpl(Attributor &A) override {

    return updateImplImpl<AAAMDFlatWorkGroupSize>(A);

  }


  /// Create an abstract attribute view for the position \p IRP.

  static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,

                                                   Attributor &A);


  ChangeStatus manifest(Attributor &A) override {

    Function *F = getAssociatedFunction();

    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());

    unsigned Min, Max;

    std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F);

    return emitAttributeIfNotDefault(A, Min, Max);

  }


  /// See AbstractAttribute::getName()

  const std::string getName() const override {

    return "AAAMDFlatWorkGroupSize";

  }


  /// See AbstractAttribute::getIdAddr()

  const char *getIdAddr() const override { return &ID; }


  /// This function should return true if the type of the \p AA is

  /// AAAMDFlatWorkGroupSize

  static bool classof(const AbstractAttribute *AA) {

    return (AA->getIdAddr() == &ID);

  }


  /// Unique ID (due to the unique address)

  static const char ID;

};


const char AAAMDFlatWorkGroupSize::ID = 0;


AAAMDFlatWorkGroupSize &

AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,

                                          Attributor &A) {

  if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)

    return *new (A.Allocator) AAAMDFlatWorkGroupSize(IRP, A);

  llvm_unreachable(

      "AAAMDFlatWorkGroupSize is only valid for function position");

}


/// Propagate amdgpu-waves-per-eu attribute.

struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {

  AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A)

      : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {}


  bool isValidState() const override {

    return !Assumed.isEmptySet() && IntegerRangeState::isValidState();

  }


  void initialize(Attributor &A) override {

    Function *F = getAssociatedFunction();

    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());


    if (const auto *AssumedGroupSize = A.getAAFor<AAAMDFlatWorkGroupSize>(

            *this, IRPosition::function(*F), DepClassTy::REQUIRED)) {


      unsigned Min, Max;

      std::tie(Min, Max) = InfoCache.getWavesPerEU(

          *F, {AssumedGroupSize->getAssumed().getLower().getZExtValue(),

               AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1});


      ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));

      intersectKnown(Range);

    }


    if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))

      indicatePessimisticFixpoint();

  }


  ChangeStatus updateImpl(Attributor &A) override {

    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());

    ChangeStatus Change = ChangeStatus::UNCHANGED;


    auto CheckCallSite = [&](AbstractCallSite CS) {

      Function *Caller = CS.getInstruction()->getFunction();

      Function *Func = getAssociatedFunction();

      LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()

                        << "->" << Func->getName() << '\n');


      const auto *CallerInfo = A.getAAFor<AAAMDWavesPerEU>(

          *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);

      const auto *AssumedGroupSize = A.getAAFor<AAAMDFlatWorkGroupSize>(

          *this, IRPosition::function(*Func), DepClassTy::REQUIRED);

      if (!CallerInfo || !AssumedGroupSize)

        return false;


      unsigned Min, Max;

      std::tie(Min, Max) = InfoCache.getEffectiveWavesPerEU(

          *Caller,

          {CallerInfo->getAssumed().getLower().getZExtValue(),

           CallerInfo->getAssumed().getUpper().getZExtValue() - 1},

          {AssumedGroupSize->getAssumed().getLower().getZExtValue(),

           AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1});

      ConstantRange CallerRange(APInt(32, Min), APInt(32, Max + 1));

      IntegerRangeState CallerRangeState(CallerRange);

      Change |= clampStateAndIndicateChange(this->getState(), CallerRangeState);


      return true;

    };


    bool AllCallSitesKnown = true;

    if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))

      return indicatePessimisticFixpoint();


    return Change;

  }


  /// Create an abstract attribute view for the position \p IRP.

  static AAAMDWavesPerEU &createForPosition(const IRPosition &IRP,

                                            Attributor &A);


  ChangeStatus manifest(Attributor &A) override {

    Function *F = getAssociatedFunction();

    auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());

    unsigned Max = InfoCache.getMaxWavesPerEU(*F);

    return emitAttributeIfNotDefault(A, 1, Max);

  }


  /// See AbstractAttribute::getName()

  const std::string getName() const override { return "AAAMDWavesPerEU"; }


  /// See AbstractAttribute::getIdAddr()

  const char *getIdAddr() const override { return &ID; }


  /// This function should return true if the type of the \p AA is

  /// AAAMDWavesPerEU

  static bool classof(const AbstractAttribute *AA) {

    return (AA->getIdAddr() == &ID);

  }


  /// Unique ID (due to the unique address)

  static const char ID;

};


const char AAAMDWavesPerEU::ID = 0;


AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,

                                                    Attributor &A) {

  if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)

    return *new (A.Allocator) AAAMDWavesPerEU(IRP, A);

  llvm_unreachable("AAAMDWavesPerEU is only valid for function position");

}


static bool inlineAsmUsesAGPRs(const InlineAsm *IA) {

  for (const auto &CI : IA->ParseConstraints()) {

    for (StringRef Code : CI.Codes) {

      Code.consume_front("{");

      if (Code.starts_with("a"))

        return true;

    }

  }


  return false;

}


struct AAAMDGPUNoAGPR

    : public IRAttribute<Attribute::NoUnwind,

                         StateWrapper<BooleanState, AbstractAttribute>,

                         AAAMDGPUNoAGPR> {

  AAAMDGPUNoAGPR(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}


  static AAAMDGPUNoAGPR &createForPosition(const IRPosition &IRP,

                                           Attributor &A) {

    if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)

      return *new (A.Allocator) AAAMDGPUNoAGPR(IRP, A);

    llvm_unreachable("AAAMDGPUNoAGPR is only valid for function position");

  }


  void initialize(Attributor &A) override {

    Function *F = getAssociatedFunction();

    if (F->hasFnAttribute("amdgpu-no-agpr"))

      indicateOptimisticFixpoint();

  }


  const std::string getAsStr(Attributor *A) const override {

    return getAssumed() ? "amdgpu-no-agpr" : "amdgpu-maybe-agpr";

  }


  void trackStatistics() const override {}


  ChangeStatus updateImpl(Attributor &A) override {

    // TODO: Use AACallEdges, but then we need a way to inspect asm edges.


    auto CheckForNoAGPRs = [&](Instruction &I) {

      const auto &CB = cast<CallBase>(I);

      const Value *CalleeOp = CB.getCalledOperand();

      const Function *Callee = dyn_cast<Function>(CalleeOp);

      if (!Callee) {

        if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp))

          return !inlineAsmUsesAGPRs(IA);

        return false;

      }


      // Some intrinsics may use AGPRs, but if we have a choice, we are not

      // required to use AGPRs.

      if (Callee->isIntrinsic())

        return true;


      // TODO: Handle callsite attributes

      const auto *CalleeInfo = A.getAAFor<AAAMDGPUNoAGPR>(

          *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);

      return CalleeInfo && CalleeInfo->getAssumed();

    };


    bool UsedAssumedInformation = false;

    if (!A.checkForAllCallLikeInstructions(CheckForNoAGPRs, *this,

                                           UsedAssumedInformation))

      return indicatePessimisticFixpoint();

    return ChangeStatus::UNCHANGED;

  }


  ChangeStatus manifest(Attributor &A) override {

    if (!getAssumed())

      return ChangeStatus::UNCHANGED;

    LLVMContext &Ctx = getAssociatedFunction()->getContext();

    return A.manifestAttrs(getIRPosition(),

                           {Attribute::get(Ctx, "amdgpu-no-agpr")});

  }


  const std::string getName() const override { return "AAAMDGPUNoAGPR"; }

  const char *getIdAddr() const override { return &ID; }


  /// This function should return true if the type of the \p AA is

  /// AAAMDGPUNoAGPRs

  static bool classof(const AbstractAttribute *AA) {

    return (AA->getIdAddr() == &ID);

  }


  static const char ID;

};


const char AAAMDGPUNoAGPR::ID = 0;


static void addPreloadKernArgHint(Function &F, TargetMachine &TM) {

  const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);

  for (unsigned I = 0;

       I < F.arg_size() &&

       I < std::min(KernargPreloadCount.getValue(), ST.getMaxNumUserSGPRs());

       ++I) {

    Argument &Arg = *F.getArg(I);

    // Check for incompatible attributes.

    if (Arg.hasByRefAttr() || Arg.hasNestAttr())

      break;


    Arg.addAttr(Attribute::InReg);

  }

}


static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM) {

  SetVector<Function *> Functions;

  for (Function &F : M) {

    if (!F.isIntrinsic())

      Functions.insert(&F);

  }


  CallGraphUpdater CGUpdater;

  BumpPtrAllocator Allocator;

  AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, TM);

  DenseSet<const char *> Allowed(

      {&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,

       &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,

       &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID, &AACallEdges::ID,

       &AAPointerInfo::ID, &AAPotentialConstantValues::ID,

       &AAUnderlyingObjects::ID});


  AttributorConfig AC(CGUpdater);

  AC.Allowed = &Allowed;

  AC.IsModulePass = true;

  AC.DefaultInitializeLiveInternals = false;

  AC.IPOAmendableCB = [](const Function &F) {

    return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;

  };


  Attributor A(Functions, InfoCache, AC);


  for (Function &F : M) {

    if (!F.isIntrinsic()) {

      A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));

      A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));

      A.getOrCreateAAFor<AAAMDGPUNoAGPR>(IRPosition::function(F));

      CallingConv::ID CC = F.getCallingConv();

      if (!AMDGPU::isEntryFunctionCC(CC)) {

        A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));

        A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(F));

      } else if (CC == CallingConv::AMDGPU_KERNEL) {

        addPreloadKernArgHint(F, TM);

      }

    }

  }


  ChangeStatus Change = A.run();

  return Change == ChangeStatus::CHANGED;

}


class AMDGPUAttributorLegacy : public ModulePass {

public:

  AMDGPUAttributorLegacy() : ModulePass(ID) {}


  /// doInitialization - Virtual method overridden by subclasses to do

  /// any necessary initialization before any pass is run.

  bool doInitialization(Module &) override {

    auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();

    if (!TPC)

      report_fatal_error("TargetMachine is required");


    TM = &TPC->getTM<TargetMachine>();

    return false;

  }


  bool runOnModule(Module &M) override {

    AnalysisGetter AG(this);

    return runImpl(M, AG, *TM);

  }


  void getAnalysisUsage(AnalysisUsage &AU) const override {

    AU.addRequired<CycleInfoWrapperPass>();

  }


  StringRef getPassName() const override { return "AMDGPU Attributor"; }

  TargetMachine *TM;

  static char ID;

};

} // namespace


PreservedAnalyses llvm::AMDGPUAttributorPass::run(Module &M,

                                                  ModuleAnalysisManager &AM) {


  FunctionAnalysisManager &FAM =

      AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();

  AnalysisGetter AG(FAM);


  // TODO: Probably preserves CFG

  return runImpl(M, AG, TM) ? PreservedAnalyses::none()

                            : PreservedAnalyses::all();

}


char AMDGPUAttributorLegacy::ID = 0;


Pass *llvm::createAMDGPUAttributorLegacyPass() {

  return new AMDGPUAttributorLegacy();

}

INITIALIZE_PASS_BEGIN(AMDGPUAttributorLegacy, DEBUG_TYPE, "AMDGPU Attributor",

                      false, false)

INITIALIZE_PASS_DEPENDENCY(CycleInfoWrapperPass);

INITIALIZE_PASS_END(AMDGPUAttributorLegacy, DEBUG_TYPE, "AMDGPU Attributor",

                    false, false)

KernargPreloadCount
static cl::opt< unsigned > KernargPreloadCount("amdgpu-kernarg-preload-count", cl::desc("How many kernel arguments to preload onto SGPRs"), cl::init(0))

isDSAddress
static bool isDSAddress(const Constant *C)
Definition: AMDGPUAttributor.cpp:121

ImplicitAttrs
static constexpr std::pair< ImplicitArgumentMask, StringLiteral > ImplicitAttrs[]
Definition: AMDGPUAttributor.cpp:52

intrinsicToAttrMask
static ImplicitArgumentMask intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit, bool HasApertureRegs, bool SupportsGetDoorBellID, unsigned CodeObjectVersion)
Definition: AMDGPUAttributor.cpp:62

ImplicitArgumentMask
ImplicitArgumentMask
Definition: AMDGPUAttributor.cpp:44

NOT_IMPLICIT_INPUT
@ NOT_IMPLICIT_INPUT
Definition: AMDGPUAttributor.cpp:45

ALL_ARGUMENT_MASK
@ ALL_ARGUMENT_MASK
Definition: AMDGPUAttributor.cpp:47

funcRequiresHostcallPtr
static bool funcRequiresHostcallPtr(const Function &F)
Returns true if the function requires the implicit argument be passed regardless of the function cont...
Definition: AMDGPUAttributor.cpp:131

ImplicitArgumentPositions
ImplicitArgumentPositions
Definition: AMDGPUAttributor.cpp:37

LAST_ARG_POS
@ LAST_ARG_POS
Definition: AMDGPUAttributor.cpp:39

castRequiresQueuePtr
static bool castRequiresQueuePtr(unsigned SrcAS)
Definition: AMDGPUAttributor.cpp:117

AMDGPUBaseInfo.h

AMDGPU.h

Attributor.h

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

CycleAnalysis.h
This file declares an analysis pass that computes CycleInfo for LLVM IR, specialized from GenericCycl...

LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101

runImpl
static bool runImpl(Function &F, const TargetLowering &TLI)
Definition: ExpandLargeDivRem.cpp:80

GCNSubtarget.h
AMD GCN specific subclass of TargetSubtarget.

DEBUG_TYPE
#define DEBUG_TYPE
Definition: GenericCycleImpl.h:30

F
#define F(x, y, z)
Definition: MD5.cpp:55

I
#define I(x, y, z)
Definition: MD5.cpp:58

Range
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))

FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:60

TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:48

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52

getName
static StringRef getName(Value *V)
Definition: ProvenanceAnalysisEvaluator.cpp:20

CC
auto CC
Definition: RISCVRedundantCopyElimination.cpp:79

Allocator
Basic Register Allocator
Definition: RegAllocBasic.cpp:143

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

OS
raw_pwrite_stream & OS
Definition: SampleProfWriter.cpp:53

initialize
static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, ArrayRef< StringLiteral > StandardNames)
Initialize the set of available library functions based on the specified target triple.
Definition: TargetLibraryInfo.cpp:897

TargetPassConfig.h
Target-Independent Code Generator Pass Configuration Options pass.

llvm::AMDGPUAttributorPass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: AMDGPUAttributor.cpp:1102

llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:78

llvm::AbstractCallSite
AbstractCallSite.
Definition: AbstractCallSite.h:50

llvm::AddrSpaceCastInst
This class represents a conversion between pointers from one address space to another.
Definition: Instructions.h:4816

llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253

llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75

llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31

llvm::Argument::hasByRefAttr
bool hasByRefAttr() const
Return true if this argument has the byref attribute.
Definition: Function.cpp:146

llvm::Argument::addAttr
void addAttr(Attribute::AttrKind Kind)
Definition: Function.cpp:328

llvm::Argument::hasNestAttr
bool hasNestAttr() const
Return true if this argument has the nest attribute.
Definition: Function.cpp:275

llvm::AttributeImpl
Definition: AttributeImpl.h:42

llvm::Attribute::get
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:94

llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:61

llvm::BumpPtrAllocatorImpl
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:66

llvm::CallGraphUpdater
Wrapper to unify "old style" CallGraph and "new style" LazyCallGraph.
Definition: CallGraphUpdater.h:29

llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1084

llvm::ConstantRange
This class represents a range of values.
Definition: ConstantRange.h:47

llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:42

llvm::CycleInfoWrapperPass
Legacy analysis pass which computes a CycleInfo.
Definition: CycleAnalysis.h:26

llvm::DenseMap
Definition: DenseMap.h:758

llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271

llvm::Function
Definition: Function.h:64

llvm::Function::getCallingConv
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:274

llvm::GCNSubtarget
Definition: GCNSubtarget.h:35

llvm::GlobalValue
Definition: GlobalValue.h:48

llvm::GlobalValue::getAddressSpace
unsigned getAddressSpace() const
Definition: GlobalValue.h:205

llvm::InlineAsm
Definition: InlineAsm.h:34

llvm::InnerAnalysisManagerProxy
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:563

llvm::Instruction
Definition: Instruction.h:68

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67

llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:251

llvm::ModulePass::runOnModule
virtual bool runOnModule(Module &M)=0
runOnModule - Virtual method overriden by subclasses to process the module being operated on.

llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65

llvm::PassRegistry
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:37

llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94

llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98

llvm::Pass::doInitialization
virtual bool doInitialization(Module &)
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
Definition: Pass.h:119

llvm::Pass::getPassName
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111

llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117

llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:57

llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162

llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:323

llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:344

llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479

llvm::SmallString
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition: SmallVector.h:426

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209

llvm::StringLiteral
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition: StringRef.h:838

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50

llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77

llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43

llvm::User::isDroppable
bool isDroppable() const
A droppable user is a user for which uses can be dropped without affecting correctness and should be ...
Definition: User.cpp:115

llvm::Value
LLVM Value Representation.
Definition: Value.h:74

llvm::cl::opt
Definition: CommandLine.h:1423

llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661

llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:691

unsigned

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143

TargetMachine.h

false
Definition: StackSlotColoring.cpp:194

llvm::AArch64PACKey::IA
@ IA
Definition: AArch64BaseInfo.h:820

llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPUAddrSpace.h:32

llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPUAddrSpace.h:35

llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPUAddrSpace.h:36

llvm::AMDGPU::ImplicitArg::HEAP_PTR_OFFSET
@ HEAP_PTR_OFFSET
Definition: SIDefines.h:1014

llvm::AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET
@ QUEUE_PTR_OFFSET
Definition: SIDefines.h:1021

llvm::AMDGPU::IsaInfo::getMaxWavesPerEU
unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI)
Definition: AMDGPUBaseInfo.cpp:966

llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:2018

llvm::AMDGPU::getAMDHSACodeObjectVersion
unsigned getAMDHSACodeObjectVersion(const Module &M)
Definition: AMDGPUBaseInfo.cpp:173

llvm::AMDGPU::getDefaultQueueImplicitArgPosition
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion)
Definition: AMDGPUBaseInfo.cpp:241

llvm::AMDGPU::AMDHSA_COV4
@ AMDHSA_COV4
Definition: AMDGPUBaseInfo.h:55

llvm::AMDGPU::AMDHSA_COV5
@ AMDHSA_COV5
Definition: AMDGPUBaseInfo.h:55

llvm::AMDGPU::getHostcallImplicitArgPosition
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion)
Definition: AMDGPUBaseInfo.cpp:230

llvm::AMDGPU::getCompletionActionImplicitArgPosition
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion)
Definition: AMDGPUBaseInfo.cpp:252

llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion)
Definition: AMDGPUBaseInfo.cpp:216

llvm::AMDGPU::isGraphics
bool isGraphics(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:2010

llvm::ARMBuildAttrs::Allowed
@ Allowed
Definition: ARMBuildAttributes.h:126

llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73

llvm::ARM::ProfileKind::M
@ M

llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:200

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24

llvm::Intrinsic::not_intrinsic
@ not_intrinsic
Definition: Intrinsics.h:44

llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:156

llvm::WinEH::EncodingType::CE
@ CE
Windows NT (Windows on ARM)

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443

llvm::ms_demangle::QualifierMangleMode::Result
@ Result

llvm::pdb::PDB_SymType::Caller
@ Caller

llvm::pdb::PDB_SymType::Callee
@ Callee

llvm::pdb::DbgHeaderType::Max
@ Max

llvm::rdf::Func
NodeAddr< FuncNode * > Func
Definition: RDFGraph.h:393

llvm::rdf::Code
NodeAddr< CodeNode * > Code
Definition: RDFGraph.h:388

llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::initializeCycleInfoWrapperPassPass
void initializeCycleInfoWrapperPassPass(PassRegistry &)

llvm::CGSCC
@ CGSCC
Definition: Attributor.h:6419

llvm::createAMDGPUAttributorLegacyPass
Pass * createAMDGPUAttributorLegacyPass()
Definition: AMDGPUAttributor.cpp:1116

llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163

llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167

llvm::clampStateAndIndicateChange
ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R)
Helper function to clamp a state S of type StateType with the information in R and indicate/return if...
Definition: Attributor.h:3455

llvm::ChangeStatus
ChangeStatus
{
Definition: Attributor.h:484

llvm::ChangeStatus::UNCHANGED
@ UNCHANGED

llvm::DepClassTy::REQUIRED
@ REQUIRED
The target cannot be valid if the source is not.

llvm::AACallEdges
An abstract state for querying live call edges.
Definition: Attributor.h:5478

llvm::AACallEdges::getOptimisticEdges
virtual const SetVector< Function * > & getOptimisticEdges() const =0
Get the optimistic edges.

llvm::AACallEdges::ID
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:5521

llvm::AACallEdges::hasNonAsmUnknownCallee
virtual bool hasNonAsmUnknownCallee() const =0
Is there any call with a unknown callee, excluding any inline asm.

llvm::AAPointerInfo::Access
An access description.
Definition: Attributor.h:5932

llvm::AAPointerInfo::Access::getRemoteInst
Instruction * getRemoteInst() const
Return the actual instruction that causes the access.
Definition: Attributor.h:6032

llvm::AAPointerInfo
An abstract interface for struct information.
Definition: Attributor.h:5746

llvm::AAPointerInfo::forallInterferingAccesses
virtual bool forallInterferingAccesses(AA::RangeTy Range, function_ref< bool(const Access &, bool)> CB) const =0
Call CB on all accesses that might interfere with Range and return true if all such accesses were kno...

llvm::AAPointerInfo::ID
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:6146

llvm::AAPotentialConstantValues::ID
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:5295

llvm::AAPotentialValues::ID
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:5332

llvm::AAUnderlyingObjects::ID
static const char ID
Unique ID (due to the unique address)
Definition: Attributor.h:6216

llvm::AA::RangeTy
Helper to represent an access offset and size, with logic to deal with uncertainty and check for over...
Definition: Attributor.h:237

llvm::AbstractAttribute
Base struct for all "concrete attribute" deductions.
Definition: Attributor.h:3275

llvm::AbstractAttribute::getIdAddr
virtual const char * getIdAddr() const =0
This function should return the address of the ID of the AbstractAttribute.

llvm::AnalysisGetter
Wrapper for FunctionAnalysisManager.
Definition: Attributor.h:1122

llvm::AttributorConfig
Configuration for the Attributor.
Definition: Attributor.h:1414

llvm::Attributor
The fixpoint analysis framework that orchestrates the attribute deduction.
Definition: Attributor.h:1508

llvm::CalleeInfo
Class to accumulate and hold information about a callee.
Definition: ModuleSummaryIndex.h:59

llvm::IRAttribute
Helper class that provides common functionality to manifest IR attributes.
Definition: Attributor.h:3180

llvm::IRAttribute::manifest
ChangeStatus manifest(Attributor &A) override
See AbstractAttribute::manifest(...).
Definition: Attributor.h:3212

llvm::IRPosition
Helper to describe and deal with positions in the LLVM-IR.
Definition: Attributor.h:581

llvm::IRPosition::callsite_returned
static const IRPosition callsite_returned(const CallBase &CB)
Create a position describing the returned value of CB.
Definition: Attributor.h:649

llvm::IRPosition::IRP_FUNCTION
@ IRP_FUNCTION
An attribute for a function (scope).
Definition: Attributor.h:593

llvm::IRPosition::function
static const IRPosition function(const Function &F, const CallBaseContext *CBContext=nullptr)
Create a position describing the function scope of F.
Definition: Attributor.h:624

llvm::IRPosition::getPositionKind
Kind getPositionKind() const
Return the associated position kind.
Definition: Attributor.h:877

llvm::InformationCache
Data structure to hold cached (LLVM-IR) information.
Definition: Attributor.h:1198

llvm::IntegerRangeState
State for an integer range.
Definition: Attributor.h:2921

llvm::IntegerRangeState::isValidState
bool isValidState() const override
See AbstractState::isValidState()
Definition: Attributor.h:2957

llvm::StateWrapper
Helper to tie a abstract state implementation to an abstract attribute.
Definition: Attributor.h:3164

llvm::StateWrapper::getState
StateType & getState() override
See AbstractAttribute::getState(...).
Definition: Attributor.h:3172

llvm::cl::desc
Definition: CommandLine.h:409