doxygen/AMDGPUAlwaysInlinePass_8cpp_source.html

//===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file

/// This pass marks all internal functions as always_inline and creates

/// duplicates of all other functions and marks the duplicates as always_inline.

//

//===----------------------------------------------------------------------===//


#include "AMDGPU.h"

#include "AMDGPUTargetMachine.h"

#include "Utils/AMDGPUBaseInfo.h"

#include "llvm/CodeGen/CommandFlags.h"

#include "llvm/IR/Module.h"

#include "llvm/Pass.h"

#include "llvm/Support/CommandLine.h"


using namespace llvm;


namespace {


static cl::opt<bool> StressCalls(

  "amdgpu-stress-function-calls",

  cl::Hidden,

  cl::desc("Force all functions to be noinline"),

  cl::init(false));


class AMDGPUAlwaysInline : public ModulePass {

  bool GlobalOpt;


public:

  static char ID;


  AMDGPUAlwaysInline(bool GlobalOpt = false) :

    ModulePass(ID), GlobalOpt(GlobalOpt) { }

  bool runOnModule(Module &M) override;


  void getAnalysisUsage(AnalysisUsage &AU) const override {

    AU.setPreservesAll();

  }

};


} // End anonymous namespace


INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline",

                "AMDGPU Inline All Functions", false, false)


char AMDGPUAlwaysInline::ID = 0;


static void

recursivelyVisitUsers(GlobalValue &GV,

                      SmallPtrSetImpl<Function *> &FuncsToAlwaysInline) {

  SmallVector<User *, 16> Stack(GV.users());


  SmallPtrSet<const Value *, 8> Visited;


  while (!Stack.empty()) {

    User *U = Stack.pop_back_val();

    if (!Visited.insert(U).second)

      continue;


    if (Instruction *I = dyn_cast<Instruction>(U)) {

      Function *F = I->getParent()->getParent();

      if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) {

        // FIXME: This is a horrible hack. We should always respect noinline,

        // and just let us hit the error when we can't handle this.

        //

        // Unfortunately, clang adds noinline to all functions at -O0. We have

        // to override this here until that's fixed.

        F->removeFnAttr(Attribute::NoInline);


        FuncsToAlwaysInline.insert(F);

        Stack.push_back(F);

      }


      // No need to look at further users, but we do need to inline any callers.

      continue;

    }


    append_range(Stack, U->users());

  }

}


static bool alwaysInlineImpl(Module &M, bool GlobalOpt) {

  std::vector<GlobalAlias*> AliasesToRemove;


  bool Changed = false;

  SmallPtrSet<Function *, 8> FuncsToAlwaysInline;

  SmallPtrSet<Function *, 8> FuncsToNoInline;

  Triple TT(M.getTargetTriple());


  for (GlobalAlias &A : M.aliases()) {

    if (Function* F = dyn_cast<Function>(A.getAliasee())) {

      if (TT.getArch() == Triple::amdgcn &&

          A.getLinkage() != GlobalValue::InternalLinkage)

        continue;

      Changed = true;

      A.replaceAllUsesWith(F);

      AliasesToRemove.push_back(&A);

    }


    // FIXME: If the aliasee isn't a function, it's some kind of constant expr

    // cast that won't be inlined through.

  }


  if (GlobalOpt) {

    for (GlobalAlias* A : AliasesToRemove) {

      A->eraseFromParent();

    }

  }


  // Always force inlining of any function that uses an LDS global address. This

  // is something of a workaround because we don't have a way of supporting LDS

  // objects defined in functions. LDS is always allocated by a kernel, and it

  // is difficult to manage LDS usage if a function may be used by multiple

  // kernels.

  //

  // OpenCL doesn't allow declaring LDS in non-kernels, so in practice this

  // should only appear when IPO passes manages to move LDs defined in a kernel

  // into a single user function.


  for (GlobalVariable &GV : M.globals()) {

    // TODO: Region address

    unsigned AS = GV.getAddressSpace();

    if ((AS == AMDGPUAS::REGION_ADDRESS) ||

        (AS == AMDGPUAS::LOCAL_ADDRESS &&

         (!AMDGPUTargetMachine::EnableLowerModuleLDS)))

      recursivelyVisitUsers(GV, FuncsToAlwaysInline);

  }


  if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) {

    auto IncompatAttr

      = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline;


    for (Function &F : M) {

      if (!F.isDeclaration() && !F.use_empty() &&

          !F.hasFnAttribute(IncompatAttr)) {

        if (StressCalls) {

          if (!FuncsToAlwaysInline.count(&F))

            FuncsToNoInline.insert(&F);

        } else

          FuncsToAlwaysInline.insert(&F);

      }

    }

  }


  for (Function *F : FuncsToAlwaysInline)

    F->addFnAttr(Attribute::AlwaysInline);


  for (Function *F : FuncsToNoInline)

    F->addFnAttr(Attribute::NoInline);


  return Changed || !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty();

}


bool AMDGPUAlwaysInline::runOnModule(Module &M) {

  return alwaysInlineImpl(M, GlobalOpt);

}


ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) {

  return new AMDGPUAlwaysInline(GlobalOpt);

}


PreservedAnalyses AMDGPUAlwaysInlinePass::run(Module &M,

                                              ModuleAnalysisManager &AM) {

  const bool Changed = alwaysInlineImpl(M, GlobalOpt);

  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();

}

alwaysInlineImpl
static bool alwaysInlineImpl(Module &M, bool GlobalOpt)
Definition: AMDGPUAlwaysInlinePass.cpp:89

recursivelyVisitUsers
static INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline", "AMDGPU Inline All Functions", false, false) char AMDGPUAlwaysInline void recursivelyVisitUsers(GlobalValue &GV, SmallPtrSetImpl< Function * > &FuncsToAlwaysInline)
Definition: AMDGPUAlwaysInlinePass.cpp:56

AMDGPUBaseInfo.h

AMDGPUTargetMachine.h
The AMDGPU TargetMachine interface definition for hw codegen targets.

AMDGPU.h

A
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")

CommandFlags.h

CommandLine.h

F
#define F(x, y, z)
Definition: MD5.cpp:55

I
#define I(x, y, z)
Definition: MD5.cpp:58

Module.h
Module.h This file contains the declarations for the Module class.

INITIALIZE_PASS
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38

Pass.h

char

llvm::AMDGPUTargetMachine::EnableFunctionCalls
static bool EnableFunctionCalls
Definition: AMDGPUTargetMachine.h:38

llvm::AMDGPUTargetMachine::EnableLowerModuleLDS
static bool EnableLowerModuleLDS
Definition: AMDGPUTargetMachine.h:39

llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47

llvm::AnalysisUsage::setPreservesAll
void setPreservesAll()
Set by analyses that do not transform their input at all.
Definition: PassAnalysisSupport.h:130

llvm::Function
Definition: Function.h:64

llvm::GlobalAlias
Definition: GlobalAlias.h:28

llvm::GlobalValue
Definition: GlobalValue.h:48

llvm::GlobalValue::InternalLinkage
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:59

llvm::GlobalVariable
Definition: GlobalVariable.h:39

llvm::Instruction
Definition: Instruction.h:68

llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:251

llvm::ModulePass::runOnModule
virtual bool runOnModule(Module &M)=0
runOnModule - Virtual method overriden by subclasses to process the module being operated on.

llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65

llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111

llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117

llvm::SmallPtrSetImplBase::empty
bool empty() const
Definition: SmallPtrSet.h:93

llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:323

llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:412

llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:344

llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:479

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209

llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44

llvm::Triple::amdgcn
@ amdgcn
Definition: Triple.h:74

llvm::User
Definition: User.h:44

llvm::cl::opt
Definition: CommandLine.h:1423

unsigned

llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPUAddrSpace.h:32

llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPUAddrSpace.h:35

llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:2018

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24

llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:137

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2067

llvm::createAMDGPUAlwaysInlinePass
ModulePass * createAMDGPUAlwaysInlinePass(bool GlobalOpt=true)
Definition: AMDGPUAlwaysInlinePass.cpp:165

llvm::AMDGPUAlwaysInlinePass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: AMDGPUAlwaysInlinePass.cpp:169

llvm::cl::desc
Definition: CommandLine.h:409