LLVM  14.0.0git
AMDGPUAnnotateKernelFeatures.cpp
Go to the documentation of this file.
1 //===- AMDGPUAnnotateKernelFeaturesPass.cpp -------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass propagates the uniform-work-group-size attribute from
10 /// kernels to leaf functions when possible. It also adds additional attributes
11 /// to hint ABI lowering optimizations later.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
16 #include "GCNSubtarget.h"
20 #include "llvm/IR/IntrinsicsAMDGPU.h"
21 #include "llvm/IR/IntrinsicsR600.h"
23 
24 #define DEBUG_TYPE "amdgpu-annotate-kernel-features"
25 
26 using namespace llvm;
27 
28 namespace {
29 class AMDGPUAnnotateKernelFeatures : public CallGraphSCCPass {
30 private:
31  const TargetMachine *TM = nullptr;
32 
33  bool addFeatureAttributes(Function &F);
34 
35 public:
36  static char ID;
37 
38  AMDGPUAnnotateKernelFeatures() : CallGraphSCCPass(ID) {}
39 
40  bool doInitialization(CallGraph &CG) override;
41  bool runOnSCC(CallGraphSCC &SCC) override;
42 
43  StringRef getPassName() const override {
44  return "AMDGPU Annotate Kernel Features";
45  }
46 
47  void getAnalysisUsage(AnalysisUsage &AU) const override {
48  AU.setPreservesAll();
50  }
51 };
52 
53 } // end anonymous namespace
54 
56 
58 
59 INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE,
60  "Add AMDGPU function attributes", false, false)
61 
62 bool AMDGPUAnnotateKernelFeatures::addFeatureAttributes(Function &F) {
63  bool HaveStackObjects = false;
64  bool Changed = false;
65  bool HaveCall = false;
66  bool IsFunc = !AMDGPU::isEntryFunctionCC(F.getCallingConv());
67 
68  for (BasicBlock &BB : F) {
69  for (Instruction &I : BB) {
70  if (isa<AllocaInst>(I)) {
71  HaveStackObjects = true;
72  continue;
73  }
74 
75  if (auto *CB = dyn_cast<CallBase>(&I)) {
76  const Function *Callee =
77  dyn_cast<Function>(CB->getCalledOperand()->stripPointerCasts());
78 
79  // Note the occurrence of indirect call.
80  if (!Callee) {
81  if (!CB->isInlineAsm())
82  HaveCall = true;
83 
84  continue;
85  }
86 
87  Intrinsic::ID IID = Callee->getIntrinsicID();
88  if (IID == Intrinsic::not_intrinsic) {
89  HaveCall = true;
90  Changed = true;
91  }
92  }
93  }
94  }
95 
96  // TODO: We could refine this to captured pointers that could possibly be
97  // accessed by flat instructions. For now this is mostly a poor way of
98  // estimating whether there are calls before argument lowering.
99  if (!IsFunc && HaveCall) {
100  F.addFnAttr("amdgpu-calls");
101  Changed = true;
102  }
103 
104  if (HaveStackObjects) {
105  F.addFnAttr("amdgpu-stack-objects");
106  Changed = true;
107  }
108 
109  return Changed;
110 }
111 
112 bool AMDGPUAnnotateKernelFeatures::runOnSCC(CallGraphSCC &SCC) {
113  bool Changed = false;
114 
115  for (CallGraphNode *I : SCC) {
116  Function *F = I->getFunction();
117  // Ignore functions with graphics calling conventions, these are currently
118  // not allowed to have kernel arguments.
119  if (!F || F->isDeclaration() || AMDGPU::isGraphics(F->getCallingConv()))
120  continue;
121  // Add feature attributes
122  Changed |= addFeatureAttributes(*F);
123  }
124 
125  return Changed;
126 }
127 
128 bool AMDGPUAnnotateKernelFeatures::doInitialization(CallGraph &CG) {
129  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
130  if (!TPC)
131  report_fatal_error("TargetMachine is required");
132 
133  TM = &TPC->getTM<TargetMachine>();
134  return false;
135 }
136 
138  return new AMDGPUAnnotateKernelFeatures();
139 }
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::Function
Definition: Function.h:61
llvm::CallGraph
The basic data container for the call graph of a Module of IR.
Definition: CallGraph.h:73
llvm::Intrinsic::not_intrinsic
@ not_intrinsic
Definition: Intrinsics.h:45
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::CallGraphSCC
CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on.
Definition: CallGraphSCCPass.h:87
TargetMachine.h
GCNSubtarget.h
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::Instruction
Definition: Instruction.h:45
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
llvm::CallGraphNode
A node in the call graph for a module.
Definition: CallGraph.h:167
llvm::createAMDGPUAnnotateKernelFeaturesPass
Pass * createAMDGPUAnnotateKernelFeaturesPass()
Definition: AMDGPUAnnotateKernelFeatures.cpp:137
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1382
INITIALIZE_PASS
INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, "Add AMDGPU function attributes", false, false) bool AMDGPUAnnotateKernelFeatures
Definition: AMDGPUAnnotateKernelFeatures.cpp:59
llvm::CallGraphSCCPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &Info) const override
getAnalysisUsage - For this class, we declare that we require and preserve the call graph.
Definition: CallGraphSCCPass.cpp:659
I
#define I(x, y, z)
Definition: MD5.cpp:59
TargetPassConfig.h
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:79
llvm::AMDGPU::CPol::SCC
@ SCC
Definition: SIDefines.h:292
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
AMDGPU.h
llvm::AMDGPU::isGraphics
bool isGraphics(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1374
CallGraphSCCPass.h
Callee
amdgpu Simplify well known AMD library false FunctionCallee Callee
Definition: AMDGPULibCalls.cpp:206
llvm::AnalysisUsage::setPreservesAll
void setPreservesAll()
Set by analyses that do not transform their input at all.
Definition: PassAnalysisSupport.h:130
DEBUG_TYPE
#define DEBUG_TYPE
Definition: AMDGPUAnnotateKernelFeatures.cpp:24
llvm::CallGraphSCCPass
Definition: CallGraphSCCPass.h:34
CallGraph.h
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AMDGPUAnnotateKernelFeaturesID
char & AMDGPUAnnotateKernelFeaturesID
Definition: AMDGPUAnnotateKernelFeatures.cpp:57
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37