LLVM  15.0.0git
AMDGPUMachineFunction.cpp
Go to the documentation of this file.
1 //===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "AMDGPU.h"
11 #include "AMDGPUPerfHintAnalysis.h"
12 #include "AMDGPUSubtarget.h"
15 
16 using namespace llvm;
17 
19  : Mode(MF.getFunction()), IsEntryFunction(AMDGPU::isEntryFunctionCC(
20  MF.getFunction().getCallingConv())),
21  IsModuleEntryFunction(
22  AMDGPU::isModuleEntryFunctionCC(MF.getFunction().getCallingConv())),
23  NoSignedZerosFPMath(MF.getTarget().Options.NoSignedZerosFPMath) {
25 
26  // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
27  // except reserved size is not correctly aligned.
28  const Function &F = MF.getFunction();
29 
30  Attribute MemBoundAttr = F.getFnAttribute("amdgpu-memory-bound");
31  MemoryBound = MemBoundAttr.getValueAsBool();
32 
33  Attribute WaveLimitAttr = F.getFnAttribute("amdgpu-wave-limiter");
34  WaveLimiter = WaveLimitAttr.getValueAsBool();
35 
36  // FIXME: How is this attribute supposed to interact with statically known
37  // global sizes?
38  StringRef S = F.getFnAttribute("amdgpu-gds-size").getValueAsString();
39  if (!S.empty())
40  S.consumeInteger(0, GDSSize);
41 
42  // Assume the attribute allocates before any known GDS globals.
44 
45  CallingConv::ID CC = F.getCallingConv();
47  ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign);
48 }
49 
51  const GlobalVariable &GV) {
52  auto Entry = LocalMemoryObjects.insert(std::make_pair(&GV, 0));
53  if (!Entry.second)
54  return Entry.first->second;
55 
56  Align Alignment =
57  DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
58 
59  unsigned Offset;
61  /// TODO: We should sort these to minimize wasted space due to alignment
62  /// padding. Currently the padding is decided by the first encountered use
63  /// during lowering.
64  Offset = StaticLDSSize = alignTo(StaticLDSSize, Alignment);
65 
66  StaticLDSSize += DL.getTypeAllocSize(GV.getValueType());
67 
68  // Update the LDS size considering the padding to align the dynamic shared
69  // memory.
71  } else {
73  "expected region address space");
74 
75  Offset = StaticGDSSize = alignTo(StaticGDSSize, Alignment);
76  StaticGDSSize += DL.getTypeAllocSize(GV.getValueType());
77 
78  // FIXME: Apply alignment of dynamic GDS
80  }
81 
82  Entry.first->second = Offset;
83  return Offset;
84 }
85 
86 // This kernel calls no functions that require the module lds struct
87 static bool canElideModuleLDS(const Function &F) {
88  return F.hasFnAttribute("amdgpu-elide-module-lds");
89 }
90 
92  const Module *M = F.getParent();
93  if (isModuleEntryFunction()) {
94  const GlobalVariable *GV = M->getNamedGlobal("llvm.amdgcn.module.lds");
95  if (GV && !canElideModuleLDS(F)) {
96  unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV);
97  (void)Offset;
98  assert(Offset == 0 &&
99  "Module LDS expected to be allocated before other LDS");
100  }
101  }
102 }
103 
105  const GlobalVariable &GV) {
106  assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
107 
108  Align Alignment =
109  DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType());
110  if (Alignment <= DynLDSAlign)
111  return;
112 
113  LDSSize = alignTo(StaticLDSSize, Alignment);
114  DynLDSAlign = Alignment;
115 }
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:156
llvm::AMDGPUMachineFunction::allocateLDSGlobal
unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV)
Definition: AMDGPUMachineFunction.cpp:50
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::AMDGPUMachineFunction::ExplicitKernArgSize
uint64_t ExplicitKernArgSize
Definition: AMDGPUMachineFunction.h:28
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::Function
Definition: Function.h:60
getFunction
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:233
llvm::Attribute
Definition: Attributes.h:65
llvm::GlobalObject::getAlign
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
Definition: GlobalObject.h:79
llvm::AMDGPUMachineFunction::AMDGPUMachineFunction
AMDGPUMachineFunction(const MachineFunction &MF)
Definition: AMDGPUMachineFunction.cpp:18
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::AMDGPUMachineFunction::LDSSize
uint32_t LDSSize
Number of bytes in the LDS that are being used.
Definition: AMDGPUMachineFunction.h:32
llvm::AMDGPUMachineFunction::WaveLimiter
bool WaveLimiter
Definition: AMDGPUMachineFunction.h:63
llvm::Attribute::getValueAsBool
bool getValueAsBool() const
Return the attribute's value as a boolean.
Definition: Attributes.cpp:290
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::AMDGPUSubtarget::get
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Definition: AMDGPUSubtarget.cpp:972
TargetMachine.h
AMDGPUMachineFunction.h
AMDGPUSubtarget.h
AMDGPU
Definition: AMDGPUReplaceLDSUseWithPointer.cpp:114
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:363
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::AMDGPU::isEntryFunctionCC
bool isEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1639
llvm::AMDGPUMachineFunction::allocateModuleLDSGlobal
void allocateModuleLDSGlobal(const Function &F)
Definition: AMDGPUMachineFunction.cpp:91
llvm::AMDGPUMachineFunction::MemoryBound
bool MemoryBound
Definition: AMDGPUMachineFunction.h:60
llvm::AMDGPUSubtarget
Definition: AMDGPUSubtarget.h:29
AMDGPUPerfHintAnalysis.h
Analyzes if a function potentially memory bound and if a kernel kernel may benefit from limiting numb...
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
canElideModuleLDS
static bool canElideModuleLDS(const Function &F)
Definition: AMDGPUMachineFunction.cpp:87
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Mode
SI Whole Quad Mode
Definition: SIWholeQuadMode.cpp:262
MachineModuleInfo.h
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::MachineFunction
Definition: MachineFunction.h:257
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
AMDGPU.h
llvm::AMDGPU::isModuleEntryFunctionCC
bool isModuleEntryFunctionCC(CallingConv::ID CC)
Definition: AMDGPUBaseInfo.cpp:1656
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::AMDGPUMachineFunction::StaticLDSSize
uint32_t StaticLDSSize
Number of bytes in the LDS allocated statically.
Definition: AMDGPUMachineFunction.h:37
llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:216
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:622
llvm::AMDGPUMachineFunction::isModuleEntryFunction
bool isModuleEntryFunction() const
Definition: AMDGPUMachineFunction.h:90
llvm::AMDGPUAS::REGION_ADDRESS
@ REGION_ADDRESS
Address space for region memory. (GDS)
Definition: AMDGPU.h:360
llvm::GlobalValue::getAddressSpace
unsigned getAddressSpace() const
Definition: Globals.cpp:121
llvm::AMDGPUMachineFunction::StaticGDSSize
uint32_t StaticGDSSize
Definition: AMDGPUMachineFunction.h:38
llvm::CallingConv::SPIR_KERNEL
@ SPIR_KERNEL
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:152
llvm::GlobalValue::getValueType
Type * getValueType() const
Definition: GlobalValue.h:278
llvm::AMDGPUMachineFunction::MaxKernArgAlign
Align MaxKernArgAlign
Definition: AMDGPUMachineFunction.h:29
llvm::AMDGPUMachineFunction::GDSSize
uint32_t GDSSize
Definition: AMDGPUMachineFunction.h:33
llvm::AMDGPUMachineFunction::DynLDSAlign
Align DynLDSAlign
Align for dynamic shared memory if any.
Definition: AMDGPUMachineFunction.h:45
llvm::AMDGPUMachineFunction::setDynLDSAlign
void setDynLDSAlign(const DataLayout &DL, const GlobalVariable &GV)
Definition: AMDGPUMachineFunction.cpp:104