Go to the documentation of this file.
17 #include "llvm/IR/IntrinsicsAMDGPU.h"
21 #define DEBUG_TYPE "amdgpu-lower-kernel-arguments"
47 AllocaInst *AI = dyn_cast<AllocaInst>(&*InsPt);
63 auto &TPC = getAnalysis<TargetPassConfig>();
72 const Align KernArgBaseAlign(16);
73 const uint64_t BaseOffset =
ST.getExplicitKernelArgOffset(
F);
77 const uint64_t TotalKernArgSize =
ST.getKernArgSegmentSize(
F, MaxAlign);
78 if (TotalKernArgSize == 0)
82 Builder.CreateIntrinsic(Intrinsic::amdgcn_kernarg_segment_ptr, {}, {},
83 nullptr,
F.getName() +
".kernarg.segment");
85 KernArgSegment->
addRetAttr(Attribute::NonNull);
93 const bool IsByRef =
Arg.hasByRefAttr();
94 Type *ArgTy = IsByRef ?
Arg.getParamByRefType() :
Arg.getType();
97 ABITypeAlign =
DL.getABITypeAlign(ArgTy);
100 uint64_t AllocSize =
DL.getTypeAllocSize(ArgTy);
102 uint64_t EltOffset =
alignTo(ExplicitArgOffset, ABITypeAlign) + BaseOffset;
103 ExplicitArgOffset =
alignTo(ExplicitArgOffset, ABITypeAlign) + AllocSize;
111 Value *ArgOffsetPtr =
Builder.CreateConstInBoundsGEP1_64(
112 Builder.getInt8Ty(), KernArgSegment, EltOffset,
113 Arg.getName() +
".byval.kernarg.offset");
115 Value *CastOffsetPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
116 ArgOffsetPtr,
Arg.getType());
117 Arg.replaceAllUsesWith(CastOffsetPtr);
121 if (
PointerType *PT = dyn_cast<PointerType>(ArgTy)) {
128 !
ST.hasUsableDSOffset())
133 if (
Arg.hasNoAliasAttr())
137 auto *VT = dyn_cast<FixedVectorType>(ArgTy);
138 bool IsV3 = VT && VT->getNumElements() == 3;
143 int64_t AlignDownOffset =
alignDown(EltOffset, 4);
144 int64_t OffsetDiff = EltOffset - AlignDownOffset;
146 KernArgBaseAlign, DoShiftOpt ? AlignDownOffset : EltOffset);
157 ArgPtr =
Builder.CreateConstInBoundsGEP1_64(
158 Builder.getInt8Ty(), KernArgSegment, AlignDownOffset,
159 Arg.getName() +
".kernarg.offset.align.down");
160 AdjustedArgTy =
Builder.getInt32Ty();
162 ArgPtr =
Builder.CreateConstInBoundsGEP1_64(
163 Builder.getInt8Ty(), KernArgSegment, EltOffset,
164 Arg.getName() +
".kernarg.offset");
165 AdjustedArgTy = ArgTy;
168 if (IsV3 && Size >= 32) {
171 AdjustedArgTy = V4Ty;
177 Builder.CreateAlignedLoad(AdjustedArgTy, ArgPtr, AdjustedAlign);
178 Load->setMetadata(LLVMContext::MD_invariant_load,
MDNode::get(Ctx, {}));
182 if (isa<PointerType>(ArgTy)) {
183 if (
Arg.hasNonNullAttr())
186 uint64_t DerefBytes =
Arg.getDereferenceableBytes();
187 if (DerefBytes != 0) {
189 LLVMContext::MD_dereferenceable,
195 uint64_t DerefOrNullBytes =
Arg.getDereferenceableOrNullBytes();
196 if (DerefOrNullBytes != 0) {
198 LLVMContext::MD_dereferenceable_or_null,
201 DerefOrNullBytes))));
204 unsigned ParamAlign =
Arg.getParamAlignment();
205 if (ParamAlign != 0) {
207 LLVMContext::MD_align,
217 Value *ExtractBits = OffsetDiff == 0 ?
221 Value *Trunc =
Builder.CreateTrunc(ExtractBits, ArgIntTy);
223 Arg.getName() +
".load");
224 Arg.replaceAllUsesWith(NewVal);
227 Arg.getName() +
".load");
228 Arg.replaceAllUsesWith(Shuf);
230 Load->setName(
Arg.getName() +
".load");
242 "AMDGPU Lower Kernel Arguments",
false,
false)
246 char AMDGPULowerKernelArguments::
ID = 0;
249 return new AMDGPULowerKernelArguments();
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
This class represents an incoming formal argument to a Function.
This is an optimization pass for GlobalISel generic memory operations.
A parsed version of the target data layout string in and methods for querying it.
InstListType::iterator iterator
Instruction iterators...
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
The instances of the Type class are immutable: once they are created, they are never changed.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
LLVM Basic Block Representation.
@ REGION_ADDRESS
Address space for region memory. (GDS)
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
INITIALIZE_PASS_BEGIN(AMDGPULowerKernelArguments, DEBUG_TYPE, "AMDGPU Lower Kernel Arguments", false, false) INITIALIZE_PASS_END(AMDGPULowerKernelArguments
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Represent the analysis usage information of a pass.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Class to represent integer types.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Target-Independent Code Generator Pass Configuration Options.
Base class of all SIMD vector types.
@ AMDGPU_KERNEL
Calling convention for AMDGPU code object kernels.
This is an important class for using LLVM in a threaded context.
static Attribute getWithDereferenceableBytes(LLVMContext &Context, uint64_t Bytes)
Class to represent pointers.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Primary interface to the complete machine description for the target machine.
Type * getType() const
All values are typed, get the type of this value.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
StringRef getName() const
Return a constant reference to the value's name.
An instruction for reading from memory.
static Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
static bool runOnFunction(Function &F, bool PostInlining)
Align commonAlignment(Align A, Align B)
Returns the alignment that satisfies both alignments.
void setPreservesAll()
Set by analyses that do not transform their input at all.
FunctionPass * createAMDGPULowerKernelArgumentsPass()
AMDGPU Lower Kernel Arguments
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
@ LOCAL_ADDRESS
Address space for local memory.
Align max(MaybeAlign Lhs, Align Rhs)
static BasicBlock::iterator getInsertPt(BasicBlock &BB)
const char LLVMTargetMachineRef TM
FunctionPass class - This class is used to implement most global optimizations.
This class represents a function call, abstracting a target machine's calling convention.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
bool isAggregateType() const
Return true if the type is an aggregate type.
AnalysisUsage & addRequired()
an instruction to allocate memory on the stack
LLVM Value Representation.