36#include "llvm/IR/IntrinsicsAMDGPU.h"
37#include "llvm/IR/IntrinsicsR600.h"
42#define DEBUG_TYPE "amdgpu-promote-alloca"
49 "disable-promote-alloca-to-vector",
50 cl::desc(
"Disable promote alloca to vector"),
54 "disable-promote-alloca-to-lds",
55 cl::desc(
"Disable promote alloca to LDS"),
59 "amdgpu-promote-alloca-to-vector-limit",
60 cl::desc(
"Maximum byte size to consider promote alloca to vector"),
64class AMDGPUPromoteAllocaImpl {
75 bool IsAMDGCN =
false;
76 bool IsAMDHSA =
false;
83 bool collectUsesWithPtrTypes(
Value *BaseAlloca,
85 std::vector<Value*> &WorkList)
const;
91 bool binaryOpIsDerivedFromSameAlloca(
Value *Alloca,
Value *Val,
93 int OpIdx0,
int OpIdx1)
const;
96 bool hasSufficientLocalMem(
const Function &
F);
99 bool tryPromoteAllocaToLDS(
AllocaInst &
I,
bool SufficientLDS);
103 const Triple &TT =
TM.getTargetTriple();
108 bool run(
Function &
F,
bool PromoteToLDS);
121 if (
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>())
135class AMDGPUPromoteAllocaToVector :
public FunctionPass {
144 if (
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>())
151 return "AMDGPU Promote Alloca to vector";
161 if (!
TM.getTargetTriple().isAMDGCN())
165 unsigned MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(
F).first);
170 if (!
F.hasFnAttribute(Attribute::AlwaysInline) &&
172 MaxVGPRs = std::min(MaxVGPRs, 32u);
178char AMDGPUPromoteAlloca::ID = 0;
179char AMDGPUPromoteAllocaToVector::ID = 0;
182 "AMDGPU promote alloca to vector or LDS",
false,
false)
190 "AMDGPU promote alloca to vector",
false,
false)
197 bool Changed = AMDGPUPromoteAllocaImpl(
TM).
run(
F,
true);
208 bool Changed = AMDGPUPromoteAllocaImpl(TM).run(
F,
false);
218 return new AMDGPUPromoteAlloca();
222 return new AMDGPUPromoteAllocaToVector();
225bool AMDGPUPromoteAllocaImpl::run(
Function &
F,
bool PromoteToLDS) {
227 DL = &
Mod->getDataLayout();
230 if (!ST.isPromoteAllocaEnabled())
233 MaxVGPRs = getMaxVGPRs(
TM,
F);
235 bool SufficientLDS = PromoteToLDS ? hasSufficientLocalMem(
F) :
false;
242 if (!AI->isStaticAlloca() || AI->isArrayAllocation())
248 bool Changed =
false;
250 if (tryPromoteAllocaToVector(*AI))
252 else if (PromoteToLDS && tryPromoteAllocaToLDS(*AI, SufficientLDS))
269 using namespace PatternMatch;
273 return I->getOperand(0) == AI &&
279 const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
280 auto *
GEP = dyn_cast<GetElementPtrInst>(
Ptr->stripPointerCasts());
284 auto I = GEPIdx.find(
GEP);
285 assert(
I != GEPIdx.end() &&
"Must have entry for GEP!");
293 unsigned BW =
DL.getIndexTypeSizeInBits(
GEP->getType());
295 APInt ConstOffset(BW, 0);
296 if (
GEP->getPointerOperand()->stripPointerCasts() != Alloca ||
297 !
GEP->collectOffset(
DL, BW, VarOffsets, ConstOffset))
300 unsigned VecElemSize =
DL.getTypeAllocSize(VecElemTy);
301 if (VarOffsets.
size() > 1)
304 if (VarOffsets.
size() == 1) {
307 const auto &VarOffset = VarOffsets.
front();
308 if (!ConstOffset.
isZero() || VarOffset.second != VecElemSize)
310 return VarOffset.first;
323bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(
AllocaInst &Alloca) {
324 LLVM_DEBUG(
dbgs() <<
"Trying to promote to vector: " << Alloca <<
'\n');
326 if (DisablePromoteAllocaToVector) {
332 auto *VectorTy = dyn_cast<FixedVectorType>(AllocaTy);
333 if (
auto *ArrayTy = dyn_cast<ArrayType>(AllocaTy)) {
335 ArrayTy->getNumElements() > 0)
337 ArrayTy->getNumElements());
341 unsigned Limit = PromoteAllocaToVectorLimit ? PromoteAllocaToVectorLimit * 8
344 if (
DL->getTypeSizeInBits(AllocaTy) * 4 > Limit) {
345 LLVM_DEBUG(
dbgs() <<
" Alloca too big for vectorization with " << MaxVGPRs
346 <<
" registers available\n");
360 if (VectorTy->getNumElements() > 16 || VectorTy->getNumElements() < 2) {
362 <<
" has an unsupported number of elements\n");
366 std::map<GetElementPtrInst *, Value *> GEPVectorIdx;
373 LLVM_DEBUG(
dbgs() <<
" Cannot promote alloca to vector: " << Msg <<
"\n"
374 <<
" " << *Inst <<
"\n");
381 LLVM_DEBUG(
dbgs() <<
" Attempting promotion to: " << *VectorTy <<
"\n");
383 Type *VecEltTy = VectorTy->getElementType();
384 unsigned ElementSize =
DL->getTypeSizeInBits(VecEltTy) / 8;
385 while (!
Uses.empty()) {
391 if (isa<StoreInst>(Inst) &&
393 return RejectUser(Inst,
"pointer is being stored");
396 Ptr =
Ptr->stripPointerCasts();
400 DL->getTypeStoreSize(AccessTy))
404 bool IsSimple = isa<LoadInst>(Inst) ? cast<LoadInst>(Inst)->isSimple()
405 : cast<StoreInst>(Inst)->isSimple();
408 return RejectUser(Inst,
"not simple and/or vector element type not "
409 "castable to access type");
415 if (isa<BitCastInst>(Inst)) {
422 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(Inst)) {
427 return RejectUser(Inst,
"cannot compute vector index for GEP");
435 if (
MemSetInst *MSI = dyn_cast<MemSetInst>(Inst);
442 if (TransferInst->isVolatile())
443 return RejectUser(Inst,
"mem transfer inst is volatile");
445 ConstantInt *
Len = dyn_cast<ConstantInt>(TransferInst->getLength());
446 if (!Len || (
Len->getZExtValue() % ElementSize))
447 return RejectUser(Inst,
"mem transfer inst length is non-constant or "
448 "not a multiple of the vector element size");
450 if (!TransferInfo.
count(TransferInst)) {
458 if (
Ptr != &Alloca && !GEPVectorIdx.count(
GEP))
464 unsigned OpNum =
U->getOperandNo();
467 Value *Dest = TransferInst->getDest();
470 return RejectUser(Inst,
"could not calculate constant dest index");
474 Value *Src = TransferInst->getSource();
477 return RejectUser(Inst,
"could not calculate constant src index");
488 return isAssumeLikeIntrinsic(cast<Instruction>(U));
492 return RejectUser(Inst,
"unhandled alloca user");
495 while (!DeferredInsts.
empty()) {
501 if (!
Info.SrcIndex || !
Info.DestIndex)
503 Inst,
"mem transfer inst is missing constant src and/or dst index");
506 LLVM_DEBUG(
dbgs() <<
" Converting alloca to vector " << *AllocaTy <<
" -> "
507 << *VectorTy <<
'\n');
512 case Instruction::Load: {
513 Value *
Ptr = cast<LoadInst>(Inst)->getPointerOperand();
516 Value *BitCast =
Builder.CreateBitCast(&Alloca, VecPtrTy);
520 if (Inst->
getType() != VecEltTy)
527 case Instruction::Store: {
532 Value *BitCast =
Builder.CreateBitCast(&Alloca, VecPtrTy);
535 Value *Elt =
SI->getValueOperand();
536 if (Elt->
getType() != VecEltTy)
537 Elt =
Builder.CreateBitOrPointerCast(Elt, VecEltTy);
543 case Instruction::Call: {
546 unsigned NumCopied =
Length->getZExtValue() / ElementSize;
552 for (
unsigned Idx = 0;
Idx < VectorTy->getNumElements(); ++
Idx) {
553 if (
Idx >= DestBegin &&
Idx < DestBegin + NumCopied) {
554 Mask.push_back(SrcBegin++);
560 Value *BitCast =
Builder.CreateBitCast(&Alloca, VecPtrTy);
563 Value *NewVecValue =
Builder.CreateShuffleVector(VecValue, Mask);
567 }
else if (
MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) {
571 MSI->setOperand(2,
Builder.getInt64(
DL->getTypeStoreSize(VectorTy)));
586std::pair<Value *, Value *>
587AMDGPUPromoteAllocaImpl::getLocalSizeYZ(
IRBuilder<> &Builder) {
600 ST.makeLIDRangeMetadata(LocalSizeY);
601 ST.makeLIDRangeMetadata(LocalSizeZ);
603 return std::pair(LocalSizeY, LocalSizeZ);
646 F.removeFnAttr(
"amdgpu-no-dispatch-ptr");
658 Value *GEPXY =
Builder.CreateConstInBoundsGEP1_64(I32Ty, CastDispatchPtr, 1);
661 Value *GEPZU =
Builder.CreateConstInBoundsGEP1_64(I32Ty, CastDispatchPtr, 2);
665 LoadXY->
setMetadata(LLVMContext::MD_invariant_load, MD);
666 LoadZU->
setMetadata(LLVMContext::MD_invariant_load, MD);
667 ST.makeLIDRangeMetadata(LoadZU);
672 return std::pair(
Y, LoadZU);
684 IntrID = IsAMDGCN ? (
Intrinsic::ID)Intrinsic::amdgcn_workitem_id_x
686 AttrName =
"amdgpu-no-workitem-id-x";
689 IntrID = IsAMDGCN ? (
Intrinsic::ID)Intrinsic::amdgcn_workitem_id_y
691 AttrName =
"amdgpu-no-workitem-id-y";
695 IntrID = IsAMDGCN ? (
Intrinsic::ID)Intrinsic::amdgcn_workitem_id_z
697 AttrName =
"amdgpu-no-workitem-id-z";
705 ST.makeLIDRangeMetadata(CI);
706 F->removeFnAttr(AttrName);
717 case Intrinsic::memcpy:
718 case Intrinsic::memmove:
719 case Intrinsic::memset:
720 case Intrinsic::lifetime_start:
721 case Intrinsic::lifetime_end:
722 case Intrinsic::invariant_start:
723 case Intrinsic::invariant_end:
724 case Intrinsic::launder_invariant_group:
725 case Intrinsic::strip_invariant_group:
726 case Intrinsic::objectsize:
733bool AMDGPUPromoteAllocaImpl::binaryOpIsDerivedFromSameAlloca(
741 if (isa<ConstantPointerNull>(OtherOp))
745 if (!isa<AllocaInst>(OtherObj))
754 if (OtherObj != BaseAlloca) {
756 dbgs() <<
"Found a binary instruction with another alloca object\n");
763bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes(
764 Value *BaseAlloca,
Value *Val, std::vector<Value *> &WorkList)
const {
774 WorkList.push_back(
User);
779 if (UseInst->
getOpcode() == Instruction::PtrToInt)
782 if (
LoadInst *LI = dyn_cast<LoadInst>(UseInst)) {
783 if (LI->isVolatile())
789 if (
StoreInst *SI = dyn_cast<StoreInst>(UseInst)) {
790 if (
SI->isVolatile())
794 if (
SI->getPointerOperand() != Val)
796 }
else if (
AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UseInst)) {
797 if (RMW->isVolatile())
800 if (CAS->isVolatile())
806 if (
ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
807 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, ICmp, 0, 1))
811 WorkList.push_back(ICmp);
814 if (UseInst->
getOpcode() == Instruction::AddrSpaceCast) {
819 WorkList.push_back(
User);
825 if (isa<InsertValueInst>(
User) || isa<InsertElementInst>(
User))
834 if (!
GEP->isInBounds())
840 if (
SelectInst *SI = dyn_cast<SelectInst>(UseInst)) {
841 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, SI, 1, 2))
846 if (
PHINode *Phi = dyn_cast<PHINode>(UseInst)) {
849 switch (Phi->getNumIncomingValues()) {
853 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, Phi, 0, 1))
861 WorkList.push_back(
User);
862 if (!collectUsesWithPtrTypes(BaseAlloca,
User, WorkList))
869bool AMDGPUPromoteAllocaImpl::hasSufficientLocalMem(
const Function &
F) {
877 for (
Type *ParamTy : FTy->params()) {
878 PointerType *PtrTy = dyn_cast<PointerType>(ParamTy);
881 LLVM_DEBUG(
dbgs() <<
"Function has local memory argument. Promoting to "
882 "local memory disabled.\n");
887 LocalMemLimit =
ST.getAddressableLocalMemorySize();
888 if (LocalMemLimit == 0)
898 if (
Use->getParent()->getParent() == &
F)
902 if (VisitedConstants.
insert(
C).second)
914 if (visitUsers(&GV, &GV)) {
922 while (!
Stack.empty()) {
924 if (visitUsers(&GV,
C)) {
945 LLVM_DEBUG(
dbgs() <<
"Function has a reference to externally allocated "
946 "local memory. Promoting to local memory "
961 CurrentLocalMemUsage = 0;
967 for (
auto Alloc : AllocatedSizes) {
968 CurrentLocalMemUsage =
alignTo(CurrentLocalMemUsage,
Alloc.second);
969 CurrentLocalMemUsage +=
Alloc.first;
972 unsigned MaxOccupancy =
973 ST.getOccupancyWithLocalMemSize(CurrentLocalMemUsage,
F);
980 unsigned OccupancyHint =
ST.getWavesPerEU(
F).second;
981 if (OccupancyHint == 0)
985 OccupancyHint = std::min(OccupancyHint,
ST.getMaxWavesPerEU());
989 MaxOccupancy = std::min(OccupancyHint, MaxOccupancy);
992 unsigned MaxSizeWithWaveCount =
993 ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy,
F);
996 if (CurrentLocalMemUsage > MaxSizeWithWaveCount)
999 LocalMemLimit = MaxSizeWithWaveCount;
1002 <<
" bytes of LDS\n"
1003 <<
" Rounding size to " << MaxSizeWithWaveCount
1004 <<
" with a maximum occupancy of " << MaxOccupancy <<
'\n'
1005 <<
" and " << (LocalMemLimit - CurrentLocalMemUsage)
1006 <<
" available for promotion\n");
1012bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToLDS(
AllocaInst &
I,
1013 bool SufficientLDS) {
1016 if (DisablePromoteAllocaToLDS) {
1024 const Function &ContainingFunction = *
I.getParent()->getParent();
1037 <<
" promote alloca to LDS not supported with calling convention.\n");
1046 unsigned WorkGroupSize =
ST.getFlatWorkGroupSizes(ContainingFunction).second;
1049 DL.getValueOrABITypeAlignment(
I.getAlign(),
I.getAllocatedType());
1059 WorkGroupSize *
DL.getTypeAllocSize(
I.getAllocatedType());
1060 NewSize += AllocSize;
1062 if (NewSize > LocalMemLimit) {
1064 <<
" bytes of local memory not available to promote\n");
1068 CurrentLocalMemUsage = NewSize;
1070 std::vector<Value*> WorkList;
1072 if (!collectUsesWithPtrTypes(&
I, &
I, WorkList)) {
1084 Twine(
F->getName()) +
Twine(
'.') +
I.getName(),
nullptr,
1089 Value *TCntY, *TCntZ;
1091 std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder);
1092 Value *TIdX = getWorkitemID(Builder, 0);
1093 Value *TIdY = getWorkitemID(Builder, 1);
1094 Value *TIdZ = getWorkitemID(Builder, 2);
1096 Value *Tmp0 =
Builder.CreateMul(TCntY, TCntZ,
"",
true,
true);
1097 Tmp0 =
Builder.CreateMul(Tmp0, TIdX);
1098 Value *Tmp1 =
Builder.CreateMul(TIdY, TCntZ,
"",
true,
true);
1100 TID =
Builder.CreateAdd(TID, TIdZ);
1102 Value *Indices[] = {
1108 I.mutateType(
Offset->getType());
1110 I.eraseFromParent();
1114 for (
Value *V : WorkList) {
1117 if (
ICmpInst *CI = dyn_cast<ICmpInst>(V)) {
1122 if (isa<ConstantPointerNull>(CI->
getOperand(0)))
1125 if (isa<ConstantPointerNull>(CI->
getOperand(1)))
1133 if (isa<AddrSpaceCastInst>(V))
1141 V->mutateType(NewTy);
1144 if (
SelectInst *SI = dyn_cast<SelectInst>(V)) {
1145 if (isa<ConstantPointerNull>(
SI->getOperand(1)))
1148 if (isa<ConstantPointerNull>(
SI->getOperand(2)))
1150 }
else if (
PHINode *Phi = dyn_cast<PHINode>(V)) {
1151 for (
unsigned I = 0,
E = Phi->getNumIncomingValues();
I !=
E; ++
I) {
1152 if (isa<ConstantPointerNull>(Phi->getIncomingValue(
I)))
1162 switch (
Intr->getIntrinsicID()) {
1163 case Intrinsic::lifetime_start:
1164 case Intrinsic::lifetime_end:
1166 Intr->eraseFromParent();
1168 case Intrinsic::memcpy:
1169 case Intrinsic::memmove:
1175 case Intrinsic::memset: {
1180 Intr->eraseFromParent();
1183 case Intrinsic::invariant_start:
1184 case Intrinsic::invariant_end:
1185 case Intrinsic::launder_invariant_group:
1186 case Intrinsic::strip_invariant_group:
1187 Intr->eraseFromParent();
1192 case Intrinsic::objectsize: {
1195 Mod, Intrinsic::objectsize,
1202 {Src,
Intr->getOperand(1),
Intr->getOperand(2),
Intr->getOperand(3)});
1203 Intr->replaceAllUsesWith(NewCall);
1204 Intr->eraseFromParent();
1216 assert(
ID == Intrinsic::memcpy ||
ID == Intrinsic::memmove);
1220 Builder.CreateMemTransferInst(
ID,
MI->getRawDest(),
MI->getDestAlign(),
1221 MI->getRawSource(),
MI->getSourceAlign(),
1222 MI->getLength(),
MI->isVolatile());
1224 for (
unsigned I = 0;
I != 2; ++
I) {
1225 if (
uint64_t Bytes =
Intr->getParamDereferenceableBytes(
I)) {
1226 B->addDereferenceableParamAttr(
I, Bytes);
1230 Intr->eraseFromParent();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Rewrite Partial Register Uses
AMD GCN specific subclass of TargetSubtarget.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Target-Independent Code Generator Pass Configuration Options pass.
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Class for arbitrary precision integers.
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
A container for analyses that lazily runs them and caches their results.
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
Represents analyses that only rely on functions' control flow.
void addDereferenceableRetAttr(uint64_t Bytes)
adds the dereferenceable attribute to the list of attributes.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
This class represents a function call, abstracting a target machine's calling convention.
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
This is the shared class of boolean and integer constants.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalObject.
bool hasExternalLinkage() const
void setUnnamedAddr(UnnamedAddr Val)
unsigned getAddressSpace() const
@ InternalLinkage
Rename collisions when linking (static functions).
Type * getValueType() const
This instruction compares its operands according to the predicate given to the constructor.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
This class implements a map that also provides access to all stored values in a deterministic order.
std::pair< KeyT, ValueT > & front()
Value * getLength() const
Value * getRawDest() const
MaybeAlign getDestAlign() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
This class wraps the llvm.memcpy/memmove intrinsics.
A Module instance is used to store all the information related to an LLVM module.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static PointerType * getWithSamePointeeType(PointerType *PT, unsigned AddressSpace)
This constructs a pointer type with the same pointee type as input PointerType (or opaque pointer if ...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
This class represents the LLVM 'select' instruction.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static unsigned getPointerOperandIndex()
StringRef - Represent a constant reference to a string, i.e.
Primary interface to the complete machine description for the target machine.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt32Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
iterator_range< use_iterator > uses()
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
const CustomOperand< const MCSubtargetInfo & > Msg[]
bool isEntryFunctionCC(CallingConv::ID CC)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ C
The default llvm calling convention, compatible with C.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
specific_intval< false > m_SpecificInt(APInt V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
FunctionPass * createAMDGPUPromoteAllocaToVector()
void sort(IteratorTy Start, IteratorTy End)
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAMDGPUPromoteAlloca()
@ Mod
The access may modify the value stored in memory.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
char & AMDGPUPromoteAllocaID
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
char & AMDGPUPromoteAllocaToVectorID
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Function object to check whether the second component of a container supported by std::get (like std:...