39#include "llvm/IR/IntrinsicsAMDGPU.h"
40#include "llvm/IR/IntrinsicsR600.h"
46#define DEBUG_TYPE "amdgpu-promote-alloca"
53 DisablePromoteAllocaToVector(
"disable-promote-alloca-to-vector",
54 cl::desc(
"Disable promote alloca to vector"),
58 DisablePromoteAllocaToLDS(
"disable-promote-alloca-to-lds",
59 cl::desc(
"Disable promote alloca to LDS"),
63 "amdgpu-promote-alloca-to-vector-limit",
64 cl::desc(
"Maximum byte size to consider promote alloca to vector"),
68class AMDGPUPromoteAllocaImpl {
79 bool IsAMDGCN =
false;
80 bool IsAMDHSA =
false;
87 bool collectUsesWithPtrTypes(
Value *BaseAlloca,
Value *Val,
88 std::vector<Value *> &WorkList)
const;
94 bool binaryOpIsDerivedFromSameAlloca(
Value *Alloca,
Value *Val,
99 bool hasSufficientLocalMem(
const Function &
F);
102 bool tryPromoteAllocaToLDS(
AllocaInst &
I,
bool SufficientLDS);
106 const Triple &TT =
TM.getTargetTriple();
111 bool run(
Function &
F,
bool PromoteToLDS);
124 if (
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>())
138class AMDGPUPromoteAllocaToVector :
public FunctionPass {
147 if (
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>())
154 return "AMDGPU Promote Alloca to vector";
164 if (!
TM.getTargetTriple().isAMDGCN())
168 unsigned MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(
F).first);
173 if (!
F.hasFnAttribute(Attribute::AlwaysInline) &&
175 MaxVGPRs = std::min(MaxVGPRs, 32u);
181char AMDGPUPromoteAlloca::ID = 0;
182char AMDGPUPromoteAllocaToVector::ID = 0;
185 "AMDGPU promote alloca to vector or LDS",
false,
false)
193 "AMDGPU promote alloca to vector",
false,
false)
200 bool Changed = AMDGPUPromoteAllocaImpl(
TM).
run(
F,
true);
211 bool Changed = AMDGPUPromoteAllocaImpl(TM).run(
F,
false);
221 return new AMDGPUPromoteAlloca();
225 return new AMDGPUPromoteAllocaToVector();
228bool AMDGPUPromoteAllocaImpl::run(
Function &
F,
bool PromoteToLDS) {
230 DL = &
Mod->getDataLayout();
233 if (!ST.isPromoteAllocaEnabled())
236 MaxVGPRs = getMaxVGPRs(
TM,
F);
238 bool SufficientLDS = PromoteToLDS ? hasSufficientLocalMem(
F) :
false;
245 if (!AI->isStaticAlloca() || AI->isArrayAllocation())
251 bool Changed =
false;
253 if (tryPromoteAllocaToVector(*AI))
255 else if (PromoteToLDS && tryPromoteAllocaToLDS(*AI, SufficientLDS))
276 using namespace PatternMatch;
284 return I->getOperand(0) == AI &&
290 const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
291 auto *
GEP = dyn_cast<GetElementPtrInst>(
Ptr->stripPointerCasts());
295 auto I = GEPIdx.find(
GEP);
296 assert(
I != GEPIdx.end() &&
"Must have entry for GEP!");
304 unsigned BW =
DL.getIndexTypeSizeInBits(
GEP->getType());
306 APInt ConstOffset(BW, 0);
307 if (
GEP->getPointerOperand()->stripPointerCasts() != Alloca ||
308 !
GEP->collectOffset(
DL, BW, VarOffsets, ConstOffset))
311 unsigned VecElemSize =
DL.getTypeAllocSize(VecElemTy);
312 if (VarOffsets.
size() > 1)
315 if (VarOffsets.
size() == 1) {
318 const auto &VarOffset = VarOffsets.
front();
319 if (!ConstOffset.
isZero() || VarOffset.second != VecElemSize)
321 return VarOffset.first;
350 unsigned VecStoreSize,
unsigned ElementSize,
352 std::map<GetElementPtrInst *, Value *> &GEPVectorIdx,
Value *CurVal,
360 const auto GetOrLoadCurrentVectorValue = [&]() ->
Value * {
368 "promotealloca.dummyload");
375 assert(
DL.getTypeStoreSize(Val->getType()) ==
DL.getTypeStoreSize(PtrTy));
376 const unsigned Size =
DL.getTypeStoreSizeInBits(PtrTy);
377 if (!PtrTy->isVectorTy())
379 const unsigned NumPtrElts = cast<FixedVectorType>(PtrTy)->getNumElements();
382 assert((
Size % NumPtrElts == 0) &&
"Vector size not divisble");
384 return Builder.CreateBitOrPointerCast(
392 case Instruction::Load: {
395 DeferredLoads.
push_back(cast<LoadInst>(Inst));
404 TypeSize AccessSize =
DL.getTypeStoreSize(AccessTy);
405 if (AccessSize == VecStoreSize && cast<Constant>(
Index)->isZeroValue()) {
407 CurVal = CreateTempPtrIntCast(CurVal, AccessTy);
409 CurVal = CreateTempPtrIntCast(CurVal, CurVal->
getType());
410 Value *NewVal =
Builder.CreateBitOrPointerCast(CurVal, AccessTy);
416 if (isa<FixedVectorType>(AccessTy)) {
418 const unsigned NumLoadedElts = AccessSize /
DL.getTypeStoreSize(VecEltTy);
420 assert(
DL.getTypeStoreSize(SubVecTy) ==
DL.getTypeStoreSize(AccessTy));
422 unsigned IndexVal = cast<ConstantInt>(
Index)->getZExtValue();
424 for (
unsigned K = 0; K < NumLoadedElts; ++K) {
425 SubVec =
Builder.CreateInsertElement(
426 SubVec,
Builder.CreateExtractElement(CurVal, IndexVal + K), K);
430 SubVec = CreateTempPtrIntCast(SubVec, AccessTy);
431 else if (SubVecTy->isPtrOrPtrVectorTy())
432 SubVec = CreateTempPtrIntCast(SubVec, SubVecTy);
434 SubVec =
Builder.CreateBitOrPointerCast(SubVec, AccessTy);
441 if (AccessTy != VecEltTy)
442 ExtractElement =
Builder.CreateBitOrPointerCast(ExtractElement, AccessTy);
447 case Instruction::Store: {
454 Value *Val = SI->getValueOperand();
458 TypeSize AccessSize =
DL.getTypeStoreSize(AccessTy);
459 if (AccessSize == VecStoreSize && cast<Constant>(
Index)->isZeroValue()) {
461 Val = CreateTempPtrIntCast(Val, AccessTy);
463 Val = CreateTempPtrIntCast(Val, VectorTy);
464 return Builder.CreateBitOrPointerCast(Val, VectorTy);
468 if (isa<FixedVectorType>(AccessTy)) {
470 const unsigned NumWrittenElts =
471 AccessSize /
DL.getTypeStoreSize(VecEltTy);
473 assert(
DL.getTypeStoreSize(SubVecTy) ==
DL.getTypeStoreSize(AccessTy));
475 if (SubVecTy->isPtrOrPtrVectorTy())
476 Val = CreateTempPtrIntCast(Val, SubVecTy);
478 Val = CreateTempPtrIntCast(Val, AccessTy);
480 Val =
Builder.CreateBitOrPointerCast(Val, SubVecTy);
482 unsigned IndexVal = cast<ConstantInt>(
Index)->getZExtValue();
483 Value *CurVec = GetOrLoadCurrentVectorValue();
484 for (
unsigned K = 0; K < NumWrittenElts && ((IndexVal + K) < NumVecElts);
486 CurVec =
Builder.CreateInsertElement(
487 CurVec,
Builder.CreateExtractElement(Val, K), IndexVal + K);
492 if (Val->
getType() != VecEltTy)
493 Val =
Builder.CreateBitOrPointerCast(Val, VecEltTy);
494 return Builder.CreateInsertElement(GetOrLoadCurrentVectorValue(), Val,
497 case Instruction::Call: {
498 if (
auto *MTI = dyn_cast<MemTransferInst>(Inst)) {
501 unsigned NumCopied =
Length->getZExtValue() / ElementSize;
508 if (
Idx >= DestBegin &&
Idx < DestBegin + NumCopied) {
509 Mask.push_back(SrcBegin++);
515 return Builder.CreateShuffleVector(GetOrLoadCurrentVectorValue(), Mask);
518 if (
auto *MSI = dyn_cast<MemSetInst>(Inst)) {
521 Value *Elt = MSI->getOperand(1);
522 if (
DL.getTypeStoreSize(VecEltTy) > 1) {
524 Builder.CreateVectorSplat(
DL.getTypeStoreSize(VecEltTy), Elt);
525 Elt =
Builder.CreateBitCast(EltBytes, VecEltTy);
554 if (isa<FixedVectorType>(AccessTy)) {
555 TypeSize AccTS =
DL.getTypeStoreSize(AccessTy);
566template <
typename InstContainer>
578 auto &BlockUses = UsesByBlock[BB];
581 if (BlockUses.empty())
585 if (BlockUses.size() == 1) {
592 if (!BlockUses.contains(&Inst))
604bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(
AllocaInst &Alloca) {
605 LLVM_DEBUG(
dbgs() <<
"Trying to promote to vector: " << Alloca <<
'\n');
607 if (DisablePromoteAllocaToVector) {
613 auto *VectorTy = dyn_cast<FixedVectorType>(AllocaTy);
614 if (
auto *ArrayTy = dyn_cast<ArrayType>(AllocaTy)) {
616 ArrayTy->getNumElements() > 0)
618 ArrayTy->getNumElements());
622 unsigned Limit = PromoteAllocaToVectorLimit ? PromoteAllocaToVectorLimit * 8
625 if (
DL->getTypeSizeInBits(AllocaTy) * 4 > Limit) {
626 LLVM_DEBUG(
dbgs() <<
" Alloca too big for vectorization with " << MaxVGPRs
627 <<
" registers available\n");
641 if (VectorTy->getNumElements() > 16 || VectorTy->getNumElements() < 2) {
643 <<
" has an unsupported number of elements\n");
647 std::map<GetElementPtrInst *, Value *> GEPVectorIdx;
655 LLVM_DEBUG(
dbgs() <<
" Cannot promote alloca to vector: " << Msg <<
"\n"
656 <<
" " << *Inst <<
"\n");
663 LLVM_DEBUG(
dbgs() <<
" Attempting promotion to: " << *VectorTy <<
"\n");
665 Type *VecEltTy = VectorTy->getElementType();
666 unsigned ElementSize =
DL->getTypeSizeInBits(VecEltTy) / 8;
667 while (!
Uses.empty()) {
673 if (isa<StoreInst>(Inst) &&
675 return RejectUser(Inst,
"pointer is being stored");
679 return RejectUser(Inst,
"unsupported load/store as aggregate");
682 Ptr =
Ptr->stripPointerCasts();
686 DL->getTypeStoreSize(AccessTy)) {
692 bool IsSimple = isa<LoadInst>(Inst) ? cast<LoadInst>(Inst)->isSimple()
693 : cast<StoreInst>(Inst)->isSimple();
695 return RejectUser(Inst,
"not a simple load or store");
697 return RejectUser(Inst,
"not a supported access type");
703 if (isa<BitCastInst>(Inst)) {
711 if (
auto *
GEP = dyn_cast<GetElementPtrInst>(Inst)) {
716 return RejectUser(Inst,
"cannot compute vector index for GEP");
725 if (
MemSetInst *MSI = dyn_cast<MemSetInst>(Inst);
732 if (TransferInst->isVolatile())
733 return RejectUser(Inst,
"mem transfer inst is volatile");
735 ConstantInt *
Len = dyn_cast<ConstantInt>(TransferInst->getLength());
736 if (!Len || (
Len->getZExtValue() % ElementSize))
737 return RejectUser(Inst,
"mem transfer inst length is non-constant or "
738 "not a multiple of the vector element size");
740 if (!TransferInfo.
count(TransferInst)) {
748 if (
Ptr != &Alloca && !GEPVectorIdx.count(
GEP))
754 unsigned OpNum =
U->getOperandNo();
757 Value *Dest = TransferInst->getDest();
760 return RejectUser(Inst,
"could not calculate constant dest index");
764 Value *Src = TransferInst->getSource();
767 return RejectUser(Inst,
"could not calculate constant src index");
780 return isAssumeLikeIntrinsic(cast<Instruction>(U));
786 return RejectUser(Inst,
"unhandled alloca user");
789 while (!DeferredInsts.
empty()) {
795 if (!
Info.SrcIndex || !
Info.DestIndex)
797 Inst,
"mem transfer inst is missing constant src and/or dst index");
800 LLVM_DEBUG(
dbgs() <<
" Converting alloca to vector " << *AllocaTy <<
" -> "
801 << *VectorTy <<
'\n');
802 const unsigned VecStoreSize =
DL->getTypeStoreSize(VectorTy);
807 Updater.
Initialize(VectorTy,
"promotealloca");
817 I, *
DL, VectorTy, VecStoreSize, ElementSize, TransferInfo, GEPVectorIdx,
830 I, *
DL, VectorTy, VecStoreSize, ElementSize, TransferInfo, GEPVectorIdx,
834 assert(NewDLs.
empty() &&
"No more deferred loads should be queued!");
840 InstsToDelete.insert(DeferredLoads.
begin(), DeferredLoads.
end());
843 I->eraseFromParent();
848 I->dropDroppableUses();
850 I->eraseFromParent();
859std::pair<Value *, Value *>
860AMDGPUPromoteAllocaImpl::getLocalSizeYZ(
IRBuilder<> &Builder) {
873 ST.makeLIDRangeMetadata(LocalSizeY);
874 ST.makeLIDRangeMetadata(LocalSizeZ);
876 return std::pair(LocalSizeY, LocalSizeZ);
919 F.removeFnAttr(
"amdgpu-no-dispatch-ptr");
931 Value *GEPXY =
Builder.CreateConstInBoundsGEP1_64(I32Ty, CastDispatchPtr, 1);
934 Value *GEPZU =
Builder.CreateConstInBoundsGEP1_64(I32Ty, CastDispatchPtr, 2);
938 LoadXY->
setMetadata(LLVMContext::MD_invariant_load, MD);
939 LoadZU->
setMetadata(LLVMContext::MD_invariant_load, MD);
940 ST.makeLIDRangeMetadata(LoadZU);
945 return std::pair(
Y, LoadZU);
957 IntrID = IsAMDGCN ? (
Intrinsic::ID)Intrinsic::amdgcn_workitem_id_x
959 AttrName =
"amdgpu-no-workitem-id-x";
962 IntrID = IsAMDGCN ? (
Intrinsic::ID)Intrinsic::amdgcn_workitem_id_y
964 AttrName =
"amdgpu-no-workitem-id-y";
968 IntrID = IsAMDGCN ? (
Intrinsic::ID)Intrinsic::amdgcn_workitem_id_z
970 AttrName =
"amdgpu-no-workitem-id-z";
978 ST.makeLIDRangeMetadata(CI);
979 F->removeFnAttr(AttrName);
990 case Intrinsic::memcpy:
991 case Intrinsic::memmove:
992 case Intrinsic::memset:
993 case Intrinsic::lifetime_start:
994 case Intrinsic::lifetime_end:
995 case Intrinsic::invariant_start:
996 case Intrinsic::invariant_end:
997 case Intrinsic::launder_invariant_group:
998 case Intrinsic::strip_invariant_group:
999 case Intrinsic::objectsize:
1006bool AMDGPUPromoteAllocaImpl::binaryOpIsDerivedFromSameAlloca(
1014 if (isa<ConstantPointerNull>(OtherOp))
1018 if (!isa<AllocaInst>(OtherObj))
1027 if (OtherObj != BaseAlloca) {
1029 dbgs() <<
"Found a binary instruction with another alloca object\n");
1036bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes(
1037 Value *BaseAlloca,
Value *Val, std::vector<Value *> &WorkList)
const {
1047 WorkList.push_back(
User);
1052 if (UseInst->
getOpcode() == Instruction::PtrToInt)
1055 if (
LoadInst *LI = dyn_cast<LoadInst>(UseInst)) {
1056 if (LI->isVolatile())
1062 if (
StoreInst *SI = dyn_cast<StoreInst>(UseInst)) {
1063 if (
SI->isVolatile())
1067 if (
SI->getPointerOperand() != Val)
1069 }
else if (
AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UseInst)) {
1070 if (RMW->isVolatile())
1073 if (CAS->isVolatile())
1079 if (
ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
1080 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, ICmp, 0, 1))
1084 WorkList.push_back(ICmp);
1087 if (UseInst->
getOpcode() == Instruction::AddrSpaceCast) {
1092 WorkList.push_back(
User);
1098 if (isa<InsertValueInst>(
User) || isa<InsertElementInst>(
User))
1107 if (!
GEP->isInBounds())
1113 if (
SelectInst *SI = dyn_cast<SelectInst>(UseInst)) {
1114 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, SI, 1, 2))
1119 if (
PHINode *Phi = dyn_cast<PHINode>(UseInst)) {
1122 switch (
Phi->getNumIncomingValues()) {
1126 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, Phi, 0, 1))
1134 WorkList.push_back(
User);
1135 if (!collectUsesWithPtrTypes(BaseAlloca,
User, WorkList))
1142bool AMDGPUPromoteAllocaImpl::hasSufficientLocalMem(
const Function &
F) {
1150 for (
Type *ParamTy : FTy->params()) {
1151 PointerType *PtrTy = dyn_cast<PointerType>(ParamTy);
1154 LLVM_DEBUG(
dbgs() <<
"Function has local memory argument. Promoting to "
1155 "local memory disabled.\n");
1160 LocalMemLimit =
ST.getAddressableLocalMemorySize();
1161 if (LocalMemLimit == 0)
1171 if (
Use->getParent()->getParent() == &
F)
1175 if (VisitedConstants.
insert(
C).second)
1187 if (visitUsers(&GV, &GV)) {
1195 while (!
Stack.empty()) {
1197 if (visitUsers(&GV,
C)) {
1218 LLVM_DEBUG(
dbgs() <<
"Function has a reference to externally allocated "
1219 "local memory. Promoting to local memory "
1234 CurrentLocalMemUsage = 0;
1240 for (
auto Alloc : AllocatedSizes) {
1241 CurrentLocalMemUsage =
alignTo(CurrentLocalMemUsage,
Alloc.second);
1242 CurrentLocalMemUsage +=
Alloc.first;
1245 unsigned MaxOccupancy =
1246 ST.getOccupancyWithLocalMemSize(CurrentLocalMemUsage,
F);
1253 unsigned OccupancyHint =
ST.getWavesPerEU(
F).second;
1254 if (OccupancyHint == 0)
1258 OccupancyHint = std::min(OccupancyHint,
ST.getMaxWavesPerEU());
1262 MaxOccupancy = std::min(OccupancyHint, MaxOccupancy);
1265 unsigned MaxSizeWithWaveCount =
1266 ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy,
F);
1269 if (CurrentLocalMemUsage > MaxSizeWithWaveCount)
1272 LocalMemLimit = MaxSizeWithWaveCount;
1275 <<
" bytes of LDS\n"
1276 <<
" Rounding size to " << MaxSizeWithWaveCount
1277 <<
" with a maximum occupancy of " << MaxOccupancy <<
'\n'
1278 <<
" and " << (LocalMemLimit - CurrentLocalMemUsage)
1279 <<
" available for promotion\n");
1285bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToLDS(
AllocaInst &
I,
1286 bool SufficientLDS) {
1289 if (DisablePromoteAllocaToLDS) {
1297 const Function &ContainingFunction = *
I.getParent()->getParent();
1310 <<
" promote alloca to LDS not supported with calling convention.\n");
1319 unsigned WorkGroupSize =
ST.getFlatWorkGroupSizes(ContainingFunction).second;
1322 DL.getValueOrABITypeAlignment(
I.getAlign(),
I.getAllocatedType());
1332 WorkGroupSize *
DL.getTypeAllocSize(
I.getAllocatedType());
1333 NewSize += AllocSize;
1335 if (NewSize > LocalMemLimit) {
1337 <<
" bytes of local memory not available to promote\n");
1341 CurrentLocalMemUsage = NewSize;
1343 std::vector<Value *> WorkList;
1345 if (!collectUsesWithPtrTypes(&
I, &
I, WorkList)) {
1357 Twine(
F->getName()) +
Twine(
'.') +
I.getName(),
nullptr,
1362 Value *TCntY, *TCntZ;
1364 std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder);
1365 Value *TIdX = getWorkitemID(Builder, 0);
1366 Value *TIdY = getWorkitemID(Builder, 1);
1367 Value *TIdZ = getWorkitemID(Builder, 2);
1369 Value *Tmp0 =
Builder.CreateMul(TCntY, TCntZ,
"",
true,
true);
1370 Tmp0 =
Builder.CreateMul(Tmp0, TIdX);
1371 Value *Tmp1 =
Builder.CreateMul(TIdY, TCntZ,
"",
true,
true);
1373 TID =
Builder.CreateAdd(TID, TIdZ);
1379 I.mutateType(
Offset->getType());
1381 I.eraseFromParent();
1385 for (
Value *V : WorkList) {
1388 if (
ICmpInst *CI = dyn_cast<ICmpInst>(V)) {
1391 if (isa<ConstantPointerNull>(CI->
getOperand(0)))
1394 if (isa<ConstantPointerNull>(CI->
getOperand(1)))
1402 if (isa<AddrSpaceCastInst>(V))
1409 V->mutateType(NewTy);
1412 if (
SelectInst *SI = dyn_cast<SelectInst>(V)) {
1413 if (isa<ConstantPointerNull>(
SI->getOperand(1)))
1416 if (isa<ConstantPointerNull>(
SI->getOperand(2)))
1418 }
else if (
PHINode *Phi = dyn_cast<PHINode>(V)) {
1419 for (
unsigned I = 0,
E =
Phi->getNumIncomingValues();
I !=
E; ++
I) {
1420 if (isa<ConstantPointerNull>(
Phi->getIncomingValue(
I)))
1430 switch (
Intr->getIntrinsicID()) {
1431 case Intrinsic::lifetime_start:
1432 case Intrinsic::lifetime_end:
1434 Intr->eraseFromParent();
1436 case Intrinsic::memcpy:
1437 case Intrinsic::memmove:
1443 case Intrinsic::memset: {
1448 Intr->eraseFromParent();
1451 case Intrinsic::invariant_start:
1452 case Intrinsic::invariant_end:
1453 case Intrinsic::launder_invariant_group:
1454 case Intrinsic::strip_invariant_group:
1455 Intr->eraseFromParent();
1460 case Intrinsic::objectsize: {
1463 Mod, Intrinsic::objectsize,
1469 {Src,
Intr->getOperand(1),
Intr->getOperand(2),
Intr->getOperand(3)});
1470 Intr->replaceAllUsesWith(NewCall);
1471 Intr->eraseFromParent();
1483 assert(
ID == Intrinsic::memcpy ||
ID == Intrinsic::memmove);
1486 auto *
B =
Builder.CreateMemTransferInst(
1487 ID,
MI->getRawDest(),
MI->getDestAlign(),
MI->getRawSource(),
1488 MI->getSourceAlign(),
MI->getLength(),
MI->isVolatile());
1490 for (
unsigned I = 0;
I != 2; ++
I) {
1491 if (
uint64_t Bytes =
Intr->getParamDereferenceableBytes(
I)) {
1492 B->addDereferenceableParamAttr(
I, Bytes);
1496 Intr->eraseFromParent();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Rewrite Partial Register Uses
AMD GCN specific subclass of TargetSubtarget.
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Target-Independent Code Generator Pass Configuration Options pass.
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Class for arbitrary precision integers.
static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
Dual division/remainder interface.
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
an instruction to allocate memory on the stack
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
A container for analyses that lazily runs them and caches their results.
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
LLVM Basic Block Representation.
Represents analyses that only rely on functions' control flow.
void addDereferenceableRetAttr(uint64_t Bytes)
adds the dereferenceable attribute to the list of attributes.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
This class represents a function call, abstracting a target machine's calling convention.
static bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
This is the shared class of boolean and integer constants.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalObject.
bool hasExternalLinkage() const
void setUnnamedAddr(UnnamedAddr Val)
unsigned getAddressSpace() const
@ InternalLinkage
Rename collisions when linking (static functions).
Type * getValueType() const
This instruction compares its operands according to the predicate given to the constructor.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
const BasicBlock * getParent() const
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
This class implements a map that also provides access to all stored values in a deterministic order.
std::pair< KeyT, ValueT > & front()
Value * getLength() const
Value * getRawDest() const
MaybeAlign getDestAlign() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
This class wraps the llvm.memcpy/memmove intrinsics.
A Module instance is used to store all the information related to an LLVM module.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
Helper class for SSA formation on a set of values defined in multiple blocks.
Value * FindValueForBlock(BasicBlock *BB) const
Return the value for the specified block if the SSAUpdater has one, otherwise return nullptr.
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Value * GetValueInMiddleOfBlock(BasicBlock *BB)
Construct SSA form, materializing a value that is live in the middle of the specified block.
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
This class represents the LLVM 'select' instruction.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static unsigned getPointerOperandIndex()
StringRef - Represent a constant reference to a string, i.e.
Primary interface to the complete machine description for the target machine.
Triple - Helper class for working with autoconf configuration names.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
bool isArrayTy() const
True if this is an instance of ArrayType.
bool isPointerTy() const
True if this is an instance of PointerType.
bool isAggregateType() const
Return true if the type is an aggregate type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
static IntegerType * getInt32Ty(LLVMContext &C)
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Type * getElementType() const
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
const CustomOperand< const MCSubtargetInfo & > Msg[]
bool isEntryFunctionCC(CallingConv::ID CC)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ C
The default llvm calling convention, compatible with C.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
specific_intval< false > m_SpecificInt(APInt V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
initializer< Ty > init(const Ty &Val)
NodeAddr< PhiNode * > Phi
This is an optimization pass for GlobalISel generic memory operations.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
FunctionPass * createAMDGPUPromoteAllocaToVector()
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAMDGPUPromoteAlloca()
@ Mod
The access may modify the value stored in memory.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
char & AMDGPUPromoteAllocaID
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
char & AMDGPUPromoteAllocaToVectorID
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Function object to check whether the second component of a container supported by std::get (like std:...