40#include "llvm/IR/IntrinsicsAMDGPU.h"
41#include "llvm/IR/IntrinsicsR600.h"
48#define DEBUG_TYPE "amdgpu-promote-alloca"
55 DisablePromoteAllocaToVector(
"disable-promote-alloca-to-vector",
56 cl::desc(
"Disable promote alloca to vector"),
60 DisablePromoteAllocaToLDS(
"disable-promote-alloca-to-lds",
61 cl::desc(
"Disable promote alloca to LDS"),
65 "amdgpu-promote-alloca-to-vector-limit",
66 cl::desc(
"Maximum byte size to consider promote alloca to vector"),
70 "amdgpu-promote-alloca-to-vector-max-regs",
72 "Maximum vector size (in 32b registers) to use when promoting alloca"),
78 "amdgpu-promote-alloca-to-vector-vgpr-ratio",
79 cl::desc(
"Ratio of VGPRs to budget for promoting alloca to vectors"),
83 LoopUserWeight(
"promote-alloca-vector-loop-user-weight",
84 cl::desc(
"The bonus weight of users of allocas within loop "
85 "when sorting profitable allocas"),
89class AMDGPUPromoteAllocaImpl {
100 unsigned VGPRBudgetRatio;
101 unsigned MaxVectorRegs;
103 bool IsAMDGCN =
false;
104 bool IsAMDHSA =
false;
106 std::pair<Value *, Value *> getLocalSizeYZ(
IRBuilder<> &Builder);
111 bool collectUsesWithPtrTypes(
Value *BaseAlloca,
Value *Val,
112 std::vector<Value *> &WorkList)
const;
118 bool binaryOpIsDerivedFromSameAlloca(
Value *Alloca,
Value *Val,
123 bool hasSufficientLocalMem(
const Function &
F);
127 bool tryPromoteAllocaToLDS(
AllocaInst &
I,
bool SufficientLDS);
131 void setFunctionLimits(
const Function &
F);
137 IsAMDGCN = TT.isAMDGCN();
141 bool run(
Function &
F,
bool PromoteToLDS);
154 if (
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>())
155 return AMDGPUPromoteAllocaImpl(
157 getAnalysis<LoopInfoWrapperPass>().getLoopInfo())
162 StringRef getPassName()
const override {
return "AMDGPU Promote Alloca"; }
171static unsigned getMaxVGPRs(
unsigned LDSBytes,
const TargetMachine &TM,
181 if (DynamicVGPRBlockSize == 0 && ST.isDynamicVGPREnabled())
182 DynamicVGPRBlockSize = ST.getDynamicVGPRBlockSize();
184 unsigned MaxVGPRs = ST.getMaxNumVGPRs(
185 ST.getWavesPerEU(ST.getFlatWorkGroupSizes(
F), LDSBytes,
F).first,
186 DynamicVGPRBlockSize);
191 if (!
F.hasFnAttribute(Attribute::AlwaysInline) &&
193 MaxVGPRs = std::min(MaxVGPRs, 32u);
199char AMDGPUPromoteAlloca::ID = 0;
202 "AMDGPU promote alloca to vector or LDS",
false,
false)
215 bool Changed = AMDGPUPromoteAllocaImpl(TM, LI).run(
F,
true);
227 bool Changed = AMDGPUPromoteAllocaImpl(TM, LI).run(
F,
false);
237 return new AMDGPUPromoteAlloca();
243 while (!WorkList.empty()) {
244 auto *Cur = WorkList.pop_back_val();
245 for (
auto &U : Cur->uses()) {
254void AMDGPUPromoteAllocaImpl::sortAllocasToPromote(
258 for (
auto *Alloca : Allocas) {
260 unsigned &Score = Scores[Alloca];
264 for (
auto *U :
Uses) {
269 1 + (LoopUserWeight * LI.getLoopDepth(Inst->
getParent()));
270 LLVM_DEBUG(
dbgs() <<
" [+" << UserScore <<
"]:\t" << *Inst <<
"\n");
277 return Scores.
at(
A) > Scores.
at(
B);
282 dbgs() <<
"Sorted Worklist:\n";
283 for (
auto *
A: Allocas)
284 dbgs() <<
" " << *
A <<
"\n";
289void AMDGPUPromoteAllocaImpl::setFunctionLimits(
const Function &
F) {
293 const int R600MaxVectorRegs = 16;
294 MaxVectorRegs =
F.getFnAttributeAsParsedInteger(
295 "amdgpu-promote-alloca-to-vector-max-regs",
296 IsAMDGCN ? PromoteAllocaToVectorMaxRegs : R600MaxVectorRegs);
297 if (PromoteAllocaToVectorMaxRegs.getNumOccurrences())
298 MaxVectorRegs = PromoteAllocaToVectorMaxRegs;
299 VGPRBudgetRatio =
F.getFnAttributeAsParsedInteger(
300 "amdgpu-promote-alloca-to-vector-vgpr-ratio",
301 PromoteAllocaToVectorVGPRRatio);
302 if (PromoteAllocaToVectorVGPRRatio.getNumOccurrences())
303 VGPRBudgetRatio = PromoteAllocaToVectorVGPRRatio;
306bool AMDGPUPromoteAllocaImpl::run(
Function &
F,
bool PromoteToLDS) {
308 DL = &
Mod->getDataLayout();
311 if (!
ST.isPromoteAllocaEnabled())
314 bool SufficientLDS = PromoteToLDS && hasSufficientLocalMem(
F);
315 MaxVGPRs = getMaxVGPRs(CurrentLocalMemUsage, TM,
F);
316 setFunctionLimits(
F);
318 unsigned VectorizationBudget =
319 (PromoteAllocaToVectorLimit ? PromoteAllocaToVectorLimit * 8
328 if (!AI->isStaticAlloca() || AI->isArrayAllocation())
334 sortAllocasToPromote(Allocas);
338 const unsigned AllocaCost =
DL->getTypeSizeInBits(AI->getAllocatedType());
340 if (AllocaCost <= VectorizationBudget) {
343 if (tryPromoteAllocaToVector(*AI)) {
345 assert((VectorizationBudget - AllocaCost) < VectorizationBudget &&
347 VectorizationBudget -= AllocaCost;
349 << VectorizationBudget <<
"\n");
354 << AllocaCost <<
", budget:" << VectorizationBudget
355 <<
"): " << *AI <<
"\n");
358 if (PromoteToLDS && tryPromoteAllocaToLDS(*AI, SufficientLDS))
387 return I->getOperand(0) == AI &&
392 Value *Ptr,
const std::map<GetElementPtrInst *, WeakTrackingVH> &GEPIdx) {
397 auto I = GEPIdx.find(
GEP);
398 assert(
I != GEPIdx.end() &&
"Must have entry for GEP!");
400 Value *IndexValue =
I->second;
401 assert(IndexValue &&
"index value missing from GEP index map");
411 unsigned BW =
DL.getIndexTypeSizeInBits(
GEP->getType());
413 APInt ConstOffset(BW, 0);
434 if (!CurGEP->collectOffset(
DL, BW, VarOffsets, ConstOffset))
438 CurPtr = CurGEP->getPointerOperand();
441 assert(CurPtr == Alloca &&
"GEP not based on alloca");
443 int64_t VecElemSize =
DL.getTypeAllocSize(VecElemTy);
444 if (VarOffsets.
size() > 1)
452 if (VarOffsets.
size() == 0)
453 return ConstantInt::get(Ctx, IndexQuot);
457 const auto &VarOffset = VarOffsets.
front();
460 if (Rem != 0 || OffsetQuot.
isZero())
467 Offset = Builder.CreateSExtOrTrunc(
Offset, Builder.getIntNTy(BW));
468 if (
Offset != VarOffset.first)
471 if (!OffsetQuot.
isOne()) {
481 Value *IndexAdd = Builder.CreateAdd(
Offset, ConstIndex);
504 unsigned VecStoreSize,
unsigned ElementSize,
506 std::map<GetElementPtrInst *, WeakTrackingVH> &GEPVectorIdx,
512 Builder.SetInsertPoint(Inst);
514 const auto CreateTempPtrIntCast = [&Builder,
DL](
Value *Val,
516 assert(
DL.getTypeStoreSize(Val->getType()) ==
DL.getTypeStoreSize(PtrTy));
517 const unsigned Size =
DL.getTypeStoreSizeInBits(PtrTy);
518 if (!PtrTy->isVectorTy())
519 return Builder.CreateBitOrPointerCast(Val, Builder.getIntNTy(
Size));
523 assert((
Size % NumPtrElts == 0) &&
"Vector size not divisble");
525 return Builder.CreateBitOrPointerCast(
532 case Instruction::Load: {
533 Value *CurVal = GetCurVal();
539 TypeSize AccessSize =
DL.getTypeStoreSize(AccessTy);
541 if (CI->isZeroValue() && AccessSize == VecStoreSize) {
543 CurVal = CreateTempPtrIntCast(CurVal, AccessTy);
545 CurVal = CreateTempPtrIntCast(CurVal, CurVal->
getType());
546 Value *NewVal = Builder.CreateBitOrPointerCast(CurVal, AccessTy);
555 const unsigned NumLoadedElts = AccessSize /
DL.getTypeStoreSize(VecEltTy);
557 assert(
DL.getTypeStoreSize(SubVecTy) ==
DL.getTypeStoreSize(AccessTy));
560 for (
unsigned K = 0; K < NumLoadedElts; ++K) {
562 Builder.CreateAdd(Index, ConstantInt::get(Index->getType(), K));
563 SubVec = Builder.CreateInsertElement(
564 SubVec, Builder.CreateExtractElement(CurVal, CurIdx), K);
568 SubVec = CreateTempPtrIntCast(SubVec, AccessTy);
569 else if (SubVecTy->isPtrOrPtrVectorTy())
570 SubVec = CreateTempPtrIntCast(SubVec, SubVecTy);
572 SubVec = Builder.CreateBitOrPointerCast(SubVec, AccessTy);
578 Value *ExtractElement = Builder.CreateExtractElement(CurVal, Index);
579 if (AccessTy != VecEltTy)
580 ExtractElement = Builder.CreateBitOrPointerCast(ExtractElement, AccessTy);
585 case Instruction::Store: {
592 Value *Val =
SI->getValueOperand();
596 TypeSize AccessSize =
DL.getTypeStoreSize(AccessTy);
598 if (CI->isZeroValue() && AccessSize == VecStoreSize) {
600 Val = CreateTempPtrIntCast(Val, AccessTy);
602 Val = CreateTempPtrIntCast(Val, VectorTy);
603 return Builder.CreateBitOrPointerCast(Val, VectorTy);
610 const unsigned NumWrittenElts =
611 AccessSize /
DL.getTypeStoreSize(VecEltTy);
614 assert(
DL.getTypeStoreSize(SubVecTy) ==
DL.getTypeStoreSize(AccessTy));
616 if (SubVecTy->isPtrOrPtrVectorTy())
617 Val = CreateTempPtrIntCast(Val, SubVecTy);
619 Val = CreateTempPtrIntCast(Val, AccessTy);
621 Val = Builder.CreateBitOrPointerCast(Val, SubVecTy);
623 Value *CurVec = GetCurVal();
624 for (
unsigned K = 0, NumElts = std::min(NumWrittenElts, NumVecElts);
627 Builder.CreateAdd(Index, ConstantInt::get(Index->getType(), K));
628 CurVec = Builder.CreateInsertElement(
629 CurVec, Builder.CreateExtractElement(Val, K), CurIdx);
634 if (Val->
getType() != VecEltTy)
635 Val = Builder.CreateBitOrPointerCast(Val, VecEltTy);
636 return Builder.CreateInsertElement(GetCurVal(), Val, Index);
638 case Instruction::Call: {
642 unsigned NumCopied =
Length->getZExtValue() / ElementSize;
649 if (Idx >= DestBegin && Idx < DestBegin + NumCopied) {
658 return Builder.CreateShuffleVector(GetCurVal(), Mask);
664 Value *Elt = MSI->getOperand(1);
665 const unsigned BytesPerElt =
DL.getTypeStoreSize(VecEltTy);
666 if (BytesPerElt > 1) {
667 Value *EltBytes = Builder.CreateVectorSplat(BytesPerElt, Elt);
673 Elt = Builder.CreateBitCast(EltBytes, PtrInt);
674 Elt = Builder.CreateIntToPtr(Elt, VecEltTy);
676 Elt = Builder.CreateBitCast(EltBytes, VecEltTy);
683 if (Intr->getIntrinsicID() == Intrinsic::objectsize) {
684 Intr->replaceAllUsesWith(
685 Builder.getIntN(Intr->getType()->getIntegerBitWidth(),
686 DL.getTypeAllocSize(VectorTy)));
715 TypeSize AccTS =
DL.getTypeStoreSize(AccessTy);
719 if (AccTS * 8 !=
DL.getTypeSizeInBits(AccessTy))
731template <
typename InstContainer>
743 auto &BlockUses = UsesByBlock[BB];
746 if (BlockUses.empty())
750 if (BlockUses.size() == 1) {
757 if (!BlockUses.contains(&Inst))
778AMDGPUPromoteAllocaImpl::getVectorTypeForAlloca(
Type *AllocaTy)
const {
779 if (DisablePromoteAllocaToVector) {
786 uint64_t NumElems = 1;
789 NumElems *= ArrayTy->getNumElements();
790 ElemTy = ArrayTy->getElementType();
796 NumElems *= InnerVectorTy->getNumElements();
797 ElemTy = InnerVectorTy->getElementType();
801 unsigned ElementSize =
DL->getTypeSizeInBits(ElemTy) / 8;
802 if (ElementSize > 0) {
803 unsigned AllocaSize =
DL->getTypeStoreSize(AllocaTy);
808 if (NumElems * ElementSize != AllocaSize)
809 NumElems = AllocaSize / ElementSize;
810 if (NumElems > 0 && (AllocaSize % ElementSize) == 0)
820 const unsigned MaxElements =
826 <<
" has an unsupported number of elements\n");
831 unsigned ElementSizeInBits =
DL->getTypeSizeInBits(VecEltTy);
832 if (ElementSizeInBits !=
DL->getTypeAllocSizeInBits(VecEltTy)) {
833 LLVM_DEBUG(
dbgs() <<
" Cannot convert to vector if the allocation size "
834 "does not match the type's size\n");
842bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(
AllocaInst &Alloca) {
843 LLVM_DEBUG(
dbgs() <<
"Trying to promote to vectors: " << Alloca <<
'\n');
850 std::map<GetElementPtrInst *, WeakTrackingVH> GEPVectorIdx;
858 LLVM_DEBUG(
dbgs() <<
" Cannot promote alloca to vector: " << Msg <<
"\n"
859 <<
" " << *Inst <<
"\n");
860 for (
auto *Inst :
reverse(NewGEPInsts))
868 LLVM_DEBUG(
dbgs() <<
" Attempting promotion to: " << *VectorTy <<
"\n");
871 unsigned ElementSize =
DL->getTypeSizeInBits(VecEltTy) / 8;
873 for (
auto *U :
Uses) {
880 return RejectUser(Inst,
"pointer is being stored");
884 return RejectUser(Inst,
"unsupported load/store as aggregate");
891 return RejectUser(Inst,
"not a simple load or store");
893 Ptr = Ptr->stripPointerCasts();
897 DL->getTypeStoreSize(AccessTy)) {
903 return RejectUser(Inst,
"not a supported access type");
914 return RejectUser(Inst,
"cannot compute vector index for GEP");
928 if (TransferInst->isVolatile())
929 return RejectUser(Inst,
"mem transfer inst is volatile");
932 if (!Len || (
Len->getZExtValue() % ElementSize))
933 return RejectUser(Inst,
"mem transfer inst length is non-constant or "
934 "not a multiple of the vector element size");
936 if (TransferInfo.
try_emplace(TransferInst).second) {
943 if (Ptr != &Alloca && !GEPVectorIdx.count(
GEP))
949 unsigned OpNum =
U->getOperandNo();
950 MemTransferInfo *TI = &TransferInfo[TransferInst];
952 Value *Dest = TransferInst->getDest();
955 return RejectUser(Inst,
"could not calculate constant dest index");
959 Value *Src = TransferInst->getSource();
962 return RejectUser(Inst,
"could not calculate constant src index");
969 if (Intr->getIntrinsicID() == Intrinsic::objectsize) {
978 return RejectUser(Inst,
"assume-like intrinsic cannot have any users");
984 return isAssumeLikeIntrinsic(cast<Instruction>(U));
990 return RejectUser(Inst,
"unhandled alloca user");
993 while (!DeferredInsts.
empty()) {
998 MemTransferInfo &
Info = TransferInfo[TransferInst];
999 if (!
Info.SrcIndex || !
Info.DestIndex)
1001 Inst,
"mem transfer inst is missing constant src and/or dst index");
1004 LLVM_DEBUG(
dbgs() <<
" Converting alloca to vector " << *AllocaTy <<
" -> "
1005 << *VectorTy <<
'\n');
1006 const unsigned VecStoreSize =
DL->getTypeStoreSize(VectorTy);
1011 Updater.
Initialize(VectorTy,
"promotealloca");
1017 Value *AllocaInitValue =
1019 AllocaInitValue->
takeName(&Alloca);
1030 auto GetCurVal = [&]() ->
Value * {
1034 if (!Placeholders.
empty() && Placeholders.
back()->getParent() == BB)
1035 return Placeholders.
back();
1043 return Placeholders.
back();
1048 TransferInfo, GEPVectorIdx, GetCurVal);
1055 Placeholder->replaceAllUsesWith(
1057 Placeholder->eraseFromParent();
1065 I->eraseFromParent();
1070 I->dropDroppableUses();
1072 I->eraseFromParent();
1081std::pair<Value *, Value *>
1082AMDGPUPromoteAllocaImpl::getLocalSizeYZ(
IRBuilder<> &Builder) {
1092 ST.makeLIDRangeMetadata(LocalSizeY);
1093 ST.makeLIDRangeMetadata(LocalSizeZ);
1095 return std::pair(LocalSizeY, LocalSizeZ);
1136 F.removeFnAttr(
"amdgpu-no-dispatch-ptr");
1153 LoadXY->
setMetadata(LLVMContext::MD_invariant_load, MD);
1154 LoadZU->
setMetadata(LLVMContext::MD_invariant_load, MD);
1155 ST.makeLIDRangeMetadata(LoadZU);
1160 return std::pair(
Y, LoadZU);
1172 IntrID = IsAMDGCN ? (
Intrinsic::ID)Intrinsic::amdgcn_workitem_id_x
1174 AttrName =
"amdgpu-no-workitem-id-x";
1177 IntrID = IsAMDGCN ? (
Intrinsic::ID)Intrinsic::amdgcn_workitem_id_y
1179 AttrName =
"amdgpu-no-workitem-id-y";
1183 IntrID = IsAMDGCN ? (
Intrinsic::ID)Intrinsic::amdgcn_workitem_id_z
1185 AttrName =
"amdgpu-no-workitem-id-z";
1193 ST.makeLIDRangeMetadata(CI);
1194 F->removeFnAttr(AttrName);
1204 switch (
II->getIntrinsicID()) {
1205 case Intrinsic::memcpy:
1206 case Intrinsic::memmove:
1207 case Intrinsic::memset:
1208 case Intrinsic::lifetime_start:
1209 case Intrinsic::lifetime_end:
1210 case Intrinsic::invariant_start:
1211 case Intrinsic::invariant_end:
1212 case Intrinsic::launder_invariant_group:
1213 case Intrinsic::strip_invariant_group:
1214 case Intrinsic::objectsize:
1221bool AMDGPUPromoteAllocaImpl::binaryOpIsDerivedFromSameAlloca(
1243 if (OtherObj != BaseAlloca) {
1245 dbgs() <<
"Found a binary instruction with another alloca object\n");
1252bool AMDGPUPromoteAllocaImpl::collectUsesWithPtrTypes(
1253 Value *BaseAlloca,
Value *Val, std::vector<Value *> &WorkList)
const {
1263 WorkList.push_back(
User);
1268 if (UseInst->
getOpcode() == Instruction::PtrToInt)
1272 if (LI->isVolatile())
1278 if (
SI->isVolatile())
1282 if (
SI->getPointerOperand() != Val)
1288 if (RMW->isVolatile())
1294 if (CAS->isVolatile())
1302 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, ICmp, 0, 1))
1306 WorkList.push_back(ICmp);
1313 if (!
GEP->isInBounds())
1318 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val,
SI, 1, 2))
1325 switch (
Phi->getNumIncomingValues()) {
1329 if (!binaryOpIsDerivedFromSameAlloca(BaseAlloca, Val, Phi, 0, 1))
1346 WorkList.push_back(
User);
1347 if (!collectUsesWithPtrTypes(BaseAlloca,
User, WorkList))
1354bool AMDGPUPromoteAllocaImpl::hasSufficientLocalMem(
const Function &
F) {
1362 for (
Type *ParamTy : FTy->params()) {
1366 LLVM_DEBUG(
dbgs() <<
"Function has local memory argument. Promoting to "
1367 "local memory disabled.\n");
1372 LocalMemLimit =
ST.getAddressableLocalMemorySize();
1373 if (LocalMemLimit == 0)
1383 if (
Use->getFunction() == &
F)
1387 if (VisitedConstants.
insert(
C).second)
1399 if (visitUsers(&GV, &GV)) {
1407 while (!
Stack.empty()) {
1409 if (visitUsers(&GV,
C)) {
1430 LLVM_DEBUG(
dbgs() <<
"Function has a reference to externally allocated "
1431 "local memory. Promoting to local memory "
1446 CurrentLocalMemUsage = 0;
1452 for (
auto Alloc : AllocatedSizes) {
1453 CurrentLocalMemUsage =
alignTo(CurrentLocalMemUsage,
Alloc.second);
1454 CurrentLocalMemUsage +=
Alloc.first;
1457 unsigned MaxOccupancy =
1458 ST.getWavesPerEU(
ST.getFlatWorkGroupSizes(
F), CurrentLocalMemUsage,
F)
1462 unsigned MaxSizeWithWaveCount =
1463 ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy,
F);
1466 if (CurrentLocalMemUsage > MaxSizeWithWaveCount)
1469 LocalMemLimit = MaxSizeWithWaveCount;
1472 <<
" bytes of LDS\n"
1473 <<
" Rounding size to " << MaxSizeWithWaveCount
1474 <<
" with a maximum occupancy of " << MaxOccupancy <<
'\n'
1475 <<
" and " << (LocalMemLimit - CurrentLocalMemUsage)
1476 <<
" available for promotion\n");
1482bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToLDS(
AllocaInst &
I,
1483 bool SufficientLDS) {
1486 if (DisablePromoteAllocaToLDS) {
1507 <<
" promote alloca to LDS not supported with calling convention.\n");
1516 unsigned WorkGroupSize =
ST.getFlatWorkGroupSizes(ContainingFunction).second;
1519 DL.getValueOrABITypeAlignment(
I.getAlign(),
I.getAllocatedType());
1527 uint32_t NewSize =
alignTo(CurrentLocalMemUsage, Alignment);
1528 uint32_t AllocSize =
1529 WorkGroupSize *
DL.getTypeAllocSize(
I.getAllocatedType());
1530 NewSize += AllocSize;
1532 if (NewSize > LocalMemLimit) {
1534 <<
" bytes of local memory not available to promote\n");
1538 CurrentLocalMemUsage = NewSize;
1540 std::vector<Value *> WorkList;
1542 if (!collectUsesWithPtrTypes(&
I, &
I, WorkList)) {
1554 Twine(
F->getName()) +
Twine(
'.') +
I.getName(),
nullptr,
1559 Value *TCntY, *TCntZ;
1561 std::tie(TCntY, TCntZ) = getLocalSizeYZ(Builder);
1562 Value *TIdX = getWorkitemID(Builder, 0);
1563 Value *TIdY = getWorkitemID(Builder, 1);
1564 Value *TIdZ = getWorkitemID(Builder, 2);
1578 I.eraseFromParent();
1584 for (
Value *V : WorkList) {
1606 assert(
V->getType()->isPtrOrPtrVectorTy());
1608 Type *NewTy =
V->getType()->getWithNewType(NewPtrTy);
1609 V->mutateType(NewTy);
1619 for (
unsigned I = 0,
E =
Phi->getNumIncomingValues();
I !=
E; ++
I) {
1621 Phi->getIncomingValue(
I)))
1632 case Intrinsic::lifetime_start:
1633 case Intrinsic::lifetime_end:
1637 case Intrinsic::memcpy:
1638 case Intrinsic::memmove:
1644 case Intrinsic::memset: {
1652 case Intrinsic::invariant_start:
1653 case Intrinsic::invariant_end:
1654 case Intrinsic::launder_invariant_group:
1655 case Intrinsic::strip_invariant_group: {
1673 case Intrinsic::objectsize: {
1677 Intrinsic::objectsize,
1693 assert(
ID == Intrinsic::memcpy ||
ID == Intrinsic::memmove);
1697 ID,
MI->getRawDest(),
MI->getDestAlign(),
MI->getRawSource(),
1698 MI->getSourceAlign(),
MI->getLength(),
MI->isVolatile());
1700 for (
unsigned I = 0;
I != 2; ++
I) {
1702 B->addDereferenceableParamAttr(
I, Bytes);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
static bool runOnFunction(Function &F, bool PostInlining)
AMD GCN specific subclass of TargetSubtarget.
uint64_t IntrinsicInst * II
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Remove Loads Into Fake Uses
static unsigned getNumElements(Type *Ty)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
Target-Independent Code Generator Pass Configuration Options pass.
static const AMDGPUSubtarget & get(const MachineFunction &MF)
Class for arbitrary precision integers.
static LLVM_ABI void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, APInt &Remainder)
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
bool isOne() const
Determine if this is a value of 1.
an instruction to allocate memory on the stack
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
Represents analyses that only rely on functions' control flow.
uint64_t getParamDereferenceableBytes(unsigned i) const
Extract the number of dereferenceable bytes for a call or parameter (0=unknown).
void addDereferenceableRetAttr(uint64_t Bytes)
adds the dereferenceable attribute to the list of attributes.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Value * getArgOperand(unsigned i) const
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
This is the shared class of boolean and integer constants.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This is an important base class in LLVM.
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
ValueT & at(const_arg_type_t< KeyT > Val)
at - Return the entry for the specified key, or abort if no such entry exists.
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Implements a dense probed hash-table based set.
Class to represent fixed width SIMD vectors.
unsigned getNumElements() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Class to represent function types.
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
const Function & getFunction() const
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool hasExternalLinkage() const
void setUnnamedAddr(UnnamedAddr Val)
unsigned getAddressSpace() const
@ InternalLinkage
Rename collisions when linking (static functions).
Type * getValueType() const
MaybeAlign getAlign() const
Returns the alignment of the given variable.
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalVariable.
This instruction compares its operands according to the predicate given to the constructor.
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
BasicBlock * GetInsertBlock() const
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
CallInst * CreateMemSet(Value *Ptr, Value *Val, uint64_t Size, MaybeAlign Align, bool isVolatile=false, const AAMDNodes &AAInfo=AAMDNodes())
Create and insert a memset to the specified pointer and the specified value.
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateConstInBoundsGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
LLVM_ABI CallInst * CreateMemTransferInst(Intrinsic::ID IntrID, Value *Dst, MaybeAlign DstAlign, Value *Src, MaybeAlign SrcAlign, Value *Size, bool isVolatile=false, const AAMDNodes &AAInfo=AAMDNodes())
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Analysis pass that exposes the LoopInfo for a function.
The legacy pass manager's analysis pass to compute loop information.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
std::pair< KeyT, ValueT > & front()
Value * getLength() const
Value * getRawDest() const
MaybeAlign getDestAlign() const
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
This class wraps the llvm.memcpy/memmove intrinsics.
A Module instance is used to store all the information related to an LLVM module.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Class to represent pointers.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Helper class for SSA formation on a set of values defined in multiple blocks.
Value * FindValueForBlock(BasicBlock *BB) const
Return the value for the specified block if the SSAUpdater has one, otherwise return nullptr.
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
Value * GetValueInMiddleOfBlock(BasicBlock *BB)
Construct SSA form, materializing a value that is live in the middle of the specified block.
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
This class represents the LLVM 'select' instruction.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static unsigned getPointerOperandIndex()
StringRef - Represent a constant reference to a string, i.e.
Primary interface to the complete machine description for the target machine.
const Triple & getTargetTriple() const
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Triple - Helper class for working with autoconf configuration names.
bool isAMDGCN() const
Tests whether the target is AMDGCN.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
bool isArrayTy() const
True if this is an instance of ArrayType.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
bool isAggregateType() const
Return true if the type is an aggregate type.
LLVM_ABI Type * getWithNewType(Type *EltTy) const
Given vector type, change the element type, whilst keeping the old number of elements.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Type * getElementType() const
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
LLVM_READNONE constexpr bool isEntryFunctionCC(CallingConv::ID CC)
unsigned getDynamicVGPRBlockSize(const Function &F)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ C
The default llvm calling convention, compatible with C.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
initializer< Ty > init(const Ty &Val)
NodeAddr< PhiNode * > Phi
This is an optimization pass for GlobalISel generic memory operations.
void stable_sort(R &&Range)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
auto reverse(ContainerTy &&C)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
FunctionPass * createAMDGPUPromoteAlloca()
@ Mod
The access may modify the value stored in memory.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
char & AMDGPUPromoteAllocaID
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
This struct is a compact representation of a valid (non-zero power of two) alignment.
A MapVector that performs no allocations if smaller than a certain size.
Function object to check whether the second component of a container supported by std::get (like std:...