198#include "llvm/IR/IntrinsicsAMDGPU.h"
215#define DEBUG_TYPE "amdgpu-lower-module-lds"
223 "amdgpu-super-align-lds-globals",
224 cl::desc(
"Increase alignment of LDS if it is not on align boundary"),
227enum class LoweringKind { module, table, kernel, hybrid };
229 "amdgpu-lower-module-lds-strategy",
233 clEnumValN(LoweringKind::table,
"table",
"Lower via table lookup"),
234 clEnumValN(LoweringKind::module,
"module",
"Lower via module struct"),
236 LoweringKind::kernel,
"kernel",
237 "Lower variables reachable from one kernel, otherwise abort"),
239 "Lower via mixture of above strategies")));
241template <
typename T> std::vector<T> sortByName(std::vector<T> &&V) {
242 llvm::sort(V, [](
const auto *L,
const auto *R) {
243 return L->getName() < R->getName();
245 return {std::move(V)};
248class AMDGPULowerModuleLDS {
252 removeLocalVarsFromUsedLists(
Module &M,
264 LocalVar->removeDeadConstantUsers();
289 IRBuilder<> Builder(Entry, Entry->getFirstNonPHIIt());
292 Func->getParent(), Intrinsic::donothing, {});
294 Value *UseInstance[1] = {
295 Builder.CreateConstInBoundsGEP1_32(SGV->
getValueType(), SGV, 0)};
304 struct LDSVariableReplacement {
314 static Constant *getAddressesOfVariablesInKernel(
326 auto ConstantGepIt = LDSVarsToConstantGEP.
find(GV);
327 if (ConstantGepIt != LDSVarsToConstantGEP.
end()) {
328 Elements.push_back(ConstantGepIt->second);
340 if (Variables.
empty()) {
345 const size_t NumberVariables = Variables.
size();
346 const size_t NumberKernels = kernels.
size();
355 std::vector<Constant *> overallConstantExprElts(NumberKernels);
356 for (
size_t i = 0; i < NumberKernels; i++) {
357 auto Replacement = KernelToReplacement.
find(kernels[i]);
358 overallConstantExprElts[i] =
359 (Replacement == KernelToReplacement.
end())
361 : getAddressesOfVariablesInKernel(
362 Ctx, Variables, Replacement->second.LDSVarsToConstantGEP);
377 Value *OptionalIndex) {
383 Value *tableKernelIndex = getTableLookupKernelIndex(M,
I->getFunction());
389 Builder.SetInsertPoint(
I);
393 ConstantInt::get(I32, 0),
399 Value *Address = Builder.CreateInBoundsGEP(
400 LookupTable->getValueType(), LookupTable, GEPIdx, GV->
getName());
402 Value *Loaded = Builder.CreateLoad(GV->
getType(), Address);
406 void replaceUsesInInstructionsWithTableLookup(
414 for (
size_t Index = 0; Index < ModuleScopeVariables.
size(); Index++) {
415 auto *GV = ModuleScopeVariables[Index];
422 replaceUseWithTableLookup(M, Builder, LookupTable, GV, U,
423 ConstantInt::get(I32, Index));
434 if (VariableSet.
empty())
437 for (
Function &Func : M.functions()) {
438 if (Func.isDeclaration() || !
isKernel(Func))
452 chooseBestVariableForModuleStrategy(
const DataLayout &
DL,
458 size_t UserCount = 0;
461 CandidateTy() =
default;
464 : GV(GV), UserCount(UserCount),
Size(AllocSize) {}
468 if (UserCount <
Other.UserCount) {
471 if (UserCount >
Other.UserCount) {
489 CandidateTy MostUsed;
491 for (
auto &K : LDSVars) {
493 if (K.second.size() <= 1) {
499 if (MostUsed < Candidate)
500 MostUsed = Candidate;
524 auto [It, Inserted] = tableKernelIndexCache.
try_emplace(
F);
526 auto InsertAt =
F->getEntryBlock().getFirstNonPHIOrDbgOrAlloca();
529 It->second = Builder.CreateIntrinsic(Intrinsic::amdgcn_lds_kernel_id, {});
535 static std::vector<Function *> assignLDSKernelIDToEachKernel(
543 std::vector<Function *> OrderedKernels;
544 if (!KernelsThatAllocateTableLDS.
empty() ||
545 !KernelsThatIndirectlyAllocateDynamicLDS.
empty()) {
547 for (
Function &Func : M->functions()) {
548 if (Func.isDeclaration())
553 if (KernelsThatAllocateTableLDS.
contains(&Func) ||
554 KernelsThatIndirectlyAllocateDynamicLDS.
contains(&Func)) {
556 OrderedKernels.push_back(&Func);
561 OrderedKernels = sortByName(std::move(OrderedKernels));
567 if (OrderedKernels.size() > UINT32_MAX) {
572 for (
size_t i = 0; i < OrderedKernels.size(); i++) {
576 OrderedKernels[i]->setMetadata(
"llvm.amdgcn.lds.kernel.id",
580 return OrderedKernels;
583 static void partitionVariablesIntoIndirectStrategies(
592 LoweringKindLoc != LoweringKind::hybrid
594 : chooseBestVariableForModuleStrategy(
595 M.getDataLayout(), LDSToKernelsThatNeedToAccessItIndirectly);
600 ? LDSToKernelsThatNeedToAccessItIndirectly[HybridModuleRoot]
603 for (
auto &K : LDSToKernelsThatNeedToAccessItIndirectly) {
609 assert(!K.second.empty());
612 DynamicVariables.
insert(GV);
616 switch (LoweringKindLoc) {
617 case LoweringKind::module:
618 ModuleScopeVariables.insert(GV);
621 case LoweringKind::table:
622 TableLookupVariables.
insert(GV);
625 case LoweringKind::kernel:
626 if (K.second.size() == 1) {
627 KernelAccessVariables.
insert(GV);
631 "cannot lower LDS '" + GV->
getName() +
632 "' to kernel access as it is reachable from multiple kernels");
636 case LoweringKind::hybrid: {
637 if (GV == HybridModuleRoot) {
638 assert(K.second.size() != 1);
639 ModuleScopeVariables.insert(GV);
640 }
else if (K.second.size() == 1) {
641 KernelAccessVariables.
insert(GV);
642 }
else if (K.second == HybridModuleRootKernels) {
643 ModuleScopeVariables.insert(GV);
645 TableLookupVariables.
insert(GV);
654 assert(ModuleScopeVariables.
size() + TableLookupVariables.
size() +
655 KernelAccessVariables.
size() + DynamicVariables.
size() ==
656 LDSToKernelsThatNeedToAccessItIndirectly.size());
669 if (ModuleScopeVariables.
empty()) {
675 LDSVariableReplacement ModuleScopeReplacement =
676 createLDSVariableReplacement(M,
"llvm.amdgcn.module.lds",
677 ModuleScopeVariables);
685 recordLDSAbsoluteAddress(&M, ModuleScopeReplacement.SGV, 0);
688 removeLocalVarsFromUsedLists(M, ModuleScopeVariables);
691 replaceLDSVariablesWithStruct(
692 M, ModuleScopeVariables, ModuleScopeReplacement, [&](
Use &U) {
705 for (
Function &Func : M.functions()) {
706 if (Func.isDeclaration() || !
isKernel(Func))
709 if (KernelsThatAllocateModuleLDS.
contains(&Func)) {
710 replaceLDSVariablesWithStruct(
711 M, ModuleScopeVariables, ModuleScopeReplacement, [&](
Use &U) {
720 markUsedByKernel(&Func, ModuleScopeReplacement.SGV);
724 return ModuleScopeReplacement.SGV;
728 lowerKernelScopeStructVariables(
737 for (
Function &Func : M.functions()) {
738 if (Func.isDeclaration() || !
isKernel(Func))
746 KernelUsedVariables.
insert(v);
754 KernelUsedVariables.
insert(v);
760 if (KernelsThatAllocateModuleLDS.
contains(&Func)) {
762 KernelUsedVariables.
erase(v);
766 if (KernelUsedVariables.
empty()) {
778 if (!Func.hasName()) {
782 std::string VarName =
783 (
Twine(
"llvm.amdgcn.kernel.") + Func.getName() +
".lds").str();
786 createLDSVariableReplacement(M, VarName, KernelUsedVariables);
794 markUsedByKernel(&Func, Replacement.SGV);
797 removeLocalVarsFromUsedLists(M, KernelUsedVariables);
798 KernelToReplacement[&Func] = Replacement;
801 replaceLDSVariablesWithStruct(
802 M, KernelUsedVariables, Replacement, [&Func](
Use &U) {
804 return I &&
I->getFunction() == &Func;
807 return KernelToReplacement;
827 Align MaxDynamicAlignment(1);
831 MaxDynamicAlignment =
837 UpdateMaxAlignment(GV);
841 UpdateMaxAlignment(GV);
848 Twine(
"llvm.amdgcn." + func->
getName() +
".dynlds"),
nullptr,
850 N->setAlignment(MaxDynamicAlignment);
860 std::vector<Function *>
const &OrderedKernels) {
862 if (!KernelsThatIndirectlyAllocateDynamicLDS.
empty()) {
867 std::vector<Constant *> newDynamicLDS;
870 for (
auto &func : OrderedKernels) {
872 if (KernelsThatIndirectlyAllocateDynamicLDS.
contains(func)) {
879 buildRepresentativeDynamicLDSInstance(M, LDSUsesInfo, func);
881 KernelToCreatedDynamicLDS[func] =
N;
883 markUsedByKernel(func,
N);
885 newDynamicLDS.push_back(
N);
890 assert(OrderedKernels.size() == newDynamicLDS.size());
896 "llvm.amdgcn.dynlds.offset.table",
nullptr,
907 replaceUseWithTableLookup(M, Builder, table, GV, U,
nullptr);
911 return KernelToCreatedDynamicLDS;
917 bool runOnModuleLinkTime(
Module &M) {
918 bool Changed = superAlignLDSGlobals(M);
927 if (KernelLDSUses.empty() && FunctionLDSUses.empty())
931 assert(!ModuleId.empty() &&
932 "modules with LDS variables should have a unique ID");
935 for (
auto &[
F, Vars] : KernelLDSUses)
936 AllLDSUses[
F].insert(Vars.begin(), Vars.end());
937 for (
auto &[
F, Vars] : FunctionLDSUses)
938 AllLDSUses[
F].insert(Vars.begin(), Vars.end());
941 for (
auto &[
F, Vars] : AllLDSUses) {
952 for (
auto &[
F, Vars] : AllLDSUses) {
954 VarToFuncs[V].push_back(
F);
963 for (
auto &[V, Funcs] : VarToFuncs) {
964 if (!V->hasLocalLinkage() || Funcs.size() > 1) {
965 GlobalScopeVars.
insert(V);
966 if (V->hasLocalLinkage())
967 InternalMultiUserVars.
insert(V);
975 for (
auto &KV : AllLDSUses) {
979 if (!GlobalScopeVars.
count(V))
983 if (FuncScopeVars.
empty())
988 ? (
"__amdgpu_lds." +
F->getName() + ModuleId).str()
989 : (
"__amdgpu_lds." +
F->getName()).str();
990 LDSVariableReplacement Replacement =
991 createLDSVariableReplacement(M,
StructName, FuncScopeVars);
998 replaceLDSVariablesWithStruct(
999 M, FuncScopeVars, Replacement, [
F](
const Use &U) {
1001 return I &&
I->getFunction() ==
F;
1004 AllReplacedVars.
insert(FuncScopeVars.
begin(), FuncScopeVars.
end());
1011 if (!InternalMultiUserVars.
empty()) {
1012 std::string
StructName =
"__amdgpu_lds.__internal" + ModuleId;
1013 LDSVariableReplacement Replacement =
1014 createLDSVariableReplacement(M,
StructName, InternalMultiUserVars);
1020 replaceLDSVariablesWithStruct(
1021 M, InternalMultiUserVars, Replacement,
1027 FuncsUsingInternalVars.insert(
F);
1029 for (
Function *
F : FuncsUsingInternalVars)
1032 AllReplacedVars.
insert(InternalMultiUserVars.begin(),
1033 InternalMultiUserVars.end());
1039 V->setInitializer(
nullptr);
1046 NamedMDNode *LdsMD = M.getOrInsertNamedMetadata(
"amdgpu.lds.uses");
1048 for (
auto &[
F, SGV] : FuncToLdsStruct)
1052 for (
auto &[V, Funcs] : VarToFuncs) {
1053 if (GlobalScopeVars.
count(V) && !InternalMultiUserVars.
count(V)) {
1063 AllLDSVarsForCleanup.
insert(GlobalScopeVars.
begin(), GlobalScopeVars.
end());
1064 removeLocalVarsFromUsedLists(M, AllLDSVarsForCleanup);
1074 bool runOnModule(
Module &M) {
1076 return runOnModuleLinkTime(M);
1077 return runOnModuleNormal(M);
1080 bool runOnModuleNormal(
Module &M) {
1082 bool Changed = superAlignLDSGlobals(M);
1099 LDSToKernelsThatNeedToAccessItIndirectly[GV].insert(
F);
1108 partitionVariablesIntoIndirectStrategies(
1109 M, LDSUsesInfo, LDSToKernelsThatNeedToAccessItIndirectly,
1110 ModuleScopeVariables, TableLookupVariables, KernelAccessVariables,
1117 kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
1118 ModuleScopeVariables);
1120 kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
1121 TableLookupVariables);
1124 kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
1127 GlobalVariable *MaybeModuleScopeStruct = lowerModuleScopeStructVariables(
1128 M, ModuleScopeVariables, KernelsThatAllocateModuleLDS);
1131 lowerKernelScopeStructVariables(M, LDSUsesInfo, ModuleScopeVariables,
1132 KernelsThatAllocateModuleLDS,
1133 MaybeModuleScopeStruct);
1136 for (
auto &GV : KernelAccessVariables) {
1137 auto &funcs = LDSToKernelsThatNeedToAccessItIndirectly[GV];
1138 assert(funcs.size() == 1);
1139 LDSVariableReplacement Replacement =
1140 KernelToReplacement[*(funcs.begin())];
1145 replaceLDSVariablesWithStruct(M, Vec, Replacement, [](
Use &U) {
1151 std::vector<Function *> OrderedKernels =
1152 assignLDSKernelIDToEachKernel(&M, KernelsThatAllocateTableLDS,
1153 KernelsThatIndirectlyAllocateDynamicLDS);
1155 if (!KernelsThatAllocateTableLDS.
empty()) {
1161 auto TableLookupVariablesOrdered =
1162 sortByName(std::vector<GlobalVariable *>(TableLookupVariables.
begin(),
1163 TableLookupVariables.
end()));
1166 M, TableLookupVariablesOrdered, OrderedKernels, KernelToReplacement);
1167 replaceUsesInInstructionsWithTableLookup(M, TableLookupVariablesOrdered,
1172 lowerDynamicLDSVariables(M, LDSUsesInfo,
1173 KernelsThatIndirectlyAllocateDynamicLDS,
1174 DynamicVariables, OrderedKernels);
1179 for (
auto *KernelSet : {&KernelsThatIndirectlyAllocateDynamicLDS,
1180 &KernelsThatAllocateTableLDS})
1189 for (
Function &Func : M.functions()) {
1190 if (Func.isDeclaration() || !
isKernel(Func))
1204 const bool AllocateModuleScopeStruct =
1205 MaybeModuleScopeStruct &&
1206 KernelsThatAllocateModuleLDS.
contains(&Func);
1208 auto Replacement = KernelToReplacement.
find(&Func);
1209 const bool AllocateKernelScopeStruct =
1210 Replacement != KernelToReplacement.
end();
1212 const bool AllocateDynamicVariable =
1213 KernelToCreatedDynamicLDS.
contains(&Func);
1217 if (AllocateModuleScopeStruct) {
1223 if (AllocateKernelScopeStruct) {
1226 recordLDSAbsoluteAddress(&M, KernelStruct,
Offset);
1234 if (AllocateDynamicVariable) {
1235 GlobalVariable *DynamicVariable = KernelToCreatedDynamicLDS[&Func];
1237 recordLDSAbsoluteAddress(&M, DynamicVariable,
Offset);
1252 if (AllocateDynamicVariable)
1255 Func.addFnAttr(
"amdgpu-lds-size", Buffer);
1274 static bool superAlignLDSGlobals(
Module &M) {
1277 if (!SuperAlignLDSGlobals) {
1281 for (
auto &GV : M.globals()) {
1301 Alignment = std::max(Alignment,
Align(16));
1302 }
else if (GVSize > 4) {
1304 Alignment = std::max(Alignment,
Align(8));
1305 }
else if (GVSize > 2) {
1307 Alignment = std::max(Alignment,
Align(4));
1308 }
else if (GVSize > 1) {
1310 Alignment = std::max(Alignment,
Align(2));
1321 static LDSVariableReplacement createLDSVariableReplacement(
1322 Module &M, std::string VarName,
1339 auto Sorted = sortByName(std::vector<GlobalVariable *>(
1340 LDSVarsToTransform.
begin(), LDSVarsToTransform.
end()));
1351 std::vector<GlobalVariable *> LocalVars;
1353 LocalVars.reserve(LDSVarsToTransform.
size());
1354 IsPaddingField.
reserve(LDSVarsToTransform.
size());
1357 for (
auto &
F : LayoutFields) {
1360 Align DataAlign =
F.Alignment;
1363 if (
uint64_t Rem = CurrentOffset % DataAlignV) {
1364 uint64_t Padding = DataAlignV - Rem;
1376 CurrentOffset += Padding;
1379 LocalVars.push_back(FGV);
1381 CurrentOffset +=
F.Size;
1385 std::vector<Type *> LocalVarTypes;
1386 LocalVarTypes.reserve(LocalVars.size());
1388 LocalVars.cbegin(), LocalVars.cend(), std::back_inserter(LocalVarTypes),
1403 for (
size_t I = 0;
I < LocalVars.size();
I++) {
1405 Constant *GEPIdx[] = {ConstantInt::get(I32, 0), ConstantInt::get(I32,
I)};
1407 if (IsPaddingField[
I]) {
1414 assert(Map.size() == LDSVarsToTransform.
size());
1415 return {SGV, std::move(Map)};
1418 template <
typename PredicateTy>
1419 static void replaceLDSVariablesWithStruct(
1421 const LDSVariableReplacement &Replacement, PredicateTy
Predicate) {
1428 auto LDSVarsToTransform = sortByName(std::vector<GlobalVariable *>(
1429 LDSVarsToTransformArg.
begin(), LDSVarsToTransformArg.
end()));
1435 const size_t NumberVars = LDSVarsToTransform.
size();
1436 if (NumberVars > 1) {
1438 AliasScopes.
reserve(NumberVars);
1440 for (
size_t I = 0;
I < NumberVars;
I++) {
1444 NoAliasList.
append(&AliasScopes[1], AliasScopes.
end());
1449 for (
size_t I = 0;
I < NumberVars;
I++) {
1451 Constant *
GEP = Replacement.LDSVarsToConstantGEP.at(GV);
1455 APInt APOff(
DL.getIndexTypeSizeInBits(
GEP->getType()), 0);
1456 GEP->stripAndAccumulateInBoundsConstantOffsets(
DL, APOff);
1463 NoAliasList[
I - 1] = AliasScopes[
I - 1];
1469 refineUsesAlignmentAndAA(
GEP,
A,
DL, AliasScope, NoAlias);
1473 static void refineUsesAlignmentAndAA(
Value *Ptr,
Align A,
1475 MDNode *NoAlias,
unsigned MaxDepth = 5) {
1476 if (!MaxDepth || (
A == 1 && !AliasScope))
1483 if (AliasScope &&
I->mayReadOrWriteMemory()) {
1484 MDNode *AS =
I->getMetadata(LLVMContext::MD_alias_scope);
1487 I->setMetadata(LLVMContext::MD_alias_scope, AS);
1489 MDNode *NA =
I->getMetadata(LLVMContext::MD_noalias);
1513 if (Intersection.empty()) {
1518 I->setMetadata(LLVMContext::MD_noalias, NA);
1523 LI->setAlignment(std::max(
A, LI->getAlign()));
1527 if (
SI->getPointerOperand() == Ptr)
1528 SI->setAlignment(std::max(
A,
SI->getAlign()));
1534 if (AI->getPointerOperand() == Ptr)
1535 AI->setAlignment(std::max(
A, AI->getAlign()));
1539 if (AI->getPointerOperand() == Ptr)
1540 AI->setAlignment(std::max(
A, AI->getAlign()));
1544 unsigned BitWidth =
DL.getIndexTypeSizeInBits(
GEP->getType());
1546 if (
GEP->getPointerOperand() == Ptr) {
1548 if (
GEP->accumulateConstantOffset(
DL, Off))
1550 refineUsesAlignmentAndAA(
GEP, GA,
DL, AliasScope, NoAlias,
1556 if (
I->getOpcode() == Instruction::BitCast ||
1557 I->getOpcode() == Instruction::AddrSpaceCast)
1558 refineUsesAlignmentAndAA(
I,
A,
DL, AliasScope, NoAlias, MaxDepth - 1);
1564class AMDGPULowerModuleLDSLegacy :
public ModulePass {
1577 bool runOnModule(
Module &M)
override {
1579 auto &TPC = getAnalysis<TargetPassConfig>();
1588char AMDGPULowerModuleLDSLegacy::ID = 0;
1593 "Lower uses of LDS variables from non-kernel functions",
1597 "Lower uses of LDS variables from non-kernel functions",
1602 return new AMDGPULowerModuleLDSLegacy(TM);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
DXIL Forward Handle Accesses
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
const std::string FatArchTraits< MachO::fat_arch >::StructName
This file provides an interface for laying out a sequence of fields as a struct in a way that attempt...
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This is the interface for a metadata-based scoped no-alias analysis.
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallString class.
Target-Independent Code Generator Pass Configuration Options pass.
static bool EnableObjectLinking
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
bool empty() const
Check if the array is empty.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
LLVM Basic Block Representation.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
void reserve(unsigned N)
Reserve space for atleast N bits in the bitvector.
The basic data container for the call graph of a Module of IR.
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, GEPNoWrapFlags NW=GEPNoWrapFlags::none(), std::optional< ConstantRange > InRange=std::nullopt, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
This is an important base class in LLVM.
LLVM_ABI void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Implements a dense probed hash-table based set.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set a particular kind of metadata attachment.
LLVM_ABI bool isAbsoluteSymbolRef() const
Returns whether this is a reference to an absolute symbol.
void setLinkage(LinkageTypes LT)
PointerType * getType() const
Global values are always pointers.
@ InternalLinkage
Rename collisions when linking (static functions).
@ ExternalLinkage
Externally visible function.
Type * getValueType() const
LLVM_ABI void setInitializer(Constant *InitVal)
setInitializer - Sets the initializer for this global variable, removing any existing initializer if ...
bool hasInitializer() const
Definitions have initializers, declarations don't.
LLVM_ABI uint64_t getGlobalSize(const DataLayout &DL) const
Get the size of this global variable in bytes.
LLVM_ABI void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalVariable.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
bool runOnModule(Module &) override
ImmutablePasses are never run.
This is an important class for using LLVM in a threaded context.
MDNode * createAnonymousAliasScope(MDNode *Domain, StringRef Name=StringRef())
Return metadata appropriate for an alias scope root node.
MDNode * createAnonymousAliasScopeDomain(StringRef Name=StringRef())
Return metadata appropriate for an alias scope domain node.
static LLVM_ABI MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
static LLVM_ABI MDNode * concatenate(MDNode *A, MDNode *B)
Methods for metadata merging.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDNode * intersect(MDNode *A, MDNode *B)
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
A Module instance is used to store all the information related to an LLVM module.
LLVM_ABI void addOperand(MDNode *M)
A container for an operand bundle being viewed as a set of values rather than a set of uses.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
A simple AA result which uses scoped-noalias metadata to answer queries.
static LLVM_ABI void collectScopedDomains(const MDNode *NoAlias, SmallPtrSetImpl< const MDNode * > &Domains)
Collect the set of scoped domains relevant to the noalias scopes.
bool insert(const value_type &X)
Insert a new element into the SetVector.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Class to represent struct types.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Target-Independent Code Generator Pass Configuration Options.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
iterator_range< user_iterator > users()
LLVM_ABI bool replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
A raw_ostream that writes to an std::string.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
GVUsesInfoTy getTransitiveUsesOfLDSForLowering(const CallGraph &CG, Module &M)
Collects all uses of LDS Global Variables in M using getUsesOfGVByFunction, with isLDSVariableToLower...
bool isDynamicLDS(const GlobalVariable &GV)
void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot, ArrayRef< StringRef > FnAttrs)
Strip FnAttr attribute from any functions where we may have introduced its use.
bool eliminateGVConstantExprUsesFromAllInstructions(Module &M, function_ref< bool(const GlobalVariable &)> Filter)
Iterates over all GlobalVariables in M, and whenever Filter returns true, replace all constant users ...
LLVM_READNONE constexpr bool isKernel(CallingConv::ID CC)
void getUsesOfGVByFunction(const CallGraph &CG, Module &M, function_ref< bool(const GlobalVariable &)> Filter, FunctionVariableMap &Kernels, FunctionVariableMap &Functions)
Finds uses of Global Variables on a per-function basis.
DenseMap< Function *, DenseSet< GlobalVariable * > > FunctionVariableMap
TargetExtType * isNamedBarrier(const GlobalVariable &GV)
bool isLDSVariableToLower(const GlobalVariable &GV)
Align getAlign(const DataLayout &DL, const GlobalVariable *GV)
DenseMap< GlobalVariable *, DenseSet< Function * > > VariableFunctionMap
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool operator<(int64_t V1, const APSInt &V2)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI std::string getUniqueModuleId(Module *M)
Produce a unique identifier for this module by taking the MD5 sum of the names of the module's strong...
void sort(IteratorTy Start, IteratorTy End)
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
char & AMDGPULowerModuleLDSLegacyPassID
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
LLVM_ABI void removeFromUsedLists(Module &M, function_ref< bool(Constant *)> ShouldRemove)
Removes global values from the llvm.used and llvm.compiler.used arrays.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
ModulePass * createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM=nullptr)
LLVM_ABI void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
LLVM_ABI std::pair< uint64_t, Align > performOptimizedStructLayout(MutableArrayRef< OptimizedStructLayoutField > Fields)
Compute a layout for a struct containing the given fields, making a best-effort attempt to minimize t...
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
const AMDGPUTargetMachine & TM
FunctionVariableMap DirectAccess
FunctionVariableMap IndirectAccess
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.