198#include "llvm/IR/IntrinsicsAMDGPU.h"
215#define DEBUG_TYPE "amdgpu-lower-module-lds"
223 "amdgpu-super-align-lds-globals",
224 cl::desc(
"Increase alignment of LDS if it is not on align boundary"),
227enum class LoweringKind { module, table, kernel, hybrid };
229 "amdgpu-lower-module-lds-strategy",
233 clEnumValN(LoweringKind::table,
"table",
"Lower via table lookup"),
234 clEnumValN(LoweringKind::module,
"module",
"Lower via module struct"),
236 LoweringKind::kernel,
"kernel",
237 "Lower variables reachable from one kernel, otherwise abort"),
239 "Lower via mixture of above strategies")));
241template <
typename T> std::vector<T> sortByName(std::vector<T> &&V) {
242 llvm::sort(V, [](
const auto *L,
const auto *R) {
243 return L->getName() < R->getName();
245 return {std::move(V)};
248class AMDGPULowerModuleLDS {
252 removeLocalVarsFromUsedLists(
Module &M,
264 LocalVar->removeDeadConstantUsers();
289 IRBuilder<> Builder(Entry, Entry->getFirstNonPHIIt());
292 Func->getParent(), Intrinsic::donothing, {});
294 Value *UseInstance[1] = {
295 Builder.CreateConstInBoundsGEP1_32(SGV->
getValueType(), SGV, 0)};
304 struct LDSVariableReplacement {
314 static Constant *getAddressesOfVariablesInKernel(
326 auto ConstantGepIt = LDSVarsToConstantGEP.
find(GV);
327 if (ConstantGepIt != LDSVarsToConstantGEP.
end()) {
328 Elements.push_back(ConstantGepIt->second);
340 if (Variables.
empty()) {
345 const size_t NumberVariables = Variables.
size();
346 const size_t NumberKernels = kernels.
size();
355 std::vector<Constant *> overallConstantExprElts(NumberKernels);
356 for (
size_t i = 0; i < NumberKernels; i++) {
357 auto Replacement = KernelToReplacement.
find(kernels[i]);
358 overallConstantExprElts[i] =
359 (Replacement == KernelToReplacement.
end())
361 : getAddressesOfVariablesInKernel(
362 Ctx, Variables, Replacement->second.LDSVarsToConstantGEP);
377 Value *OptionalIndex) {
383 Value *tableKernelIndex = getTableLookupKernelIndex(M,
I->getFunction());
389 Builder.SetInsertPoint(
I);
393 ConstantInt::get(I32, 0),
399 Value *Address = Builder.CreateInBoundsGEP(
400 LookupTable->getValueType(), LookupTable, GEPIdx, GV->
getName());
402 Value *Loaded = Builder.CreateLoad(GV->
getType(), Address);
406 void replaceUsesInInstructionsWithTableLookup(
414 for (
size_t Index = 0; Index < ModuleScopeVariables.
size(); Index++) {
415 auto *GV = ModuleScopeVariables[Index];
422 replaceUseWithTableLookup(M, Builder, LookupTable, GV, U,
423 ConstantInt::get(I32, Index));
434 if (VariableSet.
empty())
437 for (
Function &Func : M.functions()) {
438 if (Func.isDeclaration() || !
isKernel(Func))
452 chooseBestVariableForModuleStrategy(
const DataLayout &
DL,
458 size_t UserCount = 0;
461 CandidateTy() =
default;
464 : GV(GV), UserCount(UserCount),
Size(AllocSize) {}
468 if (UserCount <
Other.UserCount) {
471 if (UserCount >
Other.UserCount) {
489 CandidateTy MostUsed;
491 for (
auto &K : LDSVars) {
493 if (K.second.size() <= 1) {
499 if (MostUsed < Candidate)
500 MostUsed = Candidate;
524 auto [It, Inserted] = tableKernelIndexCache.
try_emplace(
F);
526 auto InsertAt =
F->getEntryBlock().getFirstNonPHIOrDbgOrAlloca();
529 It->second = Builder.CreateIntrinsic(Intrinsic::amdgcn_lds_kernel_id, {});
535 static std::vector<Function *> assignLDSKernelIDToEachKernel(
543 std::vector<Function *> OrderedKernels;
544 if (!KernelsThatAllocateTableLDS.
empty() ||
545 !KernelsThatIndirectlyAllocateDynamicLDS.
empty()) {
547 for (
Function &Func : M->functions()) {
548 if (Func.isDeclaration())
553 if (KernelsThatAllocateTableLDS.
contains(&Func) ||
554 KernelsThatIndirectlyAllocateDynamicLDS.
contains(&Func)) {
556 OrderedKernels.push_back(&Func);
561 OrderedKernels = sortByName(std::move(OrderedKernels));
567 if (OrderedKernels.size() > UINT32_MAX) {
572 for (
size_t i = 0; i < OrderedKernels.size(); i++) {
576 OrderedKernels[i]->setMetadata(
"llvm.amdgcn.lds.kernel.id",
580 return OrderedKernels;
583 static void partitionVariablesIntoIndirectStrategies(
592 LoweringKindLoc != LoweringKind::hybrid
594 : chooseBestVariableForModuleStrategy(
595 M.getDataLayout(), LDSToKernelsThatNeedToAccessItIndirectly);
600 ? LDSToKernelsThatNeedToAccessItIndirectly[HybridModuleRoot]
603 for (
auto &K : LDSToKernelsThatNeedToAccessItIndirectly) {
609 assert(!K.second.empty());
612 DynamicVariables.
insert(GV);
616 switch (LoweringKindLoc) {
617 case LoweringKind::module:
618 ModuleScopeVariables.insert(GV);
621 case LoweringKind::table:
622 TableLookupVariables.
insert(GV);
625 case LoweringKind::kernel:
626 if (K.second.size() == 1) {
627 KernelAccessVariables.
insert(GV);
631 "cannot lower LDS '" + GV->
getName() +
632 "' to kernel access as it is reachable from multiple kernels");
636 case LoweringKind::hybrid: {
637 if (GV == HybridModuleRoot) {
638 assert(K.second.size() != 1);
639 ModuleScopeVariables.insert(GV);
640 }
else if (K.second.size() == 1) {
641 KernelAccessVariables.
insert(GV);
642 }
else if (K.second == HybridModuleRootKernels) {
643 ModuleScopeVariables.insert(GV);
645 TableLookupVariables.
insert(GV);
654 assert(ModuleScopeVariables.
size() + TableLookupVariables.
size() +
655 KernelAccessVariables.
size() + DynamicVariables.
size() ==
656 LDSToKernelsThatNeedToAccessItIndirectly.size());
669 if (ModuleScopeVariables.
empty()) {
675 LDSVariableReplacement ModuleScopeReplacement =
676 createLDSVariableReplacement(M,
"llvm.amdgcn.module.lds",
677 ModuleScopeVariables);
685 recordLDSAbsoluteAddress(&M, ModuleScopeReplacement.SGV, 0);
688 removeLocalVarsFromUsedLists(M, ModuleScopeVariables);
691 replaceLDSVariablesWithStruct(
692 M, ModuleScopeVariables, ModuleScopeReplacement, [&](
Use &U) {
705 for (
Function &Func : M.functions()) {
706 if (Func.isDeclaration() || !
isKernel(Func))
709 if (KernelsThatAllocateModuleLDS.
contains(&Func)) {
710 replaceLDSVariablesWithStruct(
711 M, ModuleScopeVariables, ModuleScopeReplacement, [&](
Use &U) {
720 markUsedByKernel(&Func, ModuleScopeReplacement.SGV);
724 return ModuleScopeReplacement.SGV;
728 lowerKernelScopeStructVariables(
737 for (
Function &Func : M.functions()) {
738 if (Func.isDeclaration() || !
isKernel(Func))
746 KernelUsedVariables.
insert(v);
754 KernelUsedVariables.
insert(v);
760 if (KernelsThatAllocateModuleLDS.
contains(&Func)) {
762 KernelUsedVariables.
erase(v);
766 if (KernelUsedVariables.
empty()) {
778 if (!Func.hasName()) {
782 std::string VarName =
783 (
Twine(
"llvm.amdgcn.kernel.") + Func.getName() +
".lds").str();
786 createLDSVariableReplacement(M, VarName, KernelUsedVariables);
794 markUsedByKernel(&Func, Replacement.SGV);
797 removeLocalVarsFromUsedLists(M, KernelUsedVariables);
798 KernelToReplacement[&Func] = Replacement;
801 replaceLDSVariablesWithStruct(
802 M, KernelUsedVariables, Replacement, [&Func](
Use &U) {
804 return I &&
I->getFunction() == &Func;
807 return KernelToReplacement;
827 Align MaxDynamicAlignment(1);
831 MaxDynamicAlignment =
837 UpdateMaxAlignment(GV);
841 UpdateMaxAlignment(GV);
848 Twine(
"llvm.amdgcn." + func->
getName() +
".dynlds"),
nullptr,
850 N->setAlignment(MaxDynamicAlignment);
860 std::vector<Function *>
const &OrderedKernels) {
862 if (!KernelsThatIndirectlyAllocateDynamicLDS.
empty()) {
867 std::vector<Constant *> newDynamicLDS;
870 for (
auto &func : OrderedKernels) {
872 if (KernelsThatIndirectlyAllocateDynamicLDS.
contains(func)) {
879 buildRepresentativeDynamicLDSInstance(M, LDSUsesInfo, func);
881 KernelToCreatedDynamicLDS[func] =
N;
883 markUsedByKernel(func,
N);
885 newDynamicLDS.push_back(
N);
890 assert(OrderedKernels.size() == newDynamicLDS.size());
896 "llvm.amdgcn.dynlds.offset.table",
nullptr,
907 replaceUseWithTableLookup(M, Builder, table, GV, U,
nullptr);
911 return KernelToCreatedDynamicLDS;
917 bool runOnModuleLinkTime(
Module &M) {
918 bool Changed = superAlignLDSGlobals(M);
925 if (KernelLDSUses.empty() && FunctionLDSUses.empty())
929 assert(!ModuleId.empty() &&
930 "modules with LDS variables should have a unique ID");
933 for (
auto &[
F, Vars] : KernelLDSUses)
934 AllLDSUses[
F].insert(Vars.begin(), Vars.end());
935 for (
auto &[
F, Vars] : FunctionLDSUses)
936 AllLDSUses[
F].insert(Vars.begin(), Vars.end());
939 for (
auto &[
F, Vars] : AllLDSUses) {
950 for (
auto &[
F, Vars] : AllLDSUses) {
952 VarToFuncs[V].push_back(
F);
961 for (
auto &[V, Funcs] : VarToFuncs) {
962 if (!V->hasLocalLinkage() || Funcs.size() > 1) {
963 GlobalScopeVars.
insert(V);
964 if (V->hasLocalLinkage())
965 InternalMultiUserVars.
insert(V);
973 for (
auto &KV : AllLDSUses) {
977 if (!GlobalScopeVars.
count(V))
981 if (FuncScopeVars.
empty())
986 ? (
"__amdgpu_lds." +
F->getName() + ModuleId).str()
987 : (
"__amdgpu_lds." +
F->getName()).str();
988 LDSVariableReplacement Replacement =
989 createLDSVariableReplacement(M,
StructName, FuncScopeVars);
996 replaceLDSVariablesWithStruct(
997 M, FuncScopeVars, Replacement, [
F](
const Use &U) {
999 return I &&
I->getFunction() ==
F;
1002 AllReplacedVars.
insert(FuncScopeVars.
begin(), FuncScopeVars.
end());
1009 if (!InternalMultiUserVars.
empty()) {
1010 std::string
StructName =
"__amdgpu_lds.__internal" + ModuleId;
1011 LDSVariableReplacement Replacement =
1012 createLDSVariableReplacement(M,
StructName, InternalMultiUserVars);
1018 replaceLDSVariablesWithStruct(
1019 M, InternalMultiUserVars, Replacement,
1025 FuncsUsingInternalVars.insert(
F);
1027 for (
Function *
F : FuncsUsingInternalVars)
1030 AllReplacedVars.
insert(InternalMultiUserVars.begin(),
1031 InternalMultiUserVars.end());
1037 V->setInitializer(
nullptr);
1044 NamedMDNode *LdsMD = M.getOrInsertNamedMetadata(
"amdgpu.lds.uses");
1046 for (
auto &[
F, SGV] : FuncToLdsStruct)
1050 for (
auto &[V, Funcs] : VarToFuncs) {
1051 if (GlobalScopeVars.
count(V) && !InternalMultiUserVars.
count(V)) {
1063 AllLDSVarsForCleanup.
insert(GlobalScopeVars.
begin(), GlobalScopeVars.
end());
1064 removeLocalVarsFromUsedLists(M, AllLDSVarsForCleanup);
1074 bool runOnModule(
Module &M) {
1076 return runOnModuleLinkTime(M);
1077 return runOnModuleNormal(M);
1080 bool runOnModuleNormal(
Module &M) {
1082 bool Changed = superAlignLDSGlobals(M);
1098 LDSToKernelsThatNeedToAccessItIndirectly[GV].insert(
F);
1107 partitionVariablesIntoIndirectStrategies(
1108 M, LDSUsesInfo, LDSToKernelsThatNeedToAccessItIndirectly,
1109 ModuleScopeVariables, TableLookupVariables, KernelAccessVariables,
1116 kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
1117 ModuleScopeVariables);
1119 kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
1120 TableLookupVariables);
1123 kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
1126 GlobalVariable *MaybeModuleScopeStruct = lowerModuleScopeStructVariables(
1127 M, ModuleScopeVariables, KernelsThatAllocateModuleLDS);
1130 lowerKernelScopeStructVariables(M, LDSUsesInfo, ModuleScopeVariables,
1131 KernelsThatAllocateModuleLDS,
1132 MaybeModuleScopeStruct);
1135 for (
auto &GV : KernelAccessVariables) {
1136 auto &funcs = LDSToKernelsThatNeedToAccessItIndirectly[GV];
1137 assert(funcs.size() == 1);
1138 LDSVariableReplacement Replacement =
1139 KernelToReplacement[*(funcs.begin())];
1144 replaceLDSVariablesWithStruct(M, Vec, Replacement, [](
Use &U) {
1150 std::vector<Function *> OrderedKernels =
1151 assignLDSKernelIDToEachKernel(&M, KernelsThatAllocateTableLDS,
1152 KernelsThatIndirectlyAllocateDynamicLDS);
1154 if (!KernelsThatAllocateTableLDS.
empty()) {
1160 auto TableLookupVariablesOrdered =
1161 sortByName(std::vector<GlobalVariable *>(TableLookupVariables.
begin(),
1162 TableLookupVariables.
end()));
1165 M, TableLookupVariablesOrdered, OrderedKernels, KernelToReplacement);
1166 replaceUsesInInstructionsWithTableLookup(M, TableLookupVariablesOrdered,
1171 lowerDynamicLDSVariables(M, LDSUsesInfo,
1172 KernelsThatIndirectlyAllocateDynamicLDS,
1173 DynamicVariables, OrderedKernels);
1178 for (
auto *KernelSet : {&KernelsThatIndirectlyAllocateDynamicLDS,
1179 &KernelsThatAllocateTableLDS})
1188 for (
Function &Func : M.functions()) {
1189 if (Func.isDeclaration() || !
isKernel(Func))
1203 const bool AllocateModuleScopeStruct =
1204 MaybeModuleScopeStruct &&
1205 KernelsThatAllocateModuleLDS.
contains(&Func);
1207 auto Replacement = KernelToReplacement.
find(&Func);
1208 const bool AllocateKernelScopeStruct =
1209 Replacement != KernelToReplacement.
end();
1211 const bool AllocateDynamicVariable =
1212 KernelToCreatedDynamicLDS.
contains(&Func);
1216 if (AllocateModuleScopeStruct) {
1222 if (AllocateKernelScopeStruct) {
1225 recordLDSAbsoluteAddress(&M, KernelStruct,
Offset);
1233 if (AllocateDynamicVariable) {
1234 GlobalVariable *DynamicVariable = KernelToCreatedDynamicLDS[&Func];
1236 recordLDSAbsoluteAddress(&M, DynamicVariable,
Offset);
1251 if (AllocateDynamicVariable)
1254 Func.addFnAttr(
"amdgpu-lds-size", Buffer);
1273 static bool superAlignLDSGlobals(
Module &M) {
1276 if (!SuperAlignLDSGlobals) {
1280 for (
auto &GV : M.globals()) {
1300 Alignment = std::max(Alignment,
Align(16));
1301 }
else if (GVSize > 4) {
1303 Alignment = std::max(Alignment,
Align(8));
1304 }
else if (GVSize > 2) {
1306 Alignment = std::max(Alignment,
Align(4));
1307 }
else if (GVSize > 1) {
1309 Alignment = std::max(Alignment,
Align(2));
1320 static LDSVariableReplacement createLDSVariableReplacement(
1321 Module &M, std::string VarName,
1338 auto Sorted = sortByName(std::vector<GlobalVariable *>(
1339 LDSVarsToTransform.
begin(), LDSVarsToTransform.
end()));
1350 std::vector<GlobalVariable *> LocalVars;
1352 LocalVars.reserve(LDSVarsToTransform.
size());
1353 IsPaddingField.
reserve(LDSVarsToTransform.
size());
1356 for (
auto &
F : LayoutFields) {
1359 Align DataAlign =
F.Alignment;
1362 if (
uint64_t Rem = CurrentOffset % DataAlignV) {
1363 uint64_t Padding = DataAlignV - Rem;
1375 CurrentOffset += Padding;
1378 LocalVars.push_back(FGV);
1380 CurrentOffset +=
F.Size;
1384 std::vector<Type *> LocalVarTypes;
1385 LocalVarTypes.reserve(LocalVars.size());
1387 LocalVars.cbegin(), LocalVars.cend(), std::back_inserter(LocalVarTypes),
1402 for (
size_t I = 0;
I < LocalVars.size();
I++) {
1404 Constant *GEPIdx[] = {ConstantInt::get(I32, 0), ConstantInt::get(I32,
I)};
1406 if (IsPaddingField[
I]) {
1413 assert(Map.size() == LDSVarsToTransform.
size());
1414 return {SGV, std::move(Map)};
1417 template <
typename PredicateTy>
1418 static void replaceLDSVariablesWithStruct(
1420 const LDSVariableReplacement &Replacement, PredicateTy
Predicate) {
1427 auto LDSVarsToTransform = sortByName(std::vector<GlobalVariable *>(
1428 LDSVarsToTransformArg.
begin(), LDSVarsToTransformArg.
end()));
1434 const size_t NumberVars = LDSVarsToTransform.
size();
1435 if (NumberVars > 1) {
1437 AliasScopes.
reserve(NumberVars);
1439 for (
size_t I = 0;
I < NumberVars;
I++) {
1443 NoAliasList.
append(&AliasScopes[1], AliasScopes.
end());
1448 for (
size_t I = 0;
I < NumberVars;
I++) {
1450 Constant *
GEP = Replacement.LDSVarsToConstantGEP.at(GV);
1454 APInt APOff(
DL.getIndexTypeSizeInBits(
GEP->getType()), 0);
1455 GEP->stripAndAccumulateInBoundsConstantOffsets(
DL, APOff);
1462 NoAliasList[
I - 1] = AliasScopes[
I - 1];
1468 refineUsesAlignmentAndAA(
GEP,
A,
DL, AliasScope, NoAlias);
1472 static void refineUsesAlignmentAndAA(
Value *Ptr,
Align A,
1474 MDNode *NoAlias,
unsigned MaxDepth = 5) {
1475 if (!MaxDepth || (
A == 1 && !AliasScope))
1482 if (AliasScope &&
I->mayReadOrWriteMemory()) {
1483 MDNode *AS =
I->getMetadata(LLVMContext::MD_alias_scope);
1486 I->setMetadata(LLVMContext::MD_alias_scope, AS);
1488 MDNode *NA =
I->getMetadata(LLVMContext::MD_noalias);
1512 if (Intersection.empty()) {
1517 I->setMetadata(LLVMContext::MD_noalias, NA);
1522 LI->setAlignment(std::max(
A, LI->getAlign()));
1526 if (
SI->getPointerOperand() == Ptr)
1527 SI->setAlignment(std::max(
A,
SI->getAlign()));
1533 if (AI->getPointerOperand() == Ptr)
1534 AI->setAlignment(std::max(
A, AI->getAlign()));
1538 if (AI->getPointerOperand() == Ptr)
1539 AI->setAlignment(std::max(
A, AI->getAlign()));
1543 unsigned BitWidth =
DL.getIndexTypeSizeInBits(
GEP->getType());
1545 if (
GEP->getPointerOperand() == Ptr) {
1547 if (
GEP->accumulateConstantOffset(
DL, Off))
1549 refineUsesAlignmentAndAA(
GEP, GA,
DL, AliasScope, NoAlias,
1555 if (
I->getOpcode() == Instruction::BitCast ||
1556 I->getOpcode() == Instruction::AddrSpaceCast)
1557 refineUsesAlignmentAndAA(
I,
A,
DL, AliasScope, NoAlias, MaxDepth - 1);
1563class AMDGPULowerModuleLDSLegacy :
public ModulePass {
1576 bool runOnModule(
Module &M)
override {
1578 auto &TPC = getAnalysis<TargetPassConfig>();
1587char AMDGPULowerModuleLDSLegacy::ID = 0;
1592 "Lower uses of LDS variables from non-kernel functions",
1596 "Lower uses of LDS variables from non-kernel functions",
1601 return new AMDGPULowerModuleLDSLegacy(TM);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
DXIL Forward Handle Accesses
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
const std::string FatArchTraits< MachO::fat_arch >::StructName
This file provides an interface for laying out a sequence of fields as a struct in a way that attempt...
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This is the interface for a metadata-based scoped no-alias analysis.
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallString class.
Target-Independent Code Generator Pass Configuration Options pass.
static bool EnableObjectLinking
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
LLVM Basic Block Representation.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
The basic data container for the call graph of a Module of IR.
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, GEPNoWrapFlags NW=GEPNoWrapFlags::none(), std::optional< ConstantRange > InRange=std::nullopt, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
This is an important base class in LLVM.
LLVM_ABI void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Implements a dense probed hash-table based set.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set a particular kind of metadata attachment.
LLVM_ABI bool isAbsoluteSymbolRef() const
Returns whether this is a reference to an absolute symbol.
void setLinkage(LinkageTypes LT)
PointerType * getType() const
Global values are always pointers.
@ InternalLinkage
Rename collisions when linking (static functions).
@ ExternalLinkage
Externally visible function.
Type * getValueType() const
LLVM_ABI void setInitializer(Constant *InitVal)
setInitializer - Sets the initializer for this global variable, removing any existing initializer if ...
bool hasInitializer() const
Definitions have initializers, declarations don't.
LLVM_ABI uint64_t getGlobalSize(const DataLayout &DL) const
Get the size of this global variable in bytes.
LLVM_ABI void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalVariable.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
bool runOnModule(Module &) override
ImmutablePasses are never run.
This is an important class for using LLVM in a threaded context.
MDNode * createAnonymousAliasScope(MDNode *Domain, StringRef Name=StringRef())
Return metadata appropriate for an alias scope root node.
MDNode * createAnonymousAliasScopeDomain(StringRef Name=StringRef())
Return metadata appropriate for an alias scope domain node.
static LLVM_ABI MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
static LLVM_ABI MDNode * concatenate(MDNode *A, MDNode *B)
Methods for metadata merging.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDNode * intersect(MDNode *A, MDNode *B)
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
A Module instance is used to store all the information related to an LLVM module.
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
LLVM_ABI void addOperand(MDNode *M)
A container for an operand bundle being viewed as a set of values rather than a set of uses.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
A simple AA result which uses scoped-noalias metadata to answer queries.
static LLVM_ABI void collectScopedDomains(const MDNode *NoAlias, SmallPtrSetImpl< const MDNode * > &Domains)
Collect the set of scoped domains relevant to the noalias scopes.
bool insert(const value_type &X)
Insert a new element into the SetVector.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Class to represent struct types.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Target-Independent Code Generator Pass Configuration Options.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
iterator_range< user_iterator > users()
LLVM_ABI bool replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
A raw_ostream that writes to an std::string.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
bool isDynamicLDS(const GlobalVariable &GV)
void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot, ArrayRef< StringRef > FnAttrs)
Strip FnAttr attribute from any functions where we may have introduced its use.
LLVM_READNONE constexpr bool isKernel(CallingConv::ID CC)
void getUsesOfLDSByFunction(const CallGraph &CG, Module &M, FunctionVariableMap &kernels, FunctionVariableMap &Functions)
LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M)
DenseMap< Function *, DenseSet< GlobalVariable * > > FunctionVariableMap
TargetExtType * isNamedBarrier(const GlobalVariable &GV)
bool isLDSVariableToLower(const GlobalVariable &GV)
bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M)
Align getAlign(const DataLayout &DL, const GlobalVariable *GV)
DenseMap< GlobalVariable *, DenseSet< Function * > > VariableFunctionMap
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool operator<(int64_t V1, const APSInt &V2)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI std::string getUniqueModuleId(Module *M)
Produce a unique identifier for this module by taking the MD5 sum of the names of the module's strong...
void sort(IteratorTy Start, IteratorTy End)
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
char & AMDGPULowerModuleLDSLegacyPassID
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
LLVM_ABI void removeFromUsedLists(Module &M, function_ref< bool(Constant *)> ShouldRemove)
Removes global values from the llvm.used and llvm.compiler.used arrays.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
ModulePass * createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM=nullptr)
LLVM_ABI void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
LLVM_ABI std::pair< uint64_t, Align > performOptimizedStructLayout(MutableArrayRef< OptimizedStructLayoutField > Fields)
Compute a layout for a struct containing the given fields, making a best-effort attempt to minimize t...
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
const AMDGPUTargetMachine & TM
FunctionVariableMap direct_access
FunctionVariableMap indirect_access
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.