198#include "llvm/IR/IntrinsicsAMDGPU.h"
215#define DEBUG_TYPE "amdgpu-lower-module-lds"
223 "amdgpu-super-align-lds-globals",
224 cl::desc(
"Increase alignment of LDS if it is not on align boundary"),
227enum class LoweringKind { module, table, kernel, hybrid };
229 "amdgpu-lower-module-lds-strategy",
233 clEnumValN(LoweringKind::table,
"table",
"Lower via table lookup"),
234 clEnumValN(LoweringKind::module,
"module",
"Lower via module struct"),
236 LoweringKind::kernel,
"kernel",
237 "Lower variables reachable from one kernel, otherwise abort"),
239 "Lower via mixture of above strategies")));
241template <
typename T> std::vector<T> sortByName(std::vector<T> &&V) {
242 llvm::sort(V, [](
const auto *L,
const auto *R) {
243 return L->getName() < R->getName();
245 return {std::move(V)};
248class AMDGPULowerModuleLDS {
252 removeLocalVarsFromUsedLists(
Module &M,
264 LocalVar->removeDeadConstantUsers();
289 IRBuilder<> Builder(Entry, Entry->getFirstNonPHIIt());
292 Func->getParent(), Intrinsic::donothing, {});
294 Value *UseInstance[1] = {
295 Builder.CreateConstInBoundsGEP1_32(SGV->
getValueType(), SGV, 0)};
304 struct LDSVariableReplacement {
314 static Constant *getAddressesOfVariablesInKernel(
326 auto ConstantGepIt = LDSVarsToConstantGEP.
find(GV);
327 if (ConstantGepIt != LDSVarsToConstantGEP.
end()) {
328 Elements.push_back(ConstantGepIt->second);
340 if (Variables.
empty()) {
345 const size_t NumberVariables = Variables.
size();
346 const size_t NumberKernels = kernels.
size();
355 std::vector<Constant *> overallConstantExprElts(NumberKernels);
356 for (
size_t i = 0; i < NumberKernels; i++) {
357 auto Replacement = KernelToReplacement.
find(kernels[i]);
358 overallConstantExprElts[i] =
359 (Replacement == KernelToReplacement.
end())
361 : getAddressesOfVariablesInKernel(
362 Ctx, Variables, Replacement->second.LDSVarsToConstantGEP);
377 Value *OptionalIndex) {
383 Value *tableKernelIndex = getTableLookupKernelIndex(M,
I->getFunction());
389 Builder.SetInsertPoint(
I);
393 ConstantInt::get(I32, 0),
399 Value *Address = Builder.CreateInBoundsGEP(
400 LookupTable->getValueType(), LookupTable, GEPIdx, GV->
getName());
402 Value *Loaded = Builder.CreateLoad(GV->
getType(), Address);
406 void replaceUsesInInstructionsWithTableLookup(
414 for (
size_t Index = 0; Index < ModuleScopeVariables.
size(); Index++) {
415 auto *GV = ModuleScopeVariables[Index];
422 replaceUseWithTableLookup(M, Builder, LookupTable, GV, U,
423 ConstantInt::get(I32, Index));
434 if (VariableSet.
empty())
437 for (
Function &Func : M.functions()) {
438 if (Func.isDeclaration() || !
isKernel(Func))
452 chooseBestVariableForModuleStrategy(
const DataLayout &
DL,
458 size_t UserCount = 0;
461 CandidateTy() =
default;
464 : GV(GV), UserCount(UserCount),
Size(AllocSize) {}
468 if (UserCount <
Other.UserCount) {
471 if (UserCount >
Other.UserCount) {
489 CandidateTy MostUsed;
491 for (
auto &K : LDSVars) {
493 if (K.second.size() <= 1) {
499 if (MostUsed < Candidate)
500 MostUsed = Candidate;
524 auto [It, Inserted] = tableKernelIndexCache.
try_emplace(
F);
526 auto InsertAt =
F->getEntryBlock().getFirstNonPHIOrDbgOrAlloca();
529 It->second = Builder.CreateIntrinsic(Intrinsic::amdgcn_lds_kernel_id, {});
535 static std::vector<Function *> assignLDSKernelIDToEachKernel(
543 std::vector<Function *> OrderedKernels;
544 if (!KernelsThatAllocateTableLDS.
empty() ||
545 !KernelsThatIndirectlyAllocateDynamicLDS.
empty()) {
547 for (
Function &Func : M->functions()) {
548 if (Func.isDeclaration())
553 if (KernelsThatAllocateTableLDS.
contains(&Func) ||
554 KernelsThatIndirectlyAllocateDynamicLDS.
contains(&Func)) {
556 OrderedKernels.push_back(&Func);
561 OrderedKernels = sortByName(std::move(OrderedKernels));
567 if (OrderedKernels.size() > UINT32_MAX) {
572 for (
size_t i = 0; i < OrderedKernels.size(); i++) {
576 OrderedKernels[i]->setMetadata(
"llvm.amdgcn.lds.kernel.id",
580 return OrderedKernels;
583 static void partitionVariablesIntoIndirectStrategies(
592 LoweringKindLoc != LoweringKind::hybrid
594 : chooseBestVariableForModuleStrategy(
595 M.getDataLayout(), LDSToKernelsThatNeedToAccessItIndirectly);
600 ? LDSToKernelsThatNeedToAccessItIndirectly[HybridModuleRoot]
603 for (
auto &K : LDSToKernelsThatNeedToAccessItIndirectly) {
609 assert(!K.second.empty());
612 DynamicVariables.
insert(GV);
616 switch (LoweringKindLoc) {
617 case LoweringKind::module:
618 ModuleScopeVariables.insert(GV);
621 case LoweringKind::table:
622 TableLookupVariables.
insert(GV);
625 case LoweringKind::kernel:
626 if (K.second.size() == 1) {
627 KernelAccessVariables.
insert(GV);
631 "cannot lower LDS '" + GV->
getName() +
632 "' to kernel access as it is reachable from multiple kernels");
636 case LoweringKind::hybrid: {
637 if (GV == HybridModuleRoot) {
638 assert(K.second.size() != 1);
639 ModuleScopeVariables.insert(GV);
640 }
else if (K.second.size() == 1) {
641 KernelAccessVariables.
insert(GV);
642 }
else if (K.second == HybridModuleRootKernels) {
643 ModuleScopeVariables.insert(GV);
645 TableLookupVariables.
insert(GV);
654 assert(ModuleScopeVariables.
size() + TableLookupVariables.
size() +
655 KernelAccessVariables.
size() + DynamicVariables.
size() ==
656 LDSToKernelsThatNeedToAccessItIndirectly.size());
669 if (ModuleScopeVariables.
empty()) {
675 LDSVariableReplacement ModuleScopeReplacement =
676 createLDSVariableReplacement(M,
"llvm.amdgcn.module.lds",
677 ModuleScopeVariables);
685 recordLDSAbsoluteAddress(&M, ModuleScopeReplacement.SGV, 0);
688 removeLocalVarsFromUsedLists(M, ModuleScopeVariables);
691 replaceLDSVariablesWithStruct(
692 M, ModuleScopeVariables, ModuleScopeReplacement, [&](
Use &U) {
705 for (
Function &Func : M.functions()) {
706 if (Func.isDeclaration() || !
isKernel(Func))
709 if (KernelsThatAllocateModuleLDS.
contains(&Func)) {
710 replaceLDSVariablesWithStruct(
711 M, ModuleScopeVariables, ModuleScopeReplacement, [&](
Use &U) {
720 markUsedByKernel(&Func, ModuleScopeReplacement.SGV);
724 return ModuleScopeReplacement.SGV;
728 lowerKernelScopeStructVariables(
737 for (
Function &Func : M.functions()) {
738 if (Func.isDeclaration() || !
isKernel(Func))
746 KernelUsedVariables.
insert(v);
754 KernelUsedVariables.
insert(v);
760 if (KernelsThatAllocateModuleLDS.
contains(&Func)) {
762 KernelUsedVariables.
erase(v);
766 if (KernelUsedVariables.
empty()) {
778 if (!Func.hasName()) {
782 std::string VarName =
783 (
Twine(
"llvm.amdgcn.kernel.") + Func.getName() +
".lds").str();
786 createLDSVariableReplacement(M, VarName, KernelUsedVariables);
794 markUsedByKernel(&Func, Replacement.SGV);
797 removeLocalVarsFromUsedLists(M, KernelUsedVariables);
798 KernelToReplacement[&Func] = Replacement;
801 replaceLDSVariablesWithStruct(
802 M, KernelUsedVariables, Replacement, [&Func](
Use &U) {
804 return I &&
I->getFunction() == &Func;
807 return KernelToReplacement;
827 Align MaxDynamicAlignment(1);
831 MaxDynamicAlignment =
837 UpdateMaxAlignment(GV);
841 UpdateMaxAlignment(GV);
848 Twine(
"llvm.amdgcn." + func->
getName() +
".dynlds"),
nullptr,
850 N->setAlignment(MaxDynamicAlignment);
860 std::vector<Function *>
const &OrderedKernels) {
862 if (!KernelsThatIndirectlyAllocateDynamicLDS.
empty()) {
867 std::vector<Constant *> newDynamicLDS;
870 for (
auto &func : OrderedKernels) {
872 if (KernelsThatIndirectlyAllocateDynamicLDS.
contains(func)) {
879 buildRepresentativeDynamicLDSInstance(M, LDSUsesInfo, func);
881 KernelToCreatedDynamicLDS[func] =
N;
883 markUsedByKernel(func,
N);
885 newDynamicLDS.push_back(
N);
890 assert(OrderedKernels.size() == newDynamicLDS.size());
896 "llvm.amdgcn.dynlds.offset.table",
nullptr,
907 replaceUseWithTableLookup(M, Builder, table, GV, U,
nullptr);
911 return KernelToCreatedDynamicLDS;
917 bool runOnModuleLinkTime(
Module &M) {
918 bool Changed = superAlignLDSGlobals(M);
925 if (KernelLDSUses.empty() && FunctionLDSUses.empty())
929 assert(!ModuleId.empty() &&
930 "modules with LDS variables should have a unique ID");
933 for (
auto &[
F, Vars] : KernelLDSUses)
934 AllLDSUses[
F].insert(Vars.begin(), Vars.end());
935 for (
auto &[
F, Vars] : FunctionLDSUses)
936 AllLDSUses[
F].insert(Vars.begin(), Vars.end());
939 for (
auto &[
F, Vars] : AllLDSUses) {
950 for (
auto &[
F, Vars] : AllLDSUses) {
952 VarToFuncs[V].push_back(
F);
961 for (
auto &[V, Funcs] : VarToFuncs) {
962 if (!V->hasLocalLinkage() || Funcs.size() > 1) {
963 GlobalScopeVars.
insert(V);
964 if (V->hasLocalLinkage())
965 InternalMultiUserVars.
insert(V);
973 for (
auto &KV : AllLDSUses) {
977 if (!GlobalScopeVars.
count(V))
981 if (FuncScopeVars.
empty())
986 ? (
"__amdgpu_lds." +
F->getName() + ModuleId).str()
987 : (
"__amdgpu_lds." +
F->getName()).str();
988 LDSVariableReplacement Replacement =
989 createLDSVariableReplacement(M,
StructName, FuncScopeVars);
996 replaceLDSVariablesWithStruct(
997 M, FuncScopeVars, Replacement, [
F](
const Use &U) {
999 return I &&
I->getFunction() ==
F;
1002 AllReplacedVars.
insert(FuncScopeVars.
begin(), FuncScopeVars.
end());
1009 if (!InternalMultiUserVars.
empty()) {
1010 std::string
StructName =
"__amdgpu_lds.__internal" + ModuleId;
1011 LDSVariableReplacement Replacement =
1012 createLDSVariableReplacement(M,
StructName, InternalMultiUserVars);
1018 replaceLDSVariablesWithStruct(
1019 M, InternalMultiUserVars, Replacement,
1025 FuncsUsingInternalVars.insert(
F);
1027 for (
Function *
F : FuncsUsingInternalVars)
1030 AllReplacedVars.
insert(InternalMultiUserVars.begin(),
1031 InternalMultiUserVars.end());
1037 V->setInitializer(
nullptr);
1044 NamedMDNode *LdsMD = M.getOrInsertNamedMetadata(
"amdgpu.lds.uses");
1046 for (
auto &[
F, SGV] : FuncToLdsStruct)
1050 for (
auto &[V, Funcs] : VarToFuncs) {
1051 if (GlobalScopeVars.
count(V) && !InternalMultiUserVars.
count(V)) {
1061 AllLDSVarsForCleanup.
insert(GlobalScopeVars.
begin(), GlobalScopeVars.
end());
1062 removeLocalVarsFromUsedLists(M, AllLDSVarsForCleanup);
1072 bool runOnModule(
Module &M) {
1074 return runOnModuleLinkTime(M);
1075 return runOnModuleNormal(M);
1078 bool runOnModuleNormal(
Module &M) {
1080 bool Changed = superAlignLDSGlobals(M);
1096 LDSToKernelsThatNeedToAccessItIndirectly[GV].insert(
F);
1105 partitionVariablesIntoIndirectStrategies(
1106 M, LDSUsesInfo, LDSToKernelsThatNeedToAccessItIndirectly,
1107 ModuleScopeVariables, TableLookupVariables, KernelAccessVariables,
1114 kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
1115 ModuleScopeVariables);
1117 kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
1118 TableLookupVariables);
1121 kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
1124 GlobalVariable *MaybeModuleScopeStruct = lowerModuleScopeStructVariables(
1125 M, ModuleScopeVariables, KernelsThatAllocateModuleLDS);
1128 lowerKernelScopeStructVariables(M, LDSUsesInfo, ModuleScopeVariables,
1129 KernelsThatAllocateModuleLDS,
1130 MaybeModuleScopeStruct);
1133 for (
auto &GV : KernelAccessVariables) {
1134 auto &funcs = LDSToKernelsThatNeedToAccessItIndirectly[GV];
1135 assert(funcs.size() == 1);
1136 LDSVariableReplacement Replacement =
1137 KernelToReplacement[*(funcs.begin())];
1142 replaceLDSVariablesWithStruct(M, Vec, Replacement, [](
Use &U) {
1148 std::vector<Function *> OrderedKernels =
1149 assignLDSKernelIDToEachKernel(&M, KernelsThatAllocateTableLDS,
1150 KernelsThatIndirectlyAllocateDynamicLDS);
1152 if (!KernelsThatAllocateTableLDS.
empty()) {
1158 auto TableLookupVariablesOrdered =
1159 sortByName(std::vector<GlobalVariable *>(TableLookupVariables.
begin(),
1160 TableLookupVariables.
end()));
1163 M, TableLookupVariablesOrdered, OrderedKernels, KernelToReplacement);
1164 replaceUsesInInstructionsWithTableLookup(M, TableLookupVariablesOrdered,
1169 lowerDynamicLDSVariables(M, LDSUsesInfo,
1170 KernelsThatIndirectlyAllocateDynamicLDS,
1171 DynamicVariables, OrderedKernels);
1176 for (
auto *KernelSet : {&KernelsThatIndirectlyAllocateDynamicLDS,
1177 &KernelsThatAllocateTableLDS})
1186 for (
Function &Func : M.functions()) {
1187 if (Func.isDeclaration() || !
isKernel(Func))
1201 const bool AllocateModuleScopeStruct =
1202 MaybeModuleScopeStruct &&
1203 KernelsThatAllocateModuleLDS.
contains(&Func);
1205 auto Replacement = KernelToReplacement.
find(&Func);
1206 const bool AllocateKernelScopeStruct =
1207 Replacement != KernelToReplacement.
end();
1209 const bool AllocateDynamicVariable =
1210 KernelToCreatedDynamicLDS.
contains(&Func);
1214 if (AllocateModuleScopeStruct) {
1220 if (AllocateKernelScopeStruct) {
1223 recordLDSAbsoluteAddress(&M, KernelStruct,
Offset);
1231 if (AllocateDynamicVariable) {
1232 GlobalVariable *DynamicVariable = KernelToCreatedDynamicLDS[&Func];
1234 recordLDSAbsoluteAddress(&M, DynamicVariable,
Offset);
1249 if (AllocateDynamicVariable)
1252 Func.addFnAttr(
"amdgpu-lds-size", Buffer);
1271 static bool superAlignLDSGlobals(
Module &M) {
1274 if (!SuperAlignLDSGlobals) {
1278 for (
auto &GV : M.globals()) {
1298 Alignment = std::max(Alignment,
Align(16));
1299 }
else if (GVSize > 4) {
1301 Alignment = std::max(Alignment,
Align(8));
1302 }
else if (GVSize > 2) {
1304 Alignment = std::max(Alignment,
Align(4));
1305 }
else if (GVSize > 1) {
1307 Alignment = std::max(Alignment,
Align(2));
1318 static LDSVariableReplacement createLDSVariableReplacement(
1319 Module &M, std::string VarName,
1336 auto Sorted = sortByName(std::vector<GlobalVariable *>(
1337 LDSVarsToTransform.
begin(), LDSVarsToTransform.
end()));
1348 std::vector<GlobalVariable *> LocalVars;
1350 LocalVars.reserve(LDSVarsToTransform.
size());
1351 IsPaddingField.
reserve(LDSVarsToTransform.
size());
1354 for (
auto &
F : LayoutFields) {
1357 Align DataAlign =
F.Alignment;
1360 if (
uint64_t Rem = CurrentOffset % DataAlignV) {
1361 uint64_t Padding = DataAlignV - Rem;
1373 CurrentOffset += Padding;
1376 LocalVars.push_back(FGV);
1378 CurrentOffset +=
F.Size;
1382 std::vector<Type *> LocalVarTypes;
1383 LocalVarTypes.reserve(LocalVars.size());
1385 LocalVars.cbegin(), LocalVars.cend(), std::back_inserter(LocalVarTypes),
1400 for (
size_t I = 0;
I < LocalVars.size();
I++) {
1402 Constant *GEPIdx[] = {ConstantInt::get(I32, 0), ConstantInt::get(I32,
I)};
1404 if (IsPaddingField[
I]) {
1411 assert(Map.size() == LDSVarsToTransform.
size());
1412 return {SGV, std::move(Map)};
1415 template <
typename PredicateTy>
1416 static void replaceLDSVariablesWithStruct(
1418 const LDSVariableReplacement &Replacement, PredicateTy
Predicate) {
1425 auto LDSVarsToTransform = sortByName(std::vector<GlobalVariable *>(
1426 LDSVarsToTransformArg.
begin(), LDSVarsToTransformArg.
end()));
1432 const size_t NumberVars = LDSVarsToTransform.
size();
1433 if (NumberVars > 1) {
1435 AliasScopes.
reserve(NumberVars);
1437 for (
size_t I = 0;
I < NumberVars;
I++) {
1441 NoAliasList.
append(&AliasScopes[1], AliasScopes.
end());
1446 for (
size_t I = 0;
I < NumberVars;
I++) {
1448 Constant *
GEP = Replacement.LDSVarsToConstantGEP.at(GV);
1452 APInt APOff(
DL.getIndexTypeSizeInBits(
GEP->getType()), 0);
1453 GEP->stripAndAccumulateInBoundsConstantOffsets(
DL, APOff);
1460 NoAliasList[
I - 1] = AliasScopes[
I - 1];
1466 refineUsesAlignmentAndAA(
GEP,
A,
DL, AliasScope, NoAlias);
1470 static void refineUsesAlignmentAndAA(
Value *Ptr,
Align A,
1472 MDNode *NoAlias,
unsigned MaxDepth = 5) {
1473 if (!MaxDepth || (
A == 1 && !AliasScope))
1480 if (AliasScope &&
I->mayReadOrWriteMemory()) {
1481 MDNode *AS =
I->getMetadata(LLVMContext::MD_alias_scope);
1484 I->setMetadata(LLVMContext::MD_alias_scope, AS);
1486 MDNode *NA =
I->getMetadata(LLVMContext::MD_noalias);
1510 if (Intersection.empty()) {
1515 I->setMetadata(LLVMContext::MD_noalias, NA);
1520 LI->setAlignment(std::max(
A, LI->getAlign()));
1524 if (
SI->getPointerOperand() == Ptr)
1525 SI->setAlignment(std::max(
A,
SI->getAlign()));
1531 if (AI->getPointerOperand() == Ptr)
1532 AI->setAlignment(std::max(
A, AI->getAlign()));
1536 if (AI->getPointerOperand() == Ptr)
1537 AI->setAlignment(std::max(
A, AI->getAlign()));
1541 unsigned BitWidth =
DL.getIndexTypeSizeInBits(
GEP->getType());
1543 if (
GEP->getPointerOperand() == Ptr) {
1545 if (
GEP->accumulateConstantOffset(
DL, Off))
1547 refineUsesAlignmentAndAA(
GEP, GA,
DL, AliasScope, NoAlias,
1553 if (
I->getOpcode() == Instruction::BitCast ||
1554 I->getOpcode() == Instruction::AddrSpaceCast)
1555 refineUsesAlignmentAndAA(
I,
A,
DL, AliasScope, NoAlias, MaxDepth - 1);
1561class AMDGPULowerModuleLDSLegacy :
public ModulePass {
1574 bool runOnModule(
Module &M)
override {
1576 auto &TPC = getAnalysis<TargetPassConfig>();
1585char AMDGPULowerModuleLDSLegacy::ID = 0;
1590 "Lower uses of LDS variables from non-kernel functions",
1594 "Lower uses of LDS variables from non-kernel functions",
1599 return new AMDGPULowerModuleLDSLegacy(TM);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
DXIL Forward Handle Accesses
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
const std::string FatArchTraits< MachO::fat_arch >::StructName
This file provides an interface for laying out a sequence of fields as a struct in a way that attempt...
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This is the interface for a metadata-based scoped no-alias analysis.
This file defines generic set operations that may be used on set's of different types,...
This file defines the SmallString class.
Target-Independent Code Generator Pass Configuration Options pass.
static bool EnableObjectLinking
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
LLVM Basic Block Representation.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
The basic data container for the call graph of a Module of IR.
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static Constant * getGetElementPtr(Type *Ty, Constant *C, ArrayRef< Constant * > IdxList, GEPNoWrapFlags NW=GEPNoWrapFlags::none(), std::optional< ConstantRange > InRange=std::nullopt, Type *OnlyIfReducedTy=nullptr)
Getelementptr form.
This is an important base class in LLVM.
LLVM_ABI void removeDeadConstantUsers() const
If there are any dead constant users dangling off of this constant, remove them.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Implements a dense probed hash-table based set.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set a particular kind of metadata attachment.
LLVM_ABI bool isAbsoluteSymbolRef() const
Returns whether this is a reference to an absolute symbol.
void setLinkage(LinkageTypes LT)
PointerType * getType() const
Global values are always pointers.
@ InternalLinkage
Rename collisions when linking (static functions).
@ ExternalLinkage
Externally visible function.
Type * getValueType() const
LLVM_ABI void setInitializer(Constant *InitVal)
setInitializer - Sets the initializer for this global variable, removing any existing initializer if ...
bool hasInitializer() const
Definitions have initializers, declarations don't.
LLVM_ABI uint64_t getGlobalSize(const DataLayout &DL) const
Get the size of this global variable in bytes.
LLVM_ABI void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalVariable.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
bool runOnModule(Module &) override
ImmutablePasses are never run.
This is an important class for using LLVM in a threaded context.
MDNode * createAnonymousAliasScope(MDNode *Domain, StringRef Name=StringRef())
Return metadata appropriate for an alias scope root node.
MDNode * createAnonymousAliasScopeDomain(StringRef Name=StringRef())
Return metadata appropriate for an alias scope domain node.
static LLVM_ABI MDNode * getMostGenericAliasScope(MDNode *A, MDNode *B)
static LLVM_ABI MDNode * concatenate(MDNode *A, MDNode *B)
Methods for metadata merging.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDNode * intersect(MDNode *A, MDNode *B)
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
A Module instance is used to store all the information related to an LLVM module.
LLVM_ABI void addOperand(MDNode *M)
A container for an operand bundle being viewed as a set of values rather than a set of uses.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
A simple AA result which uses scoped-noalias metadata to answer queries.
static LLVM_ABI void collectScopedDomains(const MDNode *NoAlias, SmallPtrSetImpl< const MDNode * > &Domains)
Collect the set of scoped domains relevant to the noalias scopes.
bool insert(const value_type &X)
Insert a new element into the SetVector.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Class to represent struct types.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Target-Independent Code Generator Pass Configuration Options.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
A Use represents the edge between a Value definition and its users.
LLVM Value Representation.
iterator_range< user_iterator > users()
LLVM_ABI bool replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
std::pair< iterator, bool > insert(const ValueT &V)
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
bool erase(const ValueT &V)
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
A raw_ostream that writes to an std::string.
@ LOCAL_ADDRESS
Address space for local memory.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
bool isDynamicLDS(const GlobalVariable &GV)
void removeFnAttrFromReachable(CallGraph &CG, Function *KernelRoot, ArrayRef< StringRef > FnAttrs)
Strip FnAttr attribute from any functions where we may have introduced its use.
LLVM_READNONE constexpr bool isKernel(CallingConv::ID CC)
void getUsesOfLDSByFunction(const CallGraph &CG, Module &M, FunctionVariableMap &kernels, FunctionVariableMap &Functions)
LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M)
DenseMap< Function *, DenseSet< GlobalVariable * > > FunctionVariableMap
TargetExtType * isNamedBarrier(const GlobalVariable &GV)
bool isLDSVariableToLower(const GlobalVariable &GV)
bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M)
Align getAlign(const DataLayout &DL, const GlobalVariable *GV)
DenseMap< GlobalVariable *, DenseSet< Function * > > VariableFunctionMap
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
bool operator<(int64_t V1, const APSInt &V2)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI std::string getUniqueModuleId(Module *M)
Produce a unique identifier for this module by taking the MD5 sum of the names of the module's strong...
void sort(IteratorTy Start, IteratorTy End)
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
char & AMDGPULowerModuleLDSLegacyPassID
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2)
set_intersection(A, B) - Return A ^ B
LLVM_ABI void removeFromUsedLists(Module &M, function_ref< bool(Constant *)> ShouldRemove)
Removes global values from the llvm.used and llvm.compiler.used arrays.
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
ModulePass * createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM=nullptr)
LLVM_ABI void appendToCompilerUsed(Module &M, ArrayRef< GlobalValue * > Values)
Adds global values to the llvm.compiler.used list.
LLVM_ABI std::pair< uint64_t, Align > performOptimizedStructLayout(MutableArrayRef< OptimizedStructLayoutField > Fields)
Compute a layout for a struct containing the given fields, making a best-effort attempt to minimize t...
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
const AMDGPUTargetMachine & TM
FunctionVariableMap direct_access
FunctionVariableMap indirect_access
This struct is a compact representation of a valid (non-zero power of two) alignment.
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.