33#define DEBUG_TYPE "amdgpu-lower-exec-sync"
57unsigned allocateExecSyncID(
T &NextAvailableIDTracker,
59 unsigned MaxNumGroup,
unsigned IDCnt) {
60 constexpr unsigned InitialVal = 1;
61 unsigned NextID = InitialVal;
64 NextAvailableIDTracker.lookup(
F);
65 unsigned ID = InitialVal;
66 if (!NextAvailableID.
empty())
67 ID = NextAvailableID[GroupID];
75 auto Inserted = NextAvailableIDTracker.try_emplace(
F);
78 Inserted.first->second.assign(MaxNumGroup, InitialVal);
80 Inserted.first->second[GroupID] = NextID + IDCnt;
86static bool lowerExecSyncGlobalVariables(
Module &M,
91 constexpr unsigned NumBarScopes = 1;
96 for (
auto *GV : GVs) {
99 auto Iter = AllocationQ.
find(GV);
100 if (Iter == AllocationQ.
end())
103 Iter->second.push_back(
F);
108 for (
auto *GV : GVs) {
111 auto Iter = AllocationQ.
find(GV);
112 if (Iter == AllocationQ.
end())
115 Iter->second.push_back(
F);
122 if (
A.second.size() !=
B.second.size())
123 return A.second.size() >
B.second.size();
126 return A.first->getName() <
B.first->getName();
129 for (
auto &[GV, Kernels] : AllocationQ) {
132 unsigned BarrierScope = ExtTy->getIntParameter(0);
135 unsigned BarID = allocateExecSyncID(KernelBarrierIDs, Kernels,
136 BarrierScope, NumBarScopes, BarCnt);
139 dbgs() <<
" was assigned barrier id: " << BarID
140 <<
" id-count: " << BarCnt <<
"\n");
143 Offset = 0x802000u | BarrierScope << 9 | BarID << 4;
148 recordLDSAbsoluteAddress(&M, GV,
Offset);
162static bool handleNamedBarriersForObjectLinking(
Module &M) {
169 BarrierToFuncs[&GV].
insert(
I->getFunction());
172 if (BarrierToFuncs.
empty())
176 NamedMDNode *BarMD = M.getOrInsertNamedMetadata(
"amdgpu.named_barrier.uses");
178 std::string ModuleId;
180 assert(!ModuleId.empty() &&
181 "modules with named barriers should have a unique ID");
182 for (
auto &[V, Funcs] : BarrierToFuncs) {
183 if (V->hasLocalLinkage())
184 V->setName(
"__amdgpu_named_barrier." + V->getName() + ModuleId);
185 else if (!V->getName().starts_with(
"__amdgpu_named_barrier"))
186 V->setName(
"__amdgpu_named_barrier." + V->getName());
187 V->setInitializer(
nullptr);
199static bool runLowerExecSyncGlobals(
Module &M) {
201 return handleNamedBarriersForObjectLinking(M);
213 Changed |= lowerExecSyncGlobalVariables(M, LDSUsesInfo);
218class AMDGPULowerExecSyncLegacy :
public ModulePass {
222 bool runOnModule(
Module &M)
override;
227char AMDGPULowerExecSyncLegacy::ID = 0;
231 "AMDGPU lowering of execution synchronization",
false,
235 "AMDGPU lowering of execution synchronization",
false,
238bool AMDGPULowerExecSyncLegacy::runOnModule(
Module &M) {
239 return runLowerExecSyncGlobals(M);
243 return new AMDGPULowerExecSyncLegacy();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Target-Independent Code Generator Pass Configuration Options pass.
static bool EnableObjectLinking
Represent a constant reference to an array (0 or more elements consecutively in memory),...
The basic data container for the call graph of a Module of IR.
A parsed version of the target data layout string in and methods for querying it.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set a particular kind of metadata attachment.
LLVM_ABI bool isAbsoluteSymbolRef() const
Returns whether this is a reference to an absolute symbol.
@ ExternalLinkage
Externally visible function.
LLVM_ABI uint64_t getGlobalSize(const DataLayout &DL) const
Get the size of this global variable in bytes.
This is an important class for using LLVM in a threaded context.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
This class implements a map that also provides access to all stored values in a deterministic order.
iterator find(const KeyT &Key)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
A Module instance is used to store all the information related to an LLVM module.
LLVM_ABI void addOperand(MDNode *M)
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Class to represent target extensions types, which are generally unintrospectable from target-independ...
Target-Independent Code Generator Pass Configuration Options.
iterator_range< user_iterator > users()
LLVM_ABI void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
LLVM_READNONE constexpr bool isKernel(CallingConv::ID CC)
LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M)
TargetExtType * isNamedBarrier(const GlobalVariable &GV)
bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
char & AMDGPULowerExecSyncLegacyPassID
LLVM_ABI std::string getUniqueModuleId(Module *M)
Produce a unique identifier for this module by taking the MD5 sum of the names of the module's strong...
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
ModulePass * createAMDGPULowerExecSyncLegacyPass()
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
FunctionVariableMap direct_access
FunctionVariableMap indirect_access