33#define DEBUG_TYPE "amdgpu-lower-exec-sync"
44 bool NeedsReplacement =
false;
49 NeedsReplacement =
true;
54 if (!NeedsReplacement)
66 U.getUser()->replaceUsesOfWith(GV, NewGV);
86 sort(V, [](
const auto *L,
const auto *R) {
87 return L->getName() < R->getName();
89 return {std::move(V)};
93static bool lowerExecSyncGlobalVariables(
101 for (
auto &K : LDSToKernelsThatNeedToAccessItIndirectly) {
108 if (LDSToKernelsThatNeedToAccessItIndirectly[GV].
size() > 1) {
115 LDSToKernelsThatNeedToAccessItIndirectly.erase(GV);
117 OrderedGVs = sortByName(std::move(OrderedGVs));
120 unsigned BarId = NumAbsolutes + 1;
122 NumAbsolutes += BarCnt;
126 unsigned Offset = 0x802000u | BarrierScope << 9 | BarId << 4;
127 recordLDSAbsoluteAddress(&M, GV,
Offset);
140 OrderedKernels = sortByName(std::move(OrderedKernels));
155 OrderedGVs = sortByName(std::move(OrderedGVs));
159 auto NewGV = uniquifyGVPerKernel(M, GV,
F);
162 unsigned BarId = Kernel2BarId[
F];
163 BarId += NumAbsolutes + 1;
165 Kernel2BarId[
F] += BarCnt;
166 unsigned Offset = 0x802000u | BarrierScope << 9 | BarId << 4;
167 recordLDSAbsoluteAddress(&M, NewGV,
Offset);
185static bool handleNamedBarriersForObjectLinking(
Module &M) {
192 BarrierToFuncs[&GV].
insert(
I->getFunction());
195 if (BarrierToFuncs.
empty())
199 NamedMDNode *BarMD = M.getOrInsertNamedMetadata(
"amdgpu.named_barrier.uses");
201 std::string ModuleId;
203 assert(!ModuleId.empty() &&
204 "modules with named barriers should have a unique ID");
205 for (
auto &[V, Funcs] : BarrierToFuncs) {
206 if (V->hasLocalLinkage())
207 V->setName(
"__amdgpu_named_barrier." + V->getName() + ModuleId);
208 else if (!V->getName().starts_with(
"__amdgpu_named_barrier"))
209 V->setName(
"__amdgpu_named_barrier." + V->getName());
210 V->setInitializer(
nullptr);
222static bool runLowerExecSyncGlobals(
Module &M) {
224 return handleNamedBarriersForObjectLinking(M);
240 LDSToKernelsThatNeedToAccessItIndirectly[GV].insert(
F);
246 Changed |= lowerExecSyncGlobalVariables(
247 M, LDSUsesInfo, LDSToKernelsThatNeedToAccessItIndirectly);
252class AMDGPULowerExecSyncLegacy :
public ModulePass {
256 bool runOnModule(
Module &M)
override;
261char AMDGPULowerExecSyncLegacy::ID = 0;
265 "AMDGPU lowering of execution synchronization",
false,
269 "AMDGPU lowering of execution synchronization",
false,
272bool AMDGPULowerExecSyncLegacy::runOnModule(
Module &M) {
273 return runLowerExecSyncGlobals(M);
277 return new AMDGPULowerExecSyncLegacy();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
The AMDGPU TargetMachine interface definition for hw codegen targets.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file provides interfaces used to build and manipulate a call graph, which is a very useful tool ...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Target-Independent Code Generator Pass Configuration Options pass.
static bool EnableObjectLinking
The basic data container for the call graph of a Module of IR.
A parsed version of the target data layout string in and methods for querying it.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set a particular kind of metadata attachment.
LinkageTypes getLinkage() const
LLVM_ABI bool isAbsoluteSymbolRef() const
Returns whether this is a reference to an absolute symbol.
ThreadLocalMode getThreadLocalMode() const
PointerType * getType() const
Global values are always pointers.
@ ExternalLinkage
Externally visible function.
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
LLVM_ABI void copyAttributesFrom(const GlobalVariable *Src)
copyAttributesFrom - copy all additional attributes (those not needed to create a GlobalVariable) fro...
LLVM_ABI uint64_t getGlobalSize(const DataLayout &DL) const
Get the size of this global variable in bytes.
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
This is an important class for using LLVM in a threaded context.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
A Module instance is used to store all the information related to an LLVM module.
LLVM_ABI void addOperand(MDNode *M)
unsigned getAddressSpace() const
Return the address space of the Pointer type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Target-Independent Code Generator Pass Configuration Options.
A Use represents the edge between a Value definition and its users.
iterator_range< user_iterator > users()
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
@ LOCAL_ADDRESS
Address space for local memory.
@ BARRIER_SCOPE_WORKGROUP
LLVM_READNONE constexpr bool isKernel(CallingConv::ID CC)
LDSUsesInfoTy getTransitiveUsesOfLDS(const CallGraph &CG, Module &M)
TargetExtType * isNamedBarrier(const GlobalVariable &GV)
bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M)
DenseMap< GlobalVariable *, DenseSet< Function * > > VariableFunctionMap
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
This is an optimization pass for GlobalISel generic memory operations.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
char & AMDGPULowerExecSyncLegacyPassID
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
LLVM_ABI std::string getUniqueModuleId(Module *M)
Produce a unique identifier for this module by taking the MD5 sum of the names of the module's strong...
void sort(IteratorTy Start, IteratorTy End)
ModulePass * createAMDGPULowerExecSyncLegacyPass()
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
FunctionVariableMap direct_access
FunctionVariableMap indirect_access