58#define DEBUG_TYPE "amdgpu-rewrite-out-arguments"
63 "amdgpu-any-address-space-out-arguments",
64 cl::desc(
"Replace pointer out arguments with "
65 "struct returns for non-private address space"),
70 "amdgpu-max-return-arg-num-regs",
71 cl::desc(
"Approximately limit number of return registers for replacing out arguments"),
76 "Number out arguments moved to struct return values");
78 "Number of functions with out arguments moved to struct return values");
107 "AMDGPU Rewrite Out Arguments",
false,
false)
112char AMDGPURewriteOutArguments::
ID = 0;
114Type *AMDGPURewriteOutArguments::getStoredType(
Value &Arg)
const {
115 const int MaxUses = 10;
119 for (
Use &U : Arg.uses())
122 Type *StoredType =
nullptr;
123 while (!Worklist.
empty()) {
126 if (
auto *BCI = dyn_cast<BitCastInst>(U->getUser())) {
127 for (
Use &U : BCI->uses())
132 if (
auto *SI = dyn_cast<StoreInst>(U->getUser())) {
133 if (UseCount++ > MaxUses)
136 if (!SI->isSimple() ||
140 if (StoredType && StoredType != SI->getValueOperand()->getType())
142 StoredType = SI->getValueOperand()->getType();
153Type *AMDGPURewriteOutArguments::getOutArgumentType(
Argument &Arg)
const {
158 if (!ArgTy || (ArgTy->getAddressSpace() !=
DL->getAllocaAddrSpace() &&
164 Type *StoredType = getStoredType(Arg);
165 if (!StoredType ||
DL->getTypeStoreSize(StoredType) > MaxOutArgSizeBytes)
171bool AMDGPURewriteOutArguments::doInitialization(
Module &M) {
172 DL = &
M.getDataLayout();
176bool AMDGPURewriteOutArguments::runOnFunction(
Function &
F) {
181 if (
F.isVarArg() ||
F.hasStructRetAttr() ||
185 MDA = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
187 unsigned ReturnNumRegs = 0;
191 if (!
RetTy->isVoidTy()) {
192 ReturnNumRegs =
DL->getTypeStoreSize(
RetTy) / 4;
202 if (
Type *Ty = getOutArgumentType(Arg)) {
204 <<
" in function " <<
F.getName() <<
'\n');
218 if (
ReturnInst *RI = dyn_cast<ReturnInst>(&BB.back()))
237 for (
const auto &Pair : OutArgs) {
238 bool ThisReplaceable =
true;
242 Type *ArgTy = Pair.second;
249 unsigned ArgNumRegs =
DL->getTypeStoreSize(ArgTy) / 4;
265 LLVM_DEBUG(
dbgs() <<
"Found out argument store: " << *SI <<
'\n');
268 ThisReplaceable =
false;
273 if (!ThisReplaceable)
276 for (std::pair<ReturnInst *, StoreInst *> Store : ReplaceableStores) {
277 Value *ReplVal =
Store.second->getValueOperand();
279 auto &ValVec = Replacements[
Store.first];
281 [OutArg](
const std::pair<Argument *, Value *> &Entry) {
282 return Entry.first == OutArg;
285 <<
"Saw multiple out arg stores" << *OutArg <<
'\n');
288 ThisReplaceable =
false;
292 ValVec.emplace_back(OutArg, ReplVal);
293 Store.second->eraseFromParent();
296 if (ThisReplaceable) {
299 ++NumOutArgumentsReplaced;
305 if (Replacements.
empty())
312 F.getFunctionType()->params(),
315 LLVM_DEBUG(
dbgs() <<
"Computed new return type: " << *NewRetTy <<
'\n');
318 F.getName() +
".body");
319 F.getParent()->getFunctionList().insert(
F.getIterator(), NewFunc);
338 for (std::pair<ReturnInst *, ReplacementVec> &Replacement : Replacements) {
348 NewRetVal =
B.CreateInsertValue(NewRetVal, RetVal, RetIdx++);
350 for (std::pair<Argument *, Value *> ReturnPoint : Replacement.second)
351 NewRetVal =
B.CreateInsertValue(NewRetVal, ReturnPoint.second, RetIdx++);
356 B.CreateRet(NewRetVal);
374 CallInst *StubCall =
B.CreateCall(NewFunc, StubCallArgs);
376 int RetIdx =
RetTy->isVoidTy() ? 0 : 1;
385 Value *Val =
B.CreateExtractValue(StubCall, RetIdx++);
386 B.CreateAlignedStore(Val, &Arg,
Align);
389 if (!
RetTy->isVoidTy()) {
390 B.CreateRet(
B.CreateExtractValue(StubCall, 0));
396 F.addFnAttr(Attribute::AlwaysInline);
398 ++NumOutArgumentFunctionsReplaced;
403 return new AMDGPURewriteOutArguments();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
AMDGPU Rewrite Out Arguments
static cl::opt< unsigned > MaxNumRetRegs("amdgpu-max-return-arg-num-regs", cl::desc("Approximately limit number of return registers for replacing out arguments"), cl::Hidden, cl::init(16))
static cl::opt< bool > AnyAddressSpace("amdgpu-any-address-space-out-arguments", cl::desc("Replace pointer out arguments with " "struct returns for non-private address space"), cl::Hidden, cl::init(false))
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
This class represents an incoming formal argument to a Function.
bool hasByValAttr() const
Return true if this argument has the byval attribute.
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
MaybeAlign getParamAlign() const
If this is a byval or inalloca argument, return its alignment.
bool hasStructRetAttr() const
Return true if this argument has the sret attribute.
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
LLVM Basic Block Representation.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
const Function * getParent() const
Return the enclosing method, or null if none.
This class represents a function call, abstracting a target machine's calling convention.
A parsed version of the target data layout string in and methods for querying it.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
void splice(Function::iterator ToIt, Function *FromF)
Transfer all blocks from FromF to this function at ToIt.
void stealArgumentListFrom(Function &Src)
Steal arguments from another function.
void removeRetAttrs(const AttributeMask &Attrs)
removes the attributes from the return value list of attributes.
void copyAttributesFrom(const Function *Src)
copyAttributesFrom - copy all additional attributes (those not needed to create a Function) from the ...
void setComdat(Comdat *C)
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
This is an important class for using LLVM in a threaded context.
A memory dependence query can return one of three different answers.
bool isDef() const
Tests if this MemDepResult represents a query that is an instruction definition dependency.
Instruction * getInst() const
If this is a normal dependency, returns the instruction that is depended on.
Provides a lazy, caching interface for making common memory aliasing information queries,...
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance.
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
A Module instance is used to store all the information related to an LLVM module.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual bool doInitialization(Module &)
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Return a value (possibly void), from a function.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static unsigned getPointerOperandIndex()
Class to represent struct types.
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
The instances of the Type class are immutable: once they are created, they are never changed.
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool isEntryFunctionCC(CallingConv::ID CC)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createAMDGPURewriteOutArgumentsPass()
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
This struct is a compact representation of a valid (non-zero power of two) alignment.