Go to the documentation of this file.
33 #define DEBUG_TYPE "printfToRuntime"
37 class AMDGPUPrintfRuntimeBinding final :
public ModulePass {
42 explicit AMDGPUPrintfRuntimeBinding();
45 bool runOnModule(
Module &
M)
override;
53 class AMDGPUPrintfRuntimeBindingImpl {
55 AMDGPUPrintfRuntimeBindingImpl(
58 : GetDT(GetDT), GetTLI(GetTLI) {}
65 bool shouldPrintAsStr(
char Specifier,
Type *OpType)
const;
66 bool lowerPrintfForGpu(
Module &
M);
83 "amdgpu-printf-runtime-binding",
"AMDGPU Printf lowering",
94 return new AMDGPUPrintfRuntimeBinding();
98 AMDGPUPrintfRuntimeBinding::AMDGPUPrintfRuntimeBinding() :
ModulePass(
ID) {
102 void AMDGPUPrintfRuntimeBindingImpl::getConversionSpecifiers(
104 size_t NumOps)
const {
110 static const char ConvSpecifiers[] =
"cdieEfgGaosuxXp";
111 size_t CurFmtSpecifierIdx = 0;
112 size_t PrevFmtSpecifierIdx = 0;
115 ConvSpecifiers, CurFmtSpecifierIdx)) != StringRef::npos) {
116 bool ArgDump =
false;
118 CurFmtSpecifierIdx - PrevFmtSpecifierIdx);
120 if (pTag != StringRef::npos) {
122 while (pTag && CurFmt[--pTag] ==
'%') {
128 OpConvSpecifiers.push_back(Fmt[CurFmtSpecifierIdx]);
130 PrevFmtSpecifierIdx = ++CurFmtSpecifierIdx;
134 bool AMDGPUPrintfRuntimeBindingImpl::shouldPrintAsStr(
char Specifier,
135 Type *OpType)
const {
136 if (Specifier !=
's')
138 const PointerType *PT = dyn_cast<PointerType>(OpType);
142 if (ElemType->
getTypeID() != Type::IntegerTyID)
144 IntegerType *ElemIType = cast<IntegerType>(ElemType);
148 bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(
Module &
M) {
151 Type *I32Ty = Type::getInt32Ty(Ctx);
154 const char NonLiteralStr[4] =
"???";
156 for (
auto CI : Printfs) {
157 unsigned NumOps = CI->arg_size();
160 Value *
Op = CI->getArgOperand(0);
162 if (
auto LI = dyn_cast<LoadInst>(
Op)) {
163 Op = LI->getPointerOperand();
164 for (
auto Use :
Op->users()) {
165 if (
auto SI = dyn_cast<StoreInst>(
Use)) {
166 Op =
SI->getValueOperand();
172 if (
auto I = dyn_cast<Instruction>(
Op)) {
173 Value *Op_simplified =
174 simplify(
I, &GetTLI(*
I->getFunction()), &GetDT(*
I->getFunction()));
187 if (
auto *CA = dyn_cast<ConstantDataArray>(
Init)) {
189 Str = CA->getAsCString();
190 }
else if (isa<ConstantAggregateZero>(
Init)) {
199 getConversionSpecifiers(OpConvSpecifiers, Str, NumOps - 1);
202 std::string AStreamHolder;
205 Sizes << CI->arg_size() - 1;
207 for (
unsigned ArgCount = 1;
208 ArgCount < CI->arg_size() && ArgCount <= OpConvSpecifiers.size();
210 Value *
Arg = CI->getArgOperand(ArgCount);
211 Type *ArgType =
Arg->getType();
212 unsigned ArgSize = TD->getTypeAllocSizeInBits(ArgType);
213 ArgSize = ArgSize / 8;
220 auto *LLVMVecType = llvm::dyn_cast<llvm::FixedVectorType>(ArgType);
221 int NumElem = LLVMVecType ? LLVMVecType->getNumElements() : 1;
222 if (LLVMVecType && NumElem > 1)
225 Builder.SetCurrentDebugLocation(CI->getDebugLoc());
226 if (OpConvSpecifiers[ArgCount - 1] ==
'x' ||
227 OpConvSpecifiers[ArgCount - 1] ==
'X' ||
228 OpConvSpecifiers[ArgCount - 1] ==
'u' ||
229 OpConvSpecifiers[ArgCount - 1] ==
'o')
233 ArgType =
Arg->getType();
234 ArgSize = TD->getTypeAllocSizeInBits(ArgType);
235 ArgSize = ArgSize / 8;
236 CI->setOperand(ArgCount,
Arg);
238 if (OpConvSpecifiers[ArgCount - 1] ==
'f') {
249 if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) {
250 if (
auto *ConstExpr = dyn_cast<ConstantExpr>(
Arg)) {
251 auto *GV = dyn_cast<GlobalVariable>(ConstExpr->
getOperand(0));
252 if (GV && GV->hasInitializer()) {
254 bool IsZeroValue =
Init->isZeroValue();
255 auto *CA = dyn_cast<ConstantDataArray>(
Init);
256 if (IsZeroValue || (CA && CA->isString())) {
258 IsZeroValue ? 1 : (strlen(CA->getAsCString().data()) + 1);
271 ArgSize =
sizeof(NonLiteralStr);
274 ArgSize =
sizeof(NonLiteralStr);
278 <<
" for type: " << *ArgType <<
'\n');
279 Sizes << ArgSize <<
':';
282 LLVM_DEBUG(
dbgs() <<
"Printf format string in source = " << Str.str()
319 Builder.SetCurrentDebugLocation(CI->getDebugLoc());
322 Attribute::NoUnwind);
324 Type *SizetTy = Type::getInt32Ty(Ctx);
326 Type *Tys_alloc[1] = {SizetTy};
327 Type *I8Ty = Type::getInt8Ty(Ctx);
331 M.getOrInsertFunction(
StringRef(
"__printf_alloc"), FTy_alloc, Attr);
333 LLVM_DEBUG(
dbgs() <<
"Printf metadata = " << Sizes.str() <<
'\n');
334 std::string fmtstr = itostr(++UniqID) +
":" + Sizes.str();
350 alloc_args.push_back(sumC);
352 CallInst::Create(PrintfAllocFn, alloc_args,
"printf_alloc_fn", CI);
361 auto *
cmp = cast<ICmpInst>(
Builder.CreateICmpNE(pcall, zeroIntPtr,
""));
362 if (!CI->use_empty()) {
365 CI->replaceAllUsesWith(
result);
381 new BitCastInst(BufferIdx, idPointer,
"PrintBuffIdCast", Brnch);
387 BufferIdx = GetElementPtrInst::Create(
392 Type *Int64Ty = Type::getInt64Ty(Ctx);
393 for (
unsigned ArgCount = 1;
394 ArgCount < CI->arg_size() && ArgCount <= OpConvSpecifiers.size();
396 Value *
Arg = CI->getArgOperand(ArgCount);
397 Type *ArgType =
Arg->getType();
401 if (OpConvSpecifiers[ArgCount - 1] ==
'f') {
402 if (
auto *FpCons = dyn_cast<ConstantFP>(
Arg)) {
405 Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
409 }
else if (
auto *FpExt = dyn_cast<FPExtInst>(
Arg)) {
410 if (FpExt->getType()->isDoubleTy() &&
411 FpExt->getOperand(0)->getType()->isFloatTy()) {
412 Arg = FpExt->getOperand(0);
418 WhatToStore.push_back(
Arg);
419 }
else if (ArgType->
getTypeID() == Type::PointerTyID) {
420 if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) {
421 const char *
S = NonLiteralStr;
422 if (
auto *ConstExpr = dyn_cast<ConstantExpr>(
Arg)) {
423 auto *GV = dyn_cast<GlobalVariable>(ConstExpr->
getOperand(0));
424 if (GV && GV->hasInitializer()) {
426 bool IsZeroValue =
Init->isZeroValue();
427 auto *CA = dyn_cast<ConstantDataArray>(
Init);
428 if (IsZeroValue || (CA && CA->isString())) {
429 S = IsZeroValue ?
"" : CA->getAsCString().data();
433 size_t SizeStr = strlen(
S) + 1;
442 char *MyNewStr =
new char[NSizeStr]();
444 int NumInts = NSizeStr / 4;
447 int ANum = *(
int *)(MyNewStr + CharC);
451 WhatToStore.push_back(ANumV);
457 WhatToStore.push_back(ANumV);
464 WhatToStore.push_back(
Arg);
466 }
else if (isa<FixedVectorType>(ArgType)) {
467 Type *IType =
nullptr;
468 uint32_t EleCount = cast<FixedVectorType>(ArgType)->getNumElements();
470 uint32_t TotalSize = EleCount * EleSize;
476 ArgType =
Arg->getType();
477 TotalSize += EleSize;
481 EleCount = TotalSize / 64;
482 IType = Type::getInt64Ty(ArgType->
getContext());
486 EleCount = TotalSize / 64;
487 IType = Type::getInt64Ty(ArgType->
getContext());
488 }
else if (EleCount >= 3) {
490 IType = Type::getInt32Ty(ArgType->
getContext());
493 IType = Type::getInt16Ty(ArgType->
getContext());
498 EleCount = TotalSize / 64;
499 IType = Type::getInt64Ty(ArgType->
getContext());
502 IType = Type::getInt32Ty(ArgType->
getContext());
510 WhatToStore.push_back(
Arg);
512 WhatToStore.push_back(
Arg);
514 for (
unsigned I = 0,
E = WhatToStore.size();
I !=
E; ++
I) {
515 Value *TheBtCast = WhatToStore[
I];
517 TD->getTypeAllocSizeInBits(TheBtCast->
getType()) / 8;
523 new BitCastInst(BufferIdx, ArgPointer,
"PrintBuffPtrCast", Brnch);
528 if (
I + 1 ==
E && ArgCount + 1 == CI->arg_size())
530 BufferIdx = GetElementPtrInst::Create(I8Ty, BufferIdx, BuffOffset,
531 "PrintBuffNextPtr", Brnch);
533 << *BufferIdx <<
'\n');
540 for (
auto CI : Printfs)
541 CI->eraseFromParent();
549 if (
TT.getArch() == Triple::r600)
552 auto PrintfFunction =
M.getFunction(
"printf");
556 for (
auto &U : PrintfFunction->uses()) {
557 if (
auto *CI = dyn_cast<CallInst>(U.getUser())) {
558 if (CI->isCallee(&U))
559 Printfs.push_back(CI);
566 TD = &
M.getDataLayout();
568 return lowerPrintfForGpu(
M);
571 bool AMDGPUPrintfRuntimeBinding::runOnModule(
Module &
M) {
573 return this->getAnalysis<DominatorTreeWrapperPass>(
F).getDomTree();
576 return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
579 return AMDGPUPrintfRuntimeBindingImpl(GetDT, GetTLI).run(
M);
592 bool Changed = AMDGPUPrintfRuntimeBindingImpl(GetDT, GetTLI).
run(
M);
A set of analyses that are preserved following a run of a transformation pass.
amdgpu printf runtime binding
This is an optimization pass for GlobalISel generic memory operations.
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
A parsed version of the target data layout string in and methods for querying it.
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A raw_ostream that writes to an std::string.
This class represents a no-op cast from one type to another.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
TypeID getTypeID() const
Return the type id for the type.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Triple - Helper class for working with autoconf configuration names.
FunctionAnalysisManager FAM
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
The instances of the Type class are immutable: once they are created, they are never changed.
const APFloat & getValueAPF() const
INITIALIZE_PASS_BEGIN(AMDGPUPrintfRuntimeBinding, "amdgpu-printf-runtime-binding", "AMDGPU Printf lowering", false, false) INITIALIZE_PASS_END(AMDGPUPrintfRuntimeBinding
static IntegerType * getInt32Ty(LLVMContext &C)
bool hasInitializer() const
Definitions have initializers, declarations don't.
It looks like we only need to define PPCfmarto for these because according to these instructions perform RTO on fma s result
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_NODISCARD StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
A constant pointer value that points to null.
void addOperand(MDNode *M)
LLVM_NODISCARD size_t find_last_of(char C, size_t From=npos) const
Find the last character in the string that is C, or npos if not found.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
(vector float) vec_cmpeq(*A, *B) C
Represent the analysis usage information of a pass.
TargetLibraryInfo run(const Function &F, FunctionAnalysisManager &)
Class to represent integer types.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Value * SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
Legacy analysis pass which computes a DominatorTree.
ConstantFP - Floating Point Values [float, double].
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
ModulePass * createAMDGPUPrintfRuntimeBinding()
An efficient, type-erasing, non-owning reference to a callable.
An instruction for storing to memory.
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
This is an important base class in LLVM.
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
This is an important class for using LLVM in a threaded context.
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Class to represent pointers.
char & AMDGPUPrintfRuntimeBindingID
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
StandardInstrumentations SI(Debug, VerifyEach)
A Module instance is used to store all the information related to an LLVM module.
Class for arbitrary precision integers.
StringRef - Represent a constant reference to a string, i.e.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Type * getType() const
All values are typed, get the type of this value.
This class represents a cast from a pointer to an integer.
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
This class represents an extension of floating point types.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
A constant value that is initialized with an expression using other constant values.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Should compile to something r4 addze r3 instead we get
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Provides information about what library functions are available for the current target.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
constexpr char Printf[]
Key for HSA::Metadata::mPrintf.
void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry &)
Analysis pass which computes a DominatorTree.
This instruction constructs a fixed permutation of two input vectors.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
amdgpu printf runtime AMDGPU Printf lowering
A container for analyses that lazily runs them and caches their results.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
This class represents a function call, abstracting a target machine's calling convention.
LLVM_NODISCARD size_t find_first_of(char C, size_t From=0) const
Find the first character in the string that is C, or npos if not found.
AnalysisUsage & addRequired()
Instruction * SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore, bool Unreachable, MDNode *BranchWeights, DominatorTree *DT, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
Value * getOperand(unsigned i) const
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
LLVM Value Representation.
Analysis pass providing the TargetLibraryInfo.
Class to represent function types.
A Use represents the edge between a Value definition and its users.