LLVM  14.0.0git
AMDGPUPrintfRuntimeBinding.cpp
Go to the documentation of this file.
1 //=== AMDGPUPrintfRuntimeBinding.cpp - OpenCL printf implementation -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // \file
9 //
10 // The pass bind printfs to a kernel arg pointer that will be bound to a buffer
11 // later by the runtime.
12 //
13 // This pass traverses the functions in the module and converts
14 // each call to printf to a sequence of operations that
15 // store the following into the printf buffer:
16 // - format string (passed as a module's metadata unique ID)
17 // - bitwise copies of printf arguments
18 // The backend passes will need to store metadata in the kernel
19 //===----------------------------------------------------------------------===//
20 
21 #include "AMDGPU.h"
24 #include "llvm/IR/Dominators.h"
25 #include "llvm/IR/IRBuilder.h"
26 #include "llvm/IR/Instructions.h"
27 #include "llvm/InitializePasses.h"
29 
30 using namespace llvm;
31 
32 #define DEBUG_TYPE "printfToRuntime"
33 #define DWORD_ALIGN 4
34 
35 namespace {
36 class AMDGPUPrintfRuntimeBinding final : public ModulePass {
37 
38 public:
39  static char ID;
40 
41  explicit AMDGPUPrintfRuntimeBinding();
42 
43 private:
44  bool runOnModule(Module &M) override;
45 
46  void getAnalysisUsage(AnalysisUsage &AU) const override {
49  }
50 };
51 
52 class AMDGPUPrintfRuntimeBindingImpl {
53 public:
54  AMDGPUPrintfRuntimeBindingImpl(
55  function_ref<const DominatorTree &(Function &)> GetDT,
56  function_ref<const TargetLibraryInfo &(Function &)> GetTLI)
57  : GetDT(GetDT), GetTLI(GetTLI) {}
58  bool run(Module &M);
59 
60 private:
61  void getConversionSpecifiers(SmallVectorImpl<char> &OpConvSpecifiers,
62  StringRef fmt, size_t num_ops) const;
63 
64  bool shouldPrintAsStr(char Specifier, Type *OpType) const;
65  bool lowerPrintfForGpu(Module &M);
66 
68  const DominatorTree *DT) {
69  return SimplifyInstruction(I, {*TD, TLI, DT});
70  }
71 
72  const DataLayout *TD;
73  function_ref<const DominatorTree &(Function &)> GetDT;
74  function_ref<const TargetLibraryInfo &(Function &)> GetTLI;
76 };
77 } // namespace
78 
80 
81 INITIALIZE_PASS_BEGIN(AMDGPUPrintfRuntimeBinding,
82  "amdgpu-printf-runtime-binding", "AMDGPU Printf lowering",
83  false, false)
86 INITIALIZE_PASS_END(AMDGPUPrintfRuntimeBinding, "amdgpu-printf-runtime-binding",
87  "AMDGPU Printf lowering", false, false)
88 
89 char &llvm::AMDGPUPrintfRuntimeBindingID = AMDGPUPrintfRuntimeBinding::ID;
90 
91 namespace llvm {
93  return new AMDGPUPrintfRuntimeBinding();
94 }
95 } // namespace llvm
96 
97 AMDGPUPrintfRuntimeBinding::AMDGPUPrintfRuntimeBinding() : ModulePass(ID) {
99 }
100 
101 void AMDGPUPrintfRuntimeBindingImpl::getConversionSpecifiers(
102  SmallVectorImpl<char> &OpConvSpecifiers, StringRef Fmt,
103  size_t NumOps) const {
104  // not all format characters are collected.
105  // At this time the format characters of interest
106  // are %p and %s, which use to know if we
107  // are either storing a literal string or a
108  // pointer to the printf buffer.
109  static const char ConvSpecifiers[] = "cdieEfgGaosuxXp";
110  size_t CurFmtSpecifierIdx = 0;
111  size_t PrevFmtSpecifierIdx = 0;
112 
113  while ((CurFmtSpecifierIdx = Fmt.find_first_of(
114  ConvSpecifiers, CurFmtSpecifierIdx)) != StringRef::npos) {
115  bool ArgDump = false;
116  StringRef CurFmt = Fmt.substr(PrevFmtSpecifierIdx,
117  CurFmtSpecifierIdx - PrevFmtSpecifierIdx);
118  size_t pTag = CurFmt.find_last_of("%");
119  if (pTag != StringRef::npos) {
120  ArgDump = true;
121  while (pTag && CurFmt[--pTag] == '%') {
122  ArgDump = !ArgDump;
123  }
124  }
125 
126  if (ArgDump)
127  OpConvSpecifiers.push_back(Fmt[CurFmtSpecifierIdx]);
128 
129  PrevFmtSpecifierIdx = ++CurFmtSpecifierIdx;
130  }
131 }
132 
133 bool AMDGPUPrintfRuntimeBindingImpl::shouldPrintAsStr(char Specifier,
134  Type *OpType) const {
135  if (Specifier != 's')
136  return false;
137  const PointerType *PT = dyn_cast<PointerType>(OpType);
138  if (!PT || PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
139  return false;
140  Type *ElemType = PT->getContainedType(0);
141  if (ElemType->getTypeID() != Type::IntegerTyID)
142  return false;
143  IntegerType *ElemIType = cast<IntegerType>(ElemType);
144  return ElemIType->getBitWidth() == 8;
145 }
146 
147 bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) {
148  LLVMContext &Ctx = M.getContext();
149  IRBuilder<> Builder(Ctx);
150  Type *I32Ty = Type::getInt32Ty(Ctx);
151  unsigned UniqID = 0;
152  // NB: This is important for this string size to be divizable by 4
153  const char NonLiteralStr[4] = "???";
154 
155  for (auto CI : Printfs) {
156  unsigned NumOps = CI->getNumArgOperands();
157 
158  SmallString<16> OpConvSpecifiers;
159  Value *Op = CI->getArgOperand(0);
160 
161  if (auto LI = dyn_cast<LoadInst>(Op)) {
162  Op = LI->getPointerOperand();
163  for (auto Use : Op->users()) {
164  if (auto SI = dyn_cast<StoreInst>(Use)) {
165  Op = SI->getValueOperand();
166  break;
167  }
168  }
169  }
170 
171  if (auto I = dyn_cast<Instruction>(Op)) {
172  Value *Op_simplified =
173  simplify(I, &GetTLI(*I->getFunction()), &GetDT(*I->getFunction()));
174  if (Op_simplified)
175  Op = Op_simplified;
176  }
177 
178  ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Op);
179 
180  if (ConstExpr) {
181  GlobalVariable *GVar = dyn_cast<GlobalVariable>(ConstExpr->getOperand(0));
182 
183  StringRef Str("unknown");
184  if (GVar && GVar->hasInitializer()) {
185  auto *Init = GVar->getInitializer();
186  if (auto *CA = dyn_cast<ConstantDataArray>(Init)) {
187  if (CA->isString())
188  Str = CA->getAsCString();
189  } else if (isa<ConstantAggregateZero>(Init)) {
190  Str = "";
191  }
192  //
193  // we need this call to ascertain
194  // that we are printing a string
195  // or a pointer. It takes out the
196  // specifiers and fills up the first
197  // arg
198  getConversionSpecifiers(OpConvSpecifiers, Str, NumOps - 1);
199  }
200  // Add metadata for the string
201  std::string AStreamHolder;
202  raw_string_ostream Sizes(AStreamHolder);
203  int Sum = DWORD_ALIGN;
204  Sizes << CI->getNumArgOperands() - 1;
205  Sizes << ':';
206  for (unsigned ArgCount = 1; ArgCount < CI->getNumArgOperands() &&
207  ArgCount <= OpConvSpecifiers.size();
208  ArgCount++) {
209  Value *Arg = CI->getArgOperand(ArgCount);
210  Type *ArgType = Arg->getType();
211  unsigned ArgSize = TD->getTypeAllocSizeInBits(ArgType);
212  ArgSize = ArgSize / 8;
213  //
214  // ArgSize by design should be a multiple of DWORD_ALIGN,
215  // expand the arguments that do not follow this rule.
216  //
217  if (ArgSize % DWORD_ALIGN != 0) {
218  llvm::Type *ResType = llvm::Type::getInt32Ty(Ctx);
219  auto *LLVMVecType = llvm::dyn_cast<llvm::FixedVectorType>(ArgType);
220  int NumElem = LLVMVecType ? LLVMVecType->getNumElements() : 1;
221  if (LLVMVecType && NumElem > 1)
222  ResType = llvm::FixedVectorType::get(ResType, NumElem);
223  Builder.SetInsertPoint(CI);
224  Builder.SetCurrentDebugLocation(CI->getDebugLoc());
225  if (OpConvSpecifiers[ArgCount - 1] == 'x' ||
226  OpConvSpecifiers[ArgCount - 1] == 'X' ||
227  OpConvSpecifiers[ArgCount - 1] == 'u' ||
228  OpConvSpecifiers[ArgCount - 1] == 'o')
229  Arg = Builder.CreateZExt(Arg, ResType);
230  else
231  Arg = Builder.CreateSExt(Arg, ResType);
232  ArgType = Arg->getType();
233  ArgSize = TD->getTypeAllocSizeInBits(ArgType);
234  ArgSize = ArgSize / 8;
235  CI->setOperand(ArgCount, Arg);
236  }
237  if (OpConvSpecifiers[ArgCount - 1] == 'f') {
238  ConstantFP *FpCons = dyn_cast<ConstantFP>(Arg);
239  if (FpCons)
240  ArgSize = 4;
241  else {
242  FPExtInst *FpExt = dyn_cast<FPExtInst>(Arg);
243  if (FpExt && FpExt->getType()->isDoubleTy() &&
244  FpExt->getOperand(0)->getType()->isFloatTy())
245  ArgSize = 4;
246  }
247  }
248  if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) {
249  if (auto *ConstExpr = dyn_cast<ConstantExpr>(Arg)) {
250  auto *GV = dyn_cast<GlobalVariable>(ConstExpr->getOperand(0));
251  if (GV && GV->hasInitializer()) {
252  Constant *Init = GV->getInitializer();
253  bool IsZeroValue = Init->isZeroValue();
254  auto *CA = dyn_cast<ConstantDataArray>(Init);
255  if (IsZeroValue || (CA && CA->isString())) {
256  size_t SizeStr =
257  IsZeroValue ? 1 : (strlen(CA->getAsCString().data()) + 1);
258  size_t Rem = SizeStr % DWORD_ALIGN;
259  size_t NSizeStr = 0;
260  LLVM_DEBUG(dbgs() << "Printf string original size = " << SizeStr
261  << '\n');
262  if (Rem) {
263  NSizeStr = SizeStr + (DWORD_ALIGN - Rem);
264  } else {
265  NSizeStr = SizeStr;
266  }
267  ArgSize = NSizeStr;
268  }
269  } else {
270  ArgSize = sizeof(NonLiteralStr);
271  }
272  } else {
273  ArgSize = sizeof(NonLiteralStr);
274  }
275  }
276  LLVM_DEBUG(dbgs() << "Printf ArgSize (in buffer) = " << ArgSize
277  << " for type: " << *ArgType << '\n');
278  Sizes << ArgSize << ':';
279  Sum += ArgSize;
280  }
281  LLVM_DEBUG(dbgs() << "Printf format string in source = " << Str.str()
282  << '\n');
283  for (size_t I = 0; I < Str.size(); ++I) {
284  // Rest of the C escape sequences (e.g. \') are handled correctly
285  // by the MDParser
286  switch (Str[I]) {
287  case '\a':
288  Sizes << "\\a";
289  break;
290  case '\b':
291  Sizes << "\\b";
292  break;
293  case '\f':
294  Sizes << "\\f";
295  break;
296  case '\n':
297  Sizes << "\\n";
298  break;
299  case '\r':
300  Sizes << "\\r";
301  break;
302  case '\v':
303  Sizes << "\\v";
304  break;
305  case ':':
306  // ':' cannot be scanned by Flex, as it is defined as a delimiter
307  // Replace it with it's octal representation \72
308  Sizes << "\\72";
309  break;
310  default:
311  Sizes << Str[I];
312  break;
313  }
314  }
315 
316  // Insert the printf_alloc call
317  Builder.SetInsertPoint(CI);
318  Builder.SetCurrentDebugLocation(CI->getDebugLoc());
319 
320  AttributeList Attr = AttributeList::get(Ctx, AttributeList::FunctionIndex,
321  Attribute::NoUnwind);
322 
323  Type *SizetTy = Type::getInt32Ty(Ctx);
324 
325  Type *Tys_alloc[1] = {SizetTy};
326  Type *I8Ty = Type::getInt8Ty(Ctx);
327  Type *I8Ptr = PointerType::get(I8Ty, 1);
328  FunctionType *FTy_alloc = FunctionType::get(I8Ptr, Tys_alloc, false);
329  FunctionCallee PrintfAllocFn =
330  M.getOrInsertFunction(StringRef("__printf_alloc"), FTy_alloc, Attr);
331 
332  LLVM_DEBUG(dbgs() << "Printf metadata = " << Sizes.str() << '\n');
333  std::string fmtstr = itostr(++UniqID) + ":" + Sizes.str().c_str();
334  MDString *fmtStrArray = MDString::get(Ctx, fmtstr);
335 
336  // Instead of creating global variables, the
337  // printf format strings are extracted
338  // and passed as metadata. This avoids
339  // polluting llvm's symbol tables in this module.
340  // Metadata is going to be extracted
341  // by the backend passes and inserted
342  // into the OpenCL binary as appropriate.
343  StringRef amd("llvm.printf.fmts");
344  NamedMDNode *metaD = M.getOrInsertNamedMetadata(amd);
345  MDNode *myMD = MDNode::get(Ctx, fmtStrArray);
346  metaD->addOperand(myMD);
347  Value *sumC = ConstantInt::get(SizetTy, Sum, false);
348  SmallVector<Value *, 1> alloc_args;
349  alloc_args.push_back(sumC);
350  CallInst *pcall =
351  CallInst::Create(PrintfAllocFn, alloc_args, "printf_alloc_fn", CI);
352 
353  //
354  // Insert code to split basicblock with a
355  // piece of hammock code.
356  // basicblock splits after buffer overflow check
357  //
358  ConstantPointerNull *zeroIntPtr =
360  auto *cmp = cast<ICmpInst>(Builder.CreateICmpNE(pcall, zeroIntPtr, ""));
361  if (!CI->use_empty()) {
362  Value *result =
363  Builder.CreateSExt(Builder.CreateNot(cmp), I32Ty, "printf_res");
364  CI->replaceAllUsesWith(result);
365  }
366  SplitBlock(CI->getParent(), cmp);
367  Instruction *Brnch =
368  SplitBlockAndInsertIfThen(cmp, cmp->getNextNode(), false);
369 
370  Builder.SetInsertPoint(Brnch);
371 
372  // store unique printf id in the buffer
373  //
374  GetElementPtrInst *BufferIdx = GetElementPtrInst::Create(
375  I8Ty, pcall, ConstantInt::get(Ctx, APInt(32, 0)), "PrintBuffID",
376  Brnch);
377 
378  Type *idPointer = PointerType::get(I32Ty, AMDGPUAS::GLOBAL_ADDRESS);
379  Value *id_gep_cast =
380  new BitCastInst(BufferIdx, idPointer, "PrintBuffIdCast", Brnch);
381 
382  new StoreInst(ConstantInt::get(I32Ty, UniqID), id_gep_cast, Brnch);
383 
384  // 1st 4 bytes hold the printf_id
385  // the following GEP is the buffer pointer
386  BufferIdx = GetElementPtrInst::Create(
387  I8Ty, pcall, ConstantInt::get(Ctx, APInt(32, 4)), "PrintBuffGep",
388  Brnch);
389 
390  Type *Int32Ty = Type::getInt32Ty(Ctx);
391  Type *Int64Ty = Type::getInt64Ty(Ctx);
392  for (unsigned ArgCount = 1; ArgCount < CI->getNumArgOperands() &&
393  ArgCount <= OpConvSpecifiers.size();
394  ArgCount++) {
395  Value *Arg = CI->getArgOperand(ArgCount);
396  Type *ArgType = Arg->getType();
397  SmallVector<Value *, 32> WhatToStore;
398  if (ArgType->isFPOrFPVectorTy() && !isa<VectorType>(ArgType)) {
399  Type *IType = (ArgType->isFloatTy()) ? Int32Ty : Int64Ty;
400  if (OpConvSpecifiers[ArgCount - 1] == 'f') {
401  if (auto *FpCons = dyn_cast<ConstantFP>(Arg)) {
402  APFloat Val(FpCons->getValueAPF());
403  bool Lost = false;
404  Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
405  &Lost);
406  Arg = ConstantFP::get(Ctx, Val);
407  IType = Int32Ty;
408  } else if (auto *FpExt = dyn_cast<FPExtInst>(Arg)) {
409  if (FpExt->getType()->isDoubleTy() &&
410  FpExt->getOperand(0)->getType()->isFloatTy()) {
411  Arg = FpExt->getOperand(0);
412  IType = Int32Ty;
413  }
414  }
415  }
416  Arg = new BitCastInst(Arg, IType, "PrintArgFP", Brnch);
417  WhatToStore.push_back(Arg);
418  } else if (ArgType->getTypeID() == Type::PointerTyID) {
419  if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) {
420  const char *S = NonLiteralStr;
421  if (auto *ConstExpr = dyn_cast<ConstantExpr>(Arg)) {
422  auto *GV = dyn_cast<GlobalVariable>(ConstExpr->getOperand(0));
423  if (GV && GV->hasInitializer()) {
424  Constant *Init = GV->getInitializer();
425  bool IsZeroValue = Init->isZeroValue();
426  auto *CA = dyn_cast<ConstantDataArray>(Init);
427  if (IsZeroValue || (CA && CA->isString())) {
428  S = IsZeroValue ? "" : CA->getAsCString().data();
429  }
430  }
431  }
432  size_t SizeStr = strlen(S) + 1;
433  size_t Rem = SizeStr % DWORD_ALIGN;
434  size_t NSizeStr = 0;
435  if (Rem) {
436  NSizeStr = SizeStr + (DWORD_ALIGN - Rem);
437  } else {
438  NSizeStr = SizeStr;
439  }
440  if (S[0]) {
441  char *MyNewStr = new char[NSizeStr]();
442  strcpy(MyNewStr, S);
443  int NumInts = NSizeStr / 4;
444  int CharC = 0;
445  while (NumInts) {
446  int ANum = *(int *)(MyNewStr + CharC);
447  CharC += 4;
448  NumInts--;
449  Value *ANumV = ConstantInt::get(Int32Ty, ANum, false);
450  WhatToStore.push_back(ANumV);
451  }
452  delete[] MyNewStr;
453  } else {
454  // Empty string, give a hint to RT it is no NULL
455  Value *ANumV = ConstantInt::get(Int32Ty, 0xFFFFFF00, false);
456  WhatToStore.push_back(ANumV);
457  }
458  } else {
459  uint64_t Size = TD->getTypeAllocSizeInBits(ArgType);
460  assert((Size == 32 || Size == 64) && "unsupported size");
461  Type *DstType = (Size == 32) ? Int32Ty : Int64Ty;
462  Arg = new PtrToIntInst(Arg, DstType, "PrintArgPtr", Brnch);
463  WhatToStore.push_back(Arg);
464  }
465  } else if (isa<FixedVectorType>(ArgType)) {
466  Type *IType = NULL;
467  uint32_t EleCount = cast<FixedVectorType>(ArgType)->getNumElements();
468  uint32_t EleSize = ArgType->getScalarSizeInBits();
469  uint32_t TotalSize = EleCount * EleSize;
470  if (EleCount == 3) {
471  ShuffleVectorInst *Shuffle =
472  new ShuffleVectorInst(Arg, Arg, ArrayRef<int>{0, 1, 2, 2});
473  Shuffle->insertBefore(Brnch);
474  Arg = Shuffle;
475  ArgType = Arg->getType();
476  TotalSize += EleSize;
477  }
478  switch (EleSize) {
479  default:
480  EleCount = TotalSize / 64;
481  IType = Type::getInt64Ty(ArgType->getContext());
482  break;
483  case 8:
484  if (EleCount >= 8) {
485  EleCount = TotalSize / 64;
486  IType = Type::getInt64Ty(ArgType->getContext());
487  } else if (EleCount >= 3) {
488  EleCount = 1;
489  IType = Type::getInt32Ty(ArgType->getContext());
490  } else {
491  EleCount = 1;
492  IType = Type::getInt16Ty(ArgType->getContext());
493  }
494  break;
495  case 16:
496  if (EleCount >= 3) {
497  EleCount = TotalSize / 64;
498  IType = Type::getInt64Ty(ArgType->getContext());
499  } else {
500  EleCount = 1;
501  IType = Type::getInt32Ty(ArgType->getContext());
502  }
503  break;
504  }
505  if (EleCount > 1) {
506  IType = FixedVectorType::get(IType, EleCount);
507  }
508  Arg = new BitCastInst(Arg, IType, "PrintArgVect", Brnch);
509  WhatToStore.push_back(Arg);
510  } else {
511  WhatToStore.push_back(Arg);
512  }
513  for (unsigned I = 0, E = WhatToStore.size(); I != E; ++I) {
514  Value *TheBtCast = WhatToStore[I];
515  unsigned ArgSize =
516  TD->getTypeAllocSizeInBits(TheBtCast->getType()) / 8;
517  SmallVector<Value *, 1> BuffOffset;
518  BuffOffset.push_back(ConstantInt::get(I32Ty, ArgSize));
519 
520  Type *ArgPointer = PointerType::get(TheBtCast->getType(), 1);
521  Value *CastedGEP =
522  new BitCastInst(BufferIdx, ArgPointer, "PrintBuffPtrCast", Brnch);
523  StoreInst *StBuff = new StoreInst(TheBtCast, CastedGEP, Brnch);
524  LLVM_DEBUG(dbgs() << "inserting store to printf buffer:\n"
525  << *StBuff << '\n');
526  (void)StBuff;
527  if (I + 1 == E && ArgCount + 1 == CI->getNumArgOperands())
528  break;
529  BufferIdx = GetElementPtrInst::Create(I8Ty, BufferIdx, BuffOffset,
530  "PrintBuffNextPtr", Brnch);
531  LLVM_DEBUG(dbgs() << "inserting gep to the printf buffer:\n"
532  << *BufferIdx << '\n');
533  }
534  }
535  }
536  }
537 
538  // erase the printf calls
539  for (auto CI : Printfs)
540  CI->eraseFromParent();
541 
542  Printfs.clear();
543  return true;
544 }
545 
546 bool AMDGPUPrintfRuntimeBindingImpl::run(Module &M) {
547  Triple TT(M.getTargetTriple());
548  if (TT.getArch() == Triple::r600)
549  return false;
550 
551  auto PrintfFunction = M.getFunction("printf");
552  if (!PrintfFunction)
553  return false;
554 
555  for (auto &U : PrintfFunction->uses()) {
556  if (auto *CI = dyn_cast<CallInst>(U.getUser())) {
557  if (CI->isCallee(&U))
558  Printfs.push_back(CI);
559  }
560  }
561 
562  if (Printfs.empty())
563  return false;
564 
565  if (auto HostcallFunction = M.getFunction("__ockl_hostcall_internal")) {
566  for (auto &U : HostcallFunction->uses()) {
567  if (auto *CI = dyn_cast<CallInst>(U.getUser())) {
568  M.getContext().emitError(
569  CI, "Cannot use both printf and hostcall in the same module");
570  }
571  }
572  }
573 
574  TD = &M.getDataLayout();
575 
576  return lowerPrintfForGpu(M);
577 }
578 
579 bool AMDGPUPrintfRuntimeBinding::runOnModule(Module &M) {
580  auto GetDT = [this](Function &F) -> DominatorTree & {
581  return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
582  };
583  auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
584  return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
585  };
586 
587  return AMDGPUPrintfRuntimeBindingImpl(GetDT, GetTLI).run(M);
588 }
589 
591 AMDGPUPrintfRuntimeBindingPass::run(Module &M, ModuleAnalysisManager &AM) {
594  auto GetDT = [&FAM](Function &F) -> DominatorTree & {
596  };
597  auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
599  };
600  bool Changed = AMDGPUPrintfRuntimeBindingImpl(GetDT, GetTLI).run(M);
601  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
602 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
binding
amdgpu printf runtime binding
Definition: AMDGPUPrintfRuntimeBinding.cpp:86
Int32Ty
IntegerType * Int32Ty
Definition: NVVMIntrRange.cpp:67
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::NamedMDNode
A tuple of MDNodes.
Definition: Metadata.h:1386
llvm::ARM::PredBlockMask::TT
@ TT
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:238
simplify
hexagon bit simplify
Definition: HexagonBitSimplify.cpp:261
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:769
llvm::Function
Definition: Function.h:61
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:623
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5164
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::IRBuilder<>
llvm::GlobalVariable
Definition: GlobalVariable.h:40
llvm::PointerType::getAddressSpace
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:686
llvm::Instruction::insertBefore
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:84
llvm::Type::getTypeID
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:135
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:45
FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:59
llvm::Type::isFPOrFPVectorTy
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:189
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:158
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::AttributeList
Definition: Attributes.h:398
llvm::ConstantFP::getValueAPF
const APFloat & getValueAPF() const
Definition: Constants.h:297
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPUPrintfRuntimeBinding, "amdgpu-printf-runtime-binding", "AMDGPU Printf lowering", false, false) INITIALIZE_PASS_END(AMDGPUPrintfRuntimeBinding
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:203
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::GlobalVariable::hasInitializer
bool hasInitializer() const
Definitions have initializers, declarations don't.
Definition: GlobalVariable.h:92
result
It looks like we only need to define PPCfmarto for these because according to these instructions perform RTO on fma s result
Definition: README_P9.txt:256
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::StringRef::substr
LLVM_NODISCARD StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:611
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:206
llvm::ConstantPointerNull
A constant pointer value that points to null.
Definition: Constants.h:534
llvm::NamedMDNode::addOperand
void addOperand(MDNode *M)
Definition: Metadata.cpp:1130
llvm::StringRef::find_last_of
LLVM_NODISCARD size_t find_last_of(char C, size_t From=npos) const
Find the last character in the string that is C, or npos if not found.
Definition: StringRef.h:436
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
cmp
< i32 >< i32 > cmp
Definition: README.txt:1447
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::TargetLibraryAnalysis::run
TargetLibraryInfo run(const Function &F, FunctionAnalysisManager &)
Definition: TargetLibraryInfo.cpp:1713
TargetLibraryInfo.h
false
Definition: StackSlotColoring.cpp:142
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::Instruction
Definition: Instruction.h:45
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:153
llvm::SimplifyInstruction
Value * SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
Definition: InstructionSimplify.cpp:6246
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:281
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:257
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:650
llvm::SmallString< 16 >
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::createAMDGPUPrintfRuntimeBinding
ModulePass * createAMDGPUPrintfRuntimeBinding()
Definition: AMDGPUPrintfRuntimeBinding.cpp:92
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:168
llvm::APFloat
Definition: APFloat.h:701
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:304
llvm::GlobalVariable::getInitializer
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
Definition: GlobalVariable.h:136
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:463
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:928
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:631
llvm::AMDGPUPrintfRuntimeBindingID
char & AMDGPUPrintfRuntimeBindingID
Definition: AMDGPUPrintfRuntimeBinding.cpp:89
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::MDNode
Metadata node.
Definition: Metadata.h:897
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:70
llvm::ArrayRef< int >
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
AMDGPU.h
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
uint32_t
llvm::PtrToIntInst
This class represents a cast from a pointer to an integer.
Definition: Instructions.h:5113
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::FPExtInst
This class represents an extension of floating point types.
Definition: Instructions.h:4875
llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:127
llvm::Init
Definition: Record.h:271
llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:936
llvm::Type::isFloatTy
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:147
get
Should compile to something r4 addze r3 instead we get
Definition: README.txt:24
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:385
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:219
llvm::Type::isDoubleTy
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:150
llvm::AMDGPU::HSAMD::Key::Printf
constexpr char Printf[]
Key for HSA::Metadata::mPrintf.
Definition: AMDGPUMetadata.h:425
llvm::initializeAMDGPUPrintfRuntimeBindingPass
void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry &)
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:252
llvm::ShuffleVectorInst
This instruction constructs a fixed permutation of two input vectors.
Definition: Instructions.h:2011
Instructions.h
Dominators.h
InstructionSimplify.h
llvm::IntegerType::getBitWidth
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:71
llvm::FunctionCallee
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:164
llvm::SmallVectorImpl< char >
lowering
amdgpu printf runtime AMDGPU Printf lowering
Definition: AMDGPUPrintfRuntimeBinding.cpp:87
DWORD_ALIGN
#define DWORD_ALIGN
Definition: AMDGPUPrintfRuntimeBinding.cpp:33
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::InnerAnalysisManagerProxy
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:926
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1475
llvm::StringRef::find_first_of
LLVM_NODISCARD size_t find_first_of(char C, size_t From=0) const
Find the first character in the string that is C, or npos if not found.
Definition: StringRef.h:410
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::SplitBlockAndInsertIfThen
Instruction * SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore, bool Unreachable, MDNode *BranchWeights, DominatorTree *DT, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
Definition: BasicBlockUtils.cpp:1418
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
BasicBlockUtils.h
llvm::MDString
A single uniqued string.
Definition: Metadata.h:611
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:814
InitializePasses.h
llvm::Type::getContainedType
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:348
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:438
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:382
llvm::FunctionType
Class to represent function types.
Definition: DerivedTypes.h:102
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38