LLVM  13.0.0git
AMDGPUPrintfRuntimeBinding.cpp
Go to the documentation of this file.
1 //=== AMDGPUPrintfRuntimeBinding.cpp - OpenCL printf implementation -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // \file
9 //
10 // The pass bind printfs to a kernel arg pointer that will be bound to a buffer
11 // later by the runtime.
12 //
13 // This pass traverses the functions in the module and converts
14 // each call to printf to a sequence of operations that
15 // store the following into the printf buffer:
16 // - format string (passed as a module's metadata unique ID)
17 // - bitwise copies of printf arguments
18 // The backend passes will need to store metadata in the kernel
19 //===----------------------------------------------------------------------===//
20 
21 #include "AMDGPU.h"
24 #include "llvm/IR/Dominators.h"
25 #include "llvm/IR/IRBuilder.h"
26 #include "llvm/IR/Instructions.h"
27 #include "llvm/InitializePasses.h"
29 
30 using namespace llvm;
31 
32 #define DEBUG_TYPE "printfToRuntime"
33 #define DWORD_ALIGN 4
34 
35 namespace {
36 class AMDGPUPrintfRuntimeBinding final : public ModulePass {
37 
38 public:
39  static char ID;
40 
41  explicit AMDGPUPrintfRuntimeBinding();
42 
43 private:
44  bool runOnModule(Module &M) override;
45 
46  void getAnalysisUsage(AnalysisUsage &AU) const override {
49  }
50 };
51 
52 class AMDGPUPrintfRuntimeBindingImpl {
53 public:
54  AMDGPUPrintfRuntimeBindingImpl(
55  function_ref<const DominatorTree &(Function &)> GetDT,
56  function_ref<const TargetLibraryInfo &(Function &)> GetTLI)
57  : GetDT(GetDT), GetTLI(GetTLI) {}
58  bool run(Module &M);
59 
60 private:
61  void getConversionSpecifiers(SmallVectorImpl<char> &OpConvSpecifiers,
62  StringRef fmt, size_t num_ops) const;
63 
64  bool shouldPrintAsStr(char Specifier, Type *OpType) const;
65  bool lowerPrintfForGpu(Module &M);
66 
68  const DominatorTree *DT) {
69  return SimplifyInstruction(I, {*TD, TLI, DT});
70  }
71 
72  const DataLayout *TD;
73  function_ref<const DominatorTree &(Function &)> GetDT;
74  function_ref<const TargetLibraryInfo &(Function &)> GetTLI;
76 };
77 } // namespace
78 
80 
81 INITIALIZE_PASS_BEGIN(AMDGPUPrintfRuntimeBinding,
82  "amdgpu-printf-runtime-binding", "AMDGPU Printf lowering",
83  false, false)
86 INITIALIZE_PASS_END(AMDGPUPrintfRuntimeBinding, "amdgpu-printf-runtime-binding",
87  "AMDGPU Printf lowering", false, false)
88 
89 char &llvm::AMDGPUPrintfRuntimeBindingID = AMDGPUPrintfRuntimeBinding::ID;
90 
91 namespace llvm {
93  return new AMDGPUPrintfRuntimeBinding();
94 }
95 } // namespace llvm
96 
97 AMDGPUPrintfRuntimeBinding::AMDGPUPrintfRuntimeBinding() : ModulePass(ID) {
99 }
100 
101 void AMDGPUPrintfRuntimeBindingImpl::getConversionSpecifiers(
102  SmallVectorImpl<char> &OpConvSpecifiers, StringRef Fmt,
103  size_t NumOps) const {
104  // not all format characters are collected.
105  // At this time the format characters of interest
106  // are %p and %s, which use to know if we
107  // are either storing a literal string or a
108  // pointer to the printf buffer.
109  static const char ConvSpecifiers[] = "cdieEfgGaosuxXp";
110  size_t CurFmtSpecifierIdx = 0;
111  size_t PrevFmtSpecifierIdx = 0;
112 
113  while ((CurFmtSpecifierIdx = Fmt.find_first_of(
114  ConvSpecifiers, CurFmtSpecifierIdx)) != StringRef::npos) {
115  bool ArgDump = false;
116  StringRef CurFmt = Fmt.substr(PrevFmtSpecifierIdx,
117  CurFmtSpecifierIdx - PrevFmtSpecifierIdx);
118  size_t pTag = CurFmt.find_last_of("%");
119  if (pTag != StringRef::npos) {
120  ArgDump = true;
121  while (pTag && CurFmt[--pTag] == '%') {
122  ArgDump = !ArgDump;
123  }
124  }
125 
126  if (ArgDump)
127  OpConvSpecifiers.push_back(Fmt[CurFmtSpecifierIdx]);
128 
129  PrevFmtSpecifierIdx = ++CurFmtSpecifierIdx;
130  }
131 }
132 
133 bool AMDGPUPrintfRuntimeBindingImpl::shouldPrintAsStr(char Specifier,
134  Type *OpType) const {
135  if (Specifier != 's')
136  return false;
137  const PointerType *PT = dyn_cast<PointerType>(OpType);
138  if (!PT || PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
139  return false;
140  Type *ElemType = PT->getContainedType(0);
141  if (ElemType->getTypeID() != Type::IntegerTyID)
142  return false;
143  IntegerType *ElemIType = cast<IntegerType>(ElemType);
144  return ElemIType->getBitWidth() == 8;
145 }
146 
147 bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) {
148  LLVMContext &Ctx = M.getContext();
149  IRBuilder<> Builder(Ctx);
150  Type *I32Ty = Type::getInt32Ty(Ctx);
151  unsigned UniqID = 0;
152  // NB: This is important for this string size to be divizable by 4
153  const char NonLiteralStr[4] = "???";
154 
155  for (auto CI : Printfs) {
156  unsigned NumOps = CI->getNumArgOperands();
157 
158  SmallString<16> OpConvSpecifiers;
159  Value *Op = CI->getArgOperand(0);
160 
161  if (auto LI = dyn_cast<LoadInst>(Op)) {
162  Op = LI->getPointerOperand();
163  for (auto Use : Op->users()) {
164  if (auto SI = dyn_cast<StoreInst>(Use)) {
165  Op = SI->getValueOperand();
166  break;
167  }
168  }
169  }
170 
171  if (auto I = dyn_cast<Instruction>(Op)) {
172  Value *Op_simplified =
173  simplify(I, &GetTLI(*I->getFunction()), &GetDT(*I->getFunction()));
174  if (Op_simplified)
175  Op = Op_simplified;
176  }
177 
178  ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Op);
179 
180  if (ConstExpr) {
181  GlobalVariable *GVar = dyn_cast<GlobalVariable>(ConstExpr->getOperand(0));
182 
183  StringRef Str("unknown");
184  if (GVar && GVar->hasInitializer()) {
185  auto *Init = GVar->getInitializer();
186  if (auto *CA = dyn_cast<ConstantDataArray>(Init)) {
187  if (CA->isString())
188  Str = CA->getAsCString();
189  } else if (isa<ConstantAggregateZero>(Init)) {
190  Str = "";
191  }
192  //
193  // we need this call to ascertain
194  // that we are printing a string
195  // or a pointer. It takes out the
196  // specifiers and fills up the first
197  // arg
198  getConversionSpecifiers(OpConvSpecifiers, Str, NumOps - 1);
199  }
200  // Add metadata for the string
201  std::string AStreamHolder;
202  raw_string_ostream Sizes(AStreamHolder);
203  int Sum = DWORD_ALIGN;
204  Sizes << CI->getNumArgOperands() - 1;
205  Sizes << ':';
206  for (unsigned ArgCount = 1; ArgCount < CI->getNumArgOperands() &&
207  ArgCount <= OpConvSpecifiers.size();
208  ArgCount++) {
209  Value *Arg = CI->getArgOperand(ArgCount);
210  Type *ArgType = Arg->getType();
211  unsigned ArgSize = TD->getTypeAllocSizeInBits(ArgType);
212  ArgSize = ArgSize / 8;
213  //
214  // ArgSize by design should be a multiple of DWORD_ALIGN,
215  // expand the arguments that do not follow this rule.
216  //
217  if (ArgSize % DWORD_ALIGN != 0) {
218  llvm::Type *ResType = llvm::Type::getInt32Ty(Ctx);
219  auto *LLVMVecType = llvm::dyn_cast<llvm::FixedVectorType>(ArgType);
220  int NumElem = LLVMVecType ? LLVMVecType->getNumElements() : 1;
221  if (LLVMVecType && NumElem > 1)
222  ResType = llvm::FixedVectorType::get(ResType, NumElem);
223  Builder.SetInsertPoint(CI);
224  Builder.SetCurrentDebugLocation(CI->getDebugLoc());
225  if (OpConvSpecifiers[ArgCount - 1] == 'x' ||
226  OpConvSpecifiers[ArgCount - 1] == 'X' ||
227  OpConvSpecifiers[ArgCount - 1] == 'u' ||
228  OpConvSpecifiers[ArgCount - 1] == 'o')
229  Arg = Builder.CreateZExt(Arg, ResType);
230  else
231  Arg = Builder.CreateSExt(Arg, ResType);
232  ArgType = Arg->getType();
233  ArgSize = TD->getTypeAllocSizeInBits(ArgType);
234  ArgSize = ArgSize / 8;
235  CI->setOperand(ArgCount, Arg);
236  }
237  if (OpConvSpecifiers[ArgCount - 1] == 'f') {
238  ConstantFP *FpCons = dyn_cast<ConstantFP>(Arg);
239  if (FpCons)
240  ArgSize = 4;
241  else {
242  FPExtInst *FpExt = dyn_cast<FPExtInst>(Arg);
243  if (FpExt && FpExt->getType()->isDoubleTy() &&
244  FpExt->getOperand(0)->getType()->isFloatTy())
245  ArgSize = 4;
246  }
247  }
248  if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) {
249  if (auto *ConstExpr = dyn_cast<ConstantExpr>(Arg)) {
250  auto *GV = dyn_cast<GlobalVariable>(ConstExpr->getOperand(0));
251  if (GV && GV->hasInitializer()) {
252  Constant *Init = GV->getInitializer();
253  bool IsZeroValue = Init->isZeroValue();
254  auto *CA = dyn_cast<ConstantDataArray>(Init);
255  if (IsZeroValue || (CA && CA->isString())) {
256  size_t SizeStr =
257  IsZeroValue ? 1 : (strlen(CA->getAsCString().data()) + 1);
258  size_t Rem = SizeStr % DWORD_ALIGN;
259  size_t NSizeStr = 0;
260  LLVM_DEBUG(dbgs() << "Printf string original size = " << SizeStr
261  << '\n');
262  if (Rem) {
263  NSizeStr = SizeStr + (DWORD_ALIGN - Rem);
264  } else {
265  NSizeStr = SizeStr;
266  }
267  ArgSize = NSizeStr;
268  }
269  } else {
270  ArgSize = sizeof(NonLiteralStr);
271  }
272  } else {
273  ArgSize = sizeof(NonLiteralStr);
274  }
275  }
276  LLVM_DEBUG(dbgs() << "Printf ArgSize (in buffer) = " << ArgSize
277  << " for type: " << *ArgType << '\n');
278  Sizes << ArgSize << ':';
279  Sum += ArgSize;
280  }
281  LLVM_DEBUG(dbgs() << "Printf format string in source = " << Str.str()
282  << '\n');
283  for (size_t I = 0; I < Str.size(); ++I) {
284  // Rest of the C escape sequences (e.g. \') are handled correctly
285  // by the MDParser
286  switch (Str[I]) {
287  case '\a':
288  Sizes << "\\a";
289  break;
290  case '\b':
291  Sizes << "\\b";
292  break;
293  case '\f':
294  Sizes << "\\f";
295  break;
296  case '\n':
297  Sizes << "\\n";
298  break;
299  case '\r':
300  Sizes << "\\r";
301  break;
302  case '\v':
303  Sizes << "\\v";
304  break;
305  case ':':
306  // ':' cannot be scanned by Flex, as it is defined as a delimiter
307  // Replace it with it's octal representation \72
308  Sizes << "\\72";
309  break;
310  default:
311  Sizes << Str[I];
312  break;
313  }
314  }
315 
316  // Insert the printf_alloc call
317  Builder.SetInsertPoint(CI);
318  Builder.SetCurrentDebugLocation(CI->getDebugLoc());
319 
320  AttributeList Attr = AttributeList::get(Ctx, AttributeList::FunctionIndex,
321  Attribute::NoUnwind);
322 
323  Type *SizetTy = Type::getInt32Ty(Ctx);
324 
325  Type *Tys_alloc[1] = {SizetTy};
326  Type *I8Ptr = PointerType::get(Type::getInt8Ty(Ctx), 1);
327  FunctionType *FTy_alloc = FunctionType::get(I8Ptr, Tys_alloc, false);
328  FunctionCallee PrintfAllocFn =
329  M.getOrInsertFunction(StringRef("__printf_alloc"), FTy_alloc, Attr);
330 
331  LLVM_DEBUG(dbgs() << "Printf metadata = " << Sizes.str() << '\n');
332  std::string fmtstr = itostr(++UniqID) + ":" + Sizes.str().c_str();
333  MDString *fmtStrArray = MDString::get(Ctx, fmtstr);
334 
335  // Instead of creating global variables, the
336  // printf format strings are extracted
337  // and passed as metadata. This avoids
338  // polluting llvm's symbol tables in this module.
339  // Metadata is going to be extracted
340  // by the backend passes and inserted
341  // into the OpenCL binary as appropriate.
342  StringRef amd("llvm.printf.fmts");
343  NamedMDNode *metaD = M.getOrInsertNamedMetadata(amd);
344  MDNode *myMD = MDNode::get(Ctx, fmtStrArray);
345  metaD->addOperand(myMD);
346  Value *sumC = ConstantInt::get(SizetTy, Sum, false);
347  SmallVector<Value *, 1> alloc_args;
348  alloc_args.push_back(sumC);
349  CallInst *pcall =
350  CallInst::Create(PrintfAllocFn, alloc_args, "printf_alloc_fn", CI);
351 
352  //
353  // Insert code to split basicblock with a
354  // piece of hammock code.
355  // basicblock splits after buffer overflow check
356  //
357  ConstantPointerNull *zeroIntPtr =
358  ConstantPointerNull::get(PointerType::get(Type::getInt8Ty(Ctx), 1));
359  auto *cmp = cast<ICmpInst>(Builder.CreateICmpNE(pcall, zeroIntPtr, ""));
360  if (!CI->use_empty()) {
361  Value *result =
362  Builder.CreateSExt(Builder.CreateNot(cmp), I32Ty, "printf_res");
363  CI->replaceAllUsesWith(result);
364  }
365  SplitBlock(CI->getParent(), cmp);
366  Instruction *Brnch =
367  SplitBlockAndInsertIfThen(cmp, cmp->getNextNode(), false);
368 
369  Builder.SetInsertPoint(Brnch);
370 
371  // store unique printf id in the buffer
372  //
373  SmallVector<Value *, 1> ZeroIdxList;
374  ConstantInt *zeroInt =
375  ConstantInt::get(Ctx, APInt(32, StringRef("0"), 10));
376  ZeroIdxList.push_back(zeroInt);
377 
378  GetElementPtrInst *BufferIdx = GetElementPtrInst::Create(
379  nullptr, pcall, ZeroIdxList, "PrintBuffID", Brnch);
380 
381  Type *idPointer = PointerType::get(I32Ty, AMDGPUAS::GLOBAL_ADDRESS);
382  Value *id_gep_cast =
383  new BitCastInst(BufferIdx, idPointer, "PrintBuffIdCast", Brnch);
384 
385  new StoreInst(ConstantInt::get(I32Ty, UniqID), id_gep_cast, Brnch);
386 
387  SmallVector<Value *, 2> FourthIdxList;
388  ConstantInt *fourInt =
389  ConstantInt::get(Ctx, APInt(32, StringRef("4"), 10));
390 
391  FourthIdxList.push_back(fourInt); // 1st 4 bytes hold the printf_id
392  // the following GEP is the buffer pointer
393  BufferIdx = GetElementPtrInst::Create(nullptr, pcall, FourthIdxList,
394  "PrintBuffGep", Brnch);
395 
396  Type *Int32Ty = Type::getInt32Ty(Ctx);
397  Type *Int64Ty = Type::getInt64Ty(Ctx);
398  for (unsigned ArgCount = 1; ArgCount < CI->getNumArgOperands() &&
399  ArgCount <= OpConvSpecifiers.size();
400  ArgCount++) {
401  Value *Arg = CI->getArgOperand(ArgCount);
402  Type *ArgType = Arg->getType();
403  SmallVector<Value *, 32> WhatToStore;
404  if (ArgType->isFPOrFPVectorTy() && !isa<VectorType>(ArgType)) {
405  Type *IType = (ArgType->isFloatTy()) ? Int32Ty : Int64Ty;
406  if (OpConvSpecifiers[ArgCount - 1] == 'f') {
407  if (auto *FpCons = dyn_cast<ConstantFP>(Arg)) {
408  APFloat Val(FpCons->getValueAPF());
409  bool Lost = false;
410  Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
411  &Lost);
412  Arg = ConstantFP::get(Ctx, Val);
413  IType = Int32Ty;
414  } else if (auto *FpExt = dyn_cast<FPExtInst>(Arg)) {
415  if (FpExt->getType()->isDoubleTy() &&
416  FpExt->getOperand(0)->getType()->isFloatTy()) {
417  Arg = FpExt->getOperand(0);
418  IType = Int32Ty;
419  }
420  }
421  }
422  Arg = new BitCastInst(Arg, IType, "PrintArgFP", Brnch);
423  WhatToStore.push_back(Arg);
424  } else if (ArgType->getTypeID() == Type::PointerTyID) {
425  if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) {
426  const char *S = NonLiteralStr;
427  if (auto *ConstExpr = dyn_cast<ConstantExpr>(Arg)) {
428  auto *GV = dyn_cast<GlobalVariable>(ConstExpr->getOperand(0));
429  if (GV && GV->hasInitializer()) {
430  Constant *Init = GV->getInitializer();
431  bool IsZeroValue = Init->isZeroValue();
432  auto *CA = dyn_cast<ConstantDataArray>(Init);
433  if (IsZeroValue || (CA && CA->isString())) {
434  S = IsZeroValue ? "" : CA->getAsCString().data();
435  }
436  }
437  }
438  size_t SizeStr = strlen(S) + 1;
439  size_t Rem = SizeStr % DWORD_ALIGN;
440  size_t NSizeStr = 0;
441  if (Rem) {
442  NSizeStr = SizeStr + (DWORD_ALIGN - Rem);
443  } else {
444  NSizeStr = SizeStr;
445  }
446  if (S[0]) {
447  char *MyNewStr = new char[NSizeStr]();
448  strcpy(MyNewStr, S);
449  int NumInts = NSizeStr / 4;
450  int CharC = 0;
451  while (NumInts) {
452  int ANum = *(int *)(MyNewStr + CharC);
453  CharC += 4;
454  NumInts--;
455  Value *ANumV = ConstantInt::get(Int32Ty, ANum, false);
456  WhatToStore.push_back(ANumV);
457  }
458  delete[] MyNewStr;
459  } else {
460  // Empty string, give a hint to RT it is no NULL
461  Value *ANumV = ConstantInt::get(Int32Ty, 0xFFFFFF00, false);
462  WhatToStore.push_back(ANumV);
463  }
464  } else {
465  uint64_t Size = TD->getTypeAllocSizeInBits(ArgType);
466  assert((Size == 32 || Size == 64) && "unsupported size");
467  Type *DstType = (Size == 32) ? Int32Ty : Int64Ty;
468  Arg = new PtrToIntInst(Arg, DstType, "PrintArgPtr", Brnch);
469  WhatToStore.push_back(Arg);
470  }
471  } else if (isa<FixedVectorType>(ArgType)) {
472  Type *IType = NULL;
473  uint32_t EleCount = cast<FixedVectorType>(ArgType)->getNumElements();
474  uint32_t EleSize = ArgType->getScalarSizeInBits();
475  uint32_t TotalSize = EleCount * EleSize;
476  if (EleCount == 3) {
477  ShuffleVectorInst *Shuffle =
478  new ShuffleVectorInst(Arg, Arg, ArrayRef<int>{0, 1, 2, 2});
479  Shuffle->insertBefore(Brnch);
480  Arg = Shuffle;
481  ArgType = Arg->getType();
482  TotalSize += EleSize;
483  }
484  switch (EleSize) {
485  default:
486  EleCount = TotalSize / 64;
487  IType = Type::getInt64Ty(ArgType->getContext());
488  break;
489  case 8:
490  if (EleCount >= 8) {
491  EleCount = TotalSize / 64;
492  IType = Type::getInt64Ty(ArgType->getContext());
493  } else if (EleCount >= 3) {
494  EleCount = 1;
495  IType = Type::getInt32Ty(ArgType->getContext());
496  } else {
497  EleCount = 1;
498  IType = Type::getInt16Ty(ArgType->getContext());
499  }
500  break;
501  case 16:
502  if (EleCount >= 3) {
503  EleCount = TotalSize / 64;
504  IType = Type::getInt64Ty(ArgType->getContext());
505  } else {
506  EleCount = 1;
507  IType = Type::getInt32Ty(ArgType->getContext());
508  }
509  break;
510  }
511  if (EleCount > 1) {
512  IType = FixedVectorType::get(IType, EleCount);
513  }
514  Arg = new BitCastInst(Arg, IType, "PrintArgVect", Brnch);
515  WhatToStore.push_back(Arg);
516  } else {
517  WhatToStore.push_back(Arg);
518  }
519  for (unsigned I = 0, E = WhatToStore.size(); I != E; ++I) {
520  Value *TheBtCast = WhatToStore[I];
521  unsigned ArgSize =
522  TD->getTypeAllocSizeInBits(TheBtCast->getType()) / 8;
523  SmallVector<Value *, 1> BuffOffset;
524  BuffOffset.push_back(ConstantInt::get(I32Ty, ArgSize));
525 
526  Type *ArgPointer = PointerType::get(TheBtCast->getType(), 1);
527  Value *CastedGEP =
528  new BitCastInst(BufferIdx, ArgPointer, "PrintBuffPtrCast", Brnch);
529  StoreInst *StBuff = new StoreInst(TheBtCast, CastedGEP, Brnch);
530  LLVM_DEBUG(dbgs() << "inserting store to printf buffer:\n"
531  << *StBuff << '\n');
532  (void)StBuff;
533  if (I + 1 == E && ArgCount + 1 == CI->getNumArgOperands())
534  break;
535  BufferIdx = GetElementPtrInst::Create(nullptr, BufferIdx, BuffOffset,
536  "PrintBuffNextPtr", Brnch);
537  LLVM_DEBUG(dbgs() << "inserting gep to the printf buffer:\n"
538  << *BufferIdx << '\n');
539  }
540  }
541  }
542  }
543 
544  // erase the printf calls
545  for (auto CI : Printfs)
546  CI->eraseFromParent();
547 
548  Printfs.clear();
549  return true;
550 }
551 
552 bool AMDGPUPrintfRuntimeBindingImpl::run(Module &M) {
553  Triple TT(M.getTargetTriple());
554  if (TT.getArch() == Triple::r600)
555  return false;
556 
557  auto PrintfFunction = M.getFunction("printf");
558  if (!PrintfFunction)
559  return false;
560 
561  for (auto &U : PrintfFunction->uses()) {
562  if (auto *CI = dyn_cast<CallInst>(U.getUser())) {
563  if (CI->isCallee(&U))
564  Printfs.push_back(CI);
565  }
566  }
567 
568  if (Printfs.empty())
569  return false;
570 
571  if (auto HostcallFunction = M.getFunction("__ockl_hostcall_internal")) {
572  for (auto &U : HostcallFunction->uses()) {
573  if (auto *CI = dyn_cast<CallInst>(U.getUser())) {
574  M.getContext().emitError(
575  CI, "Cannot use both printf and hostcall in the same module");
576  }
577  }
578  }
579 
580  TD = &M.getDataLayout();
581 
582  return lowerPrintfForGpu(M);
583 }
584 
585 bool AMDGPUPrintfRuntimeBinding::runOnModule(Module &M) {
586  auto GetDT = [this](Function &F) -> DominatorTree & {
587  return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
588  };
589  auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
590  return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
591  };
592 
593  return AMDGPUPrintfRuntimeBindingImpl(GetDT, GetTLI).run(M);
594 }
595 
597 AMDGPUPrintfRuntimeBindingPass::run(Module &M, ModuleAnalysisManager &AM) {
600  auto GetDT = [&FAM](Function &F) -> DominatorTree & {
601  return FAM.getResult<DominatorTreeAnalysis>(F);
602  };
603  auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
604  return FAM.getResult<TargetLibraryAnalysis>(F);
605  };
606  bool Changed = AMDGPUPrintfRuntimeBindingImpl(GetDT, GetTLI).run(M);
607  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
608 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
binding
amdgpu printf runtime binding
Definition: AMDGPUPrintfRuntimeBinding.cpp:86
Int32Ty
IntegerType * Int32Ty
Definition: NVVMIntrRange.cpp:67
llvm
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::NamedMDNode
A tuple of MDNodes.
Definition: Metadata.h:1386
llvm::ARM::PredBlockMask::TT
@ TT
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::ModulePass
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:238
simplify
hexagon bit simplify
Definition: HexagonBitSimplify.cpp:261
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:785
llvm::Function
Definition: Function.h:61
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:614
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5138
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::IRBuilder<>
llvm::GlobalVariable
Definition: GlobalVariable.h:40
llvm::PointerType::getAddressSpace
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:662
llvm::Instruction::insertBefore
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:84
llvm::Type::getTypeID
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:45
llvm::Type::isFPOrFPVectorTy
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:190
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:158
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
llvm::AttributeList
Definition: Attributes.h:385
llvm::ConstantFP::getValueAPF
const APFloat & getValueAPF() const
Definition: Constants.h:295
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AMDGPUPrintfRuntimeBinding, "amdgpu-printf-runtime-binding", "AMDGPU Printf lowering", false, false) INITIALIZE_PASS_END(AMDGPUPrintfRuntimeBinding
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:197
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::GlobalVariable::hasInitializer
bool hasInitializer() const
Definitions have initializers, declarations don't.
Definition: GlobalVariable.h:92
result
It looks like we only need to define PPCfmarto for these because according to these instructions perform RTO on fma s result
Definition: README_P9.txt:256
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
llvm::StringRef::substr
LLVM_NODISCARD StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:612
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:205
llvm::ConstantPointerNull
A constant pointer value that points to null.
Definition: Constants.h:533
llvm::NamedMDNode::addOperand
void addOperand(MDNode *M)
Definition: Metadata.cpp:1124
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
llvm::StringRef::find_last_of
LLVM_NODISCARD size_t find_last_of(char C, size_t From=npos) const
Find the last character in the string that is C, or npos if not found.
Definition: StringRef.h:439
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
cmp
< i32 >< i32 > cmp
Definition: README.txt:1447
SI
@ SI
Definition: SIInstrInfo.cpp:7411
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::TargetLibraryAnalysis::run
TargetLibraryInfo run(const Function &F, FunctionAnalysisManager &)
Definition: TargetLibraryInfo.cpp:1680
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:360
TargetLibraryInfo.h
false
Definition: StackSlotColoring.cpp:142
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::Instruction
Definition: Instruction.h:45
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:147
llvm::SimplifyInstruction
Value * SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
Definition: InstructionSimplify.cpp:5788
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:281
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:255
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:644
llvm::SmallString< 16 >
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::createAMDGPUPrintfRuntimeBinding
ModulePass * createAMDGPUPrintfRuntimeBinding()
Definition: AMDGPUPrintfRuntimeBinding.cpp:92
llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:168
llvm::APFloat
Definition: APFloat.h:701
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:303
llvm::GlobalVariable::getInitializer
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
Definition: GlobalVariable.h:136
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:446
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:905
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:634
llvm::AMDGPUPrintfRuntimeBindingID
char & AMDGPUPrintfRuntimeBindingID
Definition: AMDGPUPrintfRuntimeBinding.cpp:89
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::MDNode
Metadata node.
Definition: Metadata.h:897
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:649
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:70
llvm::ArrayRef< int >
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
AMDGPU.h
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
uint32_t
llvm::PtrToIntInst
This class represents a cast from a pointer to an integer.
Definition: Instructions.h:5087
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::FPExtInst
This class represents an extension of floating point types.
Definition: Instructions.h:4849
llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
llvm::Init
Definition: Record.h:271
llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:931
llvm::Type::isFloatTy
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:148
get
Should compile to something r4 addze r3 instead we get
Definition: README.txt:24
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:207
llvm::Type::isDoubleTy
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:151
llvm::AMDGPU::HSAMD::Key::Printf
constexpr char Printf[]
Key for HSA::Metadata::mPrintf.
Definition: AMDGPUMetadata.h:425
llvm::initializeAMDGPUPrintfRuntimeBindingPass
void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry &)
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:252
llvm::ShuffleVectorInst
This instruction constructs a fixed permutation of two input vectors.
Definition: Instructions.h:1986
Instructions.h
Dominators.h
InstructionSimplify.h
llvm::IntegerType::getBitWidth
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:71
llvm::FunctionCallee
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:164
llvm::SmallVectorImpl< char >
lowering
amdgpu printf runtime AMDGPU Printf lowering
Definition: AMDGPUPrintfRuntimeBinding.cpp:87
DWORD_ALIGN
#define DWORD_ALIGN
Definition: AMDGPUPrintfRuntimeBinding.cpp:33
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::InnerAnalysisManagerProxy
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
Definition: PassManager.h:945
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1450
llvm::StringRef::find_first_of
LLVM_NODISCARD size_t find_first_of(char C, size_t From=0) const
Find the first character in the string that is C, or npos if not found.
Definition: StringRef.h:413
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:363
llvm::SplitBlockAndInsertIfThen
Instruction * SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore, bool Unreachable, MDNode *BranchWeights, DominatorTree *DT, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
Definition: BasicBlockUtils.cpp:1424
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
BasicBlockUtils.h
llvm::MDString
A single uniqued string.
Definition: Metadata.h:611
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:820
InitializePasses.h
llvm::Type::getContainedType
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:346
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:421
llvm::FunctionType
Class to represent function types.
Definition: DerivedTypes.h:102
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38