LLVM  15.0.0git
AMDGPUEmitPrintf.cpp
Go to the documentation of this file.
1 //===- AMDGPUEmitPrintf.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Utility function to lower a printf call into a series of device
10 // library calls on the AMDGPU target.
11 //
12 // WARNING: This file knows about certain library functions. It recognizes them
13 // by name, and hardwires knowledge of their semantics.
14 //
15 //===----------------------------------------------------------------------===//
16 
20 
21 using namespace llvm;
22 
23 #define DEBUG_TYPE "amdgpu-emit-printf"
24 
26  auto Int64Ty = Builder.getInt64Ty();
27  auto Ty = Arg->getType();
28 
29  if (auto IntTy = dyn_cast<IntegerType>(Ty)) {
30  switch (IntTy->getBitWidth()) {
31  case 32:
32  return Builder.CreateZExt(Arg, Int64Ty);
33  case 64:
34  return Arg;
35  }
36  }
37 
38  if (Ty->getTypeID() == Type::DoubleTyID) {
39  return Builder.CreateBitCast(Arg, Int64Ty);
40  }
41 
42  if (isa<PointerType>(Ty)) {
43  return Builder.CreatePtrToInt(Arg, Int64Ty);
44  }
45 
46  llvm_unreachable("unexpected type");
47 }
48 
50  auto Int64Ty = Builder.getInt64Ty();
51  auto M = Builder.GetInsertBlock()->getModule();
52  auto Fn = M->getOrInsertFunction("__ockl_printf_begin", Int64Ty, Int64Ty);
53  return Builder.CreateCall(Fn, Version);
54 }
55 
56 static Value *callAppendArgs(IRBuilder<> &Builder, Value *Desc, int NumArgs,
57  Value *Arg0, Value *Arg1, Value *Arg2, Value *Arg3,
58  Value *Arg4, Value *Arg5, Value *Arg6,
59  bool IsLast) {
60  auto Int64Ty = Builder.getInt64Ty();
61  auto Int32Ty = Builder.getInt32Ty();
62  auto M = Builder.GetInsertBlock()->getModule();
63  auto Fn = M->getOrInsertFunction("__ockl_printf_append_args", Int64Ty,
64  Int64Ty, Int32Ty, Int64Ty, Int64Ty, Int64Ty,
65  Int64Ty, Int64Ty, Int64Ty, Int64Ty, Int32Ty);
66  auto IsLastValue = Builder.getInt32(IsLast);
67  auto NumArgsValue = Builder.getInt32(NumArgs);
68  return Builder.CreateCall(Fn, {Desc, NumArgsValue, Arg0, Arg1, Arg2, Arg3,
69  Arg4, Arg5, Arg6, IsLastValue});
70 }
71 
73  bool IsLast) {
74  auto Arg0 = fitArgInto64Bits(Builder, Arg);
75  auto Zero = Builder.getInt64(0);
76  return callAppendArgs(Builder, Desc, 1, Arg0, Zero, Zero, Zero, Zero, Zero,
77  Zero, IsLast);
78 }
79 
80 // The device library does not provide strlen, so we build our own loop
81 // here. While we are at it, we also include the terminating null in the length.
83  auto *Prev = Builder.GetInsertBlock();
84  Module *M = Prev->getModule();
85 
86  auto CharZero = Builder.getInt8(0);
87  auto One = Builder.getInt64(1);
88  auto Zero = Builder.getInt64(0);
89  auto Int64Ty = Builder.getInt64Ty();
90 
91  // The length is either zero for a null pointer, or the computed value for an
92  // actual string. We need a join block for a phi that represents the final
93  // value.
94  //
95  // Strictly speaking, the zero does not matter since
96  // __ockl_printf_append_string_n ignores the length if the pointer is null.
97  BasicBlock *Join = nullptr;
98  if (Prev->getTerminator()) {
99  Join = Prev->splitBasicBlock(Builder.GetInsertPoint(),
100  "strlen.join");
101  Prev->getTerminator()->eraseFromParent();
102  } else {
103  Join = BasicBlock::Create(M->getContext(), "strlen.join",
104  Prev->getParent());
105  }
106  BasicBlock *While =
107  BasicBlock::Create(M->getContext(), "strlen.while",
108  Prev->getParent(), Join);
109  BasicBlock *WhileDone = BasicBlock::Create(
110  M->getContext(), "strlen.while.done",
111  Prev->getParent(), Join);
112 
113  // Emit an early return for when the pointer is null.
114  Builder.SetInsertPoint(Prev);
115  auto CmpNull =
116  Builder.CreateICmpEQ(Str, Constant::getNullValue(Str->getType()));
117  BranchInst::Create(Join, While, CmpNull, Prev);
118 
119  // Entry to the while loop.
120  Builder.SetInsertPoint(While);
121 
122  auto PtrPhi = Builder.CreatePHI(Str->getType(), 2);
123  PtrPhi->addIncoming(Str, Prev);
124  auto PtrNext = Builder.CreateGEP(Builder.getInt8Ty(), PtrPhi, One);
125  PtrPhi->addIncoming(PtrNext, While);
126 
127  // Condition for the while loop.
128  auto Data = Builder.CreateLoad(Builder.getInt8Ty(), PtrPhi);
129  auto Cmp = Builder.CreateICmpEQ(Data, CharZero);
130  Builder.CreateCondBr(Cmp, WhileDone, While);
131 
132  // Add one to the computed length.
133  Builder.SetInsertPoint(WhileDone, WhileDone->begin());
134  auto Begin = Builder.CreatePtrToInt(Str, Int64Ty);
135  auto End = Builder.CreatePtrToInt(PtrPhi, Int64Ty);
136  auto Len = Builder.CreateSub(End, Begin);
137  Len = Builder.CreateAdd(Len, One);
138 
139  // Final join.
140  BranchInst::Create(Join, WhileDone);
141  Builder.SetInsertPoint(Join, Join->begin());
142  auto LenPhi = Builder.CreatePHI(Len->getType(), 2);
143  LenPhi->addIncoming(Len, WhileDone);
144  LenPhi->addIncoming(Zero, Prev);
145 
146  return LenPhi;
147 }
148 
150  Value *Length, bool isLast) {
151  auto Int64Ty = Builder.getInt64Ty();
152  auto CharPtrTy = Builder.getInt8PtrTy();
153  auto Int32Ty = Builder.getInt32Ty();
154  auto M = Builder.GetInsertBlock()->getModule();
155  auto Fn = M->getOrInsertFunction("__ockl_printf_append_string_n", Int64Ty,
156  Int64Ty, CharPtrTy, Int64Ty, Int32Ty);
157  auto IsLastInt32 = Builder.getInt32(isLast);
158  return Builder.CreateCall(Fn, {Desc, Str, Length, IsLastInt32});
159 }
160 
162  bool IsLast) {
163  Arg = Builder.CreateBitCast(
164  Arg, Builder.getInt8PtrTy(Arg->getType()->getPointerAddressSpace()));
165  auto Length = getStrlenWithNull(Builder, Arg);
166  return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
167 }
168 
170  bool SpecIsCString, bool IsLast) {
171  if (SpecIsCString && isa<PointerType>(Arg->getType())) {
172  return appendString(Builder, Desc, Arg, IsLast);
173  }
174  // If the format specifies a string but the argument is not, the frontend will
175  // have printed a warning. We just rely on undefined behaviour and send the
176  // argument anyway.
177  return appendArg(Builder, Desc, Arg, IsLast);
178 }
179 
180 // Scan the format string to locate all specifiers, and mark the ones that
181 // specify a string, i.e, the "%s" specifier with optional '*' characters.
182 static void locateCStrings(SparseBitVector<8> &BV, Value *Fmt) {
183  StringRef Str;
184  if (!getConstantStringInfo(Fmt, Str) || Str.empty())
185  return;
186 
187  static const char ConvSpecifiers[] = "diouxXfFeEgGaAcspn";
188  size_t SpecPos = 0;
189  // Skip the first argument, the format string.
190  unsigned ArgIdx = 1;
191 
192  while ((SpecPos = Str.find_first_of('%', SpecPos)) != StringRef::npos) {
193  if (Str[SpecPos + 1] == '%') {
194  SpecPos += 2;
195  continue;
196  }
197  auto SpecEnd = Str.find_first_of(ConvSpecifiers, SpecPos);
198  if (SpecEnd == StringRef::npos)
199  return;
200  auto Spec = Str.slice(SpecPos, SpecEnd + 1);
201  ArgIdx += Spec.count('*');
202  if (Str[SpecEnd] == 's') {
203  BV.set(ArgIdx);
204  }
205  SpecPos = SpecEnd + 1;
206  ++ArgIdx;
207  }
208 }
209 
212  auto NumOps = Args.size();
213  assert(NumOps >= 1);
214 
215  auto Fmt = Args[0];
216  SparseBitVector<8> SpecIsCString;
217  locateCStrings(SpecIsCString, Fmt);
218 
219  auto Desc = callPrintfBegin(Builder, Builder.getIntN(64, 0));
220  Desc = appendString(Builder, Desc, Fmt, NumOps == 1);
221 
222  // FIXME: This invokes hostcall once for each argument. We can pack up to
223  // seven scalar printf arguments in a single hostcall. See the signature of
224  // callAppendArgs().
225  for (unsigned int i = 1; i != NumOps; ++i) {
226  bool IsLast = i == NumOps - 1;
227  bool IsCString = SpecIsCString.test(i);
228  Desc = processArg(Builder, Desc, Args[i], IsCString, IsLast);
229  }
230 
231  return Builder.CreateTrunc(Desc, Builder.getInt32Ty());
232 }
i
i
Definition: README.txt:29
Int32Ty
IntegerType * Int32Ty
Definition: NVVMIntrRange.cpp:67
llvm::Type::DoubleTyID
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
callPrintfBegin
static Value * callPrintfBegin(IRBuilder<> &Builder, Value *Version)
Definition: AMDGPUEmitPrintf.cpp:49
llvm::StringRef::npos
static constexpr size_t npos
Definition: StringRef.h:60
processArg
static Value * processArg(IRBuilder<> &Builder, Value *Desc, Value *Arg, bool SpecIsCString, bool IsLast)
Definition: AMDGPUEmitPrintf.cpp:169
llvm::IRBuilder<>
ValueTracking.h
AMDGPUEmitPrintf.h
llvm::BasicBlock::splitBasicBlock
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:378
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
SparseBitVector.h
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:186
appendString
static Value * appendString(IRBuilder<> &Builder, Value *Desc, Value *Arg, bool IsLast)
Definition: AMDGPUEmitPrintf.cpp:161
llvm::SparseBitVector
Definition: SparseBitVector.h:256
locateCStrings
static void locateCStrings(SparseBitVector< 8 > &BV, Value *Fmt)
Definition: AMDGPUEmitPrintf.cpp:182
llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:297
llvm::IndexedInstrProf::Version
const uint64_t Version
Definition: InstrProf.h:1027
appendArg
static Value * appendArg(IRBuilder<> &Builder, Value *Desc, Value *Arg, bool IsLast)
Definition: AMDGPUEmitPrintf.cpp:72
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:77
llvm::SparseBitVector::set
void set(unsigned Idx)
Definition: SparseBitVector.h:508
llvm::getConstantStringInfo
bool getConstantStringInfo(const Value *V, StringRef &Str, uint64_t Offset=0, bool TrimAtNul=true)
This function computes the length of a null-terminated C string pointed to by V.
Definition: ValueTracking.cpp:4287
llvm::BranchInst::Create
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
Definition: Instructions.h:3142
llvm::SparseBitVector::test
bool test(unsigned Idx) const
Definition: SparseBitVector.h:472
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
getStrlenWithNull
static Value * getStrlenWithNull(IRBuilder<> &Builder, Value *Str)
Definition: AMDGPUEmitPrintf.cpp:82
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:97
fitArgInto64Bits
static Value * fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg)
Definition: AMDGPUEmitPrintf.cpp:25
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:350
callAppendArgs
static Value * callAppendArgs(IRBuilder<> &Builder, Value *Desc, int NumArgs, Value *Arg0, Value *Arg1, Value *Arg2, Value *Arg3, Value *Arg4, Value *Arg5, Value *Arg6, bool IsLast)
Definition: AMDGPUEmitPrintf.cpp:56
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
callAppendStringN
static Value * callAppendStringN(IRBuilder<> &Builder, Value *Desc, Value *Str, Value *Length, bool isLast)
Definition: AMDGPUEmitPrintf.cpp:149
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::emitAMDGPUPrintfCall
Value * emitAMDGPUPrintfCall(IRBuilder<> &Builder, ArrayRef< Value * > Args)
Definition: AMDGPUEmitPrintf.cpp:210