LLVM  13.0.0git
NVPTXAsmPrinter.h
Go to the documentation of this file.
1 //===-- NVPTXAsmPrinter.h - NVPTX LLVM assembly writer ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a printer that converts from our internal representation
10 // of machine-dependent LLVM code to NVPTX assembly language.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXASMPRINTER_H
15 #define LLVM_LIB_TARGET_NVPTX_NVPTXASMPRINTER_H
16 
17 #include "NVPTX.h"
18 #include "NVPTXSubtarget.h"
19 #include "NVPTXTargetMachine.h"
20 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/StringRef.h"
26 #include "llvm/IR/Constants.h"
27 #include "llvm/IR/DebugLoc.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/GlobalValue.h"
31 #include "llvm/IR/Value.h"
32 #include "llvm/MC/MCExpr.h"
33 #include "llvm/MC/MCStreamer.h"
34 #include "llvm/MC/MCSymbol.h"
35 #include "llvm/Pass.h"
36 #include "llvm/Support/Casting.h"
37 #include "llvm/Support/Compiler.h"
41 #include <algorithm>
42 #include <cassert>
43 #include <map>
44 #include <memory>
45 #include <string>
46 #include <vector>
47 
48 // The ptx syntax and format is very different from that usually seem in a .s
49 // file,
50 // therefore we are not able to use the MCAsmStreamer interface here.
51 //
52 // We are handcrafting the output method here.
53 //
54 // A better approach is to clone the MCAsmStreamer to a MCPTXAsmStreamer
55 // (subclass of MCStreamer).
56 
57 namespace llvm {
58 
59 class MCOperand;
60 
62 
63  class AggBuffer {
64  // Used to buffer the emitted string for initializing global
65  // aggregates.
66  //
67  // Normally an aggregate (array, vector or structure) is emitted
68  // as a u8[]. However, if one element/field of the aggregate
69  // is a non-NULL address, then the aggregate is emitted as u32[]
70  // or u64[].
71  //
72  // We first layout the aggregate in 'buffer' in bytes, except for
73  // those symbol addresses. For the i-th symbol address in the
74  //aggregate, its corresponding 4-byte or 8-byte elements in 'buffer'
75  // are filled with 0s. symbolPosInBuffer[i-1] records its position
76  // in 'buffer', and Symbols[i-1] records the Value*.
77  //
78  // Once we have this AggBuffer setup, we can choose how to print
79  // it out.
80  public:
81  unsigned numSymbols; // number of symbol addresses
82 
83  private:
84  const unsigned size; // size of the buffer in bytes
85  std::vector<unsigned char> buffer; // the buffer
86  SmallVector<unsigned, 4> symbolPosInBuffer;
88  // SymbolsBeforeStripping[i] is the original form of Symbols[i] before
89  // stripping pointer casts, i.e.,
90  // Symbols[i] == SymbolsBeforeStripping[i]->stripPointerCasts().
91  //
92  // We need to keep these values because AggBuffer::print decides whether to
93  // emit a "generic()" cast for Symbols[i] depending on the address space of
94  // SymbolsBeforeStripping[i].
95  SmallVector<const Value *, 4> SymbolsBeforeStripping;
96  unsigned curpos;
97  raw_ostream &O;
98  NVPTXAsmPrinter &AP;
99  bool EmitGeneric;
100 
101  public:
102  AggBuffer(unsigned size, raw_ostream &O, NVPTXAsmPrinter &AP)
103  : size(size), buffer(size), O(O), AP(AP) {
104  curpos = 0;
105  numSymbols = 0;
106  EmitGeneric = AP.EmitGeneric;
107  }
108 
109  unsigned addBytes(unsigned char *Ptr, int Num, int Bytes) {
110  assert((curpos + Num) <= size);
111  assert((curpos + Bytes) <= size);
112  for (int i = 0; i < Num; ++i) {
113  buffer[curpos] = Ptr[i];
114  curpos++;
115  }
116  for (int i = Num; i < Bytes; ++i) {
117  buffer[curpos] = 0;
118  curpos++;
119  }
120  return curpos;
121  }
122 
123  unsigned addZeros(int Num) {
124  assert((curpos + Num) <= size);
125  for (int i = 0; i < Num; ++i) {
126  buffer[curpos] = 0;
127  curpos++;
128  }
129  return curpos;
130  }
131 
132  void addSymbol(const Value *GVar, const Value *GVarBeforeStripping) {
133  symbolPosInBuffer.push_back(curpos);
134  Symbols.push_back(GVar);
135  SymbolsBeforeStripping.push_back(GVarBeforeStripping);
136  numSymbols++;
137  }
138 
139  void print() {
140  if (numSymbols == 0) {
141  // print out in bytes
142  for (unsigned i = 0; i < size; i++) {
143  if (i)
144  O << ", ";
145  O << (unsigned int) buffer[i];
146  }
147  } else {
148  // print out in 4-bytes or 8-bytes
149  unsigned int pos = 0;
150  unsigned int nSym = 0;
151  unsigned int nextSymbolPos = symbolPosInBuffer[nSym];
152  unsigned int nBytes = 4;
153  if (static_cast<const NVPTXTargetMachine &>(AP.TM).is64Bit())
154  nBytes = 8;
155  for (pos = 0; pos < size; pos += nBytes) {
156  if (pos)
157  O << ", ";
158  if (pos == nextSymbolPos) {
159  const Value *v = Symbols[nSym];
160  const Value *v0 = SymbolsBeforeStripping[nSym];
161  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
162  MCSymbol *Name = AP.getSymbol(GVar);
163  PointerType *PTy = dyn_cast<PointerType>(v0->getType());
164  bool IsNonGenericPointer = false; // Is v0 a non-generic pointer?
165  if (PTy && PTy->getAddressSpace() != 0) {
166  IsNonGenericPointer = true;
167  }
168  if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) {
169  O << "generic(";
170  Name->print(O, AP.MAI);
171  O << ")";
172  } else {
173  Name->print(O, AP.MAI);
174  }
175  } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) {
176  const MCExpr *Expr =
177  AP.lowerConstantForGV(cast<Constant>(CExpr), false);
178  AP.printMCExpr(*Expr, O);
179  } else
180  llvm_unreachable("symbol type unknown");
181  nSym++;
182  if (nSym >= numSymbols)
183  nextSymbolPos = size + 1;
184  else
185  nextSymbolPos = symbolPosInBuffer[nSym];
186  } else if (nBytes == 4)
187  O << *(unsigned int *)(&buffer[pos]);
188  else
189  O << *(unsigned long long *)(&buffer[pos]);
190  }
191  }
192  }
193  };
194 
195  friend class AggBuffer;
196 
197 private:
198  StringRef getPassName() const override { return "NVPTX Assembly Printer"; }
199 
200  const Function *F;
201  std::string CurrentFnName;
202 
203  void emitStartOfAsmFile(Module &M) override;
204  void emitBasicBlockStart(const MachineBasicBlock &MBB) override;
205  void emitFunctionEntryLabel() override;
206  void emitFunctionBodyStart() override;
207  void emitFunctionBodyEnd() override;
208  void emitImplicitDef(const MachineInstr *MI) const override;
209 
210  void emitInstruction(const MachineInstr *) override;
211  void lowerToMCInst(const MachineInstr *MI, MCInst &OutMI);
212  bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
214  unsigned encodeVirtualRegister(unsigned Reg);
215 
216  void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
217  const char *Modifier = nullptr);
218  void printModuleLevelGV(const GlobalVariable *GVar, raw_ostream &O,
219  bool = false);
220  void printParamName(Function::const_arg_iterator I, int paramIndex,
221  raw_ostream &O);
222  void emitGlobals(const Module &M);
223  void emitHeader(Module &M, raw_ostream &O, const NVPTXSubtarget &STI);
224  void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const;
225  void emitVirtualRegister(unsigned int vr, raw_ostream &);
226  void emitFunctionParamList(const Function *, raw_ostream &O);
227  void emitFunctionParamList(const MachineFunction &MF, raw_ostream &O);
228  void setAndEmitFunctionVirtualRegisters(const MachineFunction &MF);
229  void printReturnValStr(const Function *, raw_ostream &O);
230  void printReturnValStr(const MachineFunction &MF, raw_ostream &O);
231  bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
232  const char *ExtraCode, raw_ostream &) override;
233  void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
234  bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
235  const char *ExtraCode, raw_ostream &) override;
236 
237  const MCExpr *lowerConstantForGV(const Constant *CV, bool ProcessingGeneric);
238  void printMCExpr(const MCExpr &Expr, raw_ostream &OS);
239 
240 protected:
241  bool doInitialization(Module &M) override;
242  bool doFinalization(Module &M) override;
243 
244 private:
245  bool GlobalsEmitted;
246 
247  // This is specific per MachineFunction.
248  const MachineRegisterInfo *MRI;
249  // The contents are specific for each
250  // MachineFunction. But the size of the
251  // array is not.
252  typedef DenseMap<unsigned, unsigned> VRegMap;
254  VRegRCMap VRegMapping;
255 
256  // List of variables demoted to a function scope.
257  std::map<const Function *, std::vector<const GlobalVariable *>> localDecls;
258 
259  void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O);
260  void emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const;
261  std::string getPTXFundamentalTypeStr(Type *Ty, bool = true) const;
262  void printScalarConstant(const Constant *CPV, raw_ostream &O);
263  void printFPConstant(const ConstantFP *Fp, raw_ostream &O);
264  void bufferLEByte(const Constant *CPV, int Bytes, AggBuffer *aggBuffer);
265  void bufferAggregateConstant(const Constant *CV, AggBuffer *aggBuffer);
266 
267  void emitLinkageDirective(const GlobalValue *V, raw_ostream &O);
268  void emitDeclarations(const Module &, raw_ostream &O);
269  void emitDeclaration(const Function *, raw_ostream &O);
270  void emitDemotedVars(const Function *, raw_ostream &);
271 
272  bool lowerImageHandleOperand(const MachineInstr *MI, unsigned OpNo,
273  MCOperand &MCOp);
274  void lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp);
275 
276  bool isLoopHeaderOfNoUnroll(const MachineBasicBlock &MBB) const;
277 
278  // Used to control the need to emit .generic() in the initializer of
279  // module scope variables.
280  // Although ptx supports the hybrid mode like the following,
281  // .global .u32 a;
282  // .global .u32 b;
283  // .global .u32 addr[] = {a, generic(b)}
284  // we have difficulty representing the difference in the NVVM IR.
285  //
286  // Since the address value should always be generic in CUDA C and always
287  // be specific in OpenCL, we use this simple control here.
288  //
289  bool EmitGeneric;
290 
291 public:
292  NVPTXAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
293  : AsmPrinter(TM, std::move(Streamer)),
294  EmitGeneric(static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() ==
295  NVPTX::CUDA) {}
296 
297  bool runOnMachineFunction(MachineFunction &F) override;
298 
299  void getAnalysisUsage(AnalysisUsage &AU) const override {
302  }
303 
304  std::string getVirtualRegisterName(unsigned) const;
305 
306  const MCSymbol *getFunctionFrameSymbol() const override;
307 };
308 
309 } // end namespace llvm
310 
311 #endif // LLVM_LIB_TARGET_NVPTX_NVPTXASMPRINTER_H
i
i
Definition: README.txt:29
AsmPrinter.h
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:102
llvm
Definition: AllocatorList.h:23
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::NVPTXAsmPrinter::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
Record analysis usage.
Definition: NVPTXAsmPrinter.h:299
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:147
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::Function
Definition: Function.h:61
StringRef.h
Pass.h
llvm::NVPTXTargetMachine::is64Bit
bool is64Bit() const
Definition: NVPTXTargetMachine.h:46
llvm::AsmPrinter::MAI
const MCAsmInfo * MAI
Target Asm Printer information.
Definition: AsmPrinter.h:88
llvm::SmallVector< unsigned, 4 >
ErrorHandling.h
llvm::GlobalVariable
Definition: GlobalVariable.h:40
llvm::PointerType::getAddressSpace
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:689
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
DenseMap.h
printOperand
static bool printOperand(raw_ostream &OS, const SelectionDAG *G, const SDValue Value)
Definition: SelectionDAGDumper.cpp:944
llvm::MCInst
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
llvm::NVPTXTargetMachine
NVPTXTargetMachine.
Definition: NVPTXTargetMachine.h:24
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:90
NVPTX.h
printMCExpr
static void printMCExpr(const MCExpr *E, raw_ostream &OS)
Definition: SystemZAsmParser.cpp:691
GlobalValue.h
MachineLoopInfo.h
TargetMachine.h
Constants.h
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
MCSymbol.h
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:50
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:255
DebugLoc.h
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::NVPTXSubtarget
Definition: NVPTXSubtarget.h:31
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:179
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::DenseMap< unsigned, unsigned >
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:634
printMemOperand
static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO, const MachineFunction *MF, const Module *M, const MachineFrameInfo *MFI, const TargetInstrInfo *TII, LLVMContext &Ctx)
Definition: SelectionDAGDumper.cpp:511
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::AsmPrinter::getSymbol
MCSymbol * getSymbol(const GlobalValue *GV) const
Definition: AsmPrinter.cpp:478
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1540
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1463
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::AsmPrinter::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
Record analysis usage.
Definition: AsmPrinter.cpp:259
GetSymbolRef
static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, HexagonAsmPrinter &Printer, bool MustExtend)
Definition: HexagonMCInstLower.cpp:41
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
Compiler.h
llvm::NVPTXAsmPrinter
Definition: NVPTXAsmPrinter.h:61
LLVM_LIBRARY_VISIBILITY
#define LLVM_LIBRARY_VISIBILITY
LLVM_LIBRARY_VISIBILITY - If a class marked with this attribute is linked into a shared library,...
Definition: Compiler.h:131
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::NVPTXAsmPrinter::NVPTXAsmPrinter
NVPTXAsmPrinter(TargetMachine &TM, std::unique_ptr< MCStreamer > Streamer)
Definition: NVPTXAsmPrinter.h:292
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:931
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
std
Definition: BitVector.h:838
llvm::NVPTX::CUDA
@ CUDA
Definition: NVPTX.h:72
Casting.h
NVPTXTargetMachine.h
Function.h
llvm::AsmPrinter
This class is intended to be used as a driving class for all asm writers.
Definition: AsmPrinter.h:82
llvm::ARMBuildAttrs::Symbol
@ Symbol
Definition: ARMBuildAttributes.h:79
llvm::AsmPrinter::TM
TargetMachine & TM
Target machine description.
Definition: AsmPrinter.h:85
SmallVector.h
MCStreamer.h
NVPTXSubtarget.h
DerivedTypes.h
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::MCOperand
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
raw_ostream.h
MachineFunction.h
Value.h
MCExpr.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::MCExpr
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35