LLVM  9.0.0svn
NVPTXAsmPrinter.cpp
Go to the documentation of this file.
1 //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a printer that converts from our internal representation
10 // of machine-dependent LLVM code to NVPTX assembly language.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "NVPTXAsmPrinter.h"
19 #include "NVPTX.h"
20 #include "NVPTXMCExpr.h"
22 #include "NVPTXRegisterInfo.h"
23 #include "NVPTXSubtarget.h"
24 #include "NVPTXTargetMachine.h"
25 #include "NVPTXUtilities.h"
26 #include "cl_common_defines.h"
27 #include "llvm/ADT/APFloat.h"
28 #include "llvm/ADT/APInt.h"
29 #include "llvm/ADT/DenseMap.h"
30 #include "llvm/ADT/DenseSet.h"
31 #include "llvm/ADT/SmallString.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/ADT/StringRef.h"
35 #include "llvm/ADT/Triple.h"
36 #include "llvm/ADT/Twine.h"
38 #include "llvm/CodeGen/Analysis.h"
50 #include "llvm/IR/Attributes.h"
51 #include "llvm/IR/BasicBlock.h"
52 #include "llvm/IR/Constant.h"
53 #include "llvm/IR/Constants.h"
54 #include "llvm/IR/DataLayout.h"
55 #include "llvm/IR/DebugInfo.h"
57 #include "llvm/IR/DebugLoc.h"
58 #include "llvm/IR/DerivedTypes.h"
59 #include "llvm/IR/Function.h"
60 #include "llvm/IR/GlobalValue.h"
61 #include "llvm/IR/GlobalVariable.h"
62 #include "llvm/IR/Instruction.h"
63 #include "llvm/IR/LLVMContext.h"
64 #include "llvm/IR/Module.h"
65 #include "llvm/IR/Operator.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/User.h"
68 #include "llvm/MC/MCExpr.h"
69 #include "llvm/MC/MCInst.h"
70 #include "llvm/MC/MCInstrDesc.h"
71 #include "llvm/MC/MCStreamer.h"
72 #include "llvm/MC/MCSymbol.h"
73 #include "llvm/Support/Casting.h"
77 #include "llvm/Support/Path.h"
83 #include <cassert>
84 #include <cstdint>
85 #include <cstring>
86 #include <new>
87 #include <string>
88 #include <utility>
89 #include <vector>
90 
91 using namespace llvm;
92 
93 #define DEPOTNAME "__local_depot"
94 
95 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
96 /// depends.
97 static void
100  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
101  Globals.insert(GV);
102  else {
103  if (const User *U = dyn_cast<User>(V)) {
104  for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
105  DiscoverDependentGlobals(U->getOperand(i), Globals);
106  }
107  }
108  }
109 }
110 
111 /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
112 /// instances to be emitted, but only after any dependents have been added
113 /// first.s
114 static void
119  // Have we already visited this one?
120  if (Visited.count(GV))
121  return;
122 
123  // Do we have a circular dependency?
124  if (!Visiting.insert(GV).second)
125  report_fatal_error("Circular dependency found in global variable set");
126 
127  // Make sure we visit all dependents first
129  for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
130  DiscoverDependentGlobals(GV->getOperand(i), Others);
131 
133  E = Others.end();
134  I != E; ++I)
135  VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
136 
137  // Now we can visit ourself
138  Order.push_back(GV);
139  Visited.insert(GV);
140  Visiting.erase(GV);
141 }
142 
143 void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
144  MCInst Inst;
145  lowerToMCInst(MI, Inst);
146  EmitToStreamer(*OutStreamer, Inst);
147 }
148 
149 // Handle symbol backtracking for targets that do not support image handles
150 bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
151  unsigned OpNo, MCOperand &MCOp) {
152  const MachineOperand &MO = MI->getOperand(OpNo);
153  const MCInstrDesc &MCID = MI->getDesc();
154 
155  if (MCID.TSFlags & NVPTXII::IsTexFlag) {
156  // This is a texture fetch, so operand 4 is a texref and operand 5 is
157  // a samplerref
158  if (OpNo == 4 && MO.isImm()) {
159  lowerImageHandleSymbol(MO.getImm(), MCOp);
160  return true;
161  }
162  if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
163  lowerImageHandleSymbol(MO.getImm(), MCOp);
164  return true;
165  }
166 
167  return false;
168  } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
169  unsigned VecSize =
170  1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
171 
172  // For a surface load of vector size N, the Nth operand will be the surfref
173  if (OpNo == VecSize && MO.isImm()) {
174  lowerImageHandleSymbol(MO.getImm(), MCOp);
175  return true;
176  }
177 
178  return false;
179  } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
180  // This is a surface store, so operand 0 is a surfref
181  if (OpNo == 0 && MO.isImm()) {
182  lowerImageHandleSymbol(MO.getImm(), MCOp);
183  return true;
184  }
185 
186  return false;
187  } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
188  // This is a query, so operand 1 is a surfref/texref
189  if (OpNo == 1 && MO.isImm()) {
190  lowerImageHandleSymbol(MO.getImm(), MCOp);
191  return true;
192  }
193 
194  return false;
195  }
196 
197  return false;
198 }
199 
200 void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
201  // Ewwww
202  LLVMTargetMachine &TM = const_cast<LLVMTargetMachine&>(MF->getTarget());
203  NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
205  const char *Sym = MFI->getImageHandleSymbol(Index);
206  std::string *SymNamePtr =
207  nvTM.getManagedStrPool()->getManagedString(Sym);
208  MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(StringRef(*SymNamePtr)));
209 }
210 
211 void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
212  OutMI.setOpcode(MI->getOpcode());
213  // Special: Do not mangle symbol operand of CALL_PROTOTYPE
214  if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
215  const MachineOperand &MO = MI->getOperand(0);
216  OutMI.addOperand(GetSymbolRef(
218  return;
219  }
220 
221  const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
222  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
223  const MachineOperand &MO = MI->getOperand(i);
224 
225  MCOperand MCOp;
226  if (!STI.hasImageHandles()) {
227  if (lowerImageHandleOperand(MI, i, MCOp)) {
228  OutMI.addOperand(MCOp);
229  continue;
230  }
231  }
232 
233  if (lowerOperand(MO, MCOp))
234  OutMI.addOperand(MCOp);
235  }
236 }
237 
238 bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
239  MCOperand &MCOp) {
240  switch (MO.getType()) {
241  default: llvm_unreachable("unknown operand type");
243  MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
244  break;
246  MCOp = MCOperand::createImm(MO.getImm());
247  break;
250  MO.getMBB()->getSymbol(), OutContext));
251  break;
254  break;
256  MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
257  break;
259  const ConstantFP *Cnt = MO.getFPImm();
260  const APFloat &Val = Cnt->getValueAPF();
261 
262  switch (Cnt->getType()->getTypeID()) {
263  default: report_fatal_error("Unsupported FP type"); break;
264  case Type::HalfTyID:
265  MCOp = MCOperand::createExpr(
267  break;
268  case Type::FloatTyID:
269  MCOp = MCOperand::createExpr(
271  break;
272  case Type::DoubleTyID:
273  MCOp = MCOperand::createExpr(
275  break;
276  }
277  break;
278  }
279  }
280  return true;
281 }
282 
283 unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
285  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
286 
287  DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
288  unsigned RegNum = RegMap[Reg];
289 
290  // Encode the register class in the upper 4 bits
291  // Must be kept in sync with NVPTXInstPrinter::printRegName
292  unsigned Ret = 0;
293  if (RC == &NVPTX::Int1RegsRegClass) {
294  Ret = (1 << 28);
295  } else if (RC == &NVPTX::Int16RegsRegClass) {
296  Ret = (2 << 28);
297  } else if (RC == &NVPTX::Int32RegsRegClass) {
298  Ret = (3 << 28);
299  } else if (RC == &NVPTX::Int64RegsRegClass) {
300  Ret = (4 << 28);
301  } else if (RC == &NVPTX::Float32RegsRegClass) {
302  Ret = (5 << 28);
303  } else if (RC == &NVPTX::Float64RegsRegClass) {
304  Ret = (6 << 28);
305  } else if (RC == &NVPTX::Float16RegsRegClass) {
306  Ret = (7 << 28);
307  } else if (RC == &NVPTX::Float16x2RegsRegClass) {
308  Ret = (8 << 28);
309  } else {
310  report_fatal_error("Bad register class");
311  }
312 
313  // Insert the vreg number
314  Ret |= (RegNum & 0x0FFFFFFF);
315  return Ret;
316  } else {
317  // Some special-use registers are actually physical registers.
318  // Encode this as the register class ID of 0 and the real register ID.
319  return Reg & 0x0FFFFFFF;
320  }
321 }
322 
324  const MCExpr *Expr;
326  OutContext);
327  return MCOperand::createExpr(Expr);
328 }
329 
330 void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
331  const DataLayout &DL = getDataLayout();
332  const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
333  const TargetLowering *TLI = STI.getTargetLowering();
334 
335  Type *Ty = F->getReturnType();
336 
337  bool isABI = (STI.getSmVersion() >= 20);
338 
339  if (Ty->getTypeID() == Type::VoidTyID)
340  return;
341 
342  O << " (";
343 
344  if (isABI) {
345  if (Ty->isFloatingPointTy() || (Ty->isIntegerTy() && !Ty->isIntegerTy(128))) {
346  unsigned size = 0;
347  if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
348  size = ITy->getBitWidth();
349  } else {
350  assert(Ty->isFloatingPointTy() && "Floating point type expected here");
351  size = Ty->getPrimitiveSizeInBits();
352  }
353  // PTX ABI requires all scalar return values to be at least 32
354  // bits in size. fp16 normally uses .b16 as its storage type in
355  // PTX, so its size must be adjusted here, too.
356  if (size < 32)
357  size = 32;
358 
359  O << ".param .b" << size << " func_retval0";
360  } else if (isa<PointerType>(Ty)) {
361  O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits()
362  << " func_retval0";
363  } else if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
364  unsigned totalsz = DL.getTypeAllocSize(Ty);
365  unsigned retAlignment = 0;
366  if (!getAlign(*F, 0, retAlignment))
367  retAlignment = DL.getABITypeAlignment(Ty);
368  O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
369  << "]";
370  } else
371  llvm_unreachable("Unknown return type");
372  } else {
373  SmallVector<EVT, 16> vtparts;
374  ComputeValueVTs(*TLI, DL, Ty, vtparts);
375  unsigned idx = 0;
376  for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
377  unsigned elems = 1;
378  EVT elemtype = vtparts[i];
379  if (vtparts[i].isVector()) {
380  elems = vtparts[i].getVectorNumElements();
381  elemtype = vtparts[i].getVectorElementType();
382  }
383 
384  for (unsigned j = 0, je = elems; j != je; ++j) {
385  unsigned sz = elemtype.getSizeInBits();
386  if (elemtype.isInteger() && (sz < 32))
387  sz = 32;
388  O << ".reg .b" << sz << " func_retval" << idx;
389  if (j < je - 1)
390  O << ", ";
391  ++idx;
392  }
393  if (i < e - 1)
394  O << ", ";
395  }
396  }
397  O << ") ";
398 }
399 
400 void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
401  raw_ostream &O) {
402  const Function &F = MF.getFunction();
403  printReturnValStr(&F, O);
404 }
405 
406 // Return true if MBB is the header of a loop marked with
407 // llvm.loop.unroll.disable.
408 // TODO: consider "#pragma unroll 1" which is equivalent to "#pragma nounroll".
409 bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
410  const MachineBasicBlock &MBB) const {
411  MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
412  // We insert .pragma "nounroll" only to the loop header.
413  if (!LI.isLoopHeader(&MBB))
414  return false;
415 
416  // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
417  // we iterate through each back edge of the loop with header MBB, and check
418  // whether its metadata contains llvm.loop.unroll.disable.
419  for (auto I = MBB.pred_begin(); I != MBB.pred_end(); ++I) {
420  const MachineBasicBlock *PMBB = *I;
421  if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
422  // Edges from other loops to MBB are not back edges.
423  continue;
424  }
425  if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
426  if (MDNode *LoopID =
427  PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
428  if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
429  return true;
430  }
431  }
432  }
433  return false;
434 }
435 
436 void NVPTXAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
438  if (isLoopHeaderOfNoUnroll(MBB))
439  OutStreamer->EmitRawText(StringRef("\t.pragma \"nounroll\";\n"));
440 }
441 
442 void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
443  SmallString<128> Str;
444  raw_svector_ostream O(Str);
445 
446  if (!GlobalsEmitted) {
447  emitGlobals(*MF->getFunction().getParent());
448  GlobalsEmitted = true;
449  }
450 
451  // Set up
452  MRI = &MF->getRegInfo();
453  F = &MF->getFunction();
454  emitLinkageDirective(F, O);
455  if (isKernelFunction(*F))
456  O << ".entry ";
457  else {
458  O << ".func ";
459  printReturnValStr(*MF, O);
460  }
461 
462  CurrentFnSym->print(O, MAI);
463 
464  emitFunctionParamList(*MF, O);
465 
466  if (isKernelFunction(*F))
467  emitKernelFunctionDirectives(*F, O);
468 
469  OutStreamer->EmitRawText(O.str());
470 
471  VRegMapping.clear();
472  // Emit open brace for function body.
473  OutStreamer->EmitRawText(StringRef("{\n"));
474  setAndEmitFunctionVirtualRegisters(*MF);
475  // Emit initial .loc debug directive for correct relocation symbol data.
476  if (MMI && MMI->hasDebugInfo())
478 }
479 
481  bool Result = AsmPrinter::runOnMachineFunction(F);
482  // Emit closing brace for the body of function F.
483  // The closing brace must be emitted here because we need to emit additional
484  // debug labels/data after the last basic block.
485  // We need to emit the closing brace here because we don't have function that
486  // finished emission of the function body.
487  OutStreamer->EmitRawText(StringRef("}\n"));
488  return Result;
489 }
490 
491 void NVPTXAsmPrinter::EmitFunctionBodyStart() {
492  SmallString<128> Str;
493  raw_svector_ostream O(Str);
494  emitDemotedVars(&MF->getFunction(), O);
495  OutStreamer->EmitRawText(O.str());
496 }
497 
498 void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
499  VRegMapping.clear();
500 }
501 
503  SmallString<128> Str;
505  return OutContext.getOrCreateSymbol(Str);
506 }
507 
508 void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
509  unsigned RegNo = MI->getOperand(0).getReg();
511  OutStreamer->AddComment(Twine("implicit-def: ") +
512  getVirtualRegisterName(RegNo));
513  } else {
514  const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
515  OutStreamer->AddComment(Twine("implicit-def: ") +
516  STI.getRegisterInfo()->getName(RegNo));
517  }
518  OutStreamer->AddBlankLine();
519 }
520 
521 void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
522  raw_ostream &O) const {
523  // If the NVVM IR has some of reqntid* specified, then output
524  // the reqntid directive, and set the unspecified ones to 1.
525  // If none of reqntid* is specified, don't output reqntid directive.
526  unsigned reqntidx, reqntidy, reqntidz;
527  bool specified = false;
528  if (!getReqNTIDx(F, reqntidx))
529  reqntidx = 1;
530  else
531  specified = true;
532  if (!getReqNTIDy(F, reqntidy))
533  reqntidy = 1;
534  else
535  specified = true;
536  if (!getReqNTIDz(F, reqntidz))
537  reqntidz = 1;
538  else
539  specified = true;
540 
541  if (specified)
542  O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
543  << "\n";
544 
545  // If the NVVM IR has some of maxntid* specified, then output
546  // the maxntid directive, and set the unspecified ones to 1.
547  // If none of maxntid* is specified, don't output maxntid directive.
548  unsigned maxntidx, maxntidy, maxntidz;
549  specified = false;
550  if (!getMaxNTIDx(F, maxntidx))
551  maxntidx = 1;
552  else
553  specified = true;
554  if (!getMaxNTIDy(F, maxntidy))
555  maxntidy = 1;
556  else
557  specified = true;
558  if (!getMaxNTIDz(F, maxntidz))
559  maxntidz = 1;
560  else
561  specified = true;
562 
563  if (specified)
564  O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
565  << "\n";
566 
567  unsigned mincta;
568  if (getMinCTASm(F, mincta))
569  O << ".minnctapersm " << mincta << "\n";
570 
571  unsigned maxnreg;
572  if (getMaxNReg(F, maxnreg))
573  O << ".maxnreg " << maxnreg << "\n";
574 }
575 
576 std::string
578  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
579 
580  std::string Name;
581  raw_string_ostream NameStr(Name);
582 
583  VRegRCMap::const_iterator I = VRegMapping.find(RC);
584  assert(I != VRegMapping.end() && "Bad register class");
585  const DenseMap<unsigned, unsigned> &RegMap = I->second;
586 
587  VRegMap::const_iterator VI = RegMap.find(Reg);
588  assert(VI != RegMap.end() && "Bad virtual register");
589  unsigned MappedVR = VI->second;
590 
591  NameStr << getNVPTXRegClassStr(RC) << MappedVR;
592 
593  NameStr.flush();
594  return Name;
595 }
596 
597 void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
598  raw_ostream &O) {
599  O << getVirtualRegisterName(vr);
600 }
601 
602 void NVPTXAsmPrinter::printVecModifiedImmediate(
603  const MachineOperand &MO, const char *Modifier, raw_ostream &O) {
604  static const char vecelem[] = { '0', '1', '2', '3', '0', '1', '2', '3' };
605  int Imm = (int) MO.getImm();
606  if (0 == strcmp(Modifier, "vecelem"))
607  O << "_" << vecelem[Imm];
608  else if (0 == strcmp(Modifier, "vecv4comm1")) {
609  if ((Imm < 0) || (Imm > 3))
610  O << "//";
611  } else if (0 == strcmp(Modifier, "vecv4comm2")) {
612  if ((Imm < 4) || (Imm > 7))
613  O << "//";
614  } else if (0 == strcmp(Modifier, "vecv4pos")) {
615  if (Imm < 0)
616  Imm = 0;
617  O << "_" << vecelem[Imm % 4];
618  } else if (0 == strcmp(Modifier, "vecv2comm1")) {
619  if ((Imm < 0) || (Imm > 1))
620  O << "//";
621  } else if (0 == strcmp(Modifier, "vecv2comm2")) {
622  if ((Imm < 2) || (Imm > 3))
623  O << "//";
624  } else if (0 == strcmp(Modifier, "vecv2pos")) {
625  if (Imm < 0)
626  Imm = 0;
627  O << "_" << vecelem[Imm % 2];
628  } else
629  llvm_unreachable("Unknown Modifier on immediate operand");
630 }
631 
632 void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
633  emitLinkageDirective(F, O);
634  if (isKernelFunction(*F))
635  O << ".entry ";
636  else
637  O << ".func ";
638  printReturnValStr(F, O);
639  getSymbol(F)->print(O, MAI);
640  O << "\n";
641  emitFunctionParamList(F, O);
642  O << ";\n";
643 }
644 
645 static bool usedInGlobalVarDef(const Constant *C) {
646  if (!C)
647  return false;
648 
649  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
650  return GV->getName() != "llvm.used";
651  }
652 
653  for (const User *U : C->users())
654  if (const Constant *C = dyn_cast<Constant>(U))
655  if (usedInGlobalVarDef(C))
656  return true;
657 
658  return false;
659 }
660 
661 static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
662  if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
663  if (othergv->getName() == "llvm.used")
664  return true;
665  }
666 
667  if (const Instruction *instr = dyn_cast<Instruction>(U)) {
668  if (instr->getParent() && instr->getParent()->getParent()) {
669  const Function *curFunc = instr->getParent()->getParent();
670  if (oneFunc && (curFunc != oneFunc))
671  return false;
672  oneFunc = curFunc;
673  return true;
674  } else
675  return false;
676  }
677 
678  for (const User *UU : U->users())
679  if (!usedInOneFunc(UU, oneFunc))
680  return false;
681 
682  return true;
683 }
684 
685 /* Find out if a global variable can be demoted to local scope.
686  * Currently, this is valid for CUDA shared variables, which have local
687  * scope and global lifetime. So the conditions to check are :
688  * 1. Is the global variable in shared address space?
689  * 2. Does it have internal linkage?
690  * 3. Is the global variable referenced only in one function?
691  */
692 static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
693  if (!gv->hasInternalLinkage())
694  return false;
695  PointerType *Pty = gv->getType();
697  return false;
698 
699  const Function *oneFunc = nullptr;
700 
701  bool flag = usedInOneFunc(gv, oneFunc);
702  if (!flag)
703  return false;
704  if (!oneFunc)
705  return false;
706  f = oneFunc;
707  return true;
708 }
709 
710 static bool useFuncSeen(const Constant *C,
712  for (const User *U : C->users()) {
713  if (const Constant *cu = dyn_cast<Constant>(U)) {
714  if (useFuncSeen(cu, seenMap))
715  return true;
716  } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
717  const BasicBlock *bb = I->getParent();
718  if (!bb)
719  continue;
720  const Function *caller = bb->getParent();
721  if (!caller)
722  continue;
723  if (seenMap.find(caller) != seenMap.end())
724  return true;
725  }
726  }
727  return false;
728 }
729 
730 void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
732  for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
733  const Function *F = &*FI;
734 
735  if (F->getAttributes().hasFnAttribute("nvptx-libcall-callee")) {
736  emitDeclaration(F, O);
737  continue;
738  }
739 
740  if (F->isDeclaration()) {
741  if (F->use_empty())
742  continue;
743  if (F->getIntrinsicID())
744  continue;
745  emitDeclaration(F, O);
746  continue;
747  }
748  for (const User *U : F->users()) {
749  if (const Constant *C = dyn_cast<Constant>(U)) {
750  if (usedInGlobalVarDef(C)) {
751  // The use is in the initialization of a global variable
752  // that is a function pointer, so print a declaration
753  // for the original function
754  emitDeclaration(F, O);
755  break;
756  }
757  // Emit a declaration of this function if the function that
758  // uses this constant expr has already been seen.
759  if (useFuncSeen(C, seenMap)) {
760  emitDeclaration(F, O);
761  break;
762  }
763  }
764 
765  if (!isa<Instruction>(U))
766  continue;
767  const Instruction *instr = cast<Instruction>(U);
768  const BasicBlock *bb = instr->getParent();
769  if (!bb)
770  continue;
771  const Function *caller = bb->getParent();
772  if (!caller)
773  continue;
774 
775  // If a caller has already been seen, then the caller is
776  // appearing in the module before the callee. so print out
777  // a declaration for the callee.
778  if (seenMap.find(caller) != seenMap.end()) {
779  emitDeclaration(F, O);
780  break;
781  }
782  }
783  seenMap[F] = true;
784  }
785 }
786 
788  if (!GV) return true;
789  const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
790  if (!InitList) return true; // Not an array; we don't know how to parse.
791  return InitList->getNumOperands() == 0;
792 }
793 
795  // Construct a default subtarget off of the TargetMachine defaults. The
796  // rest of NVPTX isn't friendly to change subtargets per function and
797  // so the default TargetMachine will have all of the options.
798  const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
799  const auto* STI = static_cast<const NVPTXSubtarget*>(NTM.getSubtargetImpl());
800 
801  if (M.alias_size()) {
802  report_fatal_error("Module has aliases, which NVPTX does not support.");
803  return true; // error
804  }
805  if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors"))) {
807  "Module has a nontrivial global ctor, which NVPTX does not support.");
808  return true; // error
809  }
810  if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors"))) {
812  "Module has a nontrivial global dtor, which NVPTX does not support.");
813  return true; // error
814  }
815 
816  SmallString<128> Str1;
817  raw_svector_ostream OS1(Str1);
818 
819  // We need to call the parent's one explicitly.
820  bool Result = AsmPrinter::doInitialization(M);
821 
822  // Emit header before any dwarf directives are emitted below.
823  emitHeader(M, OS1, *STI);
824  OutStreamer->EmitRawText(OS1.str());
825 
826  // Emit module-level inline asm if it exists.
827  if (!M.getModuleInlineAsm().empty()) {
828  OutStreamer->AddComment("Start of file scope inline assembly");
829  OutStreamer->AddBlankLine();
830  OutStreamer->EmitRawText(StringRef(M.getModuleInlineAsm()));
831  OutStreamer->AddBlankLine();
832  OutStreamer->AddComment("End of file scope inline assembly");
833  OutStreamer->AddBlankLine();
834  }
835 
836  GlobalsEmitted = false;
837 
838  return Result;
839 }
840 
841 void NVPTXAsmPrinter::emitGlobals(const Module &M) {
842  SmallString<128> Str2;
843  raw_svector_ostream OS2(Str2);
844 
845  emitDeclarations(M, OS2);
846 
847  // As ptxas does not support forward references of globals, we need to first
848  // sort the list of module-level globals in def-use order. We visit each
849  // global variable in order, and ensure that we emit it *after* its dependent
850  // globals. We use a little extra memory maintaining both a set and a list to
851  // have fast searches while maintaining a strict ordering.
855 
856  // Visit each global variable, in order
857  for (const GlobalVariable &I : M.globals())
858  VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
859 
860  assert(GVVisited.size() == M.getGlobalList().size() &&
861  "Missed a global variable");
862  assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
863 
864  // Print out module-level global variables in proper order
865  for (unsigned i = 0, e = Globals.size(); i != e; ++i)
866  printModuleLevelGV(Globals[i], OS2);
867 
868  OS2 << '\n';
869 
870  OutStreamer->EmitRawText(OS2.str());
871 }
872 
873 void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
874  const NVPTXSubtarget &STI) {
875  O << "//\n";
876  O << "// Generated by LLVM NVPTX Back-End\n";
877  O << "//\n";
878  O << "\n";
879 
880  unsigned PTXVersion = STI.getPTXVersion();
881  O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
882 
883  O << ".target ";
884  O << STI.getTargetName();
885 
886  const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
887  if (NTM.getDrvInterface() == NVPTX::NVCL)
888  O << ", texmode_independent";
889 
890  bool HasFullDebugInfo = false;
891  for (DICompileUnit *CU : M.debug_compile_units()) {
892  switch(CU->getEmissionKind()) {
895  break;
898  HasFullDebugInfo = true;
899  break;
900  }
901  if (HasFullDebugInfo)
902  break;
903  }
904  if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo)
905  O << ", debug";
906 
907  O << "\n";
908 
909  O << ".address_size ";
910  if (NTM.is64Bit())
911  O << "64";
912  else
913  O << "32";
914  O << "\n";
915 
916  O << "\n";
917 }
918 
920  bool HasDebugInfo = MMI && MMI->hasDebugInfo();
921 
922  // If we did not emit any functions, then the global declarations have not
923  // yet been emitted.
924  if (!GlobalsEmitted) {
925  emitGlobals(M);
926  GlobalsEmitted = true;
927  }
928 
929  // XXX Temproarily remove global variables so that doFinalization() will not
930  // emit them again (global variables are emitted at beginning).
931 
932  Module::GlobalListType &global_list = M.getGlobalList();
933  int i, n = global_list.size();
934  GlobalVariable **gv_array = new GlobalVariable *[n];
935 
936  // first, back-up GlobalVariable in gv_array
937  i = 0;
938  for (Module::global_iterator I = global_list.begin(), E = global_list.end();
939  I != E; ++I)
940  gv_array[i++] = &*I;
941 
942  // second, empty global_list
943  while (!global_list.empty())
944  global_list.remove(global_list.begin());
945 
946  // call doFinalization
947  bool ret = AsmPrinter::doFinalization(M);
948 
949  // now we restore global variables
950  for (i = 0; i < n; i++)
951  global_list.insert(global_list.end(), gv_array[i]);
952 
954 
955  delete[] gv_array;
956  // Close the last emitted section
957  if (HasDebugInfo) {
958  static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer())
959  ->closeLastSection();
960  // Emit empty .debug_loc section for better support of the empty files.
961  OutStreamer->EmitRawText("\t.section\t.debug_loc\t{\t}");
962  }
963 
964  // Output last DWARF .file directives, if any.
965  static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer())
966  ->outputDwarfFileDirectives();
967 
968  return ret;
969 
970  //bool Result = AsmPrinter::doFinalization(M);
971  // Instead of calling the parents doFinalization, we may
972  // clone parents doFinalization and customize here.
973  // Currently, we if NVISA out the EmitGlobals() in
974  // parent's doFinalization, which is too intrusive.
975  //
976  // Same for the doInitialization.
977  //return Result;
978 }
979 
980 // This function emits appropriate linkage directives for
981 // functions and global variables.
982 //
983 // extern function declaration -> .extern
984 // extern function definition -> .visible
985 // external global variable with init -> .visible
986 // external without init -> .extern
987 // appending -> not allowed, assert.
988 // for any linkage other than
989 // internal, private, linker_private,
990 // linker_private_weak, linker_private_weak_def_auto,
991 // we emit -> .weak.
992 
993 void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
994  raw_ostream &O) {
995  if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
996  if (V->hasExternalLinkage()) {
997  if (isa<GlobalVariable>(V)) {
998  const GlobalVariable *GVar = cast<GlobalVariable>(V);
999  if (GVar) {
1000  if (GVar->hasInitializer())
1001  O << ".visible ";
1002  else
1003  O << ".extern ";
1004  }
1005  } else if (V->isDeclaration())
1006  O << ".extern ";
1007  else
1008  O << ".visible ";
1009  } else if (V->hasAppendingLinkage()) {
1010  std::string msg;
1011  msg.append("Error: ");
1012  msg.append("Symbol ");
1013  if (V->hasName())
1014  msg.append(V->getName());
1015  msg.append("has unsupported appending linkage type");
1016  llvm_unreachable(msg.c_str());
1017  } else if (!V->hasInternalLinkage() &&
1018  !V->hasPrivateLinkage()) {
1019  O << ".weak ";
1020  }
1021  }
1022 }
1023 
1024 void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
1025  raw_ostream &O,
1026  bool processDemoted) {
1027  // Skip meta data
1028  if (GVar->hasSection()) {
1029  if (GVar->getSection() == "llvm.metadata")
1030  return;
1031  }
1032 
1033  // Skip LLVM intrinsic global variables
1034  if (GVar->getName().startswith("llvm.") ||
1035  GVar->getName().startswith("nvvm."))
1036  return;
1037 
1038  const DataLayout &DL = getDataLayout();
1039 
1040  // GlobalVariables are always constant pointers themselves.
1041  PointerType *PTy = GVar->getType();
1042  Type *ETy = GVar->getValueType();
1043 
1044  if (GVar->hasExternalLinkage()) {
1045  if (GVar->hasInitializer())
1046  O << ".visible ";
1047  else
1048  O << ".extern ";
1049  } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
1051  GVar->hasCommonLinkage()) {
1052  O << ".weak ";
1053  }
1054 
1055  if (isTexture(*GVar)) {
1056  O << ".global .texref " << getTextureName(*GVar) << ";\n";
1057  return;
1058  }
1059 
1060  if (isSurface(*GVar)) {
1061  O << ".global .surfref " << getSurfaceName(*GVar) << ";\n";
1062  return;
1063  }
1064 
1065  if (GVar->isDeclaration()) {
1066  // (extern) declarations, no definition or initializer
1067  // Currently the only known declaration is for an automatic __local
1068  // (.shared) promoted to global.
1069  emitPTXGlobalVariable(GVar, O);
1070  O << ";\n";
1071  return;
1072  }
1073 
1074  if (isSampler(*GVar)) {
1075  O << ".global .samplerref " << getSamplerName(*GVar);
1076 
1077  const Constant *Initializer = nullptr;
1078  if (GVar->hasInitializer())
1079  Initializer = GVar->getInitializer();
1080  const ConstantInt *CI = nullptr;
1081  if (Initializer)
1082  CI = dyn_cast<ConstantInt>(Initializer);
1083  if (CI) {
1084  unsigned sample = CI->getZExtValue();
1085 
1086  O << " = { ";
1087 
1088  for (int i = 0,
1089  addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
1090  i < 3; i++) {
1091  O << "addr_mode_" << i << " = ";
1092  switch (addr) {
1093  case 0:
1094  O << "wrap";
1095  break;
1096  case 1:
1097  O << "clamp_to_border";
1098  break;
1099  case 2:
1100  O << "clamp_to_edge";
1101  break;
1102  case 3:
1103  O << "wrap";
1104  break;
1105  case 4:
1106  O << "mirror";
1107  break;
1108  }
1109  O << ", ";
1110  }
1111  O << "filter_mode = ";
1112  switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
1113  case 0:
1114  O << "nearest";
1115  break;
1116  case 1:
1117  O << "linear";
1118  break;
1119  case 2:
1120  llvm_unreachable("Anisotropic filtering is not supported");
1121  default:
1122  O << "nearest";
1123  break;
1124  }
1125  if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
1126  O << ", force_unnormalized_coords = 1";
1127  }
1128  O << " }";
1129  }
1130 
1131  O << ";\n";
1132  return;
1133  }
1134 
1135  if (GVar->hasPrivateLinkage()) {
1136  if (strncmp(GVar->getName().data(), "unrollpragma", 12) == 0)
1137  return;
1138 
1139  // FIXME - need better way (e.g. Metadata) to avoid generating this global
1140  if (strncmp(GVar->getName().data(), "filename", 8) == 0)
1141  return;
1142  if (GVar->use_empty())
1143  return;
1144  }
1145 
1146  const Function *demotedFunc = nullptr;
1147  if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
1148  O << "// " << GVar->getName() << " has been demoted\n";
1149  if (localDecls.find(demotedFunc) != localDecls.end())
1150  localDecls[demotedFunc].push_back(GVar);
1151  else {
1152  std::vector<const GlobalVariable *> temp;
1153  temp.push_back(GVar);
1154  localDecls[demotedFunc] = temp;
1155  }
1156  return;
1157  }
1158 
1159  O << ".";
1160  emitPTXAddressSpace(PTy->getAddressSpace(), O);
1161 
1162  if (isManaged(*GVar)) {
1163  O << " .attribute(.managed)";
1164  }
1165 
1166  if (GVar->getAlignment() == 0)
1167  O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
1168  else
1169  O << " .align " << GVar->getAlignment();
1170 
1171  if (ETy->isFloatingPointTy() || ETy->isPointerTy() ||
1172  (ETy->isIntegerTy() && ETy->getScalarSizeInBits() <= 64)) {
1173  O << " .";
1174  // Special case: ABI requires that we use .u8 for predicates
1175  if (ETy->isIntegerTy(1))
1176  O << "u8";
1177  else
1178  O << getPTXFundamentalTypeStr(ETy, false);
1179  O << " ";
1180  getSymbol(GVar)->print(O, MAI);
1181 
1182  // Ptx allows variable initilization only for constant and global state
1183  // spaces.
1184  if (GVar->hasInitializer()) {
1185  if ((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1186  (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) {
1187  const Constant *Initializer = GVar->getInitializer();
1188  // 'undef' is treated as there is no value specified.
1189  if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
1190  O << " = ";
1191  printScalarConstant(Initializer, O);
1192  }
1193  } else {
1194  // The frontend adds zero-initializer to device and constant variables
1195  // that don't have an initial value, and UndefValue to shared
1196  // variables, so skip warning for this case.
1197  if (!GVar->getInitializer()->isNullValue() &&
1198  !isa<UndefValue>(GVar->getInitializer())) {
1199  report_fatal_error("initial value of '" + GVar->getName() +
1200  "' is not allowed in addrspace(" +
1201  Twine(PTy->getAddressSpace()) + ")");
1202  }
1203  }
1204  }
1205  } else {
1206  unsigned int ElementSize = 0;
1207 
1208  // Although PTX has direct support for struct type and array type and
1209  // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1210  // targets that support these high level field accesses. Structs, arrays
1211  // and vectors are lowered into arrays of bytes.
1212  switch (ETy->getTypeID()) {
1213  case Type::IntegerTyID: // Integers larger than 64 bits
1214  case Type::StructTyID:
1215  case Type::ArrayTyID:
1216  case Type::VectorTyID:
1217  ElementSize = DL.getTypeStoreSize(ETy);
1218  // Ptx allows variable initilization only for constant and
1219  // global state spaces.
1220  if (((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1221  (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
1222  GVar->hasInitializer()) {
1223  const Constant *Initializer = GVar->getInitializer();
1224  if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
1225  AggBuffer aggBuffer(ElementSize, O, *this);
1226  bufferAggregateConstant(Initializer, &aggBuffer);
1227  if (aggBuffer.numSymbols) {
1228  if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) {
1229  O << " .u64 ";
1230  getSymbol(GVar)->print(O, MAI);
1231  O << "[";
1232  O << ElementSize / 8;
1233  } else {
1234  O << " .u32 ";
1235  getSymbol(GVar)->print(O, MAI);
1236  O << "[";
1237  O << ElementSize / 4;
1238  }
1239  O << "]";
1240  } else {
1241  O << " .b8 ";
1242  getSymbol(GVar)->print(O, MAI);
1243  O << "[";
1244  O << ElementSize;
1245  O << "]";
1246  }
1247  O << " = {";
1248  aggBuffer.print();
1249  O << "}";
1250  } else {
1251  O << " .b8 ";
1252  getSymbol(GVar)->print(O, MAI);
1253  if (ElementSize) {
1254  O << "[";
1255  O << ElementSize;
1256  O << "]";
1257  }
1258  }
1259  } else {
1260  O << " .b8 ";
1261  getSymbol(GVar)->print(O, MAI);
1262  if (ElementSize) {
1263  O << "[";
1264  O << ElementSize;
1265  O << "]";
1266  }
1267  }
1268  break;
1269  default:
1270  llvm_unreachable("type not supported yet");
1271  }
1272  }
1273  O << ";\n";
1274 }
1275 
1276 void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
1277  if (localDecls.find(f) == localDecls.end())
1278  return;
1279 
1280  std::vector<const GlobalVariable *> &gvars = localDecls[f];
1281 
1282  for (unsigned i = 0, e = gvars.size(); i != e; ++i) {
1283  O << "\t// demoted variable\n\t";
1284  printModuleLevelGV(gvars[i], O, true);
1285  }
1286 }
1287 
1288 void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1289  raw_ostream &O) const {
1290  switch (AddressSpace) {
1291  case ADDRESS_SPACE_LOCAL:
1292  O << "local";
1293  break;
1294  case ADDRESS_SPACE_GLOBAL:
1295  O << "global";
1296  break;
1297  case ADDRESS_SPACE_CONST:
1298  O << "const";
1299  break;
1300  case ADDRESS_SPACE_SHARED:
1301  O << "shared";
1302  break;
1303  default:
1304  report_fatal_error("Bad address space found while emitting PTX: " +
1306  break;
1307  }
1308 }
1309 
1310 std::string
1311 NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
1312  switch (Ty->getTypeID()) {
1313  default:
1314  llvm_unreachable("unexpected type");
1315  break;
1316  case Type::IntegerTyID: {
1317  unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1318  if (NumBits == 1)
1319  return "pred";
1320  else if (NumBits <= 64) {
1321  std::string name = "u";
1322  return name + utostr(NumBits);
1323  } else {
1324  llvm_unreachable("Integer too large");
1325  break;
1326  }
1327  break;
1328  }
1329  case Type::HalfTyID:
1330  // fp16 is stored as .b16 for compatibility with pre-sm_53 PTX assembly.
1331  return "b16";
1332  case Type::FloatTyID:
1333  return "f32";
1334  case Type::DoubleTyID:
1335  return "f64";
1336  case Type::PointerTyID:
1337  if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit())
1338  if (useB4PTR)
1339  return "b64";
1340  else
1341  return "u64";
1342  else if (useB4PTR)
1343  return "b32";
1344  else
1345  return "u32";
1346  }
1347  llvm_unreachable("unexpected type");
1348  return nullptr;
1349 }
1350 
1351 void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
1352  raw_ostream &O) {
1353  const DataLayout &DL = getDataLayout();
1354 
1355  // GlobalVariables are always constant pointers themselves.
1356  Type *ETy = GVar->getValueType();
1357 
1358  O << ".";
1359  emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
1360  if (GVar->getAlignment() == 0)
1361  O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
1362  else
1363  O << " .align " << GVar->getAlignment();
1364 
1365  // Special case for i128
1366  if (ETy->isIntegerTy(128)) {
1367  O << " .b8 ";
1368  getSymbol(GVar)->print(O, MAI);
1369  O << "[16]";
1370  return;
1371  }
1372 
1373  if (ETy->isFloatingPointTy() || ETy->isIntOrPtrTy()) {
1374  O << " .";
1375  O << getPTXFundamentalTypeStr(ETy);
1376  O << " ";
1377  getSymbol(GVar)->print(O, MAI);
1378  return;
1379  }
1380 
1381  int64_t ElementSize = 0;
1382 
1383  // Although PTX has direct support for struct type and array type and LLVM IR
1384  // is very similar to PTX, the LLVM CodeGen does not support for targets that
1385  // support these high level field accesses. Structs and arrays are lowered
1386  // into arrays of bytes.
1387  switch (ETy->getTypeID()) {
1388  case Type::StructTyID:
1389  case Type::ArrayTyID:
1390  case Type::VectorTyID:
1391  ElementSize = DL.getTypeStoreSize(ETy);
1392  O << " .b8 ";
1393  getSymbol(GVar)->print(O, MAI);
1394  O << "[";
1395  if (ElementSize) {
1396  O << ElementSize;
1397  }
1398  O << "]";
1399  break;
1400  default:
1401  llvm_unreachable("type not supported yet");
1402  }
1403 }
1404 
1405 static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty) {
1406  if (Ty->isSingleValueType())
1407  return DL.getPrefTypeAlignment(Ty);
1408 
1409  auto *ATy = dyn_cast<ArrayType>(Ty);
1410  if (ATy)
1411  return getOpenCLAlignment(DL, ATy->getElementType());
1412 
1413  auto *STy = dyn_cast<StructType>(Ty);
1414  if (STy) {
1415  unsigned int alignStruct = 1;
1416  // Go through each element of the struct and find the
1417  // largest alignment.
1418  for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
1419  Type *ETy = STy->getElementType(i);
1420  unsigned int align = getOpenCLAlignment(DL, ETy);
1421  if (align > alignStruct)
1422  alignStruct = align;
1423  }
1424  return alignStruct;
1425  }
1426 
1427  auto *FTy = dyn_cast<FunctionType>(Ty);
1428  if (FTy)
1429  return DL.getPointerPrefAlignment();
1430  return DL.getPrefTypeAlignment(Ty);
1431 }
1432 
1433 void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
1434  int paramIndex, raw_ostream &O) {
1435  getSymbol(I->getParent())->print(O, MAI);
1436  O << "_param_" << paramIndex;
1437 }
1438 
1439 void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
1440  const DataLayout &DL = getDataLayout();
1441  const AttributeList &PAL = F->getAttributes();
1442  const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
1443  const TargetLowering *TLI = STI.getTargetLowering();
1445  unsigned paramIndex = 0;
1446  bool first = true;
1447  bool isKernelFunc = isKernelFunction(*F);
1448  bool isABI = (STI.getSmVersion() >= 20);
1449  bool hasImageHandles = STI.hasImageHandles();
1450  MVT thePointerTy = TLI->getPointerTy(DL);
1451 
1452  if (F->arg_empty()) {
1453  O << "()\n";
1454  return;
1455  }
1456 
1457  O << "(\n";
1458 
1459  for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
1460  Type *Ty = I->getType();
1461 
1462  if (!first)
1463  O << ",\n";
1464 
1465  first = false;
1466 
1467  // Handle image/sampler parameters
1468  if (isKernelFunction(*F)) {
1469  if (isSampler(*I) || isImage(*I)) {
1470  if (isImage(*I)) {
1471  std::string sname = I->getName();
1472  if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
1473  if (hasImageHandles)
1474  O << "\t.param .u64 .ptr .surfref ";
1475  else
1476  O << "\t.param .surfref ";
1477  CurrentFnSym->print(O, MAI);
1478  O << "_param_" << paramIndex;
1479  }
1480  else { // Default image is read_only
1481  if (hasImageHandles)
1482  O << "\t.param .u64 .ptr .texref ";
1483  else
1484  O << "\t.param .texref ";
1485  CurrentFnSym->print(O, MAI);
1486  O << "_param_" << paramIndex;
1487  }
1488  } else {
1489  if (hasImageHandles)
1490  O << "\t.param .u64 .ptr .samplerref ";
1491  else
1492  O << "\t.param .samplerref ";
1493  CurrentFnSym->print(O, MAI);
1494  O << "_param_" << paramIndex;
1495  }
1496  continue;
1497  }
1498  }
1499 
1500  if (!PAL.hasParamAttribute(paramIndex, Attribute::ByVal)) {
1501  if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
1502  // Just print .param .align <a> .b8 .param[size];
1503  // <a> = PAL.getparamalignment
1504  // size = typeallocsize of element type
1505  unsigned align = PAL.getParamAlignment(paramIndex);
1506  if (align == 0)
1507  align = DL.getABITypeAlignment(Ty);
1508 
1509  unsigned sz = DL.getTypeAllocSize(Ty);
1510  O << "\t.param .align " << align << " .b8 ";
1511  printParamName(I, paramIndex, O);
1512  O << "[" << sz << "]";
1513 
1514  continue;
1515  }
1516  // Just a scalar
1517  auto *PTy = dyn_cast<PointerType>(Ty);
1518  if (isKernelFunc) {
1519  if (PTy) {
1520  // Special handling for pointer arguments to kernel
1521  O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
1522 
1523  if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
1524  NVPTX::CUDA) {
1525  Type *ETy = PTy->getElementType();
1526  int addrSpace = PTy->getAddressSpace();
1527  switch (addrSpace) {
1528  default:
1529  O << ".ptr ";
1530  break;
1531  case ADDRESS_SPACE_CONST:
1532  O << ".ptr .const ";
1533  break;
1534  case ADDRESS_SPACE_SHARED:
1535  O << ".ptr .shared ";
1536  break;
1537  case ADDRESS_SPACE_GLOBAL:
1538  O << ".ptr .global ";
1539  break;
1540  }
1541  O << ".align " << (int)getOpenCLAlignment(DL, ETy) << " ";
1542  }
1543  printParamName(I, paramIndex, O);
1544  continue;
1545  }
1546 
1547  // non-pointer scalar to kernel func
1548  O << "\t.param .";
1549  // Special case: predicate operands become .u8 types
1550  if (Ty->isIntegerTy(1))
1551  O << "u8";
1552  else
1553  O << getPTXFundamentalTypeStr(Ty);
1554  O << " ";
1555  printParamName(I, paramIndex, O);
1556  continue;
1557  }
1558  // Non-kernel function, just print .param .b<size> for ABI
1559  // and .reg .b<size> for non-ABI
1560  unsigned sz = 0;
1561  if (isa<IntegerType>(Ty)) {
1562  sz = cast<IntegerType>(Ty)->getBitWidth();
1563  if (sz < 32)
1564  sz = 32;
1565  } else if (isa<PointerType>(Ty))
1566  sz = thePointerTy.getSizeInBits();
1567  else if (Ty->isHalfTy())
1568  // PTX ABI requires all scalar parameters to be at least 32
1569  // bits in size. fp16 normally uses .b16 as its storage type
1570  // in PTX, so its size must be adjusted here, too.
1571  sz = 32;
1572  else
1573  sz = Ty->getPrimitiveSizeInBits();
1574  if (isABI)
1575  O << "\t.param .b" << sz << " ";
1576  else
1577  O << "\t.reg .b" << sz << " ";
1578  printParamName(I, paramIndex, O);
1579  continue;
1580  }
1581 
1582  // param has byVal attribute. So should be a pointer
1583  auto *PTy = dyn_cast<PointerType>(Ty);
1584  assert(PTy && "Param with byval attribute should be a pointer type");
1585  Type *ETy = PTy->getElementType();
1586 
1587  if (isABI || isKernelFunc) {
1588  // Just print .param .align <a> .b8 .param[size];
1589  // <a> = PAL.getparamalignment
1590  // size = typeallocsize of element type
1591  unsigned align = PAL.getParamAlignment(paramIndex);
1592  if (align == 0)
1593  align = DL.getABITypeAlignment(ETy);
1594  // Work around a bug in ptxas. When PTX code takes address of
1595  // byval parameter with alignment < 4, ptxas generates code to
1596  // spill argument into memory. Alas on sm_50+ ptxas generates
1597  // SASS code that fails with misaligned access. To work around
1598  // the problem, make sure that we align byval parameters by at
1599  // least 4. Matching change must be made in LowerCall() where we
1600  // prepare parameters for the call.
1601  //
1602  // TODO: this will need to be undone when we get to support multi-TU
1603  // device-side compilation as it breaks ABI compatibility with nvcc.
1604  // Hopefully ptxas bug is fixed by then.
1605  if (!isKernelFunc && align < 4)
1606  align = 4;
1607  unsigned sz = DL.getTypeAllocSize(ETy);
1608  O << "\t.param .align " << align << " .b8 ";
1609  printParamName(I, paramIndex, O);
1610  O << "[" << sz << "]";
1611  continue;
1612  } else {
1613  // Split the ETy into constituent parts and
1614  // print .param .b<size> <name> for each part.
1615  // Further, if a part is vector, print the above for
1616  // each vector element.
1617  SmallVector<EVT, 16> vtparts;
1618  ComputeValueVTs(*TLI, DL, ETy, vtparts);
1619  for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
1620  unsigned elems = 1;
1621  EVT elemtype = vtparts[i];
1622  if (vtparts[i].isVector()) {
1623  elems = vtparts[i].getVectorNumElements();
1624  elemtype = vtparts[i].getVectorElementType();
1625  }
1626 
1627  for (unsigned j = 0, je = elems; j != je; ++j) {
1628  unsigned sz = elemtype.getSizeInBits();
1629  if (elemtype.isInteger() && (sz < 32))
1630  sz = 32;
1631  O << "\t.reg .b" << sz << " ";
1632  printParamName(I, paramIndex, O);
1633  if (j < je - 1)
1634  O << ",\n";
1635  ++paramIndex;
1636  }
1637  if (i < e - 1)
1638  O << ",\n";
1639  }
1640  --paramIndex;
1641  continue;
1642  }
1643  }
1644 
1645  O << "\n)\n";
1646 }
1647 
1648 void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
1649  raw_ostream &O) {
1650  const Function &F = MF.getFunction();
1651  emitFunctionParamList(&F, O);
1652 }
1653 
1654 void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1655  const MachineFunction &MF) {
1656  SmallString<128> Str;
1657  raw_svector_ostream O(Str);
1658 
1659  // Map the global virtual register number to a register class specific
1660  // virtual register number starting from 1 with that class.
1662  //unsigned numRegClasses = TRI->getNumRegClasses();
1663 
1664  // Emit the Fake Stack Object
1665  const MachineFrameInfo &MFI = MF.getFrameInfo();
1666  int NumBytes = (int) MFI.getStackSize();
1667  if (NumBytes) {
1668  O << "\t.local .align " << MFI.getMaxAlignment() << " .b8 \t" << DEPOTNAME
1669  << getFunctionNumber() << "[" << NumBytes << "];\n";
1670  if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
1671  O << "\t.reg .b64 \t%SP;\n";
1672  O << "\t.reg .b64 \t%SPL;\n";
1673  } else {
1674  O << "\t.reg .b32 \t%SP;\n";
1675  O << "\t.reg .b32 \t%SPL;\n";
1676  }
1677  }
1678 
1679  // Go through all virtual registers to establish the mapping between the
1680  // global virtual
1681  // register number and the per class virtual register number.
1682  // We use the per class virtual register number in the ptx output.
1683  unsigned int numVRs = MRI->getNumVirtRegs();
1684  for (unsigned i = 0; i < numVRs; i++) {
1685  unsigned int vr = TRI->index2VirtReg(i);
1686  const TargetRegisterClass *RC = MRI->getRegClass(vr);
1687  DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1688  int n = regmap.size();
1689  regmap.insert(std::make_pair(vr, n + 1));
1690  }
1691 
1692  // Emit register declarations
1693  // @TODO: Extract out the real register usage
1694  // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
1695  // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
1696  // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
1697  // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
1698  // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n";
1699  // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
1700  // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n";
1701 
1702  // Emit declaration of the virtual registers or 'physical' registers for
1703  // each register class
1704  for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
1705  const TargetRegisterClass *RC = TRI->getRegClass(i);
1706  DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1707  std::string rcname = getNVPTXRegClassName(RC);
1708  std::string rcStr = getNVPTXRegClassStr(RC);
1709  int n = regmap.size();
1710 
1711  // Only declare those registers that may be used.
1712  if (n) {
1713  O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
1714  << ">;\n";
1715  }
1716  }
1717 
1718  OutStreamer->EmitRawText(O.str());
1719 }
1720 
1721 void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
1722  APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
1723  bool ignored;
1724  unsigned int numHex;
1725  const char *lead;
1726 
1727  if (Fp->getType()->getTypeID() == Type::FloatTyID) {
1728  numHex = 8;
1729  lead = "0f";
1731  } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
1732  numHex = 16;
1733  lead = "0d";
1735  } else
1736  llvm_unreachable("unsupported fp type");
1737 
1738  APInt API = APF.bitcastToAPInt();
1739  O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
1740 }
1741 
1742 void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
1743  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1744  O << CI->getValue();
1745  return;
1746  }
1747  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1748  printFPConstant(CFP, O);
1749  return;
1750  }
1751  if (isa<ConstantPointerNull>(CPV)) {
1752  O << "0";
1753  return;
1754  }
1755  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1756  bool IsNonGenericPointer = false;
1757  if (GVar->getType()->getAddressSpace() != 0) {
1758  IsNonGenericPointer = true;
1759  }
1760  if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
1761  O << "generic(";
1762  getSymbol(GVar)->print(O, MAI);
1763  O << ")";
1764  } else {
1765  getSymbol(GVar)->print(O, MAI);
1766  }
1767  return;
1768  }
1769  if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1770  const Value *v = Cexpr->stripPointerCasts();
1771  PointerType *PTy = dyn_cast<PointerType>(Cexpr->getType());
1772  bool IsNonGenericPointer = false;
1773  if (PTy && PTy->getAddressSpace() != 0) {
1774  IsNonGenericPointer = true;
1775  }
1776  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1777  if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) {
1778  O << "generic(";
1779  getSymbol(GVar)->print(O, MAI);
1780  O << ")";
1781  } else {
1782  getSymbol(GVar)->print(O, MAI);
1783  }
1784  return;
1785  } else {
1786  lowerConstant(CPV)->print(O, MAI);
1787  return;
1788  }
1789  }
1790  llvm_unreachable("Not scalar type found in printScalarConstant()");
1791 }
1792 
1793 // These utility functions assure we get the right sequence of bytes for a given
1794 // type even for big-endian machines
1795 template <typename T> static void ConvertIntToBytes(unsigned char *p, T val) {
1796  int64_t vp = (int64_t)val;
1797  for (unsigned i = 0; i < sizeof(T); ++i) {
1798  p[i] = (unsigned char)vp;
1799  vp >>= 8;
1800  }
1801 }
1802 static void ConvertFloatToBytes(unsigned char *p, float val) {
1803  int32_t *vp = (int32_t *)&val;
1804  for (unsigned i = 0; i < sizeof(int32_t); ++i) {
1805  p[i] = (unsigned char)*vp;
1806  *vp >>= 8;
1807  }
1808 }
1809 static void ConvertDoubleToBytes(unsigned char *p, double val) {
1810  int64_t *vp = (int64_t *)&val;
1811  for (unsigned i = 0; i < sizeof(int64_t); ++i) {
1812  p[i] = (unsigned char)*vp;
1813  *vp >>= 8;
1814  }
1815 }
1816 
1817 void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
1818  AggBuffer *aggBuffer) {
1819  const DataLayout &DL = getDataLayout();
1820 
1821  if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
1822  int s = DL.getTypeAllocSize(CPV->getType());
1823  if (s < Bytes)
1824  s = Bytes;
1825  aggBuffer->addZeros(s);
1826  return;
1827  }
1828 
1829  unsigned char ptr[8];
1830  switch (CPV->getType()->getTypeID()) {
1831 
1832  case Type::IntegerTyID: {
1833  Type *ETy = CPV->getType();
1834  if (ETy == Type::getInt8Ty(CPV->getContext())) {
1835  unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue();
1836  ConvertIntToBytes<>(ptr, c);
1837  aggBuffer->addBytes(ptr, 1, Bytes);
1838  } else if (ETy == Type::getInt16Ty(CPV->getContext())) {
1839  short int16 = (short)cast<ConstantInt>(CPV)->getZExtValue();
1840  ConvertIntToBytes<>(ptr, int16);
1841  aggBuffer->addBytes(ptr, 2, Bytes);
1842  } else if (ETy == Type::getInt32Ty(CPV->getContext())) {
1843  if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
1844  int int32 = (int)(constInt->getZExtValue());
1845  ConvertIntToBytes<>(ptr, int32);
1846  aggBuffer->addBytes(ptr, 4, Bytes);
1847  break;
1848  } else if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1849  if (const auto *constInt = dyn_cast_or_null<ConstantInt>(
1850  ConstantFoldConstant(Cexpr, DL))) {
1851  int int32 = (int)(constInt->getZExtValue());
1852  ConvertIntToBytes<>(ptr, int32);
1853  aggBuffer->addBytes(ptr, 4, Bytes);
1854  break;
1855  }
1856  if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1857  Value *v = Cexpr->getOperand(0)->stripPointerCasts();
1858  aggBuffer->addSymbol(v, Cexpr->getOperand(0));
1859  aggBuffer->addZeros(4);
1860  break;
1861  }
1862  }
1863  llvm_unreachable("unsupported integer const type");
1864  } else if (ETy == Type::getInt64Ty(CPV->getContext())) {
1865  if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
1866  long long int64 = (long long)(constInt->getZExtValue());
1867  ConvertIntToBytes<>(ptr, int64);
1868  aggBuffer->addBytes(ptr, 8, Bytes);
1869  break;
1870  } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1871  if (const auto *constInt = dyn_cast_or_null<ConstantInt>(
1872  ConstantFoldConstant(Cexpr, DL))) {
1873  long long int64 = (long long)(constInt->getZExtValue());
1874  ConvertIntToBytes<>(ptr, int64);
1875  aggBuffer->addBytes(ptr, 8, Bytes);
1876  break;
1877  }
1878  if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1879  Value *v = Cexpr->getOperand(0)->stripPointerCasts();
1880  aggBuffer->addSymbol(v, Cexpr->getOperand(0));
1881  aggBuffer->addZeros(8);
1882  break;
1883  }
1884  }
1885  llvm_unreachable("unsupported integer const type");
1886  } else
1887  llvm_unreachable("unsupported integer const type");
1888  break;
1889  }
1890  case Type::HalfTyID:
1891  case Type::FloatTyID:
1892  case Type::DoubleTyID: {
1893  const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV);
1894  Type *Ty = CFP->getType();
1895  if (Ty == Type::getHalfTy(CPV->getContext())) {
1896  APInt API = CFP->getValueAPF().bitcastToAPInt();
1897  uint16_t float16 = API.getLoBits(16).getZExtValue();
1898  ConvertIntToBytes<>(ptr, float16);
1899  aggBuffer->addBytes(ptr, 2, Bytes);
1900  } else if (Ty == Type::getFloatTy(CPV->getContext())) {
1901  float float32 = (float) CFP->getValueAPF().convertToFloat();
1902  ConvertFloatToBytes(ptr, float32);
1903  aggBuffer->addBytes(ptr, 4, Bytes);
1904  } else if (Ty == Type::getDoubleTy(CPV->getContext())) {
1905  double float64 = CFP->getValueAPF().convertToDouble();
1906  ConvertDoubleToBytes(ptr, float64);
1907  aggBuffer->addBytes(ptr, 8, Bytes);
1908  } else {
1909  llvm_unreachable("unsupported fp const type");
1910  }
1911  break;
1912  }
1913  case Type::PointerTyID: {
1914  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1915  aggBuffer->addSymbol(GVar, GVar);
1916  } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1917  const Value *v = Cexpr->stripPointerCasts();
1918  aggBuffer->addSymbol(v, Cexpr);
1919  }
1920  unsigned int s = DL.getTypeAllocSize(CPV->getType());
1921  aggBuffer->addZeros(s);
1922  break;
1923  }
1924 
1925  case Type::ArrayTyID:
1926  case Type::VectorTyID:
1927  case Type::StructTyID: {
1928  if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) {
1929  int ElementSize = DL.getTypeAllocSize(CPV->getType());
1930  bufferAggregateConstant(CPV, aggBuffer);
1931  if (Bytes > ElementSize)
1932  aggBuffer->addZeros(Bytes - ElementSize);
1933  } else if (isa<ConstantAggregateZero>(CPV))
1934  aggBuffer->addZeros(Bytes);
1935  else
1936  llvm_unreachable("Unexpected Constant type");
1937  break;
1938  }
1939 
1940  default:
1941  llvm_unreachable("unsupported type");
1942  }
1943 }
1944 
1945 void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
1946  AggBuffer *aggBuffer) {
1947  const DataLayout &DL = getDataLayout();
1948  int Bytes;
1949 
1950  // Integers of arbitrary width
1951  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1952  APInt Val = CI->getValue();
1953  for (unsigned I = 0, E = DL.getTypeAllocSize(CPV->getType()); I < E; ++I) {
1954  uint8_t Byte = Val.getLoBits(8).getZExtValue();
1955  aggBuffer->addBytes(&Byte, 1, 1);
1956  Val.lshrInPlace(8);
1957  }
1958  return;
1959  }
1960 
1961  // Old constants
1962  if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
1963  if (CPV->getNumOperands())
1964  for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
1965  bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
1966  return;
1967  }
1968 
1969  if (const ConstantDataSequential *CDS =
1970  dyn_cast<ConstantDataSequential>(CPV)) {
1971  if (CDS->getNumElements())
1972  for (unsigned i = 0; i < CDS->getNumElements(); ++i)
1973  bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
1974  aggBuffer);
1975  return;
1976  }
1977 
1978  if (isa<ConstantStruct>(CPV)) {
1979  if (CPV->getNumOperands()) {
1980  StructType *ST = cast<StructType>(CPV->getType());
1981  for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
1982  if (i == (e - 1))
1983  Bytes = DL.getStructLayout(ST)->getElementOffset(0) +
1984  DL.getTypeAllocSize(ST) -
1985  DL.getStructLayout(ST)->getElementOffset(i);
1986  else
1987  Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) -
1988  DL.getStructLayout(ST)->getElementOffset(i);
1989  bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
1990  }
1991  }
1992  return;
1993  }
1994  llvm_unreachable("unsupported constant type in printAggregateConstant()");
1995 }
1996 
1997 /// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly
1998 /// a copy from AsmPrinter::lowerConstant, except customized to only handle
1999 /// expressions that are representable in PTX and create
2000 /// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
2001 const MCExpr *
2002 NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) {
2003  MCContext &Ctx = OutContext;
2004 
2005  if (CV->isNullValue() || isa<UndefValue>(CV))
2006  return MCConstantExpr::create(0, Ctx);
2007 
2008  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
2009  return MCConstantExpr::create(CI->getZExtValue(), Ctx);
2010 
2011  if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
2012  const MCSymbolRefExpr *Expr =
2014  if (ProcessingGeneric) {
2015  return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
2016  } else {
2017  return Expr;
2018  }
2019  }
2020 
2021  const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
2022  if (!CE) {
2023  llvm_unreachable("Unknown constant value to lower!");
2024  }
2025 
2026  switch (CE->getOpcode()) {
2027  default:
2028  // If the code isn't optimized, there may be outstanding folding
2029  // opportunities. Attempt to fold the expression using DataLayout as a
2030  // last resort before giving up.
2032  if (C && C != CE)
2033  return lowerConstantForGV(C, ProcessingGeneric);
2034 
2035  // Otherwise report the problem to the user.
2036  {
2037  std::string S;
2038  raw_string_ostream OS(S);
2039  OS << "Unsupported expression in static initializer: ";
2040  CE->printAsOperand(OS, /*PrintType=*/false,
2041  !MF ? nullptr : MF->getFunction().getParent());
2042  report_fatal_error(OS.str());
2043  }
2044 
2045  case Instruction::AddrSpaceCast: {
2046  // Strip the addrspacecast and pass along the operand
2047  PointerType *DstTy = cast<PointerType>(CE->getType());
2048  if (DstTy->getAddressSpace() == 0) {
2049  return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
2050  }
2051  std::string S;
2052  raw_string_ostream OS(S);
2053  OS << "Unsupported expression in static initializer: ";
2054  CE->printAsOperand(OS, /*PrintType=*/ false,
2055  !MF ? nullptr : MF->getFunction().getParent());
2056  report_fatal_error(OS.str());
2057  }
2058 
2059  case Instruction::GetElementPtr: {
2060  const DataLayout &DL = getDataLayout();
2061 
2062  // Generate a symbolic expression for the byte address
2063  APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
2064  cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
2065 
2066  const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
2067  ProcessingGeneric);
2068  if (!OffsetAI)
2069  return Base;
2070 
2071  int64_t Offset = OffsetAI.getSExtValue();
2072  return MCBinaryExpr::createAdd(Base, MCConstantExpr::create(Offset, Ctx),
2073  Ctx);
2074  }
2075 
2076  case Instruction::Trunc:
2077  // We emit the value and depend on the assembler to truncate the generated
2078  // expression properly. This is important for differences between
2079  // blockaddress labels. Since the two labels are in the same function, it
2080  // is reasonable to treat their delta as a 32-bit value.
2082  case Instruction::BitCast:
2083  return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2084 
2085  case Instruction::IntToPtr: {
2086  const DataLayout &DL = getDataLayout();
2087 
2088  // Handle casts to pointers by changing them into casts to the appropriate
2089  // integer type. This promotes constant folding and simplifies this code.
2090  Constant *Op = CE->getOperand(0);
2092  false/*ZExt*/);
2093  return lowerConstantForGV(Op, ProcessingGeneric);
2094  }
2095 
2096  case Instruction::PtrToInt: {
2097  const DataLayout &DL = getDataLayout();
2098 
2099  // Support only foldable casts to/from pointers that can be eliminated by
2100  // changing the pointer to the appropriately sized integer type.
2101  Constant *Op = CE->getOperand(0);
2102  Type *Ty = CE->getType();
2103 
2104  const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
2105 
2106  // We can emit the pointer value into this slot if the slot is an
2107  // integer slot equal to the size of the pointer.
2108  if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
2109  return OpExpr;
2110 
2111  // Otherwise the pointer is smaller than the resultant integer, mask off
2112  // the high bits so we are sure to get a proper truncation if the input is
2113  // a constant expr.
2114  unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
2115  const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
2116  return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
2117  }
2118 
2119  // The MC library also has a right-shift operator, but it isn't consistently
2120  // signed or unsigned between different targets.
2121  case Instruction::Add: {
2122  const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2123  const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
2124  switch (CE->getOpcode()) {
2125  default: llvm_unreachable("Unknown binary operator constant cast expr");
2126  case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
2127  }
2128  }
2129  }
2130 }
2131 
2132 // Copy of MCExpr::print customized for NVPTX
2133 void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
2134  switch (Expr.getKind()) {
2135  case MCExpr::Target:
2136  return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI);
2137  case MCExpr::Constant:
2138  OS << cast<MCConstantExpr>(Expr).getValue();
2139  return;
2140 
2141  case MCExpr::SymbolRef: {
2142  const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr);
2143  const MCSymbol &Sym = SRE.getSymbol();
2144  Sym.print(OS, MAI);
2145  return;
2146  }
2147 
2148  case MCExpr::Unary: {
2149  const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr);
2150  switch (UE.getOpcode()) {
2151  case MCUnaryExpr::LNot: OS << '!'; break;
2152  case MCUnaryExpr::Minus: OS << '-'; break;
2153  case MCUnaryExpr::Not: OS << '~'; break;
2154  case MCUnaryExpr::Plus: OS << '+'; break;
2155  }
2156  printMCExpr(*UE.getSubExpr(), OS);
2157  return;
2158  }
2159 
2160  case MCExpr::Binary: {
2161  const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr);
2162 
2163  // Only print parens around the LHS if it is non-trivial.
2164  if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) ||
2165  isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) {
2166  printMCExpr(*BE.getLHS(), OS);
2167  } else {
2168  OS << '(';
2169  printMCExpr(*BE.getLHS(), OS);
2170  OS<< ')';
2171  }
2172 
2173  switch (BE.getOpcode()) {
2174  case MCBinaryExpr::Add:
2175  // Print "X-42" instead of "X+-42".
2176  if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
2177  if (RHSC->getValue() < 0) {
2178  OS << RHSC->getValue();
2179  return;
2180  }
2181  }
2182 
2183  OS << '+';
2184  break;
2185  default: llvm_unreachable("Unhandled binary operator");
2186  }
2187 
2188  // Only print parens around the LHS if it is non-trivial.
2189  if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
2190  printMCExpr(*BE.getRHS(), OS);
2191  } else {
2192  OS << '(';
2193  printMCExpr(*BE.getRHS(), OS);
2194  OS << ')';
2195  }
2196  return;
2197  }
2198  }
2199 
2200  llvm_unreachable("Invalid expression kind!");
2201 }
2202 
2203 /// PrintAsmOperand - Print out an operand for an inline asm expression.
2204 ///
2205 bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
2206  unsigned AsmVariant,
2207  const char *ExtraCode, raw_ostream &O) {
2208  if (ExtraCode && ExtraCode[0]) {
2209  if (ExtraCode[1] != 0)
2210  return true; // Unknown modifier.
2211 
2212  switch (ExtraCode[0]) {
2213  default:
2214  // See if this is a generic print operand
2215  return AsmPrinter::PrintAsmOperand(MI, OpNo, AsmVariant, ExtraCode, O);
2216  case 'r':
2217  break;
2218  }
2219  }
2220 
2221  printOperand(MI, OpNo, O);
2222 
2223  return false;
2224 }
2225 
2226 bool NVPTXAsmPrinter::PrintAsmMemoryOperand(
2227  const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,
2228  const char *ExtraCode, raw_ostream &O) {
2229  if (ExtraCode && ExtraCode[0])
2230  return true; // Unknown modifier
2231 
2232  O << '[';
2233  printMemOperand(MI, OpNo, O);
2234  O << ']';
2235 
2236  return false;
2237 }
2238 
2239 void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
2240  raw_ostream &O, const char *Modifier) {
2241  const MachineOperand &MO = MI->getOperand(opNum);
2242  switch (MO.getType()) {
2245  if (MO.getReg() == NVPTX::VRDepot)
2246  O << DEPOTNAME << getFunctionNumber();
2247  else
2249  } else {
2250  emitVirtualRegister(MO.getReg(), O);
2251  }
2252  return;
2253 
2255  if (!Modifier)
2256  O << MO.getImm();
2257  else if (strstr(Modifier, "vec") == Modifier)
2258  printVecModifiedImmediate(MO, Modifier, O);
2259  else
2261  "Don't know how to handle modifier on immediate operand");
2262  return;
2263 
2265  printFPConstant(MO.getFPImm(), O);
2266  break;
2267 
2269  getSymbol(MO.getGlobal())->print(O, MAI);
2270  break;
2271 
2273  MO.getMBB()->getSymbol()->print(O, MAI);
2274  return;
2275 
2276  default:
2277  llvm_unreachable("Operand type not supported.");
2278  }
2279 }
2280 
2281 void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
2282  raw_ostream &O, const char *Modifier) {
2283  printOperand(MI, opNum, O);
2284 
2285  if (Modifier && strcmp(Modifier, "add") == 0) {
2286  O << ", ";
2287  printOperand(MI, opNum + 1, O);
2288  } else {
2289  if (MI->getOperand(opNum + 1).isImm() &&
2290  MI->getOperand(opNum + 1).getImm() == 0)
2291  return; // don't print ',0' or '+0'
2292  O << "+";
2293  printOperand(MI, opNum + 1, O);
2294  }
2295 }
2296 
2297 // Force static initialization.
2301 }
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
uint64_t CallInst * C
StringRef getSection() const
Get the custom section of this global if it has one.
Definition: GlobalObject.h:89
unsigned getAlignment() const
Definition: GlobalObject.h:58
unsigned getPTXVersion() const
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static Type * getDoubleTy(LLVMContext &C)
Definition: Type.cpp:164
unsigned getOpcode() const
Return the opcode at the root of this constant expression.
Definition: Constants.h:1209
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1562
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
Bitwise negation.
Definition: MCExpr.h:361
MachineBasicBlock * getMBB() const
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition: AsmPrinter.h:93
bool hasDebugInfo() const
Returns true if valid debug info is present.
MCSymbol * GetExternalSymbolSymbol(StringRef Sym) const
Return the MCSymbol for the specified ExternalSymbol.
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:319
bool getAlign(const Function &F, unsigned index, unsigned &align)
bool hasPrivateLinkage() const
Definition: GlobalValue.h:434
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
This class represents lattice values for constants.
Definition: AllocatorList.h:23
bool getMaxNReg(const Function &F, unsigned &x)
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
static unsigned index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:65
unsigned getPointerPrefAlignment(unsigned AS=0) const
Return target&#39;s alignment for stack-based pointers FIXME: The defaults need to be removed once all of...
Definition: DataLayout.cpp:639
2: 32-bit floating point type
Definition: Type.h:58
bool doFinalization(Module &M) override
Shut down the asmprinter.
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition: AsmPrinter.h:88
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:136
LLVM_NODISCARD bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:256
Implements a dense probed hash-table based set.
Definition: DenseSet.h:249
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:607
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:163
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
bool isTexture(const Value &val)
unsigned Reg
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:422
Opcode getOpcode() const
Get the kind of this unary expression.
Definition: MCExpr.h:400
float convertToFloat() const
Definition: APFloat.h:1097
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
Definition: Module.h:405
const MCExpr * getLHS() const
Get the left-hand side expression of the binary operator.
Definition: MCExpr.h:560
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:709
Unary plus.
Definition: MCExpr.h:362
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:509
MachineBasicBlock reference.
13: Structures
Definition: Type.h:72
unsigned const TargetRegisterInfo * TRI
Metadata node.
Definition: Metadata.h:863
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:462
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:140
F(f)
MachineFunction * MF
The current machine function.
Definition: AsmPrinter.h:96
1: 16-bit floating point type
Definition: Type.h:57
const GlobalListType & getGlobalList() const
Get the Module&#39;s list of global variables (constant).
Definition: Module.h:524
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:176
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
15: Pointers
Definition: Type.h:74
static IntegerType * getInt16Ty(LLVMContext &C)
Definition: Type.cpp:174
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
static const NVPTXFloatMCExpr * createConstantFPSingle(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:48
bool hasImageHandles() const
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:221
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static void DiscoverDependentGlobals(const Value *V, DenseSet< const GlobalVariable *> &Globals)
DiscoverDependentGlobals - Return a set of GlobalVariables on which V depends.
const MCSymbol * getFunctionFrameSymbol() const override
Return symbol for the function pseudo stack if the stack frame is not a register based.
static Constant * getIntegerCast(Constant *C, Type *Ty, bool isSigned)
Create a ZExt, Bitcast or Trunc for integer -> integer casts.
Definition: Constants.cpp:1622
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:515
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:115
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:80
static Type * getFloatTy(LLVMContext &C)
Definition: Type.cpp:163
unsigned getPointerTypeSizeInBits(Type *) const
Layout pointer size, in bits, based on the type.
Definition: DataLayout.cpp:665
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:137
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:161
static bool usedInOneFunc(const User *U, Function const *&oneFunc)
const ConstantFP * getFPImm() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:411
Class to represent struct types.
Definition: DerivedTypes.h:232
void clearAnnotationCache(const Module *Mod)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
Name of external global symbol.
This file contains the simple types necessary to represent the attributes associated with functions a...
Represent a reference to a symbol from inside an expression.
Definition: MCExpr.h:165
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:408
static const NVPTXFloatMCExpr * createConstantFPDouble(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:53
const char * getSymbolName() const
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:977
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
This file implements a class to represent arbitrary precision integral constant values and operations...
bool hasCommonLinkage() const
Definition: GlobalValue.h:439
Target & getTheNVPTXTarget64()
unsigned getNumRegClasses() const
bool getMaxNTIDz(const Function &F, unsigned &z)
unsigned getSizeInBits() const
Context object for machine code objects.
Definition: MCContext.h:62
bool hasExternalLinkage() const
Definition: GlobalValue.h:421
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Attempt to fold the constant using the specified DataLayout.
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:84
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:888
Class to represent function types.
Definition: DerivedTypes.h:102
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1574
bool isKernelFunction(const Function &F)
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:405
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
bool isSurface(const Value &val)
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
Definition: AsmPrinter.h:295
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:4443
ConstantDataSequential - A vector or array constant whose element type is a simple 1/2/4/8-byte integ...
Definition: Constants.h:573
const MCExpr * getRHS() const
Get the right-hand side expression of the binary operator.
Definition: MCExpr.h:563
#define T
Unary assembler expressions.
Definition: MCExpr.h:356
Class to represent array types.
Definition: DerivedTypes.h:400
bool getMaxNTIDx(const Function &F, unsigned &x)
std::string getTextureName(const Value &val)
ManagedStringPool * getManagedStrPool() const
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:211
bool runOnMachineFunction(MachineFunction &F) override
Emit the specified function out to the OutStreamer.
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:223
RegisterAsmPrinter - Helper template for registering a target specific assembly printer, for use in the target machine initialization function.
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:122
#define DEPOTNAME
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:457
bool hasLinkOnceLinkage() const
Definition: GlobalValue.h:425
bool isSampler(const Value &val)
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:83
const NVPTXSubtarget * getSubtargetImpl(const Function &) const override
Virtual method implemented by subclasses that returns a reference to that target&#39;s TargetSubtargetInf...
Unary expressions.
Definition: MCExpr.h:41
MachineModuleInfo * MMI
This is a pointer to the current MachineModuleInfo.
Definition: AsmPrinter.h:99
Value * getOperand(unsigned i) const
Definition: User.h:169
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:158
static const NVPTXGenericMCSymbolRefExpr * create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx)
Definition: NVPTXMCExpr.cpp:54
Class to represent pointers.
Definition: DerivedTypes.h:498
bool hasAppendingLinkage() const
Definition: GlobalValue.h:432
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:176
11: Arbitrary bit width integers
Definition: Type.h:70
Target & getTheNVPTXTarget32()
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:769
0: type with no size
Definition: Type.h:56
Address of a global value.
static bool isEmptyXXStructor(GlobalVariable *GV)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
void print(raw_ostream &OS, const MCAsmInfo *MAI, bool InParens=false) const
Definition: MCExpr.cpp:41
unsigned const MachineRegisterInfo * MRI
MCSymbol * CurrentFnSym
The symbol for the current function.
Definition: AsmPrinter.h:112
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Machine Value Type.
bool hasName() const
Definition: Value.h:250
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
const MCAsmInfo * MAI
Target Asm Printer information.
Definition: AsmPrinter.h:84
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const char * getImageHandleSymbol(unsigned Idx) const
Returns the symbol name at the given index.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
static bool is64Bit(const char *name)
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:223
static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, HexagonAsmPrinter &Printer, bool MustExtend)
const GlobalValue * getGlobal() const
const NVPTXTargetLowering * getTargetLowering() const override
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant...
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:263
size_t alias_size() const
Definition: Module.h:624
double convertToDouble() const
Definition: APFloat.h:1096
TargetMachine & TM
Target machine description.
Definition: AsmPrinter.h:81
std::string getTargetName() const
unsigned getPrefTypeAlignment(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:759
This file declares a class to represent arbitrary precision floating point values and provide a varie...
unsigned getMaxAlignment() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
bool hasInternalLinkage() const
Definition: GlobalValue.h:433
bool isHalfTy() const
Return true if this is &#39;half&#39;, a 16-bit IEEE fp type.
Definition: Type.h:143
static const char * getRegisterName(unsigned RegNo)
This class describes a target machine that is implemented with the LLVM target-independent code gener...
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:526
NVPTX::DrvInterface getDrvInterface() const
unsigned size() const
Definition: DenseMap.h:125
Logical negation.
Definition: MCExpr.h:359
Extended Value Type.
Definition: ValueTypes.h:33
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:529
Binary assembler expressions.
Definition: MCExpr.h:413
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
size_t size() const
Definition: SmallVector.h:52
std::string getVirtualRegisterName(unsigned) const
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:4275
std::string & str()
Flushes the stream contents to the target string and returns the string&#39;s reference.
Definition: raw_ostream.h:498
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool getMaxNTIDy(const Function &F, unsigned &y)
bool getReqNTIDx(const Function &F, unsigned &x)
unsigned first
bool hasWeakLinkage() const
Definition: GlobalValue.h:429
const APFloat & getValueAPF() const
Definition: Constants.h:302
bool isImage(const Value &val)
14: Arrays
Definition: Type.h:73
bool getReqNTIDz(const Function &F, unsigned &z)
unsigned getFunctionNumber() const
Return a unique ID for the current function.
Definition: AsmPrinter.cpp:202
static Type * getHalfTy(LLVMContext &C)
Definition: Type.cpp:162
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:119
static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO, const MachineFunction *MF, const Module *M, const MachineFrameInfo *MFI, const TargetInstrInfo *TII, LLVMContext &Ctx)
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:191
bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const
Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind).
void setOpcode(unsigned Op)
Definition: MCInst.h:170
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1166
static void ConvertFloatToBytes(unsigned char *p, float val)
std::string * getManagedString(const char *S)
16: SIMD &#39;packed&#39; format, or other vector type
Definition: Type.h:75
static void printMCExpr(const MCExpr *E, raw_ostream &OS)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:129
MCSymbol * getSymbol(const GlobalValue *GV) const
Definition: AsmPrinter.cpp:430
const MCSymbol & getSymbol() const
Definition: MCExpr.h:332
ExprKind getKind() const
Definition: MCExpr.h:72
virtual const MCExpr * lowerConstant(const Constant *CV)
Lower the specified LLVM Constant to an MCExpr.
MachineOperand class - Representation of each machine instruction operand.
bool hasSection() const
Check if this global has a custom object file section.
Definition: GlobalObject.h:81
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
Module.h This file contains the declarations for the Module class.
AddressSpace
Definition: NVPTXBaseInfo.h:21
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:749
bool isImageReadWrite(const Value &val)
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:257
void EmitToStreamer(MCStreamer &S, const MCInst &Inst)
Definition: AsmPrinter.cpp:225
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
StringRef str()
Return a StringRef for the vector contents.
Definition: raw_ostream.h:534
std::string getSurfaceName(const Value &val)
const char * getName(unsigned RegNo) const
static void VisitGlobalVariableForEmission(const GlobalVariable *GV, SmallVectorImpl< const GlobalVariable *> &Order, DenseSet< const GlobalVariable *> &Visited, DenseSet< const GlobalVariable *> &Visiting)
VisitGlobalVariableForEmission - Add GV to the list of GlobalVariable instances to be emitted...
std::string utostr(uint64_t X, bool isNeg=false)
Definition: StringExtras.h:223
int64_t getImm() const
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition: Function.h:193
const Function & getFunction() const
Return the LLVM function that this machine code represents.
std::string getSamplerName(const Value &val)
Class for arbitrary precision integers.
Definition: APInt.h:69
ConstantArray - Constant Array Declarations.
Definition: Constants.h:413
bool isManaged(const Value &val)
bool getReqNTIDy(const Function &F, unsigned &y)
iterator_range< user_iterator > users()
Definition: Value.h:399
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:895
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:461
Representation of each machine instruction.
Definition: MachineInstr.h:63
pointer remove(iterator &IT)
Definition: ilist.h:249
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
static bool printOperand(raw_ostream &OS, const SelectionDAG *G, const SDValue Value)
void emitInitialRawDwarfLocDirective(const MachineFunction &MF)
Emits inital debug location directive.
Definition: AsmPrinter.cpp:229
unsigned getParamAlignment(unsigned ArgNo) const
Return the alignment for the specified function parameter.
NVPTXTargetMachine.
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
Definition: AsmPrinter.cpp:248
iterator insert(iterator where, pointer New)
Definition: ilist.h:226
iterator end()
Definition: Module.h:600
const Function * getParent() const
Definition: Argument.h:41
std::string getNVPTXRegClassStr(TargetRegisterClass const *RC)
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:576
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:123
virtual void EmitBasicBlockStart(const MachineBasicBlock &MBB) const
Targets can override this to emit stuff at the start of a basic block.
static bool usedInGlobalVarDef(const Constant *C)
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
FormattedNumber format_hex_no_prefix(uint64_t N, unsigned Width, bool Upper=false)
format_hex_no_prefix - Output N as a fixed width hexadecimal.
Definition: Format.h:198
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
#define I(x, y, z)
Definition: MD5.cpp:58
std::string getNVPTXRegClassName(TargetRegisterClass const *RC)
iterator_range< debug_compile_units_iterator > debug_compile_units() const
Return an iterator for all DICompileUnits listed in this Module&#39;s llvm.dbg.cu named metadata node and...
Definition: Module.h:781
iterator begin()
Definition: Module.h:598
iterator end()
Definition: DenseMap.h:108
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
virtual void print(raw_ostream &OS, const Module *M) const
print - Print out the internal state of the pass.
Definition: Pass.cpp:128
References to labels and assigned expressions.
Definition: MCExpr.h:40
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:322
Type * getValueType() const
Definition: GlobalValue.h:275
Unary minus.
Definition: MCExpr.h:360
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:91
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
static const NVPTXFloatMCExpr * createConstantFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:43
static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f)
const std::string & getModuleInlineAsm() const
Get any module-scope inline assembly blocks.
Definition: Module.h:248
Opcode getOpcode() const
Get the kind of this binary expression.
Definition: MCExpr.h:557
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:205
3: 64-bit floating point type
Definition: Type.h:59
Implments NVPTX-specific streamer.
LLVM_NODISCARD const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:122
bool hasFnAttribute(Attribute::AttrKind Kind) const
Equivalent to hasAttribute(AttributeList::FunctionIndex, Kind) but may be faster. ...
static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool useFuncSeen(const Constant *C, DenseMap< const Function *, bool > &seenMap)
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:482
const MCExpr * getSubExpr() const
Get the child of this unary expression.
Definition: MCExpr.h:403
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition: Type.h:249
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:114
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
LLVM Value Representation.
Definition: Value.h:72
bool isLoopHeader(const MachineBasicBlock *BB) const
True if the block is a loop header node.
static const char * name
Floating-point immediate operand.
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:444
bool hasInitializer() const
Definitions have initializers, declarations don&#39;t.
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
Constant expressions.
Definition: MCExpr.h:39
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:250
Binary expressions.
Definition: MCExpr.h:38
unsigned int getSmVersion() const
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:45
uint64_t getTypeAllocSizeInBits(Type *Ty) const
Returns the offset in bits between successive objects of the specified type, including alignment padd...
Definition: DataLayout.h:471
const DataLayout & getDataLayout() const
Return information about data layout.
Definition: AsmPrinter.cpp:210
static void ConvertDoubleToBytes(unsigned char *p, double val)
iterator_range< global_iterator > globals()
Definition: Module.h:587
IRTranslator LLVM IR MI
void addOperand(const MCOperand &Op)
Definition: MCInst.h:183
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
APInt bitcastToAPInt() const
Definition: APFloat.h:1093
bool getMinCTASm(const Function &F, unsigned &x)
Target specific expression.
Definition: MCExpr.h:42
static void ConvertIntToBytes(unsigned char *p, T val)
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:413
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:34
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects...
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:122
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:173
bool use_empty() const
Definition: Value.h:322
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx)
Definition: MCExpr.cpp:163
bool isImageWriteOnly(const Value &val)
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:273
void LLVMInitializeNVPTXAsmPrinter()
bool doFinalization(Module &M) override
Shut down the asmprinter.
This file describes how to lower LLVM code to machine code.
const BasicBlock * getParent() const
Definition: Instruction.h:66
const NVPTXRegisterInfo * getRegisterInfo() const override
void print(raw_ostream &OS, const MCAsmInfo *MAI) const
print - Print the value to the stream OS.
Definition: MCSymbol.cpp:59