LLVM  14.0.0git
NVPTXAsmPrinter.cpp
Go to the documentation of this file.
1 //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a printer that converts from our internal representation
10 // of machine-dependent LLVM code to NVPTX assembly language.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "NVPTXAsmPrinter.h"
19 #include "NVPTX.h"
20 #include "NVPTXMCExpr.h"
22 #include "NVPTXRegisterInfo.h"
23 #include "NVPTXSubtarget.h"
24 #include "NVPTXTargetMachine.h"
25 #include "NVPTXUtilities.h"
27 #include "cl_common_defines.h"
28 #include "llvm/ADT/APFloat.h"
29 #include "llvm/ADT/APInt.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/DenseSet.h"
32 #include "llvm/ADT/SmallString.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/StringExtras.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/ADT/Triple.h"
37 #include "llvm/ADT/Twine.h"
39 #include "llvm/CodeGen/Analysis.h"
51 #include "llvm/IR/Attributes.h"
52 #include "llvm/IR/BasicBlock.h"
53 #include "llvm/IR/Constant.h"
54 #include "llvm/IR/Constants.h"
55 #include "llvm/IR/DataLayout.h"
56 #include "llvm/IR/DebugInfo.h"
58 #include "llvm/IR/DebugLoc.h"
59 #include "llvm/IR/DerivedTypes.h"
60 #include "llvm/IR/Function.h"
61 #include "llvm/IR/GlobalValue.h"
62 #include "llvm/IR/GlobalVariable.h"
63 #include "llvm/IR/Instruction.h"
64 #include "llvm/IR/LLVMContext.h"
65 #include "llvm/IR/Module.h"
66 #include "llvm/IR/Operator.h"
67 #include "llvm/IR/Type.h"
68 #include "llvm/IR/User.h"
69 #include "llvm/MC/MCExpr.h"
70 #include "llvm/MC/MCInst.h"
71 #include "llvm/MC/MCInstrDesc.h"
72 #include "llvm/MC/MCStreamer.h"
73 #include "llvm/MC/MCSymbol.h"
74 #include "llvm/MC/TargetRegistry.h"
75 #include "llvm/Support/Casting.h"
79 #include "llvm/Support/Path.h"
84 #include <cassert>
85 #include <cstdint>
86 #include <cstring>
87 #include <new>
88 #include <string>
89 #include <utility>
90 #include <vector>
91 
92 using namespace llvm;
93 
94 #define DEPOTNAME "__local_depot"
95 
96 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
97 /// depends.
98 static void
101  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
102  Globals.insert(GV);
103  else {
104  if (const User *U = dyn_cast<User>(V)) {
105  for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
106  DiscoverDependentGlobals(U->getOperand(i), Globals);
107  }
108  }
109  }
110 }
111 
112 /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
113 /// instances to be emitted, but only after any dependents have been added
114 /// first.s
115 static void
120  // Have we already visited this one?
121  if (Visited.count(GV))
122  return;
123 
124  // Do we have a circular dependency?
125  if (!Visiting.insert(GV).second)
126  report_fatal_error("Circular dependency found in global variable set");
127 
128  // Make sure we visit all dependents first
130  for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
131  DiscoverDependentGlobals(GV->getOperand(i), Others);
132 
134  E = Others.end();
135  I != E; ++I)
136  VisitGlobalVariableForEmission(*I, Order, Visited, Visiting);
137 
138  // Now we can visit ourself
139  Order.push_back(GV);
140  Visited.insert(GV);
141  Visiting.erase(GV);
142 }
143 
144 void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) {
145  MCInst Inst;
146  lowerToMCInst(MI, Inst);
147  EmitToStreamer(*OutStreamer, Inst);
148 }
149 
150 // Handle symbol backtracking for targets that do not support image handles
151 bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
152  unsigned OpNo, MCOperand &MCOp) {
153  const MachineOperand &MO = MI->getOperand(OpNo);
154  const MCInstrDesc &MCID = MI->getDesc();
155 
156  if (MCID.TSFlags & NVPTXII::IsTexFlag) {
157  // This is a texture fetch, so operand 4 is a texref and operand 5 is
158  // a samplerref
159  if (OpNo == 4 && MO.isImm()) {
160  lowerImageHandleSymbol(MO.getImm(), MCOp);
161  return true;
162  }
163  if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
164  lowerImageHandleSymbol(MO.getImm(), MCOp);
165  return true;
166  }
167 
168  return false;
169  } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
170  unsigned VecSize =
171  1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
172 
173  // For a surface load of vector size N, the Nth operand will be the surfref
174  if (OpNo == VecSize && MO.isImm()) {
175  lowerImageHandleSymbol(MO.getImm(), MCOp);
176  return true;
177  }
178 
179  return false;
180  } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
181  // This is a surface store, so operand 0 is a surfref
182  if (OpNo == 0 && MO.isImm()) {
183  lowerImageHandleSymbol(MO.getImm(), MCOp);
184  return true;
185  }
186 
187  return false;
188  } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
189  // This is a query, so operand 1 is a surfref/texref
190  if (OpNo == 1 && MO.isImm()) {
191  lowerImageHandleSymbol(MO.getImm(), MCOp);
192  return true;
193  }
194 
195  return false;
196  }
197 
198  return false;
199 }
200 
201 void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
202  // Ewwww
203  LLVMTargetMachine &TM = const_cast<LLVMTargetMachine&>(MF->getTarget());
204  NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
206  const char *Sym = MFI->getImageHandleSymbol(Index);
207  std::string *SymNamePtr =
208  nvTM.getManagedStrPool()->getManagedString(Sym);
209  MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(StringRef(*SymNamePtr)));
210 }
211 
212 void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
213  OutMI.setOpcode(MI->getOpcode());
214  // Special: Do not mangle symbol operand of CALL_PROTOTYPE
215  if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
216  const MachineOperand &MO = MI->getOperand(0);
217  OutMI.addOperand(GetSymbolRef(
219  return;
220  }
221 
222  const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
223  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
224  const MachineOperand &MO = MI->getOperand(i);
225 
226  MCOperand MCOp;
227  if (!STI.hasImageHandles()) {
228  if (lowerImageHandleOperand(MI, i, MCOp)) {
229  OutMI.addOperand(MCOp);
230  continue;
231  }
232  }
233 
234  if (lowerOperand(MO, MCOp))
235  OutMI.addOperand(MCOp);
236  }
237 }
238 
239 bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
240  MCOperand &MCOp) {
241  switch (MO.getType()) {
242  default: llvm_unreachable("unknown operand type");
244  MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
245  break;
247  MCOp = MCOperand::createImm(MO.getImm());
248  break;
251  MO.getMBB()->getSymbol(), OutContext));
252  break;
255  break;
257  MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
258  break;
260  const ConstantFP *Cnt = MO.getFPImm();
261  const APFloat &Val = Cnt->getValueAPF();
262 
263  switch (Cnt->getType()->getTypeID()) {
264  default: report_fatal_error("Unsupported FP type"); break;
265  case Type::HalfTyID:
266  MCOp = MCOperand::createExpr(
268  break;
269  case Type::FloatTyID:
270  MCOp = MCOperand::createExpr(
272  break;
273  case Type::DoubleTyID:
274  MCOp = MCOperand::createExpr(
276  break;
277  }
278  break;
279  }
280  }
281  return true;
282 }
283 
284 unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
286  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
287 
288  DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
289  unsigned RegNum = RegMap[Reg];
290 
291  // Encode the register class in the upper 4 bits
292  // Must be kept in sync with NVPTXInstPrinter::printRegName
293  unsigned Ret = 0;
294  if (RC == &NVPTX::Int1RegsRegClass) {
295  Ret = (1 << 28);
296  } else if (RC == &NVPTX::Int16RegsRegClass) {
297  Ret = (2 << 28);
298  } else if (RC == &NVPTX::Int32RegsRegClass) {
299  Ret = (3 << 28);
300  } else if (RC == &NVPTX::Int64RegsRegClass) {
301  Ret = (4 << 28);
302  } else if (RC == &NVPTX::Float32RegsRegClass) {
303  Ret = (5 << 28);
304  } else if (RC == &NVPTX::Float64RegsRegClass) {
305  Ret = (6 << 28);
306  } else if (RC == &NVPTX::Float16RegsRegClass) {
307  Ret = (7 << 28);
308  } else if (RC == &NVPTX::Float16x2RegsRegClass) {
309  Ret = (8 << 28);
310  } else {
311  report_fatal_error("Bad register class");
312  }
313 
314  // Insert the vreg number
315  Ret |= (RegNum & 0x0FFFFFFF);
316  return Ret;
317  } else {
318  // Some special-use registers are actually physical registers.
319  // Encode this as the register class ID of 0 and the real register ID.
320  return Reg & 0x0FFFFFFF;
321  }
322 }
323 
325  const MCExpr *Expr;
327  OutContext);
328  return MCOperand::createExpr(Expr);
329 }
330 
331 void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
332  const DataLayout &DL = getDataLayout();
333  const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
334  const TargetLowering *TLI = STI.getTargetLowering();
335 
336  Type *Ty = F->getReturnType();
337 
338  bool isABI = (STI.getSmVersion() >= 20);
339 
340  if (Ty->getTypeID() == Type::VoidTyID)
341  return;
342 
343  O << " (";
344 
345  if (isABI) {
346  if (Ty->isFloatingPointTy() || (Ty->isIntegerTy() && !Ty->isIntegerTy(128))) {
347  unsigned size = 0;
348  if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
349  size = ITy->getBitWidth();
350  } else {
351  assert(Ty->isFloatingPointTy() && "Floating point type expected here");
353  }
354  // PTX ABI requires all scalar return values to be at least 32
355  // bits in size. fp16 normally uses .b16 as its storage type in
356  // PTX, so its size must be adjusted here, too.
357  if (size < 32)
358  size = 32;
359 
360  O << ".param .b" << size << " func_retval0";
361  } else if (isa<PointerType>(Ty)) {
362  O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits()
363  << " func_retval0";
364  } else if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
365  unsigned totalsz = DL.getTypeAllocSize(Ty);
366  unsigned retAlignment = 0;
367  if (!getAlign(*F, 0, retAlignment))
368  retAlignment = DL.getABITypeAlignment(Ty);
369  O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
370  << "]";
371  } else
372  llvm_unreachable("Unknown return type");
373  } else {
374  SmallVector<EVT, 16> vtparts;
375  ComputeValueVTs(*TLI, DL, Ty, vtparts);
376  unsigned idx = 0;
377  for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
378  unsigned elems = 1;
379  EVT elemtype = vtparts[i];
380  if (vtparts[i].isVector()) {
381  elems = vtparts[i].getVectorNumElements();
382  elemtype = vtparts[i].getVectorElementType();
383  }
384 
385  for (unsigned j = 0, je = elems; j != je; ++j) {
386  unsigned sz = elemtype.getSizeInBits();
387  if (elemtype.isInteger() && (sz < 32))
388  sz = 32;
389  O << ".reg .b" << sz << " func_retval" << idx;
390  if (j < je - 1)
391  O << ", ";
392  ++idx;
393  }
394  if (i < e - 1)
395  O << ", ";
396  }
397  }
398  O << ") ";
399 }
400 
401 void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
402  raw_ostream &O) {
403  const Function &F = MF.getFunction();
404  printReturnValStr(&F, O);
405 }
406 
407 // Return true if MBB is the header of a loop marked with
408 // llvm.loop.unroll.disable.
409 // TODO: consider "#pragma unroll 1" which is equivalent to "#pragma nounroll".
410 bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
411  const MachineBasicBlock &MBB) const {
412  MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
413  // We insert .pragma "nounroll" only to the loop header.
414  if (!LI.isLoopHeader(&MBB))
415  return false;
416 
417  // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
418  // we iterate through each back edge of the loop with header MBB, and check
419  // whether its metadata contains llvm.loop.unroll.disable.
420  for (auto I = MBB.pred_begin(); I != MBB.pred_end(); ++I) {
421  const MachineBasicBlock *PMBB = *I;
422  if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
423  // Edges from other loops to MBB are not back edges.
424  continue;
425  }
426  if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
427  if (MDNode *LoopID =
428  PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
429  if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
430  return true;
431  }
432  }
433  }
434  return false;
435 }
436 
437 void NVPTXAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
439  if (isLoopHeaderOfNoUnroll(MBB))
440  OutStreamer->emitRawText(StringRef("\t.pragma \"nounroll\";\n"));
441 }
442 
443 void NVPTXAsmPrinter::emitFunctionEntryLabel() {
444  SmallString<128> Str;
445  raw_svector_ostream O(Str);
446 
447  if (!GlobalsEmitted) {
448  emitGlobals(*MF->getFunction().getParent());
449  GlobalsEmitted = true;
450  }
451 
452  // Set up
453  MRI = &MF->getRegInfo();
454  F = &MF->getFunction();
455  emitLinkageDirective(F, O);
456  if (isKernelFunction(*F))
457  O << ".entry ";
458  else {
459  O << ".func ";
460  printReturnValStr(*MF, O);
461  }
462 
463  CurrentFnSym->print(O, MAI);
464 
465  emitFunctionParamList(*MF, O);
466 
467  if (isKernelFunction(*F))
468  emitKernelFunctionDirectives(*F, O);
469 
470  OutStreamer->emitRawText(O.str());
471 
472  VRegMapping.clear();
473  // Emit open brace for function body.
474  OutStreamer->emitRawText(StringRef("{\n"));
475  setAndEmitFunctionVirtualRegisters(*MF);
476  // Emit initial .loc debug directive for correct relocation symbol data.
477  if (MMI && MMI->hasDebugInfo())
479 }
480 
482  bool Result = AsmPrinter::runOnMachineFunction(F);
483  // Emit closing brace for the body of function F.
484  // The closing brace must be emitted here because we need to emit additional
485  // debug labels/data after the last basic block.
486  // We need to emit the closing brace here because we don't have function that
487  // finished emission of the function body.
488  OutStreamer->emitRawText(StringRef("}\n"));
489  return Result;
490 }
491 
492 void NVPTXAsmPrinter::emitFunctionBodyStart() {
493  SmallString<128> Str;
494  raw_svector_ostream O(Str);
495  emitDemotedVars(&MF->getFunction(), O);
496  OutStreamer->emitRawText(O.str());
497 }
498 
499 void NVPTXAsmPrinter::emitFunctionBodyEnd() {
500  VRegMapping.clear();
501 }
502 
504  SmallString<128> Str;
506  return OutContext.getOrCreateSymbol(Str);
507 }
508 
509 void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
510  Register RegNo = MI->getOperand(0).getReg();
511  if (Register::isVirtualRegister(RegNo)) {
512  OutStreamer->AddComment(Twine("implicit-def: ") +
513  getVirtualRegisterName(RegNo));
514  } else {
515  const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
516  OutStreamer->AddComment(Twine("implicit-def: ") +
517  STI.getRegisterInfo()->getName(RegNo));
518  }
519  OutStreamer->AddBlankLine();
520 }
521 
522 void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
523  raw_ostream &O) const {
524  // If the NVVM IR has some of reqntid* specified, then output
525  // the reqntid directive, and set the unspecified ones to 1.
526  // If none of reqntid* is specified, don't output reqntid directive.
527  unsigned reqntidx, reqntidy, reqntidz;
528  bool specified = false;
529  if (!getReqNTIDx(F, reqntidx))
530  reqntidx = 1;
531  else
532  specified = true;
533  if (!getReqNTIDy(F, reqntidy))
534  reqntidy = 1;
535  else
536  specified = true;
537  if (!getReqNTIDz(F, reqntidz))
538  reqntidz = 1;
539  else
540  specified = true;
541 
542  if (specified)
543  O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
544  << "\n";
545 
546  // If the NVVM IR has some of maxntid* specified, then output
547  // the maxntid directive, and set the unspecified ones to 1.
548  // If none of maxntid* is specified, don't output maxntid directive.
549  unsigned maxntidx, maxntidy, maxntidz;
550  specified = false;
551  if (!getMaxNTIDx(F, maxntidx))
552  maxntidx = 1;
553  else
554  specified = true;
555  if (!getMaxNTIDy(F, maxntidy))
556  maxntidy = 1;
557  else
558  specified = true;
559  if (!getMaxNTIDz(F, maxntidz))
560  maxntidz = 1;
561  else
562  specified = true;
563 
564  if (specified)
565  O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
566  << "\n";
567 
568  unsigned mincta;
569  if (getMinCTASm(F, mincta))
570  O << ".minnctapersm " << mincta << "\n";
571 
572  unsigned maxnreg;
573  if (getMaxNReg(F, maxnreg))
574  O << ".maxnreg " << maxnreg << "\n";
575 }
576 
577 std::string
579  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
580 
581  std::string Name;
582  raw_string_ostream NameStr(Name);
583 
584  VRegRCMap::const_iterator I = VRegMapping.find(RC);
585  assert(I != VRegMapping.end() && "Bad register class");
586  const DenseMap<unsigned, unsigned> &RegMap = I->second;
587 
589  assert(VI != RegMap.end() && "Bad virtual register");
590  unsigned MappedVR = VI->second;
591 
592  NameStr << getNVPTXRegClassStr(RC) << MappedVR;
593 
594  NameStr.flush();
595  return Name;
596 }
597 
598 void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
599  raw_ostream &O) {
600  O << getVirtualRegisterName(vr);
601 }
602 
603 void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
604  emitLinkageDirective(F, O);
605  if (isKernelFunction(*F))
606  O << ".entry ";
607  else
608  O << ".func ";
609  printReturnValStr(F, O);
610  getSymbol(F)->print(O, MAI);
611  O << "\n";
612  emitFunctionParamList(F, O);
613  O << ";\n";
614 }
615 
616 static bool usedInGlobalVarDef(const Constant *C) {
617  if (!C)
618  return false;
619 
620  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
621  return GV->getName() != "llvm.used";
622  }
623 
624  for (const User *U : C->users())
625  if (const Constant *C = dyn_cast<Constant>(U))
626  if (usedInGlobalVarDef(C))
627  return true;
628 
629  return false;
630 }
631 
632 static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
633  if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
634  if (othergv->getName() == "llvm.used")
635  return true;
636  }
637 
638  if (const Instruction *instr = dyn_cast<Instruction>(U)) {
639  if (instr->getParent() && instr->getParent()->getParent()) {
640  const Function *curFunc = instr->getParent()->getParent();
641  if (oneFunc && (curFunc != oneFunc))
642  return false;
643  oneFunc = curFunc;
644  return true;
645  } else
646  return false;
647  }
648 
649  for (const User *UU : U->users())
650  if (!usedInOneFunc(UU, oneFunc))
651  return false;
652 
653  return true;
654 }
655 
656 /* Find out if a global variable can be demoted to local scope.
657  * Currently, this is valid for CUDA shared variables, which have local
658  * scope and global lifetime. So the conditions to check are :
659  * 1. Is the global variable in shared address space?
660  * 2. Does it have internal linkage?
661  * 3. Is the global variable referenced only in one function?
662  */
663 static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
664  if (!gv->hasInternalLinkage())
665  return false;
666  PointerType *Pty = gv->getType();
668  return false;
669 
670  const Function *oneFunc = nullptr;
671 
672  bool flag = usedInOneFunc(gv, oneFunc);
673  if (!flag)
674  return false;
675  if (!oneFunc)
676  return false;
677  f = oneFunc;
678  return true;
679 }
680 
681 static bool useFuncSeen(const Constant *C,
683  for (const User *U : C->users()) {
684  if (const Constant *cu = dyn_cast<Constant>(U)) {
685  if (useFuncSeen(cu, seenMap))
686  return true;
687  } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
688  const BasicBlock *bb = I->getParent();
689  if (!bb)
690  continue;
691  const Function *caller = bb->getParent();
692  if (!caller)
693  continue;
694  if (seenMap.find(caller) != seenMap.end())
695  return true;
696  }
697  }
698  return false;
699 }
700 
701 void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
703  for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
704  const Function *F = &*FI;
705 
706  if (F->getAttributes().hasFnAttr("nvptx-libcall-callee")) {
707  emitDeclaration(F, O);
708  continue;
709  }
710 
711  if (F->isDeclaration()) {
712  if (F->use_empty())
713  continue;
714  if (F->getIntrinsicID())
715  continue;
716  emitDeclaration(F, O);
717  continue;
718  }
719  for (const User *U : F->users()) {
720  if (const Constant *C = dyn_cast<Constant>(U)) {
721  if (usedInGlobalVarDef(C)) {
722  // The use is in the initialization of a global variable
723  // that is a function pointer, so print a declaration
724  // for the original function
725  emitDeclaration(F, O);
726  break;
727  }
728  // Emit a declaration of this function if the function that
729  // uses this constant expr has already been seen.
730  if (useFuncSeen(C, seenMap)) {
731  emitDeclaration(F, O);
732  break;
733  }
734  }
735 
736  if (!isa<Instruction>(U))
737  continue;
738  const Instruction *instr = cast<Instruction>(U);
739  const BasicBlock *bb = instr->getParent();
740  if (!bb)
741  continue;
742  const Function *caller = bb->getParent();
743  if (!caller)
744  continue;
745 
746  // If a caller has already been seen, then the caller is
747  // appearing in the module before the callee. so print out
748  // a declaration for the callee.
749  if (seenMap.find(caller) != seenMap.end()) {
750  emitDeclaration(F, O);
751  break;
752  }
753  }
754  seenMap[F] = true;
755  }
756 }
757 
759  if (!GV) return true;
760  const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
761  if (!InitList) return true; // Not an array; we don't know how to parse.
762  return InitList->getNumOperands() == 0;
763 }
764 
765 void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) {
766  // Construct a default subtarget off of the TargetMachine defaults. The
767  // rest of NVPTX isn't friendly to change subtargets per function and
768  // so the default TargetMachine will have all of the options.
769  const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
770  const auto* STI = static_cast<const NVPTXSubtarget*>(NTM.getSubtargetImpl());
771  SmallString<128> Str1;
772  raw_svector_ostream OS1(Str1);
773 
774  // Emit header before any dwarf directives are emitted below.
775  emitHeader(M, OS1, *STI);
776  OutStreamer->emitRawText(OS1.str());
777 }
778 
780  if (M.alias_size()) {
781  report_fatal_error("Module has aliases, which NVPTX does not support.");
782  return true; // error
783  }
784  if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors"))) {
786  "Module has a nontrivial global ctor, which NVPTX does not support.");
787  return true; // error
788  }
789  if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors"))) {
791  "Module has a nontrivial global dtor, which NVPTX does not support.");
792  return true; // error
793  }
794 
795  // We need to call the parent's one explicitly.
796  bool Result = AsmPrinter::doInitialization(M);
797 
798  GlobalsEmitted = false;
799 
800  return Result;
801 }
802 
803 void NVPTXAsmPrinter::emitGlobals(const Module &M) {
804  SmallString<128> Str2;
805  raw_svector_ostream OS2(Str2);
806 
807  emitDeclarations(M, OS2);
808 
809  // As ptxas does not support forward references of globals, we need to first
810  // sort the list of module-level globals in def-use order. We visit each
811  // global variable in order, and ensure that we emit it *after* its dependent
812  // globals. We use a little extra memory maintaining both a set and a list to
813  // have fast searches while maintaining a strict ordering.
817 
818  // Visit each global variable, in order
819  for (const GlobalVariable &I : M.globals())
820  VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
821 
822  assert(GVVisited.size() == M.getGlobalList().size() &&
823  "Missed a global variable");
824  assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
825 
826  // Print out module-level global variables in proper order
827  for (unsigned i = 0, e = Globals.size(); i != e; ++i)
828  printModuleLevelGV(Globals[i], OS2);
829 
830  OS2 << '\n';
831 
832  OutStreamer->emitRawText(OS2.str());
833 }
834 
835 void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
836  const NVPTXSubtarget &STI) {
837  O << "//\n";
838  O << "// Generated by LLVM NVPTX Back-End\n";
839  O << "//\n";
840  O << "\n";
841 
842  unsigned PTXVersion = STI.getPTXVersion();
843  O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
844 
845  O << ".target ";
846  O << STI.getTargetName();
847 
848  const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
849  if (NTM.getDrvInterface() == NVPTX::NVCL)
850  O << ", texmode_independent";
851 
852  bool HasFullDebugInfo = false;
853  for (DICompileUnit *CU : M.debug_compile_units()) {
854  switch(CU->getEmissionKind()) {
857  break;
860  HasFullDebugInfo = true;
861  break;
862  }
863  if (HasFullDebugInfo)
864  break;
865  }
866  if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo)
867  O << ", debug";
868 
869  O << "\n";
870 
871  O << ".address_size ";
872  if (NTM.is64Bit())
873  O << "64";
874  else
875  O << "32";
876  O << "\n";
877 
878  O << "\n";
879 }
880 
882  bool HasDebugInfo = MMI && MMI->hasDebugInfo();
883 
884  // If we did not emit any functions, then the global declarations have not
885  // yet been emitted.
886  if (!GlobalsEmitted) {
887  emitGlobals(M);
888  GlobalsEmitted = true;
889  }
890 
891  // XXX Temproarily remove global variables so that doFinalization() will not
892  // emit them again (global variables are emitted at beginning).
893 
894  Module::GlobalListType &global_list = M.getGlobalList();
895  int i, n = global_list.size();
896  GlobalVariable **gv_array = new GlobalVariable *[n];
897 
898  // first, back-up GlobalVariable in gv_array
899  i = 0;
900  for (Module::global_iterator I = global_list.begin(), E = global_list.end();
901  I != E; ++I)
902  gv_array[i++] = &*I;
903 
904  // second, empty global_list
905  while (!global_list.empty())
906  global_list.remove(global_list.begin());
907 
908  // call doFinalization
910 
911  // now we restore global variables
912  for (i = 0; i < n; i++)
913  global_list.insert(global_list.end(), gv_array[i]);
914 
916 
917  delete[] gv_array;
918  // Close the last emitted section
919  if (HasDebugInfo) {
920  static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer())
921  ->closeLastSection();
922  // Emit empty .debug_loc section for better support of the empty files.
923  OutStreamer->emitRawText("\t.section\t.debug_loc\t{\t}");
924  }
925 
926  // Output last DWARF .file directives, if any.
927  static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer())
928  ->outputDwarfFileDirectives();
929 
930  return ret;
931 
932  //bool Result = AsmPrinter::doFinalization(M);
933  // Instead of calling the parents doFinalization, we may
934  // clone parents doFinalization and customize here.
935  // Currently, we if NVISA out the EmitGlobals() in
936  // parent's doFinalization, which is too intrusive.
937  //
938  // Same for the doInitialization.
939  //return Result;
940 }
941 
942 // This function emits appropriate linkage directives for
943 // functions and global variables.
944 //
945 // extern function declaration -> .extern
946 // extern function definition -> .visible
947 // external global variable with init -> .visible
948 // external without init -> .extern
949 // appending -> not allowed, assert.
950 // for any linkage other than
951 // internal, private, linker_private,
952 // linker_private_weak, linker_private_weak_def_auto,
953 // we emit -> .weak.
954 
955 void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
956  raw_ostream &O) {
957  if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
958  if (V->hasExternalLinkage()) {
959  if (isa<GlobalVariable>(V)) {
960  const GlobalVariable *GVar = cast<GlobalVariable>(V);
961  if (GVar) {
962  if (GVar->hasInitializer())
963  O << ".visible ";
964  else
965  O << ".extern ";
966  }
967  } else if (V->isDeclaration())
968  O << ".extern ";
969  else
970  O << ".visible ";
971  } else if (V->hasAppendingLinkage()) {
972  std::string msg;
973  msg.append("Error: ");
974  msg.append("Symbol ");
975  if (V->hasName())
976  msg.append(std::string(V->getName()));
977  msg.append("has unsupported appending linkage type");
978  llvm_unreachable(msg.c_str());
979  } else if (!V->hasInternalLinkage() &&
980  !V->hasPrivateLinkage()) {
981  O << ".weak ";
982  }
983  }
984 }
985 
986 void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
987  raw_ostream &O,
988  bool processDemoted) {
989  // Skip meta data
990  if (GVar->hasSection()) {
991  if (GVar->getSection() == "llvm.metadata")
992  return;
993  }
994 
995  // Skip LLVM intrinsic global variables
996  if (GVar->getName().startswith("llvm.") ||
997  GVar->getName().startswith("nvvm."))
998  return;
999 
1000  const DataLayout &DL = getDataLayout();
1001 
1002  // GlobalVariables are always constant pointers themselves.
1003  PointerType *PTy = GVar->getType();
1004  Type *ETy = GVar->getValueType();
1005 
1006  if (GVar->hasExternalLinkage()) {
1007  if (GVar->hasInitializer())
1008  O << ".visible ";
1009  else
1010  O << ".extern ";
1011  } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
1013  GVar->hasCommonLinkage()) {
1014  O << ".weak ";
1015  }
1016 
1017  if (isTexture(*GVar)) {
1018  O << ".global .texref " << getTextureName(*GVar) << ";\n";
1019  return;
1020  }
1021 
1022  if (isSurface(*GVar)) {
1023  O << ".global .surfref " << getSurfaceName(*GVar) << ";\n";
1024  return;
1025  }
1026 
1027  if (GVar->isDeclaration()) {
1028  // (extern) declarations, no definition or initializer
1029  // Currently the only known declaration is for an automatic __local
1030  // (.shared) promoted to global.
1031  emitPTXGlobalVariable(GVar, O);
1032  O << ";\n";
1033  return;
1034  }
1035 
1036  if (isSampler(*GVar)) {
1037  O << ".global .samplerref " << getSamplerName(*GVar);
1038 
1039  const Constant *Initializer = nullptr;
1040  if (GVar->hasInitializer())
1041  Initializer = GVar->getInitializer();
1042  const ConstantInt *CI = nullptr;
1043  if (Initializer)
1044  CI = dyn_cast<ConstantInt>(Initializer);
1045  if (CI) {
1046  unsigned sample = CI->getZExtValue();
1047 
1048  O << " = { ";
1049 
1050  for (int i = 0,
1051  addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
1052  i < 3; i++) {
1053  O << "addr_mode_" << i << " = ";
1054  switch (addr) {
1055  case 0:
1056  O << "wrap";
1057  break;
1058  case 1:
1059  O << "clamp_to_border";
1060  break;
1061  case 2:
1062  O << "clamp_to_edge";
1063  break;
1064  case 3:
1065  O << "wrap";
1066  break;
1067  case 4:
1068  O << "mirror";
1069  break;
1070  }
1071  O << ", ";
1072  }
1073  O << "filter_mode = ";
1074  switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
1075  case 0:
1076  O << "nearest";
1077  break;
1078  case 1:
1079  O << "linear";
1080  break;
1081  case 2:
1082  llvm_unreachable("Anisotropic filtering is not supported");
1083  default:
1084  O << "nearest";
1085  break;
1086  }
1087  if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
1088  O << ", force_unnormalized_coords = 1";
1089  }
1090  O << " }";
1091  }
1092 
1093  O << ";\n";
1094  return;
1095  }
1096 
1097  if (GVar->hasPrivateLinkage()) {
1098  if (strncmp(GVar->getName().data(), "unrollpragma", 12) == 0)
1099  return;
1100 
1101  // FIXME - need better way (e.g. Metadata) to avoid generating this global
1102  if (strncmp(GVar->getName().data(), "filename", 8) == 0)
1103  return;
1104  if (GVar->use_empty())
1105  return;
1106  }
1107 
1108  const Function *demotedFunc = nullptr;
1109  if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
1110  O << "// " << GVar->getName() << " has been demoted\n";
1111  if (localDecls.find(demotedFunc) != localDecls.end())
1112  localDecls[demotedFunc].push_back(GVar);
1113  else {
1114  std::vector<const GlobalVariable *> temp;
1115  temp.push_back(GVar);
1116  localDecls[demotedFunc] = temp;
1117  }
1118  return;
1119  }
1120 
1121  O << ".";
1122  emitPTXAddressSpace(PTy->getAddressSpace(), O);
1123 
1124  if (isManaged(*GVar)) {
1125  O << " .attribute(.managed)";
1126  }
1127 
1128  if (GVar->getAlignment() == 0)
1129  O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
1130  else
1131  O << " .align " << GVar->getAlignment();
1132 
1133  if (ETy->isFloatingPointTy() || ETy->isPointerTy() ||
1134  (ETy->isIntegerTy() && ETy->getScalarSizeInBits() <= 64)) {
1135  O << " .";
1136  // Special case: ABI requires that we use .u8 for predicates
1137  if (ETy->isIntegerTy(1))
1138  O << "u8";
1139  else
1140  O << getPTXFundamentalTypeStr(ETy, false);
1141  O << " ";
1142  getSymbol(GVar)->print(O, MAI);
1143 
1144  // Ptx allows variable initilization only for constant and global state
1145  // spaces.
1146  if (GVar->hasInitializer()) {
1147  if ((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1148  (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) {
1149  const Constant *Initializer = GVar->getInitializer();
1150  // 'undef' is treated as there is no value specified.
1151  if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
1152  O << " = ";
1153  printScalarConstant(Initializer, O);
1154  }
1155  } else {
1156  // The frontend adds zero-initializer to device and constant variables
1157  // that don't have an initial value, and UndefValue to shared
1158  // variables, so skip warning for this case.
1159  if (!GVar->getInitializer()->isNullValue() &&
1160  !isa<UndefValue>(GVar->getInitializer())) {
1161  report_fatal_error("initial value of '" + GVar->getName() +
1162  "' is not allowed in addrspace(" +
1163  Twine(PTy->getAddressSpace()) + ")");
1164  }
1165  }
1166  }
1167  } else {
1168  unsigned int ElementSize = 0;
1169 
1170  // Although PTX has direct support for struct type and array type and
1171  // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1172  // targets that support these high level field accesses. Structs, arrays
1173  // and vectors are lowered into arrays of bytes.
1174  switch (ETy->getTypeID()) {
1175  case Type::IntegerTyID: // Integers larger than 64 bits
1176  case Type::StructTyID:
1177  case Type::ArrayTyID:
1178  case Type::FixedVectorTyID:
1179  ElementSize = DL.getTypeStoreSize(ETy);
1180  // Ptx allows variable initilization only for constant and
1181  // global state spaces.
1182  if (((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1183  (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
1184  GVar->hasInitializer()) {
1185  const Constant *Initializer = GVar->getInitializer();
1186  if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
1187  AggBuffer aggBuffer(ElementSize, O, *this);
1188  bufferAggregateConstant(Initializer, &aggBuffer);
1189  if (aggBuffer.numSymbols) {
1190  if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) {
1191  O << " .u64 ";
1192  getSymbol(GVar)->print(O, MAI);
1193  O << "[";
1194  O << ElementSize / 8;
1195  } else {
1196  O << " .u32 ";
1197  getSymbol(GVar)->print(O, MAI);
1198  O << "[";
1199  O << ElementSize / 4;
1200  }
1201  O << "]";
1202  } else {
1203  O << " .b8 ";
1204  getSymbol(GVar)->print(O, MAI);
1205  O << "[";
1206  O << ElementSize;
1207  O << "]";
1208  }
1209  O << " = {";
1210  aggBuffer.print();
1211  O << "}";
1212  } else {
1213  O << " .b8 ";
1214  getSymbol(GVar)->print(O, MAI);
1215  if (ElementSize) {
1216  O << "[";
1217  O << ElementSize;
1218  O << "]";
1219  }
1220  }
1221  } else {
1222  O << " .b8 ";
1223  getSymbol(GVar)->print(O, MAI);
1224  if (ElementSize) {
1225  O << "[";
1226  O << ElementSize;
1227  O << "]";
1228  }
1229  }
1230  break;
1231  default:
1232  llvm_unreachable("type not supported yet");
1233  }
1234  }
1235  O << ";\n";
1236 }
1237 
1238 void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
1239  if (localDecls.find(f) == localDecls.end())
1240  return;
1241 
1242  std::vector<const GlobalVariable *> &gvars = localDecls[f];
1243 
1244  for (unsigned i = 0, e = gvars.size(); i != e; ++i) {
1245  O << "\t// demoted variable\n\t";
1246  printModuleLevelGV(gvars[i], O, true);
1247  }
1248 }
1249 
1250 void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1251  raw_ostream &O) const {
1252  switch (AddressSpace) {
1253  case ADDRESS_SPACE_LOCAL:
1254  O << "local";
1255  break;
1256  case ADDRESS_SPACE_GLOBAL:
1257  O << "global";
1258  break;
1259  case ADDRESS_SPACE_CONST:
1260  O << "const";
1261  break;
1262  case ADDRESS_SPACE_SHARED:
1263  O << "shared";
1264  break;
1265  default:
1266  report_fatal_error("Bad address space found while emitting PTX: " +
1268  break;
1269  }
1270 }
1271 
1272 std::string
1273 NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
1274  switch (Ty->getTypeID()) {
1275  case Type::IntegerTyID: {
1276  unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1277  if (NumBits == 1)
1278  return "pred";
1279  else if (NumBits <= 64) {
1280  std::string name = "u";
1281  return name + utostr(NumBits);
1282  } else {
1283  llvm_unreachable("Integer too large");
1284  break;
1285  }
1286  break;
1287  }
1288  case Type::HalfTyID:
1289  // fp16 is stored as .b16 for compatibility with pre-sm_53 PTX assembly.
1290  return "b16";
1291  case Type::FloatTyID:
1292  return "f32";
1293  case Type::DoubleTyID:
1294  return "f64";
1295  case Type::PointerTyID:
1296  if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit())
1297  if (useB4PTR)
1298  return "b64";
1299  else
1300  return "u64";
1301  else if (useB4PTR)
1302  return "b32";
1303  else
1304  return "u32";
1305  default:
1306  break;
1307  }
1308  llvm_unreachable("unexpected type");
1309 }
1310 
1311 void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
1312  raw_ostream &O) {
1313  const DataLayout &DL = getDataLayout();
1314 
1315  // GlobalVariables are always constant pointers themselves.
1316  Type *ETy = GVar->getValueType();
1317 
1318  O << ".";
1319  emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
1320  if (GVar->getAlignment() == 0)
1321  O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
1322  else
1323  O << " .align " << GVar->getAlignment();
1324 
1325  // Special case for i128
1326  if (ETy->isIntegerTy(128)) {
1327  O << " .b8 ";
1328  getSymbol(GVar)->print(O, MAI);
1329  O << "[16]";
1330  return;
1331  }
1332 
1333  if (ETy->isFloatingPointTy() || ETy->isIntOrPtrTy()) {
1334  O << " .";
1335  O << getPTXFundamentalTypeStr(ETy);
1336  O << " ";
1337  getSymbol(GVar)->print(O, MAI);
1338  return;
1339  }
1340 
1341  int64_t ElementSize = 0;
1342 
1343  // Although PTX has direct support for struct type and array type and LLVM IR
1344  // is very similar to PTX, the LLVM CodeGen does not support for targets that
1345  // support these high level field accesses. Structs and arrays are lowered
1346  // into arrays of bytes.
1347  switch (ETy->getTypeID()) {
1348  case Type::StructTyID:
1349  case Type::ArrayTyID:
1350  case Type::FixedVectorTyID:
1351  ElementSize = DL.getTypeStoreSize(ETy);
1352  O << " .b8 ";
1353  getSymbol(GVar)->print(O, MAI);
1354  O << "[";
1355  if (ElementSize) {
1356  O << ElementSize;
1357  }
1358  O << "]";
1359  break;
1360  default:
1361  llvm_unreachable("type not supported yet");
1362  }
1363 }
1364 
1365 static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty) {
1366  if (Ty->isSingleValueType())
1367  return DL.getPrefTypeAlignment(Ty);
1368 
1369  auto *ATy = dyn_cast<ArrayType>(Ty);
1370  if (ATy)
1371  return getOpenCLAlignment(DL, ATy->getElementType());
1372 
1373  auto *STy = dyn_cast<StructType>(Ty);
1374  if (STy) {
1375  unsigned int alignStruct = 1;
1376  // Go through each element of the struct and find the
1377  // largest alignment.
1378  for (unsigned i = 0, e = STy->getNumElements(); i != e; i++) {
1379  Type *ETy = STy->getElementType(i);
1380  unsigned int align = getOpenCLAlignment(DL, ETy);
1381  if (align > alignStruct)
1382  alignStruct = align;
1383  }
1384  return alignStruct;
1385  }
1386 
1387  auto *FTy = dyn_cast<FunctionType>(Ty);
1388  if (FTy)
1389  return DL.getPointerPrefAlignment().value();
1390  return DL.getPrefTypeAlignment(Ty);
1391 }
1392 
1393 void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
1394  int paramIndex, raw_ostream &O) {
1395  getSymbol(I->getParent())->print(O, MAI);
1396  O << "_param_" << paramIndex;
1397 }
1398 
1399 void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
1400  const DataLayout &DL = getDataLayout();
1401  const AttributeList &PAL = F->getAttributes();
1402  const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
1403  const TargetLowering *TLI = STI.getTargetLowering();
1405  unsigned paramIndex = 0;
1406  bool first = true;
1407  bool isKernelFunc = isKernelFunction(*F);
1408  bool isABI = (STI.getSmVersion() >= 20);
1409  bool hasImageHandles = STI.hasImageHandles();
1410  MVT thePointerTy = TLI->getPointerTy(DL);
1411 
1412  if (F->arg_empty()) {
1413  O << "()\n";
1414  return;
1415  }
1416 
1417  O << "(\n";
1418 
1419  for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
1420  Type *Ty = I->getType();
1421 
1422  if (!first)
1423  O << ",\n";
1424 
1425  first = false;
1426 
1427  // Handle image/sampler parameters
1428  if (isKernelFunction(*F)) {
1429  if (isSampler(*I) || isImage(*I)) {
1430  if (isImage(*I)) {
1431  std::string sname = std::string(I->getName());
1432  if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
1433  if (hasImageHandles)
1434  O << "\t.param .u64 .ptr .surfref ";
1435  else
1436  O << "\t.param .surfref ";
1437  CurrentFnSym->print(O, MAI);
1438  O << "_param_" << paramIndex;
1439  }
1440  else { // Default image is read_only
1441  if (hasImageHandles)
1442  O << "\t.param .u64 .ptr .texref ";
1443  else
1444  O << "\t.param .texref ";
1445  CurrentFnSym->print(O, MAI);
1446  O << "_param_" << paramIndex;
1447  }
1448  } else {
1449  if (hasImageHandles)
1450  O << "\t.param .u64 .ptr .samplerref ";
1451  else
1452  O << "\t.param .samplerref ";
1453  CurrentFnSym->print(O, MAI);
1454  O << "_param_" << paramIndex;
1455  }
1456  continue;
1457  }
1458  }
1459 
1460  if (!PAL.hasParamAttr(paramIndex, Attribute::ByVal)) {
1461  if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
1462  // Just print .param .align <a> .b8 .param[size];
1463  // <a> = PAL.getparamalignment
1464  // size = typeallocsize of element type
1465  const Align align = DL.getValueOrABITypeAlignment(
1466  PAL.getParamAlignment(paramIndex), Ty);
1467 
1468  unsigned sz = DL.getTypeAllocSize(Ty);
1469  O << "\t.param .align " << align.value() << " .b8 ";
1470  printParamName(I, paramIndex, O);
1471  O << "[" << sz << "]";
1472 
1473  continue;
1474  }
1475  // Just a scalar
1476  auto *PTy = dyn_cast<PointerType>(Ty);
1477  if (isKernelFunc) {
1478  if (PTy) {
1479  // Special handling for pointer arguments to kernel
1480  O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
1481 
1482  if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
1483  NVPTX::CUDA) {
1484  Type *ETy = PTy->getElementType();
1485  int addrSpace = PTy->getAddressSpace();
1486  switch (addrSpace) {
1487  default:
1488  O << ".ptr ";
1489  break;
1490  case ADDRESS_SPACE_CONST:
1491  O << ".ptr .const ";
1492  break;
1493  case ADDRESS_SPACE_SHARED:
1494  O << ".ptr .shared ";
1495  break;
1496  case ADDRESS_SPACE_GLOBAL:
1497  O << ".ptr .global ";
1498  break;
1499  }
1500  O << ".align " << (int)getOpenCLAlignment(DL, ETy) << " ";
1501  }
1502  printParamName(I, paramIndex, O);
1503  continue;
1504  }
1505 
1506  // non-pointer scalar to kernel func
1507  O << "\t.param .";
1508  // Special case: predicate operands become .u8 types
1509  if (Ty->isIntegerTy(1))
1510  O << "u8";
1511  else
1512  O << getPTXFundamentalTypeStr(Ty);
1513  O << " ";
1514  printParamName(I, paramIndex, O);
1515  continue;
1516  }
1517  // Non-kernel function, just print .param .b<size> for ABI
1518  // and .reg .b<size> for non-ABI
1519  unsigned sz = 0;
1520  if (isa<IntegerType>(Ty)) {
1521  sz = cast<IntegerType>(Ty)->getBitWidth();
1522  if (sz < 32)
1523  sz = 32;
1524  } else if (isa<PointerType>(Ty))
1525  sz = thePointerTy.getSizeInBits();
1526  else if (Ty->isHalfTy())
1527  // PTX ABI requires all scalar parameters to be at least 32
1528  // bits in size. fp16 normally uses .b16 as its storage type
1529  // in PTX, so its size must be adjusted here, too.
1530  sz = 32;
1531  else
1532  sz = Ty->getPrimitiveSizeInBits();
1533  if (isABI)
1534  O << "\t.param .b" << sz << " ";
1535  else
1536  O << "\t.reg .b" << sz << " ";
1537  printParamName(I, paramIndex, O);
1538  continue;
1539  }
1540 
1541  // param has byVal attribute. So should be a pointer
1542  auto *PTy = dyn_cast<PointerType>(Ty);
1543  assert(PTy && "Param with byval attribute should be a pointer type");
1544  Type *ETy = PTy->getElementType();
1545 
1546  if (isABI || isKernelFunc) {
1547  // Just print .param .align <a> .b8 .param[size];
1548  // <a> = PAL.getparamalignment
1549  // size = typeallocsize of element type
1550  Align align =
1551  DL.getValueOrABITypeAlignment(PAL.getParamAlignment(paramIndex), ETy);
1552  // Work around a bug in ptxas. When PTX code takes address of
1553  // byval parameter with alignment < 4, ptxas generates code to
1554  // spill argument into memory. Alas on sm_50+ ptxas generates
1555  // SASS code that fails with misaligned access. To work around
1556  // the problem, make sure that we align byval parameters by at
1557  // least 4. Matching change must be made in LowerCall() where we
1558  // prepare parameters for the call.
1559  //
1560  // TODO: this will need to be undone when we get to support multi-TU
1561  // device-side compilation as it breaks ABI compatibility with nvcc.
1562  // Hopefully ptxas bug is fixed by then.
1563  if (!isKernelFunc && align < Align(4))
1564  align = Align(4);
1565  unsigned sz = DL.getTypeAllocSize(ETy);
1566  O << "\t.param .align " << align.value() << " .b8 ";
1567  printParamName(I, paramIndex, O);
1568  O << "[" << sz << "]";
1569  continue;
1570  } else {
1571  // Split the ETy into constituent parts and
1572  // print .param .b<size> <name> for each part.
1573  // Further, if a part is vector, print the above for
1574  // each vector element.
1575  SmallVector<EVT, 16> vtparts;
1576  ComputeValueVTs(*TLI, DL, ETy, vtparts);
1577  for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
1578  unsigned elems = 1;
1579  EVT elemtype = vtparts[i];
1580  if (vtparts[i].isVector()) {
1581  elems = vtparts[i].getVectorNumElements();
1582  elemtype = vtparts[i].getVectorElementType();
1583  }
1584 
1585  for (unsigned j = 0, je = elems; j != je; ++j) {
1586  unsigned sz = elemtype.getSizeInBits();
1587  if (elemtype.isInteger() && (sz < 32))
1588  sz = 32;
1589  O << "\t.reg .b" << sz << " ";
1590  printParamName(I, paramIndex, O);
1591  if (j < je - 1)
1592  O << ",\n";
1593  ++paramIndex;
1594  }
1595  if (i < e - 1)
1596  O << ",\n";
1597  }
1598  --paramIndex;
1599  continue;
1600  }
1601  }
1602 
1603  O << "\n)\n";
1604 }
1605 
1606 void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
1607  raw_ostream &O) {
1608  const Function &F = MF.getFunction();
1609  emitFunctionParamList(&F, O);
1610 }
1611 
1612 void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1613  const MachineFunction &MF) {
1614  SmallString<128> Str;
1615  raw_svector_ostream O(Str);
1616 
1617  // Map the global virtual register number to a register class specific
1618  // virtual register number starting from 1 with that class.
1620  //unsigned numRegClasses = TRI->getNumRegClasses();
1621 
1622  // Emit the Fake Stack Object
1623  const MachineFrameInfo &MFI = MF.getFrameInfo();
1624  int NumBytes = (int) MFI.getStackSize();
1625  if (NumBytes) {
1626  O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
1627  << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
1628  if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
1629  O << "\t.reg .b64 \t%SP;\n";
1630  O << "\t.reg .b64 \t%SPL;\n";
1631  } else {
1632  O << "\t.reg .b32 \t%SP;\n";
1633  O << "\t.reg .b32 \t%SPL;\n";
1634  }
1635  }
1636 
1637  // Go through all virtual registers to establish the mapping between the
1638  // global virtual
1639  // register number and the per class virtual register number.
1640  // We use the per class virtual register number in the ptx output.
1641  unsigned int numVRs = MRI->getNumVirtRegs();
1642  for (unsigned i = 0; i < numVRs; i++) {
1643  unsigned int vr = Register::index2VirtReg(i);
1644  const TargetRegisterClass *RC = MRI->getRegClass(vr);
1645  DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1646  int n = regmap.size();
1647  regmap.insert(std::make_pair(vr, n + 1));
1648  }
1649 
1650  // Emit register declarations
1651  // @TODO: Extract out the real register usage
1652  // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
1653  // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
1654  // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
1655  // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
1656  // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n";
1657  // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
1658  // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n";
1659 
1660  // Emit declaration of the virtual registers or 'physical' registers for
1661  // each register class
1662  for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
1663  const TargetRegisterClass *RC = TRI->getRegClass(i);
1664  DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1665  std::string rcname = getNVPTXRegClassName(RC);
1666  std::string rcStr = getNVPTXRegClassStr(RC);
1667  int n = regmap.size();
1668 
1669  // Only declare those registers that may be used.
1670  if (n) {
1671  O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
1672  << ">;\n";
1673  }
1674  }
1675 
1676  OutStreamer->emitRawText(O.str());
1677 }
1678 
1679 void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
1680  APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
1681  bool ignored;
1682  unsigned int numHex;
1683  const char *lead;
1684 
1685  if (Fp->getType()->getTypeID() == Type::FloatTyID) {
1686  numHex = 8;
1687  lead = "0f";
1689  } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
1690  numHex = 16;
1691  lead = "0d";
1693  } else
1694  llvm_unreachable("unsupported fp type");
1695 
1696  APInt API = APF.bitcastToAPInt();
1697  O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
1698 }
1699 
1700 void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
1701  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1702  O << CI->getValue();
1703  return;
1704  }
1705  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1706  printFPConstant(CFP, O);
1707  return;
1708  }
1709  if (isa<ConstantPointerNull>(CPV)) {
1710  O << "0";
1711  return;
1712  }
1713  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1714  bool IsNonGenericPointer = false;
1715  if (GVar->getType()->getAddressSpace() != 0) {
1716  IsNonGenericPointer = true;
1717  }
1718  if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
1719  O << "generic(";
1720  getSymbol(GVar)->print(O, MAI);
1721  O << ")";
1722  } else {
1723  getSymbol(GVar)->print(O, MAI);
1724  }
1725  return;
1726  }
1727  if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1728  const Value *v = Cexpr->stripPointerCasts();
1729  PointerType *PTy = dyn_cast<PointerType>(Cexpr->getType());
1730  bool IsNonGenericPointer = false;
1731  if (PTy && PTy->getAddressSpace() != 0) {
1732  IsNonGenericPointer = true;
1733  }
1734  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1735  if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) {
1736  O << "generic(";
1737  getSymbol(GVar)->print(O, MAI);
1738  O << ")";
1739  } else {
1740  getSymbol(GVar)->print(O, MAI);
1741  }
1742  return;
1743  } else {
1744  lowerConstant(CPV)->print(O, MAI);
1745  return;
1746  }
1747  }
1748  llvm_unreachable("Not scalar type found in printScalarConstant()");
1749 }
1750 
1751 void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
1752  AggBuffer *AggBuffer) {
1753  const DataLayout &DL = getDataLayout();
1754  int AllocSize = DL.getTypeAllocSize(CPV->getType());
1755  if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
1756  // Non-zero Bytes indicates that we need to zero-fill everything. Otherwise,
1757  // only the space allocated by CPV.
1758  AggBuffer->addZeros(Bytes ? Bytes : AllocSize);
1759  return;
1760  }
1761 
1762  // Helper for filling AggBuffer with APInts.
1763  auto AddIntToBuffer = [AggBuffer, Bytes](const APInt &Val) {
1764  size_t NumBytes = (Val.getBitWidth() + 7) / 8;
1765  SmallVector<unsigned char, 16> Buf(NumBytes);
1766  for (unsigned I = 0; I < NumBytes; ++I) {
1767  Buf[I] = Val.extractBitsAsZExtValue(8, I * 8);
1768  }
1769  AggBuffer->addBytes(Buf.data(), NumBytes, Bytes);
1770  };
1771 
1772  switch (CPV->getType()->getTypeID()) {
1773  case Type::IntegerTyID:
1774  if (const auto CI = dyn_cast<ConstantInt>(CPV)) {
1775  AddIntToBuffer(CI->getValue());
1776  break;
1777  }
1778  if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1779  if (const auto *CI =
1780  dyn_cast<ConstantInt>(ConstantFoldConstant(Cexpr, DL))) {
1781  AddIntToBuffer(CI->getValue());
1782  break;
1783  }
1784  if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1785  Value *V = Cexpr->getOperand(0)->stripPointerCasts();
1786  AggBuffer->addSymbol(V, Cexpr->getOperand(0));
1787  AggBuffer->addZeros(AllocSize);
1788  break;
1789  }
1790  }
1791  llvm_unreachable("unsupported integer const type");
1792  break;
1793 
1794  case Type::HalfTyID:
1795  case Type::FloatTyID:
1796  case Type::DoubleTyID:
1797  AddIntToBuffer(cast<ConstantFP>(CPV)->getValueAPF().bitcastToAPInt());
1798  break;
1799 
1800  case Type::PointerTyID: {
1801  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1802  AggBuffer->addSymbol(GVar, GVar);
1803  } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1804  const Value *v = Cexpr->stripPointerCasts();
1805  AggBuffer->addSymbol(v, Cexpr);
1806  }
1807  AggBuffer->addZeros(AllocSize);
1808  break;
1809  }
1810 
1811  case Type::ArrayTyID:
1812  case Type::FixedVectorTyID:
1813  case Type::StructTyID: {
1814  if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) {
1815  bufferAggregateConstant(CPV, AggBuffer);
1816  if (Bytes > AllocSize)
1817  AggBuffer->addZeros(Bytes - AllocSize);
1818  } else if (isa<ConstantAggregateZero>(CPV))
1819  AggBuffer->addZeros(Bytes);
1820  else
1821  llvm_unreachable("Unexpected Constant type");
1822  break;
1823  }
1824 
1825  default:
1826  llvm_unreachable("unsupported type");
1827  }
1828 }
1829 
1830 void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
1831  AggBuffer *aggBuffer) {
1832  const DataLayout &DL = getDataLayout();
1833  int Bytes;
1834 
1835  // Integers of arbitrary width
1836  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1837  APInt Val = CI->getValue();
1838  for (unsigned I = 0, E = DL.getTypeAllocSize(CPV->getType()); I < E; ++I) {
1839  uint8_t Byte = Val.getLoBits(8).getZExtValue();
1840  aggBuffer->addBytes(&Byte, 1, 1);
1841  Val.lshrInPlace(8);
1842  }
1843  return;
1844  }
1845 
1846  // Old constants
1847  if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
1848  if (CPV->getNumOperands())
1849  for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
1850  bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
1851  return;
1852  }
1853 
1854  if (const ConstantDataSequential *CDS =
1855  dyn_cast<ConstantDataSequential>(CPV)) {
1856  if (CDS->getNumElements())
1857  for (unsigned i = 0; i < CDS->getNumElements(); ++i)
1858  bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
1859  aggBuffer);
1860  return;
1861  }
1862 
1863  if (isa<ConstantStruct>(CPV)) {
1864  if (CPV->getNumOperands()) {
1865  StructType *ST = cast<StructType>(CPV->getType());
1866  for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
1867  if (i == (e - 1))
1868  Bytes = DL.getStructLayout(ST)->getElementOffset(0) +
1869  DL.getTypeAllocSize(ST) -
1870  DL.getStructLayout(ST)->getElementOffset(i);
1871  else
1872  Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) -
1873  DL.getStructLayout(ST)->getElementOffset(i);
1874  bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
1875  }
1876  }
1877  return;
1878  }
1879  llvm_unreachable("unsupported constant type in printAggregateConstant()");
1880 }
1881 
1882 /// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly
1883 /// a copy from AsmPrinter::lowerConstant, except customized to only handle
1884 /// expressions that are representable in PTX and create
1885 /// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
1886 const MCExpr *
1887 NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) {
1888  MCContext &Ctx = OutContext;
1889 
1890  if (CV->isNullValue() || isa<UndefValue>(CV))
1891  return MCConstantExpr::create(0, Ctx);
1892 
1893  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
1894  return MCConstantExpr::create(CI->getZExtValue(), Ctx);
1895 
1896  if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
1897  const MCSymbolRefExpr *Expr =
1899  if (ProcessingGeneric) {
1900  return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
1901  } else {
1902  return Expr;
1903  }
1904  }
1905 
1906  const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
1907  if (!CE) {
1908  llvm_unreachable("Unknown constant value to lower!");
1909  }
1910 
1911  switch (CE->getOpcode()) {
1912  default: {
1913  // If the code isn't optimized, there may be outstanding folding
1914  // opportunities. Attempt to fold the expression using DataLayout as a
1915  // last resort before giving up.
1917  if (C != CE)
1918  return lowerConstantForGV(C, ProcessingGeneric);
1919 
1920  // Otherwise report the problem to the user.
1921  std::string S;
1922  raw_string_ostream OS(S);
1923  OS << "Unsupported expression in static initializer: ";
1924  CE->printAsOperand(OS, /*PrintType=*/false,
1925  !MF ? nullptr : MF->getFunction().getParent());
1926  report_fatal_error(Twine(OS.str()));
1927  }
1928 
1929  case Instruction::AddrSpaceCast: {
1930  // Strip the addrspacecast and pass along the operand
1931  PointerType *DstTy = cast<PointerType>(CE->getType());
1932  if (DstTy->getAddressSpace() == 0) {
1933  return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
1934  }
1935  std::string S;
1936  raw_string_ostream OS(S);
1937  OS << "Unsupported expression in static initializer: ";
1938  CE->printAsOperand(OS, /*PrintType=*/ false,
1939  !MF ? nullptr : MF->getFunction().getParent());
1940  report_fatal_error(Twine(OS.str()));
1941  }
1942 
1943  case Instruction::GetElementPtr: {
1944  const DataLayout &DL = getDataLayout();
1945 
1946  // Generate a symbolic expression for the byte address
1947  APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
1948  cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
1949 
1950  const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
1951  ProcessingGeneric);
1952  if (!OffsetAI)
1953  return Base;
1954 
1955  int64_t Offset = OffsetAI.getSExtValue();
1957  Ctx);
1958  }
1959 
1960  case Instruction::Trunc:
1961  // We emit the value and depend on the assembler to truncate the generated
1962  // expression properly. This is important for differences between
1963  // blockaddress labels. Since the two labels are in the same function, it
1964  // is reasonable to treat their delta as a 32-bit value.
1966  case Instruction::BitCast:
1967  return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
1968 
1969  case Instruction::IntToPtr: {
1970  const DataLayout &DL = getDataLayout();
1971 
1972  // Handle casts to pointers by changing them into casts to the appropriate
1973  // integer type. This promotes constant folding and simplifies this code.
1974  Constant *Op = CE->getOperand(0);
1975  Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()),
1976  false/*ZExt*/);
1977  return lowerConstantForGV(Op, ProcessingGeneric);
1978  }
1979 
1980  case Instruction::PtrToInt: {
1981  const DataLayout &DL = getDataLayout();
1982 
1983  // Support only foldable casts to/from pointers that can be eliminated by
1984  // changing the pointer to the appropriately sized integer type.
1985  Constant *Op = CE->getOperand(0);
1986  Type *Ty = CE->getType();
1987 
1988  const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
1989 
1990  // We can emit the pointer value into this slot if the slot is an
1991  // integer slot equal to the size of the pointer.
1992  if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
1993  return OpExpr;
1994 
1995  // Otherwise the pointer is smaller than the resultant integer, mask off
1996  // the high bits so we are sure to get a proper truncation if the input is
1997  // a constant expr.
1998  unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
1999  const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
2000  return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
2001  }
2002 
2003  // The MC library also has a right-shift operator, but it isn't consistently
2004  // signed or unsigned between different targets.
2005  case Instruction::Add: {
2006  const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
2007  const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
2008  switch (CE->getOpcode()) {
2009  default: llvm_unreachable("Unknown binary operator constant cast expr");
2010  case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
2011  }
2012  }
2013  }
2014 }
2015 
2016 // Copy of MCExpr::print customized for NVPTX
2017 void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
2018  switch (Expr.getKind()) {
2019  case MCExpr::Target:
2020  return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI);
2021  case MCExpr::Constant:
2022  OS << cast<MCConstantExpr>(Expr).getValue();
2023  return;
2024 
2025  case MCExpr::SymbolRef: {
2026  const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr);
2027  const MCSymbol &Sym = SRE.getSymbol();
2028  Sym.print(OS, MAI);
2029  return;
2030  }
2031 
2032  case MCExpr::Unary: {
2033  const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr);
2034  switch (UE.getOpcode()) {
2035  case MCUnaryExpr::LNot: OS << '!'; break;
2036  case MCUnaryExpr::Minus: OS << '-'; break;
2037  case MCUnaryExpr::Not: OS << '~'; break;
2038  case MCUnaryExpr::Plus: OS << '+'; break;
2039  }
2040  printMCExpr(*UE.getSubExpr(), OS);
2041  return;
2042  }
2043 
2044  case MCExpr::Binary: {
2045  const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr);
2046 
2047  // Only print parens around the LHS if it is non-trivial.
2048  if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) ||
2049  isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) {
2050  printMCExpr(*BE.getLHS(), OS);
2051  } else {
2052  OS << '(';
2053  printMCExpr(*BE.getLHS(), OS);
2054  OS<< ')';
2055  }
2056 
2057  switch (BE.getOpcode()) {
2058  case MCBinaryExpr::Add:
2059  // Print "X-42" instead of "X+-42".
2060  if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
2061  if (RHSC->getValue() < 0) {
2062  OS << RHSC->getValue();
2063  return;
2064  }
2065  }
2066 
2067  OS << '+';
2068  break;
2069  default: llvm_unreachable("Unhandled binary operator");
2070  }
2071 
2072  // Only print parens around the LHS if it is non-trivial.
2073  if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
2074  printMCExpr(*BE.getRHS(), OS);
2075  } else {
2076  OS << '(';
2077  printMCExpr(*BE.getRHS(), OS);
2078  OS << ')';
2079  }
2080  return;
2081  }
2082  }
2083 
2084  llvm_unreachable("Invalid expression kind!");
2085 }
2086 
2087 /// PrintAsmOperand - Print out an operand for an inline asm expression.
2088 ///
2089 bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
2090  const char *ExtraCode, raw_ostream &O) {
2091  if (ExtraCode && ExtraCode[0]) {
2092  if (ExtraCode[1] != 0)
2093  return true; // Unknown modifier.
2094 
2095  switch (ExtraCode[0]) {
2096  default:
2097  // See if this is a generic print operand
2098  return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
2099  case 'r':
2100  break;
2101  }
2102  }
2103 
2104  printOperand(MI, OpNo, O);
2105 
2106  return false;
2107 }
2108 
2109 bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
2110  unsigned OpNo,
2111  const char *ExtraCode,
2112  raw_ostream &O) {
2113  if (ExtraCode && ExtraCode[0])
2114  return true; // Unknown modifier
2115 
2116  O << '[';
2117  printMemOperand(MI, OpNo, O);
2118  O << ']';
2119 
2120  return false;
2121 }
2122 
2123 void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
2124  raw_ostream &O) {
2125  const MachineOperand &MO = MI->getOperand(opNum);
2126  switch (MO.getType()) {
2129  if (MO.getReg() == NVPTX::VRDepot)
2130  O << DEPOTNAME << getFunctionNumber();
2131  else
2133  } else {
2134  emitVirtualRegister(MO.getReg(), O);
2135  }
2136  break;
2137 
2139  O << MO.getImm();
2140  break;
2141 
2143  printFPConstant(MO.getFPImm(), O);
2144  break;
2145 
2147  PrintSymbolOperand(MO, O);
2148  break;
2149 
2151  MO.getMBB()->getSymbol()->print(O, MAI);
2152  break;
2153 
2154  default:
2155  llvm_unreachable("Operand type not supported.");
2156  }
2157 }
2158 
2159 void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
2160  raw_ostream &O, const char *Modifier) {
2161  printOperand(MI, opNum, O);
2162 
2163  if (Modifier && strcmp(Modifier, "add") == 0) {
2164  O << ", ";
2165  printOperand(MI, opNum + 1, O);
2166  } else {
2167  if (MI->getOperand(opNum + 1).isImm() &&
2168  MI->getOperand(opNum + 1).getImm() == 0)
2169  return; // don't print ',0' or '+0'
2170  O << "+";
2171  printOperand(MI, opNum + 1, O);
2172  }
2173 }
2174 
2175 // Force static initialization.
2179 }
llvm::NVPTXAsmPrinter::AggBuffer
friend class AggBuffer
Definition: NVPTXAsmPrinter.h:197
llvm::codeview::SimpleTypeKind::Byte
@ Byte
i
i
Definition: README.txt:29
llvm::NVPTXAsmPrinter::doFinalization
bool doFinalization(Module &M) override
Shut down the asmprinter.
Definition: NVPTXAsmPrinter.cpp:881
NVPTXInstPrinter.h
llvm::lltok::APFloat
@ APFloat
Definition: LLToken.h:495
llvm::Type::ArrayTyID
@ ArrayTyID
Arrays.
Definition: Type.h:75
ValueTypes.h
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
llvm::StringRef::startswith
LLVM_NODISCARD bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:286
llvm::Type::FloatTyID
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
llvm::getReqNTIDx
bool getReqNTIDx(const Function &F, unsigned &x)
Definition: NVPTXUtilities.cpp:254
llvm::MachineBasicBlock::pred_begin
pred_iterator pred_begin()
Definition: MachineBasicBlock.h:316
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
MachineInstr.h
llvm::MachineOperand::MO_Immediate
@ MO_Immediate
Immediate operand.
Definition: MachineOperand.h:53
llvm::Type::DoubleTyID
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::GlobalValue::hasCommonLinkage
bool hasCommonLinkage() const
Definition: GlobalValue.h:455
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
__CLK_ADDRESS_BASE
@ __CLK_ADDRESS_BASE
Definition: cl_common_defines.h:69
llvm::MachineLoopInfo::getLoopFor
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
Definition: MachineLoopInfo.h:127
llvm::NVPTXInstPrinter::getRegisterName
static const char * getRegisterName(unsigned RegNo)
llvm::MCOperand::createExpr
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
llvm::MachineBasicBlock::getBasicBlock
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
Definition: MachineBasicBlock.h:202
llvm::MCUnaryExpr::getOpcode
Opcode getOpcode() const
Get the kind of this unary expression.
Definition: MCExpr.h:467
llvm::ARMII::VecSize
@ VecSize
Definition: ARMBaseInfo.h:417
llvm::AsmPrinter::lowerConstant
virtual const MCExpr * lowerConstant(const Constant *CV)
Lower the specified LLVM Constant to an MCExpr.
Definition: AsmPrinter.cpp:2472
llvm::GlobalValue::hasExternalLinkage
bool hasExternalLinkage() const
Definition: GlobalValue.h:434
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:217
llvm::APFloatBase::IEEEsingle
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:170
DebugInfoMetadata.h
MCInstrDesc.h
llvm::MachineOperand::getGlobal
const GlobalValue * getGlobal() const
Definition: MachineOperand.h:563
llvm::PointerType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:673
llvm::MCOperand::createImm
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
llvm::MCContext
Context object for machine code objects.
Definition: MCContext.h:72
llvm::Function
Definition: Function.h:62
llvm::Type::VoidTyID
@ VoidTyID
type with no size
Definition: Type.h:63
StringRef.h
is64Bit
static bool is64Bit(const char *name)
Definition: X86Disassembler.cpp:1019
llvm::NVPTXSubtarget::getTargetLowering
const NVPTXTargetLowering * getTargetLowering() const override
Definition: NVPTXSubtarget.h:64
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:625
llvm::isImageReadWrite
bool isImageReadWrite(const Value &val)
Definition: NVPTXUtilities.cpp:200
llvm::NVPTXTargetMachine::is64Bit
bool is64Bit() const
Definition: NVPTXTargetMachine.h:46
llvm::GlobalObject::getSection
StringRef getSection() const
Get the custom section of this global if it has one.
Definition: GlobalObject.h:112
llvm::AsmPrinter::MAI
const MCAsmInfo * MAI
Target Asm Printer information.
Definition: AsmPrinter.h:88
llvm::MCConstantExpr::create
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:194
llvm::GlobalValue::hasWeakLinkage
bool hasWeakLinkage() const
Definition: GlobalValue.h:445
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
usedInGlobalVarDef
static bool usedInGlobalVarDef(const Constant *C)
Definition: NVPTXAsmPrinter.cpp:616
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Path.h
llvm::ADDRESS_SPACE_LOCAL
@ ADDRESS_SPACE_LOCAL
Definition: NVPTXBaseInfo.h:26
llvm::Value::hasName
bool hasName() const
Definition: Value.h:262
ErrorHandling.h
llvm::NVPTXGenericMCSymbolRefExpr::create
static const NVPTXGenericMCSymbolRefExpr * create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx)
Definition: NVPTXMCExpr.cpp:54
DiscoverDependentGlobals
static void DiscoverDependentGlobals(const Value *V, DenseSet< const GlobalVariable * > &Globals)
DiscoverDependentGlobals - Return a set of GlobalVariables on which V depends.
Definition: NVPTXAsmPrinter.cpp:99
llvm::GlobalVariable
Definition: GlobalVariable.h:40
llvm::PointerType::getAddressSpace
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:687
usedInOneFunc
static bool usedInOneFunc(const User *U, Function const *&oneFunc)
Definition: NVPTXAsmPrinter.cpp:632
llvm::MCUnaryExpr::Plus
@ Plus
Unary plus.
Definition: MCExpr.h:430
llvm::MCBinaryExpr::createAnd
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:529
llvm::MCBinaryExpr::Add
@ Add
Addition.
Definition: MCExpr.h:483
llvm::Type::getTypeID
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:135
MachineBasicBlock.h
llvm::NVPTXSubtarget::getSmVersion
unsigned int getSmVersion() const
Definition: NVPTXSubtarget.h:80
llvm::AsmPrinter::doFinalization
bool doFinalization(Module &M) override
Shut down the asmprinter.
Definition: AsmPrinter.cpp:1707
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:124
llvm::MachineModuleInfo::hasDebugInfo
bool hasDebugInfo() const
Returns true if valid debug info is present.
Definition: MachineModuleInfo.h:203
APInt.h
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:233
llvm::NVPTXII::IsSustFlag
@ IsSustFlag
Definition: NVPTXBaseInfo.h:38
llvm::getMinCTASm
bool getMinCTASm(const Function &F, unsigned &x)
Definition: NVPTXUtilities.cpp:266
llvm::DenseMapIterator
Definition: DenseMap.h:56
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::AsmPrinter::PrintSymbolOperand
virtual void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS)
Print the MachineOperand as a symbol.
Definition: AsmPrinterInlineAsm.cpp:589
DenseMap.h
Module.h
llvm::AttributeList
Definition: Attributes.h:399
llvm::NVPTXFloatMCExpr::createConstantFPDouble
static const NVPTXFloatMCExpr * createConstantFPDouble(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:53
printOperand
static bool printOperand(raw_ostream &OS, const SelectionDAG *G, const SDValue Value)
Definition: SelectionDAGDumper.cpp:946
llvm::isImage
bool isImage(const Value &val)
Definition: NVPTXUtilities.cpp:212
llvm::ConstantFP::getValueAPF
const APFloat & getValueAPF() const
Definition: Constants.h:297
NVPTXMCAsmInfo.h
ret
to esp esp setne al movzbw ax esp setg cl movzbw cx cmove cx cl jne LBB1_2 esp ret(also really horrible code on ppc). This is due to the expand code for 64-bit compares. GCC produces multiple branches
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
llvm::MachineRegisterInfo::getNumVirtRegs
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
Definition: MachineRegisterInfo.h:757
Operator.h
llvm::Register::index2VirtReg
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
llvm::MCInst
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
NVPTXTargetStreamer.h
llvm::MCBinaryExpr
Binary assembler expressions.
Definition: MCExpr.h:480
caller
int caller(int32 arg1, int32 arg2)
Definition: README.txt:681
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:162
llvm::format_hex_no_prefix
FormattedNumber format_hex_no_prefix(uint64_t N, unsigned Width, bool Upper=false)
format_hex_no_prefix - Output N as a fixed width hexadecimal.
Definition: Format.h:199
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::count
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
llvm::MCUnaryExpr
Unary assembler expressions.
Definition: MCExpr.h:424
NVPTXRegisterInfo.h
ConstantFolding.h
llvm::NVPTXTargetMachine
NVPTXTargetMachine.
Definition: NVPTXTargetMachine.h:24
llvm::isTexture
bool isTexture(const Value &val)
Definition: NVPTXUtilities.cpp:133
llvm::MachineOperand::MO_Register
@ MO_Register
Register operand.
Definition: MachineOperand.h:52
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::GlobalVariable::hasInitializer
bool hasInitializer() const
Definitions have initializers, declarations don't.
Definition: GlobalVariable.h:92
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:90
NVPTX.h
MachineRegisterInfo.h
llvm::AsmPrinter::EmitToStreamer
void EmitToStreamer(MCStreamer &S, const MCInst &Inst)
Definition: AsmPrinter.cpp:243
llvm::isSampler
bool isSampler(const Value &val)
Definition: NVPTXUtilities.cpp:155
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::ComputeValueVTs
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:124
NVPTXUtilities.h
MachineValueType.h
llvm::MCInst::setOpcode
void setOpcode(unsigned Op)
Definition: MCInst.h:197
cl_common_defines.h
NVPTXTargetInfo.h
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::end
iterator end()
Definition: DenseSet.h:174
Instruction.h
llvm::MachineBasicBlock::getSymbol
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
Definition: MachineBasicBlock.cpp:60
CommandLine.h
llvm::NVPTXFloatMCExpr::createConstantFPSingle
static const NVPTXFloatMCExpr * createConstantFPSingle(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:48
llvm::NVPTXAsmPrinter::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &F) override
Emit the specified function out to the OutStreamer.
Definition: NVPTXAsmPrinter.cpp:481
TargetLowering.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::Type::isSingleValueType
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition: Type.h:248
bb
< i1 > br i1 label label bb bb
Definition: README.txt:978
llvm::getReqNTIDy
bool getReqNTIDy(const Function &F, unsigned &y)
Definition: NVPTXUtilities.cpp:258
printMCExpr
static void printMCExpr(const MCExpr *E, raw_ostream &OS)
Definition: SystemZAsmParser.cpp:700
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:640
llvm::MCInstrDesc::TSFlags
uint64_t TSFlags
Definition: MCInstrDesc.h:203
llvm::MCContext::getOrCreateSymbol
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:191
GlobalValue.h
MachineLoopInfo.h
llvm::ConstantDataSequential
ConstantDataSequential - A vector or array constant whose element type is a simple 1/2/4/8-byte integ...
Definition: Constants.h:569
TargetMachine.h
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:228
llvm::GetUnrollMetadata
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:826
llvm::APInt::lshrInPlace
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:822
llvm::GlobalValue::hasAppendingLinkage
bool hasAppendingLinkage() const
Definition: GlobalValue.h:448
Constants.h
llvm::Constant::isNullValue
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:74
SmallString.h
f
Itanium Name Demangler i e convert the string _Z1fv into f()". You can also use the CRTP base ManglingParser to perform some simple analysis on the mangled name
llvm::MachineOperand::MO_GlobalAddress
@ MO_GlobalAddress
Address of a global value.
Definition: MachineOperand.h:62
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:537
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:728
llvm::User
Definition: User.h:44
llvm::MCExpr::Target
@ Target
Target specific expression.
Definition: MCExpr.h:42
llvm::ADDRESS_SPACE_GLOBAL
@ ADDRESS_SPACE_GLOBAL
Definition: NVPTXBaseInfo.h:23
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:35
llvm::AsmPrinter::OutStreamer
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition: AsmPrinter.h:97
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
Twine.h
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition: TargetLowering.h:3189
llvm::DICompileUnit::LineTablesOnly
@ LineTablesOnly
Definition: DebugInfoMetadata.h:1343
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
llvm::Register::isPhysicalRegister
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
MCSymbol.h
llvm::GlobalValue::hasPrivateLinkage
bool hasPrivateLinkage() const
Definition: GlobalValue.h:450
llvm::NVPTXSubtarget::getPTXVersion
unsigned getPTXVersion() const
Definition: NVPTXSubtarget.h:83
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:226
MCInst.h
DenseSet.h
llvm::MCBinaryExpr::getRHS
const MCExpr * getRHS() const
Get the right-hand side expression of the binary operator.
Definition: MCExpr.h:630
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:195
llvm::getSamplerName
std::string getSamplerName(const Value &val)
Definition: NVPTXUtilities.cpp:237
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::ConstantArray
ConstantArray - Constant Array Declarations.
Definition: Constants.h:409
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::size
size_type size() const
Definition: DenseSet.h:81
llvm::NVPTXTargetMachine::getManagedStrPool
ManagedStringPool * getManagedStrPool() const
Definition: NVPTXTargetMachine.h:49
llvm::getReqNTIDz
bool getReqNTIDz(const Function &F, unsigned &z)
Definition: NVPTXUtilities.cpp:262
llvm::EVT::isInteger
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:145
llvm::Instruction
Definition: Instruction.h:45
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:191
llvm::NVPTXSubtarget::getRegisterInfo
const NVPTXRegisterInfo * getRegisterInfo() const override
Definition: NVPTXSubtarget.h:61
llvm::ADDRESS_SPACE_CONST
@ ADDRESS_SPACE_CONST
Definition: NVPTXBaseInfo.h:25
llvm::AsmPrinter::CurrentFnSym
MCSymbol * CurrentFnSym
The symbol for the current function.
Definition: AsmPrinter.h:119
llvm::Module::global_iterator
GlobalListType::iterator global_iterator
The Global Variable iterator.
Definition: Module.h:87
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1460
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:257
APFloat.h
This file declares a class to represent arbitrary precision floating point values and provide a varie...
llvm::MCSymbolRefExpr::getSymbol
const MCSymbol & getSymbol() const
Definition: MCExpr.h:398
llvm::raw_ostream::flush
void flush()
Definition: raw_ostream.h:186
llvm::MachineFrameInfo::getStackSize
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Definition: MachineFrameInfo.h:553
DebugLoc.h
llvm::GlobalValue::hasInternalLinkage
bool hasInternalLinkage() const
Definition: GlobalValue.h:449
llvm::Type::PointerTyID
@ PointerTyID
Pointers.
Definition: Type.h:73
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
__CLK_FILTER_MASK
@ __CLK_FILTER_MASK
Definition: cl_common_defines.h:97
llvm::APFloat::bitcastToAPInt
APInt bitcastToAPInt() const
Definition: APFloat.h:1130
llvm::MCExpr::getKind
ExprKind getKind() const
Definition: MCExpr.h:81
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::DICompileUnit::DebugDirectivesOnly
@ DebugDirectivesOnly
Definition: DebugInfoMetadata.h:1344
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::MCSymbol::print
void print(raw_ostream &OS, const MCAsmInfo *MAI) const
print - Print the value to the stream OS.
Definition: MCSymbol.cpp:59
llvm::MCUnaryExpr::Minus
@ Minus
Unary minus.
Definition: MCExpr.h:428
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:345
llvm::AsmPrinter::emitBasicBlockStart
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
Definition: AsmPrinter.cpp:3244
llvm::MachineLoopInfo::isLoopHeader
bool isLoopHeader(const MachineBasicBlock *BB) const
True if the block is a loop header node.
Definition: MachineLoopInfo.h:142
Type.h
llvm::MCExpr::Binary
@ Binary
Binary expressions.
Definition: MCExpr.h:38
llvm::DICompileUnit::FullDebug
@ FullDebug
Definition: DebugInfoMetadata.h:1342
NVPTXMachineFunctionInfo.h
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::getNVPTXRegClassStr
std::string getNVPTXRegClassStr(TargetRegisterClass const *RC)
Definition: NVPTXRegisterInfo.cpp:73
llvm::NVPTX::NVCL
@ NVCL
Definition: NVPTX.h:71
llvm::MCInst::addOperand
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
llvm::SmallString< 128 >
llvm::ManagedStringPool::getManagedString
std::string * getManagedString(const char *S)
Definition: ManagedStringPool.h:39
llvm::getTheNVPTXTarget64
Target & getTheNVPTXTarget64()
Definition: NVPTXTargetInfo.cpp:17
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:634
llvm::MCConstantExpr
Definition: MCExpr.h:144
llvm::NVPTXSubtarget
Definition: NVPTXSubtarget.h:31
llvm::NVPTXFloatMCExpr::createConstantFPHalf
static const NVPTXFloatMCExpr * createConstantFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:43
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:190
llvm::isKernelFunction
bool isKernelFunction(const Function &F)
Definition: NVPTXUtilities.cpp:274
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:630
llvm::NVPTXRegisterInfo::getName
const char * getName(unsigned RegNo) const
Definition: NVPTXRegisterInfo.h:52
BasicBlock.h
llvm::GlobalObject::hasSection
bool hasSection() const
Check if this global has a custom object file section.
Definition: GlobalObject.h:104
llvm::APFloat
Definition: APFloat.h:701
llvm::GlobalValue::hasAvailableExternallyLinkage
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:435
llvm::NVPTXII::IsTexModeUnifiedFlag
@ IsTexModeUnifiedFlag
Definition: NVPTXBaseInfo.h:40
LLVMInitializeNVPTXAsmPrinter
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXAsmPrinter()
Definition: NVPTXAsmPrinter.cpp:2176
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:197
llvm::GlobalValue
Definition: GlobalValue.h:44
NVPTXAsmPrinter.h
llvm::GlobalVariable::getInitializer
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
Definition: GlobalVariable.h:136
VI
@ VI
Definition: SIInstrInfo.cpp:7685
llvm::NVPTXSubtarget::hasImageHandles
bool hasImageHandles() const
Definition: NVPTXSubtarget.cpp:55
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::MachineBasicBlock::pred_end
pred_iterator pred_end()
Definition: MachineBasicBlock.h:318
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:341
llvm::TargetRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
Definition: TargetRegisterInfo.h:739
DEPOTNAME
#define DEPOTNAME
Definition: NVPTXAsmPrinter.cpp:94
llvm::DenseMapBase::clear
void clear()
Definition: DenseMap.h:111
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:578
LLVM_EXTERNAL_VISIBILITY
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:132
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::Type::isIntOrPtrTy
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:205
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::NVPTXAsmPrinter::doInitialization
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
Definition: NVPTXAsmPrinter.cpp:779
llvm::DenseMap< unsigned, unsigned >
DebugInfo.h
NVPTXBaseInfo.h
I
#define I(x, y, z)
Definition: MD5.cpp:59
Analysis.h
StringExtras.h
getOpenCLAlignment
static unsigned int getOpenCLAlignment(const DataLayout &DL, Type *Ty)
Definition: NVPTXAsmPrinter.cpp:1365
isEmptyXXStructor
static bool isEmptyXXStructor(GlobalVariable *GV)
Definition: NVPTXAsmPrinter.cpp:758
llvm::DICompileUnit
Compile unit.
Definition: DebugInfoMetadata.h:1335
llvm::ADDRESS_SPACE_SHARED
@ ADDRESS_SPACE_SHARED
Definition: NVPTXBaseInfo.h:24
llvm::AttributeList::getParamAlignment
MaybeAlign getParamAlignment(unsigned ArgNo) const
Return the alignment for the specified function parameter.
Definition: Attributes.cpp:1406
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
useFuncSeen
static bool useFuncSeen(const Constant *C, DenseMap< const Function *, bool > &seenMap)
Definition: NVPTXAsmPrinter.cpp:681
llvm::MachineOperand::getType
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
Definition: MachineOperand.h:219
printMemOperand
static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO, const MachineFunction *MF, const Module *M, const MachineFrameInfo *MFI, const TargetInstrInfo *TII, LLVMContext &Ctx)
Definition: SelectionDAGDumper.cpp:513
llvm::Type::isHalfTy
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:141
llvm::MachineOperand::getFPImm
const ConstantFP * getFPImm() const
Definition: MachineOperand.h:547
llvm::Register::isVirtualRegister
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::begin
iterator begin()
Definition: DenseSet.h:173
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::AsmPrinter::getSymbol
MCSymbol * getSymbol(const GlobalValue *GV) const
Definition: AsmPrinter.cpp:486
llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition: MachineValueType.h:860
llvm::NVPTXMachineFunctionInfo::getImageHandleSymbol
const char * getImageHandleSymbol(unsigned Idx) const
Returns the symbol name at the given index.
Definition: NVPTXMachineFunctionInfo.h:43
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:646
__CLK_FILTER_BASE
@ __CLK_FILTER_BASE
Definition: cl_common_defines.h:93
llvm::AsmPrinter::MF
MachineFunction * MF
The current machine function.
Definition: AsmPrinter.h:100
llvm::AsmPrinter::OutContext
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition: AsmPrinter.h:92
llvm::NVPTXII::IsTexFlag
@ IsTexFlag
Definition: NVPTXBaseInfo.h:35
MachineModuleInfo.h
llvm::WinEH::EncodingType::CE
@ CE
Windows NT (Windows on ARM)
llvm::MachineOperand::MO_FPImmediate
@ MO_FPImmediate
Floating-point immediate operand.
Definition: MachineOperand.h:55
__CLK_NORMALIZED_BASE
@ __CLK_NORMALIZED_BASE
Definition: cl_common_defines.h:85
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
VisitGlobalVariableForEmission
static void VisitGlobalVariableForEmission(const GlobalVariable *GV, SmallVectorImpl< const GlobalVariable * > &Order, DenseSet< const GlobalVariable * > &Visited, DenseSet< const GlobalVariable * > &Visiting)
VisitGlobalVariableForEmission - Add GV to the list of GlobalVariable instances to be emitted,...
Definition: NVPTXAsmPrinter.cpp:116
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:360
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::MDNode
Metadata node.
Definition: Metadata.h:906
llvm::MCSymbolRefExpr
Represent a reference to a symbol from inside an expression.
Definition: MCExpr.h:192
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::MachineFunction
Definition: MachineFunction.h:234
Triple.h
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1532
llvm::AsmPrinter::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
Definition: AsmPrinter.h:341
llvm::MCBinaryExpr::createAdd
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:524
llvm::NVPTXAsmPrinter::getVirtualRegisterName
std::string getVirtualRegisterName(unsigned) const
Definition: NVPTXAsmPrinter.cpp:578
llvm::DICompileUnit::NoDebug
@ NoDebug
Definition: DebugInfoMetadata.h:1341
llvm::AsmPrinter::GetExternalSymbolSymbol
MCSymbol * GetExternalSymbolSymbol(StringRef Sym) const
Return the MCSymbol for the specified ExternalSymbol.
Definition: AsmPrinter.cpp:3172
llvm::MCUnaryExpr::getSubExpr
const MCExpr * getSubExpr() const
Get the child of this unary expression.
Definition: MCExpr.h:470
llvm::MachineOperand::getMBB
MachineBasicBlock * getMBB() const
Definition: MachineOperand.h:552
DataLayout.h
llvm::SymbolTableList< GlobalVariable >
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
llvm::APFloatBase::IEEEdouble
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:173
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::NVPTXII::IsSuldShift
@ IsSuldShift
Definition: NVPTXBaseInfo.h:37
GetSymbolRef
static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, HexagonAsmPrinter &Printer, bool MustExtend)
Definition: HexagonMCInstLower.cpp:41
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::MCOperand::createReg
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
TargetLoweringObjectFile.h
llvm::MachineOperand::MO_MachineBasicBlock
@ MO_MachineBasicBlock
MachineBasicBlock reference.
Definition: MachineOperand.h:56
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::ConstantExpr::getIntegerCast
static Constant * getIntegerCast(Constant *C, Type *Ty, bool IsSigned)
Create a ZExt, Bitcast or Trunc for integer -> integer casts.
Definition: Constants.cpp:2071
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:286
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::DenseMapBase::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:207
llvm::GlobalObject::getAlignment
uint64_t getAlignment() const
FIXME: Remove this function once transition to Align is over.
Definition: GlobalObject.h:71
llvm::NVPTXII::IsSurfTexQueryFlag
@ IsSurfTexQueryFlag
Definition: NVPTXBaseInfo.h:39
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::MachineFrameInfo::getMaxAlign
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
Definition: MachineFrameInfo.h:569
llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:687
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::AsmPrinter::MMI
MachineModuleInfo * MMI
This is a pointer to the current MachineModuleInfo.
Definition: AsmPrinter.h:103
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
llvm::Type::IntegerTyID
@ IntegerTyID
Arbitrary bit width integers.
Definition: Type.h:71
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
Attributes.h
canDemoteGlobalVar
static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f)
Definition: NVPTXAsmPrinter.cpp:663
llvm::NVPTXTargetStreamer
Implments NVPTX-specific streamer.
Definition: NVPTXTargetStreamer.h:18
name
static const char * name
Definition: SVEIntrinsicOpts.cpp:78
j
return j(j<< 16)
Constant.h
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::MCBinaryExpr::getOpcode
Opcode getOpcode() const
Get the kind of this binary expression.
Definition: MCExpr.h:624
llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:936
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:52
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:596
llvm::getMaxNTIDz
bool getMaxNTIDz(const Function &F, unsigned &z)
Definition: NVPTXUtilities.cpp:250
llvm::NVPTX::CUDA
@ CUDA
Definition: NVPTX.h:72
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:626
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:83
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:324
MachineFrameInfo.h
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
llvm::Type::FixedVectorTyID
@ FixedVectorTyID
Fixed width SIMD vector type.
Definition: Type.h:76
llvm::MachineOperand::MO_ExternalSymbol
@ MO_ExternalSymbol
Name of external global symbol.
Definition: MachineOperand.h:61
GlobalVariable.h
llvm::isSurface
bool isSurface(const Value &val)
Definition: NVPTXUtilities.cpp:144
llvm::MCUnaryExpr::LNot
@ LNot
Logical negation.
Definition: MCExpr.h:427
Casting.h
llvm::NVPTXTargetMachine::getSubtargetImpl
const NVPTXSubtarget * getSubtargetImpl(const Function &) const override
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Definition: NVPTXTargetMachine.h:42
NVPTXTargetMachine.h
Function.h
__CLK_ADDRESS_MASK
@ __CLK_ADDRESS_MASK
Definition: cl_common_defines.h:79
llvm::DenseMapBase::size
unsigned size() const
Definition: DenseMap.h:100
llvm::APInt::getLoBits
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:590
llvm::LLVMTargetMachine
This class describes a target machine that is implemented with the LLVM target-independent code gener...
Definition: TargetMachine.h:393
llvm::MCSymbolRefExpr::create
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:385
llvm::TargetRegisterInfo::getNumRegClasses
unsigned getNumRegClasses() const
Definition: TargetRegisterInfo.h:733
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:183
llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition: MachineOperand.h:323
llvm::getAlign
bool getAlign(const Function &F, unsigned index, unsigned &align)
Definition: NVPTXUtilities.cpp:284
llvm::getSurfaceName
std::string getSurfaceName(const Value &val)
Definition: NVPTXUtilities.cpp:232
llvm::ARMBuildAttrs::Symbol
@ Symbol
Definition: ARMBuildAttributes.h:79
llvm::getMaxNTIDy
bool getMaxNTIDy(const Function &F, unsigned &y)
Definition: NVPTXUtilities.cpp:246
llvm::isImageWriteOnly
bool isImageWriteOnly(const Value &val)
Definition: NVPTXUtilities.cpp:188
llvm::Type::StructTyID
@ StructTyID
Structures.
Definition: Type.h:74
llvm::LegalityPredicates::isVector
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Definition: LegalityPredicates.cpp:73
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:107
llvm::APFloatBase::rmNearestTiesToEven
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:190
align
mov r0 ldr L5 sub r0 lr needed for prologue ldmia ip add bx lr r2 The last stmia stores r2 into the address passed in there is one additional stmia that stores and r2 to some stack location The store is dead The llvm gcc generated code looks like align
Definition: README.txt:236
llvm::clearAnnotationCache
void clearAnnotationCache(const Module *Mod)
Definition: NVPTXUtilities.cpp:41
llvm::MCExpr::print
void print(raw_ostream &OS, const MCAsmInfo *MAI, bool InParens=false) const
Definition: MCExpr.cpp:42
llvm::APFloat::convert
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:4836
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
llvm::AsmPrinter::TM
TargetMachine & TM
Target machine description.
Definition: AsmPrinter.h:85
SmallVector.h
llvm::NVPTXSubtarget::getTargetName
std::string getTargetName() const
Definition: NVPTXSubtarget.h:81
User.h
llvm::MCExpr::SymbolRef
@ SymbolRef
References to labels and assigned expressions.
Definition: MCExpr.h:40
__CLK_NORMALIZED_MASK
@ __CLK_NORMALIZED_MASK
Definition: cl_common_defines.h:88
llvm::MachineOperand::getSymbolName
const char * getSymbolName() const
Definition: MachineOperand.h:608
llvm::TargetMachine::getSubtarget
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Definition: TargetMachine.h:166
llvm::MCExpr::Unary
@ Unary
Unary expressions.
Definition: MCExpr.h:41
MCStreamer.h
UnrollLoop.h
llvm::Module::const_iterator
FunctionListType::const_iterator const_iterator
The Function constant iterator.
Definition: Module.h:94
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::MCExpr::Constant
@ Constant
Constant expressions.
Definition: MCExpr.h:39
llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:658
NVPTXSubtarget.h
llvm::NVPTXMachineFunctionInfo
Definition: NVPTXMachineFunctionInfo.h:20
llvm::AsmPrinter::getDataLayout
const DataLayout & getDataLayout() const
Return information about data layout.
Definition: AsmPrinter.cpp:228
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::erase
bool erase(const ValueT &V)
Definition: DenseSet.h:101
llvm::StringRef::data
const LLVM_NODISCARD char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:149
llvm::GlobalValue::hasLinkOnceLinkage
bool hasLinkOnceLinkage() const
Definition: GlobalValue.h:438
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::ConstantFoldConstant
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
Definition: ConstantFolding.cpp:1145
MachineOperand.h
llvm::GlobalValue::getType
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:271
DerivedTypes.h
llvm::GlobalValue::getValueType
Type * getValueType() const
Definition: GlobalValue.h:273
llvm::MCOperand
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
llvm::NVPTXAsmPrinter::getFunctionFrameSymbol
const MCSymbol * getFunctionFrameSymbol() const override
Return symbol for the function pseudo stack if the stack frame is not a register based.
Definition: NVPTXAsmPrinter.cpp:503
llvm::AsmPrinter::emitInitialRawDwarfLocDirective
void emitInitialRawDwarfLocDirective(const MachineFunction &MF)
Emits inital debug location directive.
Definition: AsmPrinter.cpp:247
llvm::MCSymbolRefExpr::VK_None
@ VK_None
Definition: MCExpr.h:195
llvm::Type::isAggregateType
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:256
llvm::NVPTXII::IsSuldMask
@ IsSuldMask
Definition: NVPTXBaseInfo.h:36
LLVMContext.h
llvm::NVPTXTargetMachine::getDrvInterface
NVPTX::DrvInterface getDrvInterface() const
Definition: NVPTXTargetMachine.h:48
llvm::RegisterAsmPrinter
RegisterAsmPrinter - Helper template for registering a target specific assembly printer,...
Definition: TargetRegistry.h:1338
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::getMaxNTIDx
bool getMaxNTIDx(const Function &F, unsigned &x)
Definition: NVPTXUtilities.cpp:242
llvm::getTextureName
std::string getTextureName(const Value &val)
Definition: NVPTXUtilities.cpp:227
raw_ostream.h
llvm::getMaxNReg
bool getMaxNReg(const Function &F, unsigned &x)
Definition: NVPTXUtilities.cpp:270
n
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
Definition: README.txt:685
llvm::AttributeList::hasParamAttr
bool hasParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Return true if the attribute exists for the given argument.
Definition: Attributes.h:722
MachineFunction.h
llvm::getNVPTXRegClassName
std::string getNVPTXRegClassName(TargetRegisterClass const *RC)
Definition: NVPTXRegisterInfo.cpp:29
TargetRegistry.h
MCExpr.h
llvm::isManaged
bool isManaged(const Value &val)
Definition: NVPTXUtilities.cpp:216
CU
Definition: AArch64AsmBackend.cpp:501
llvm::AsmPrinter::getFunctionNumber
unsigned getFunctionNumber() const
Return a unique ID for the current function.
Definition: AsmPrinter.cpp:220
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
TargetRegisterInfo.h
llvm::MCExpr
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
NVPTXMCExpr.h
llvm::Type::HalfTyID
@ HalfTyID
16-bit floating point type
Definition: Type.h:56
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:422
llvm::AsmPrinter::PrintAsmOperand
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
Definition: AsmPrinterInlineAsm.cpp:599
llvm::TargetLoweringBase::getPointerTy
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Definition: TargetLowering.h:346
llvm::MCUnaryExpr::Not
@ Not
Bitwise negation.
Definition: MCExpr.h:429
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:166
llvm::MCBinaryExpr::getLHS
const MCExpr * getLHS() const
Get the left-hand side expression of the binary operator.
Definition: MCExpr.h:627
llvm::AsmPrinter::doInitialization
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
Definition: AsmPrinter.cpp:267
llvm::getTheNVPTXTarget32
Target & getTheNVPTXTarget32()
Definition: NVPTXTargetInfo.cpp:13