LLVM  15.0.0git
NVPTXAsmPrinter.cpp
Go to the documentation of this file.
1 //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a printer that converts from our internal representation
10 // of machine-dependent LLVM code to NVPTX assembly language.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "NVPTXAsmPrinter.h"
19 #include "NVPTX.h"
20 #include "NVPTXMCExpr.h"
22 #include "NVPTXRegisterInfo.h"
23 #include "NVPTXSubtarget.h"
24 #include "NVPTXTargetMachine.h"
25 #include "NVPTXUtilities.h"
27 #include "cl_common_defines.h"
28 #include "llvm/ADT/APFloat.h"
29 #include "llvm/ADT/APInt.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/DenseSet.h"
32 #include "llvm/ADT/SmallString.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/StringExtras.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/ADT/Triple.h"
37 #include "llvm/ADT/Twine.h"
39 #include "llvm/CodeGen/Analysis.h"
50 #include "llvm/IR/Attributes.h"
51 #include "llvm/IR/BasicBlock.h"
52 #include "llvm/IR/Constant.h"
53 #include "llvm/IR/Constants.h"
54 #include "llvm/IR/DataLayout.h"
55 #include "llvm/IR/DebugInfo.h"
57 #include "llvm/IR/DebugLoc.h"
58 #include "llvm/IR/DerivedTypes.h"
59 #include "llvm/IR/Function.h"
60 #include "llvm/IR/GlobalValue.h"
61 #include "llvm/IR/GlobalVariable.h"
62 #include "llvm/IR/Instruction.h"
63 #include "llvm/IR/LLVMContext.h"
64 #include "llvm/IR/Module.h"
65 #include "llvm/IR/Operator.h"
66 #include "llvm/IR/Type.h"
67 #include "llvm/IR/User.h"
68 #include "llvm/MC/MCExpr.h"
69 #include "llvm/MC/MCInst.h"
70 #include "llvm/MC/MCInstrDesc.h"
71 #include "llvm/MC/MCStreamer.h"
72 #include "llvm/MC/MCSymbol.h"
73 #include "llvm/MC/TargetRegistry.h"
74 #include "llvm/Support/Casting.h"
78 #include "llvm/Support/Path.h"
83 #include <cassert>
84 #include <cstdint>
85 #include <cstring>
86 #include <new>
87 #include <string>
88 #include <utility>
89 #include <vector>
90 
91 using namespace llvm;
92 
93 #define DEPOTNAME "__local_depot"
94 
95 /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
96 /// depends.
97 static void
100  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
101  Globals.insert(GV);
102  else {
103  if (const User *U = dyn_cast<User>(V)) {
104  for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) {
105  DiscoverDependentGlobals(U->getOperand(i), Globals);
106  }
107  }
108  }
109 }
110 
111 /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable
112 /// instances to be emitted, but only after any dependents have been added
113 /// first.s
114 static void
119  // Have we already visited this one?
120  if (Visited.count(GV))
121  return;
122 
123  // Do we have a circular dependency?
124  if (!Visiting.insert(GV).second)
125  report_fatal_error("Circular dependency found in global variable set");
126 
127  // Make sure we visit all dependents first
129  for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i)
130  DiscoverDependentGlobals(GV->getOperand(i), Others);
131 
132  for (const GlobalVariable *GV : Others)
133  VisitGlobalVariableForEmission(GV, Order, Visited, Visiting);
134 
135  // Now we can visit ourself
136  Order.push_back(GV);
137  Visited.insert(GV);
138  Visiting.erase(GV);
139 }
140 
141 void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) {
142  MCInst Inst;
143  lowerToMCInst(MI, Inst);
144  EmitToStreamer(*OutStreamer, Inst);
145 }
146 
147 // Handle symbol backtracking for targets that do not support image handles
148 bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI,
149  unsigned OpNo, MCOperand &MCOp) {
150  const MachineOperand &MO = MI->getOperand(OpNo);
151  const MCInstrDesc &MCID = MI->getDesc();
152 
153  if (MCID.TSFlags & NVPTXII::IsTexFlag) {
154  // This is a texture fetch, so operand 4 is a texref and operand 5 is
155  // a samplerref
156  if (OpNo == 4 && MO.isImm()) {
157  lowerImageHandleSymbol(MO.getImm(), MCOp);
158  return true;
159  }
160  if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) {
161  lowerImageHandleSymbol(MO.getImm(), MCOp);
162  return true;
163  }
164 
165  return false;
166  } else if (MCID.TSFlags & NVPTXII::IsSuldMask) {
167  unsigned VecSize =
168  1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1);
169 
170  // For a surface load of vector size N, the Nth operand will be the surfref
171  if (OpNo == VecSize && MO.isImm()) {
172  lowerImageHandleSymbol(MO.getImm(), MCOp);
173  return true;
174  }
175 
176  return false;
177  } else if (MCID.TSFlags & NVPTXII::IsSustFlag) {
178  // This is a surface store, so operand 0 is a surfref
179  if (OpNo == 0 && MO.isImm()) {
180  lowerImageHandleSymbol(MO.getImm(), MCOp);
181  return true;
182  }
183 
184  return false;
185  } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) {
186  // This is a query, so operand 1 is a surfref/texref
187  if (OpNo == 1 && MO.isImm()) {
188  lowerImageHandleSymbol(MO.getImm(), MCOp);
189  return true;
190  }
191 
192  return false;
193  }
194 
195  return false;
196 }
197 
198 void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
199  // Ewwww
200  LLVMTargetMachine &TM = const_cast<LLVMTargetMachine&>(MF->getTarget());
201  NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM);
203  const char *Sym = MFI->getImageHandleSymbol(Index);
204  std::string *SymNamePtr =
205  nvTM.getManagedStrPool()->getManagedString(Sym);
206  MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(StringRef(*SymNamePtr)));
207 }
208 
209 void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
210  OutMI.setOpcode(MI->getOpcode());
211  // Special: Do not mangle symbol operand of CALL_PROTOTYPE
212  if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) {
213  const MachineOperand &MO = MI->getOperand(0);
214  OutMI.addOperand(GetSymbolRef(
216  return;
217  }
218 
219  const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
220  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
221  const MachineOperand &MO = MI->getOperand(i);
222 
223  MCOperand MCOp;
224  if (!STI.hasImageHandles()) {
225  if (lowerImageHandleOperand(MI, i, MCOp)) {
226  OutMI.addOperand(MCOp);
227  continue;
228  }
229  }
230 
231  if (lowerOperand(MO, MCOp))
232  OutMI.addOperand(MCOp);
233  }
234 }
235 
236 bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
237  MCOperand &MCOp) {
238  switch (MO.getType()) {
239  default: llvm_unreachable("unknown operand type");
241  MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg()));
242  break;
244  MCOp = MCOperand::createImm(MO.getImm());
245  break;
248  MO.getMBB()->getSymbol(), OutContext));
249  break;
252  break;
254  MCOp = GetSymbolRef(getSymbol(MO.getGlobal()));
255  break;
257  const ConstantFP *Cnt = MO.getFPImm();
258  const APFloat &Val = Cnt->getValueAPF();
259 
260  switch (Cnt->getType()->getTypeID()) {
261  default: report_fatal_error("Unsupported FP type"); break;
262  case Type::HalfTyID:
263  MCOp = MCOperand::createExpr(
265  break;
266  case Type::FloatTyID:
267  MCOp = MCOperand::createExpr(
269  break;
270  case Type::DoubleTyID:
271  MCOp = MCOperand::createExpr(
273  break;
274  }
275  break;
276  }
277  }
278  return true;
279 }
280 
281 unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
283  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
284 
285  DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC];
286  unsigned RegNum = RegMap[Reg];
287 
288  // Encode the register class in the upper 4 bits
289  // Must be kept in sync with NVPTXInstPrinter::printRegName
290  unsigned Ret = 0;
291  if (RC == &NVPTX::Int1RegsRegClass) {
292  Ret = (1 << 28);
293  } else if (RC == &NVPTX::Int16RegsRegClass) {
294  Ret = (2 << 28);
295  } else if (RC == &NVPTX::Int32RegsRegClass) {
296  Ret = (3 << 28);
297  } else if (RC == &NVPTX::Int64RegsRegClass) {
298  Ret = (4 << 28);
299  } else if (RC == &NVPTX::Float32RegsRegClass) {
300  Ret = (5 << 28);
301  } else if (RC == &NVPTX::Float64RegsRegClass) {
302  Ret = (6 << 28);
303  } else if (RC == &NVPTX::Float16RegsRegClass) {
304  Ret = (7 << 28);
305  } else if (RC == &NVPTX::Float16x2RegsRegClass) {
306  Ret = (8 << 28);
307  } else {
308  report_fatal_error("Bad register class");
309  }
310 
311  // Insert the vreg number
312  Ret |= (RegNum & 0x0FFFFFFF);
313  return Ret;
314  } else {
315  // Some special-use registers are actually physical registers.
316  // Encode this as the register class ID of 0 and the real register ID.
317  return Reg & 0x0FFFFFFF;
318  }
319 }
320 
322  const MCExpr *Expr;
324  OutContext);
325  return MCOperand::createExpr(Expr);
326 }
327 
328 void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
329  const DataLayout &DL = getDataLayout();
330  const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
331  const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
332 
333  Type *Ty = F->getReturnType();
334 
335  bool isABI = (STI.getSmVersion() >= 20);
336 
337  if (Ty->getTypeID() == Type::VoidTyID)
338  return;
339 
340  O << " (";
341 
342  if (isABI) {
343  if (Ty->isFloatingPointTy() || (Ty->isIntegerTy() && !Ty->isIntegerTy(128))) {
344  unsigned size = 0;
345  if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
346  size = ITy->getBitWidth();
347  } else {
348  assert(Ty->isFloatingPointTy() && "Floating point type expected here");
350  }
351  // PTX ABI requires all scalar return values to be at least 32
352  // bits in size. fp16 normally uses .b16 as its storage type in
353  // PTX, so its size must be adjusted here, too.
354  if (size < 32)
355  size = 32;
356 
357  O << ".param .b" << size << " func_retval0";
358  } else if (isa<PointerType>(Ty)) {
359  O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits()
360  << " func_retval0";
361  } else if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
362  unsigned totalsz = DL.getTypeAllocSize(Ty);
363  unsigned retAlignment = 0;
364  if (!getAlign(*F, 0, retAlignment))
365  retAlignment = TLI->getFunctionParamOptimizedAlign(F, Ty, DL).value();
366  O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
367  << "]";
368  } else
369  llvm_unreachable("Unknown return type");
370  } else {
371  SmallVector<EVT, 16> vtparts;
372  ComputeValueVTs(*TLI, DL, Ty, vtparts);
373  unsigned idx = 0;
374  for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
375  unsigned elems = 1;
376  EVT elemtype = vtparts[i];
377  if (vtparts[i].isVector()) {
378  elems = vtparts[i].getVectorNumElements();
379  elemtype = vtparts[i].getVectorElementType();
380  }
381 
382  for (unsigned j = 0, je = elems; j != je; ++j) {
383  unsigned sz = elemtype.getSizeInBits();
384  if (elemtype.isInteger() && (sz < 32))
385  sz = 32;
386  O << ".reg .b" << sz << " func_retval" << idx;
387  if (j < je - 1)
388  O << ", ";
389  ++idx;
390  }
391  if (i < e - 1)
392  O << ", ";
393  }
394  }
395  O << ") ";
396 }
397 
398 void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
399  raw_ostream &O) {
400  const Function &F = MF.getFunction();
401  printReturnValStr(&F, O);
402 }
403 
404 // Return true if MBB is the header of a loop marked with
405 // llvm.loop.unroll.disable.
406 // TODO: consider "#pragma unroll 1" which is equivalent to "#pragma nounroll".
407 bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
408  const MachineBasicBlock &MBB) const {
409  MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
410  // We insert .pragma "nounroll" only to the loop header.
411  if (!LI.isLoopHeader(&MBB))
412  return false;
413 
414  // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
415  // we iterate through each back edge of the loop with header MBB, and check
416  // whether its metadata contains llvm.loop.unroll.disable.
417  for (const MachineBasicBlock *PMBB : MBB.predecessors()) {
418  if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
419  // Edges from other loops to MBB are not back edges.
420  continue;
421  }
422  if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
423  if (MDNode *LoopID =
424  PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) {
425  if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
426  return true;
427  }
428  }
429  }
430  return false;
431 }
432 
433 void NVPTXAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
435  if (isLoopHeaderOfNoUnroll(MBB))
436  OutStreamer->emitRawText(StringRef("\t.pragma \"nounroll\";\n"));
437 }
438 
439 void NVPTXAsmPrinter::emitFunctionEntryLabel() {
440  SmallString<128> Str;
441  raw_svector_ostream O(Str);
442 
443  if (!GlobalsEmitted) {
444  emitGlobals(*MF->getFunction().getParent());
445  GlobalsEmitted = true;
446  }
447 
448  // Set up
449  MRI = &MF->getRegInfo();
450  F = &MF->getFunction();
451  emitLinkageDirective(F, O);
452  if (isKernelFunction(*F))
453  O << ".entry ";
454  else {
455  O << ".func ";
456  printReturnValStr(*MF, O);
457  }
458 
459  CurrentFnSym->print(O, MAI);
460 
461  emitFunctionParamList(*MF, O);
462 
463  if (isKernelFunction(*F))
464  emitKernelFunctionDirectives(*F, O);
465 
466  OutStreamer->emitRawText(O.str());
467 
468  VRegMapping.clear();
469  // Emit open brace for function body.
470  OutStreamer->emitRawText(StringRef("{\n"));
471  setAndEmitFunctionVirtualRegisters(*MF);
472  // Emit initial .loc debug directive for correct relocation symbol data.
473  if (MMI && MMI->hasDebugInfo())
475 }
476 
478  bool Result = AsmPrinter::runOnMachineFunction(F);
479  // Emit closing brace for the body of function F.
480  // The closing brace must be emitted here because we need to emit additional
481  // debug labels/data after the last basic block.
482  // We need to emit the closing brace here because we don't have function that
483  // finished emission of the function body.
484  OutStreamer->emitRawText(StringRef("}\n"));
485  return Result;
486 }
487 
488 void NVPTXAsmPrinter::emitFunctionBodyStart() {
489  SmallString<128> Str;
490  raw_svector_ostream O(Str);
491  emitDemotedVars(&MF->getFunction(), O);
492  OutStreamer->emitRawText(O.str());
493 }
494 
495 void NVPTXAsmPrinter::emitFunctionBodyEnd() {
496  VRegMapping.clear();
497 }
498 
500  SmallString<128> Str;
502  return OutContext.getOrCreateSymbol(Str);
503 }
504 
505 void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
506  Register RegNo = MI->getOperand(0).getReg();
507  if (Register::isVirtualRegister(RegNo)) {
508  OutStreamer->AddComment(Twine("implicit-def: ") +
509  getVirtualRegisterName(RegNo));
510  } else {
511  const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>();
512  OutStreamer->AddComment(Twine("implicit-def: ") +
513  STI.getRegisterInfo()->getName(RegNo));
514  }
515  OutStreamer->AddBlankLine();
516 }
517 
518 void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
519  raw_ostream &O) const {
520  // If the NVVM IR has some of reqntid* specified, then output
521  // the reqntid directive, and set the unspecified ones to 1.
522  // If none of reqntid* is specified, don't output reqntid directive.
523  unsigned reqntidx, reqntidy, reqntidz;
524  bool specified = false;
525  if (!getReqNTIDx(F, reqntidx))
526  reqntidx = 1;
527  else
528  specified = true;
529  if (!getReqNTIDy(F, reqntidy))
530  reqntidy = 1;
531  else
532  specified = true;
533  if (!getReqNTIDz(F, reqntidz))
534  reqntidz = 1;
535  else
536  specified = true;
537 
538  if (specified)
539  O << ".reqntid " << reqntidx << ", " << reqntidy << ", " << reqntidz
540  << "\n";
541 
542  // If the NVVM IR has some of maxntid* specified, then output
543  // the maxntid directive, and set the unspecified ones to 1.
544  // If none of maxntid* is specified, don't output maxntid directive.
545  unsigned maxntidx, maxntidy, maxntidz;
546  specified = false;
547  if (!getMaxNTIDx(F, maxntidx))
548  maxntidx = 1;
549  else
550  specified = true;
551  if (!getMaxNTIDy(F, maxntidy))
552  maxntidy = 1;
553  else
554  specified = true;
555  if (!getMaxNTIDz(F, maxntidz))
556  maxntidz = 1;
557  else
558  specified = true;
559 
560  if (specified)
561  O << ".maxntid " << maxntidx << ", " << maxntidy << ", " << maxntidz
562  << "\n";
563 
564  unsigned mincta;
565  if (getMinCTASm(F, mincta))
566  O << ".minnctapersm " << mincta << "\n";
567 
568  unsigned maxnreg;
569  if (getMaxNReg(F, maxnreg))
570  O << ".maxnreg " << maxnreg << "\n";
571 }
572 
573 std::string
575  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
576 
577  std::string Name;
578  raw_string_ostream NameStr(Name);
579 
580  VRegRCMap::const_iterator I = VRegMapping.find(RC);
581  assert(I != VRegMapping.end() && "Bad register class");
582  const DenseMap<unsigned, unsigned> &RegMap = I->second;
583 
585  assert(VI != RegMap.end() && "Bad virtual register");
586  unsigned MappedVR = VI->second;
587 
588  NameStr << getNVPTXRegClassStr(RC) << MappedVR;
589 
590  NameStr.flush();
591  return Name;
592 }
593 
594 void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr,
595  raw_ostream &O) {
596  O << getVirtualRegisterName(vr);
597 }
598 
599 void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
600  emitLinkageDirective(F, O);
601  if (isKernelFunction(*F))
602  O << ".entry ";
603  else
604  O << ".func ";
605  printReturnValStr(F, O);
606  getSymbol(F)->print(O, MAI);
607  O << "\n";
608  emitFunctionParamList(F, O);
609  O << ";\n";
610 }
611 
612 static bool usedInGlobalVarDef(const Constant *C) {
613  if (!C)
614  return false;
615 
616  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
617  return GV->getName() != "llvm.used";
618  }
619 
620  for (const User *U : C->users())
621  if (const Constant *C = dyn_cast<Constant>(U))
622  if (usedInGlobalVarDef(C))
623  return true;
624 
625  return false;
626 }
627 
628 static bool usedInOneFunc(const User *U, Function const *&oneFunc) {
629  if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) {
630  if (othergv->getName() == "llvm.used")
631  return true;
632  }
633 
634  if (const Instruction *instr = dyn_cast<Instruction>(U)) {
635  if (instr->getParent() && instr->getParent()->getParent()) {
636  const Function *curFunc = instr->getParent()->getParent();
637  if (oneFunc && (curFunc != oneFunc))
638  return false;
639  oneFunc = curFunc;
640  return true;
641  } else
642  return false;
643  }
644 
645  for (const User *UU : U->users())
646  if (!usedInOneFunc(UU, oneFunc))
647  return false;
648 
649  return true;
650 }
651 
652 /* Find out if a global variable can be demoted to local scope.
653  * Currently, this is valid for CUDA shared variables, which have local
654  * scope and global lifetime. So the conditions to check are :
655  * 1. Is the global variable in shared address space?
656  * 2. Does it have internal linkage?
657  * 3. Is the global variable referenced only in one function?
658  */
659 static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) {
660  if (!gv->hasInternalLinkage())
661  return false;
662  PointerType *Pty = gv->getType();
664  return false;
665 
666  const Function *oneFunc = nullptr;
667 
668  bool flag = usedInOneFunc(gv, oneFunc);
669  if (!flag)
670  return false;
671  if (!oneFunc)
672  return false;
673  f = oneFunc;
674  return true;
675 }
676 
677 static bool useFuncSeen(const Constant *C,
679  for (const User *U : C->users()) {
680  if (const Constant *cu = dyn_cast<Constant>(U)) {
681  if (useFuncSeen(cu, seenMap))
682  return true;
683  } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
684  const BasicBlock *bb = I->getParent();
685  if (!bb)
686  continue;
687  const Function *caller = bb->getParent();
688  if (!caller)
689  continue;
690  if (seenMap.find(caller) != seenMap.end())
691  return true;
692  }
693  }
694  return false;
695 }
696 
697 void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
699  for (const Function &F : M) {
700  if (F.getAttributes().hasFnAttr("nvptx-libcall-callee")) {
701  emitDeclaration(&F, O);
702  continue;
703  }
704 
705  if (F.isDeclaration()) {
706  if (F.use_empty())
707  continue;
708  if (F.getIntrinsicID())
709  continue;
710  emitDeclaration(&F, O);
711  continue;
712  }
713  for (const User *U : F.users()) {
714  if (const Constant *C = dyn_cast<Constant>(U)) {
715  if (usedInGlobalVarDef(C)) {
716  // The use is in the initialization of a global variable
717  // that is a function pointer, so print a declaration
718  // for the original function
719  emitDeclaration(&F, O);
720  break;
721  }
722  // Emit a declaration of this function if the function that
723  // uses this constant expr has already been seen.
724  if (useFuncSeen(C, seenMap)) {
725  emitDeclaration(&F, O);
726  break;
727  }
728  }
729 
730  if (!isa<Instruction>(U))
731  continue;
732  const Instruction *instr = cast<Instruction>(U);
733  const BasicBlock *bb = instr->getParent();
734  if (!bb)
735  continue;
736  const Function *caller = bb->getParent();
737  if (!caller)
738  continue;
739 
740  // If a caller has already been seen, then the caller is
741  // appearing in the module before the callee. so print out
742  // a declaration for the callee.
743  if (seenMap.find(caller) != seenMap.end()) {
744  emitDeclaration(&F, O);
745  break;
746  }
747  }
748  seenMap[&F] = true;
749  }
750 }
751 
753  if (!GV) return true;
754  const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
755  if (!InitList) return true; // Not an array; we don't know how to parse.
756  return InitList->getNumOperands() == 0;
757 }
758 
759 void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) {
760  // Construct a default subtarget off of the TargetMachine defaults. The
761  // rest of NVPTX isn't friendly to change subtargets per function and
762  // so the default TargetMachine will have all of the options.
763  const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
764  const auto* STI = static_cast<const NVPTXSubtarget*>(NTM.getSubtargetImpl());
765  SmallString<128> Str1;
766  raw_svector_ostream OS1(Str1);
767 
768  // Emit header before any dwarf directives are emitted below.
769  emitHeader(M, OS1, *STI);
770  OutStreamer->emitRawText(OS1.str());
771 }
772 
774  if (M.alias_size()) {
775  report_fatal_error("Module has aliases, which NVPTX does not support.");
776  return true; // error
777  }
778  if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors"))) {
780  "Module has a nontrivial global ctor, which NVPTX does not support.");
781  return true; // error
782  }
783  if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors"))) {
785  "Module has a nontrivial global dtor, which NVPTX does not support.");
786  return true; // error
787  }
788 
789  // We need to call the parent's one explicitly.
790  bool Result = AsmPrinter::doInitialization(M);
791 
792  GlobalsEmitted = false;
793 
794  return Result;
795 }
796 
797 void NVPTXAsmPrinter::emitGlobals(const Module &M) {
798  SmallString<128> Str2;
799  raw_svector_ostream OS2(Str2);
800 
801  emitDeclarations(M, OS2);
802 
803  // As ptxas does not support forward references of globals, we need to first
804  // sort the list of module-level globals in def-use order. We visit each
805  // global variable in order, and ensure that we emit it *after* its dependent
806  // globals. We use a little extra memory maintaining both a set and a list to
807  // have fast searches while maintaining a strict ordering.
811 
812  // Visit each global variable, in order
813  for (const GlobalVariable &I : M.globals())
814  VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
815 
816  assert(GVVisited.size() == M.getGlobalList().size() &&
817  "Missed a global variable");
818  assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
819 
820  const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
821  const NVPTXSubtarget &STI =
822  *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
823 
824  // Print out module-level global variables in proper order
825  for (unsigned i = 0, e = Globals.size(); i != e; ++i)
826  printModuleLevelGV(Globals[i], OS2, /*processDemoted=*/false, STI);
827 
828  OS2 << '\n';
829 
830  OutStreamer->emitRawText(OS2.str());
831 }
832 
833 void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
834  const NVPTXSubtarget &STI) {
835  O << "//\n";
836  O << "// Generated by LLVM NVPTX Back-End\n";
837  O << "//\n";
838  O << "\n";
839 
840  unsigned PTXVersion = STI.getPTXVersion();
841  O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n";
842 
843  O << ".target ";
844  O << STI.getTargetName();
845 
846  const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
847  if (NTM.getDrvInterface() == NVPTX::NVCL)
848  O << ", texmode_independent";
849 
850  bool HasFullDebugInfo = false;
851  for (DICompileUnit *CU : M.debug_compile_units()) {
852  switch(CU->getEmissionKind()) {
855  break;
858  HasFullDebugInfo = true;
859  break;
860  }
861  if (HasFullDebugInfo)
862  break;
863  }
864  if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo)
865  O << ", debug";
866 
867  O << "\n";
868 
869  O << ".address_size ";
870  if (NTM.is64Bit())
871  O << "64";
872  else
873  O << "32";
874  O << "\n";
875 
876  O << "\n";
877 }
878 
880  bool HasDebugInfo = MMI && MMI->hasDebugInfo();
881 
882  // If we did not emit any functions, then the global declarations have not
883  // yet been emitted.
884  if (!GlobalsEmitted) {
885  emitGlobals(M);
886  GlobalsEmitted = true;
887  }
888 
889  // call doFinalization
891 
893 
894  if (auto *TS = static_cast<NVPTXTargetStreamer *>(
895  OutStreamer->getTargetStreamer())) {
896  // Close the last emitted section
897  if (HasDebugInfo) {
898  TS->closeLastSection();
899  // Emit empty .debug_loc section for better support of the empty files.
900  OutStreamer->emitRawText("\t.section\t.debug_loc\t{\t}");
901  }
902 
903  // Output last DWARF .file directives, if any.
904  TS->outputDwarfFileDirectives();
905  }
906 
907  return ret;
908 
909  //bool Result = AsmPrinter::doFinalization(M);
910  // Instead of calling the parents doFinalization, we may
911  // clone parents doFinalization and customize here.
912  // Currently, we if NVISA out the EmitGlobals() in
913  // parent's doFinalization, which is too intrusive.
914  //
915  // Same for the doInitialization.
916  //return Result;
917 }
918 
919 // This function emits appropriate linkage directives for
920 // functions and global variables.
921 //
922 // extern function declaration -> .extern
923 // extern function definition -> .visible
924 // external global variable with init -> .visible
925 // external without init -> .extern
926 // appending -> not allowed, assert.
927 // for any linkage other than
928 // internal, private, linker_private,
929 // linker_private_weak, linker_private_weak_def_auto,
930 // we emit -> .weak.
931 
932 void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V,
933  raw_ostream &O) {
934  if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) {
935  if (V->hasExternalLinkage()) {
936  if (isa<GlobalVariable>(V)) {
937  const GlobalVariable *GVar = cast<GlobalVariable>(V);
938  if (GVar) {
939  if (GVar->hasInitializer())
940  O << ".visible ";
941  else
942  O << ".extern ";
943  }
944  } else if (V->isDeclaration())
945  O << ".extern ";
946  else
947  O << ".visible ";
948  } else if (V->hasAppendingLinkage()) {
949  std::string msg;
950  msg.append("Error: ");
951  msg.append("Symbol ");
952  if (V->hasName())
953  msg.append(std::string(V->getName()));
954  msg.append("has unsupported appending linkage type");
955  llvm_unreachable(msg.c_str());
956  } else if (!V->hasInternalLinkage() &&
957  !V->hasPrivateLinkage()) {
958  O << ".weak ";
959  }
960  }
961 }
962 
963 void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
964  raw_ostream &O, bool processDemoted,
965  const NVPTXSubtarget &STI) {
966  // Skip meta data
967  if (GVar->hasSection()) {
968  if (GVar->getSection() == "llvm.metadata")
969  return;
970  }
971 
972  // Skip LLVM intrinsic global variables
973  if (GVar->getName().startswith("llvm.") ||
974  GVar->getName().startswith("nvvm."))
975  return;
976 
977  const DataLayout &DL = getDataLayout();
978 
979  // GlobalVariables are always constant pointers themselves.
980  PointerType *PTy = GVar->getType();
981  Type *ETy = GVar->getValueType();
982 
983  if (GVar->hasExternalLinkage()) {
984  if (GVar->hasInitializer())
985  O << ".visible ";
986  else
987  O << ".extern ";
988  } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() ||
990  GVar->hasCommonLinkage()) {
991  O << ".weak ";
992  }
993 
994  if (isTexture(*GVar)) {
995  O << ".global .texref " << getTextureName(*GVar) << ";\n";
996  return;
997  }
998 
999  if (isSurface(*GVar)) {
1000  O << ".global .surfref " << getSurfaceName(*GVar) << ";\n";
1001  return;
1002  }
1003 
1004  if (GVar->isDeclaration()) {
1005  // (extern) declarations, no definition or initializer
1006  // Currently the only known declaration is for an automatic __local
1007  // (.shared) promoted to global.
1008  emitPTXGlobalVariable(GVar, O, STI);
1009  O << ";\n";
1010  return;
1011  }
1012 
1013  if (isSampler(*GVar)) {
1014  O << ".global .samplerref " << getSamplerName(*GVar);
1015 
1016  const Constant *Initializer = nullptr;
1017  if (GVar->hasInitializer())
1018  Initializer = GVar->getInitializer();
1019  const ConstantInt *CI = nullptr;
1020  if (Initializer)
1021  CI = dyn_cast<ConstantInt>(Initializer);
1022  if (CI) {
1023  unsigned sample = CI->getZExtValue();
1024 
1025  O << " = { ";
1026 
1027  for (int i = 0,
1028  addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE);
1029  i < 3; i++) {
1030  O << "addr_mode_" << i << " = ";
1031  switch (addr) {
1032  case 0:
1033  O << "wrap";
1034  break;
1035  case 1:
1036  O << "clamp_to_border";
1037  break;
1038  case 2:
1039  O << "clamp_to_edge";
1040  break;
1041  case 3:
1042  O << "wrap";
1043  break;
1044  case 4:
1045  O << "mirror";
1046  break;
1047  }
1048  O << ", ";
1049  }
1050  O << "filter_mode = ";
1051  switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) {
1052  case 0:
1053  O << "nearest";
1054  break;
1055  case 1:
1056  O << "linear";
1057  break;
1058  case 2:
1059  llvm_unreachable("Anisotropic filtering is not supported");
1060  default:
1061  O << "nearest";
1062  break;
1063  }
1064  if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) {
1065  O << ", force_unnormalized_coords = 1";
1066  }
1067  O << " }";
1068  }
1069 
1070  O << ";\n";
1071  return;
1072  }
1073 
1074  if (GVar->hasPrivateLinkage()) {
1075  if (strncmp(GVar->getName().data(), "unrollpragma", 12) == 0)
1076  return;
1077 
1078  // FIXME - need better way (e.g. Metadata) to avoid generating this global
1079  if (strncmp(GVar->getName().data(), "filename", 8) == 0)
1080  return;
1081  if (GVar->use_empty())
1082  return;
1083  }
1084 
1085  const Function *demotedFunc = nullptr;
1086  if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) {
1087  O << "// " << GVar->getName() << " has been demoted\n";
1088  if (localDecls.find(demotedFunc) != localDecls.end())
1089  localDecls[demotedFunc].push_back(GVar);
1090  else {
1091  std::vector<const GlobalVariable *> temp;
1092  temp.push_back(GVar);
1093  localDecls[demotedFunc] = temp;
1094  }
1095  return;
1096  }
1097 
1098  O << ".";
1099  emitPTXAddressSpace(PTy->getAddressSpace(), O);
1100 
1101  if (isManaged(*GVar)) {
1102  if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
1104  ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1105  }
1106  O << " .attribute(.managed)";
1107  }
1108 
1109  if (MaybeAlign A = GVar->getAlign())
1110  O << " .align " << A->value();
1111  else
1112  O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
1113 
1114  if (ETy->isFloatingPointTy() || ETy->isPointerTy() ||
1115  (ETy->isIntegerTy() && ETy->getScalarSizeInBits() <= 64)) {
1116  O << " .";
1117  // Special case: ABI requires that we use .u8 for predicates
1118  if (ETy->isIntegerTy(1))
1119  O << "u8";
1120  else
1121  O << getPTXFundamentalTypeStr(ETy, false);
1122  O << " ";
1123  getSymbol(GVar)->print(O, MAI);
1124 
1125  // Ptx allows variable initilization only for constant and global state
1126  // spaces.
1127  if (GVar->hasInitializer()) {
1128  if ((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1129  (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) {
1130  const Constant *Initializer = GVar->getInitializer();
1131  // 'undef' is treated as there is no value specified.
1132  if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) {
1133  O << " = ";
1134  printScalarConstant(Initializer, O);
1135  }
1136  } else {
1137  // The frontend adds zero-initializer to device and constant variables
1138  // that don't have an initial value, and UndefValue to shared
1139  // variables, so skip warning for this case.
1140  if (!GVar->getInitializer()->isNullValue() &&
1141  !isa<UndefValue>(GVar->getInitializer())) {
1142  report_fatal_error("initial value of '" + GVar->getName() +
1143  "' is not allowed in addrspace(" +
1144  Twine(PTy->getAddressSpace()) + ")");
1145  }
1146  }
1147  }
1148  } else {
1149  unsigned int ElementSize = 0;
1150 
1151  // Although PTX has direct support for struct type and array type and
1152  // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
1153  // targets that support these high level field accesses. Structs, arrays
1154  // and vectors are lowered into arrays of bytes.
1155  switch (ETy->getTypeID()) {
1156  case Type::IntegerTyID: // Integers larger than 64 bits
1157  case Type::StructTyID:
1158  case Type::ArrayTyID:
1159  case Type::FixedVectorTyID:
1160  ElementSize = DL.getTypeStoreSize(ETy);
1161  // Ptx allows variable initilization only for constant and
1162  // global state spaces.
1163  if (((PTy->getAddressSpace() == ADDRESS_SPACE_GLOBAL) ||
1164  (PTy->getAddressSpace() == ADDRESS_SPACE_CONST)) &&
1165  GVar->hasInitializer()) {
1166  const Constant *Initializer = GVar->getInitializer();
1167  if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) {
1168  AggBuffer aggBuffer(ElementSize, O, *this);
1169  bufferAggregateConstant(Initializer, &aggBuffer);
1170  if (aggBuffer.numSymbols) {
1171  if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) {
1172  O << " .u64 ";
1173  getSymbol(GVar)->print(O, MAI);
1174  O << "[";
1175  O << ElementSize / 8;
1176  } else {
1177  O << " .u32 ";
1178  getSymbol(GVar)->print(O, MAI);
1179  O << "[";
1180  O << ElementSize / 4;
1181  }
1182  O << "]";
1183  } else {
1184  O << " .b8 ";
1185  getSymbol(GVar)->print(O, MAI);
1186  O << "[";
1187  O << ElementSize;
1188  O << "]";
1189  }
1190  O << " = {";
1191  aggBuffer.print();
1192  O << "}";
1193  } else {
1194  O << " .b8 ";
1195  getSymbol(GVar)->print(O, MAI);
1196  if (ElementSize) {
1197  O << "[";
1198  O << ElementSize;
1199  O << "]";
1200  }
1201  }
1202  } else {
1203  O << " .b8 ";
1204  getSymbol(GVar)->print(O, MAI);
1205  if (ElementSize) {
1206  O << "[";
1207  O << ElementSize;
1208  O << "]";
1209  }
1210  }
1211  break;
1212  default:
1213  llvm_unreachable("type not supported yet");
1214  }
1215  }
1216  O << ";\n";
1217 }
1218 
1219 void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) {
1220  if (localDecls.find(f) == localDecls.end())
1221  return;
1222 
1223  std::vector<const GlobalVariable *> &gvars = localDecls[f];
1224 
1225  const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
1226  const NVPTXSubtarget &STI =
1227  *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
1228 
1229  for (const GlobalVariable *GV : gvars) {
1230  O << "\t// demoted variable\n\t";
1231  printModuleLevelGV(GV, O, /*processDemoted=*/true, STI);
1232  }
1233 }
1234 
1235 void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace,
1236  raw_ostream &O) const {
1237  switch (AddressSpace) {
1238  case ADDRESS_SPACE_LOCAL:
1239  O << "local";
1240  break;
1241  case ADDRESS_SPACE_GLOBAL:
1242  O << "global";
1243  break;
1244  case ADDRESS_SPACE_CONST:
1245  O << "const";
1246  break;
1247  case ADDRESS_SPACE_SHARED:
1248  O << "shared";
1249  break;
1250  default:
1251  report_fatal_error("Bad address space found while emitting PTX: " +
1253  break;
1254  }
1255 }
1256 
1257 std::string
1258 NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
1259  switch (Ty->getTypeID()) {
1260  case Type::IntegerTyID: {
1261  unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
1262  if (NumBits == 1)
1263  return "pred";
1264  else if (NumBits <= 64) {
1265  std::string name = "u";
1266  return name + utostr(NumBits);
1267  } else {
1268  llvm_unreachable("Integer too large");
1269  break;
1270  }
1271  break;
1272  }
1273  case Type::HalfTyID:
1274  // fp16 is stored as .b16 for compatibility with pre-sm_53 PTX assembly.
1275  return "b16";
1276  case Type::FloatTyID:
1277  return "f32";
1278  case Type::DoubleTyID:
1279  return "f64";
1280  case Type::PointerTyID:
1281  if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit())
1282  if (useB4PTR)
1283  return "b64";
1284  else
1285  return "u64";
1286  else if (useB4PTR)
1287  return "b32";
1288  else
1289  return "u32";
1290  default:
1291  break;
1292  }
1293  llvm_unreachable("unexpected type");
1294 }
1295 
1296 void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
1297  raw_ostream &O,
1298  const NVPTXSubtarget &STI) {
1299  const DataLayout &DL = getDataLayout();
1300 
1301  // GlobalVariables are always constant pointers themselves.
1302  Type *ETy = GVar->getValueType();
1303 
1304  O << ".";
1305  emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O);
1306  if (isManaged(*GVar)) {
1307  if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) {
1309  ".attribute(.managed) requires PTX version >= 4.0 and sm_30");
1310  }
1311  O << " .attribute(.managed)";
1312  }
1313  if (MaybeAlign A = GVar->getAlign())
1314  O << " .align " << A->value();
1315  else
1316  O << " .align " << (int)DL.getPrefTypeAlignment(ETy);
1317 
1318  // Special case for i128
1319  if (ETy->isIntegerTy(128)) {
1320  O << " .b8 ";
1321  getSymbol(GVar)->print(O, MAI);
1322  O << "[16]";
1323  return;
1324  }
1325 
1326  if (ETy->isFloatingPointTy() || ETy->isIntOrPtrTy()) {
1327  O << " .";
1328  O << getPTXFundamentalTypeStr(ETy);
1329  O << " ";
1330  getSymbol(GVar)->print(O, MAI);
1331  return;
1332  }
1333 
1334  int64_t ElementSize = 0;
1335 
1336  // Although PTX has direct support for struct type and array type and LLVM IR
1337  // is very similar to PTX, the LLVM CodeGen does not support for targets that
1338  // support these high level field accesses. Structs and arrays are lowered
1339  // into arrays of bytes.
1340  switch (ETy->getTypeID()) {
1341  case Type::StructTyID:
1342  case Type::ArrayTyID:
1343  case Type::FixedVectorTyID:
1344  ElementSize = DL.getTypeStoreSize(ETy);
1345  O << " .b8 ";
1346  getSymbol(GVar)->print(O, MAI);
1347  O << "[";
1348  if (ElementSize) {
1349  O << ElementSize;
1350  }
1351  O << "]";
1352  break;
1353  default:
1354  llvm_unreachable("type not supported yet");
1355  }
1356 }
1357 
1358 void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
1359  int paramIndex, raw_ostream &O) {
1360  getSymbol(I->getParent())->print(O, MAI);
1361  O << "_param_" << paramIndex;
1362 }
1363 
1364 void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
1365  const DataLayout &DL = getDataLayout();
1366  const AttributeList &PAL = F->getAttributes();
1367  const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
1368  const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering());
1369 
1371  unsigned paramIndex = 0;
1372  bool first = true;
1373  bool isKernelFunc = isKernelFunction(*F);
1374  bool isABI = (STI.getSmVersion() >= 20);
1375  bool hasImageHandles = STI.hasImageHandles();
1376  MVT thePointerTy = TLI->getPointerTy(DL);
1377 
1378  if (F->arg_empty()) {
1379  O << "()\n";
1380  return;
1381  }
1382 
1383  O << "(\n";
1384 
1385  for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) {
1386  Type *Ty = I->getType();
1387 
1388  if (!first)
1389  O << ",\n";
1390 
1391  first = false;
1392 
1393  // Handle image/sampler parameters
1394  if (isKernelFunction(*F)) {
1395  if (isSampler(*I) || isImage(*I)) {
1396  if (isImage(*I)) {
1397  std::string sname = std::string(I->getName());
1398  if (isImageWriteOnly(*I) || isImageReadWrite(*I)) {
1399  if (hasImageHandles)
1400  O << "\t.param .u64 .ptr .surfref ";
1401  else
1402  O << "\t.param .surfref ";
1403  CurrentFnSym->print(O, MAI);
1404  O << "_param_" << paramIndex;
1405  }
1406  else { // Default image is read_only
1407  if (hasImageHandles)
1408  O << "\t.param .u64 .ptr .texref ";
1409  else
1410  O << "\t.param .texref ";
1411  CurrentFnSym->print(O, MAI);
1412  O << "_param_" << paramIndex;
1413  }
1414  } else {
1415  if (hasImageHandles)
1416  O << "\t.param .u64 .ptr .samplerref ";
1417  else
1418  O << "\t.param .samplerref ";
1419  CurrentFnSym->print(O, MAI);
1420  O << "_param_" << paramIndex;
1421  }
1422  continue;
1423  }
1424  }
1425 
1426  auto getOptimalAlignForParam = [TLI, &DL, &PAL, F,
1427  paramIndex](Type *Ty) -> Align {
1428  Align TypeAlign = TLI->getFunctionParamOptimizedAlign(F, Ty, DL);
1429  MaybeAlign ParamAlign = PAL.getParamAlignment(paramIndex);
1430  return max(TypeAlign, ParamAlign);
1431  };
1432 
1433  if (!PAL.hasParamAttr(paramIndex, Attribute::ByVal)) {
1434  if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
1435  // Just print .param .align <a> .b8 .param[size];
1436  // <a> = optimal alignment for the element type; always multiple of
1437  // PAL.getParamAlignment
1438  // size = typeallocsize of element type
1439  Align OptimalAlign = getOptimalAlignForParam(Ty);
1440 
1441  O << "\t.param .align " << OptimalAlign.value() << " .b8 ";
1442  printParamName(I, paramIndex, O);
1443  O << "[" << DL.getTypeAllocSize(Ty) << "]";
1444 
1445  continue;
1446  }
1447  // Just a scalar
1448  auto *PTy = dyn_cast<PointerType>(Ty);
1449  if (isKernelFunc) {
1450  if (PTy) {
1451  // Special handling for pointer arguments to kernel
1452  O << "\t.param .u" << thePointerTy.getSizeInBits() << " ";
1453 
1454  if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
1455  NVPTX::CUDA) {
1456  int addrSpace = PTy->getAddressSpace();
1457  switch (addrSpace) {
1458  default:
1459  O << ".ptr ";
1460  break;
1461  case ADDRESS_SPACE_CONST:
1462  O << ".ptr .const ";
1463  break;
1464  case ADDRESS_SPACE_SHARED:
1465  O << ".ptr .shared ";
1466  break;
1467  case ADDRESS_SPACE_GLOBAL:
1468  O << ".ptr .global ";
1469  break;
1470  }
1471  Align ParamAlign = I->getParamAlign().valueOrOne();
1472  O << ".align " << ParamAlign.value() << " ";
1473  }
1474  printParamName(I, paramIndex, O);
1475  continue;
1476  }
1477 
1478  // non-pointer scalar to kernel func
1479  O << "\t.param .";
1480  // Special case: predicate operands become .u8 types
1481  if (Ty->isIntegerTy(1))
1482  O << "u8";
1483  else
1484  O << getPTXFundamentalTypeStr(Ty);
1485  O << " ";
1486  printParamName(I, paramIndex, O);
1487  continue;
1488  }
1489  // Non-kernel function, just print .param .b<size> for ABI
1490  // and .reg .b<size> for non-ABI
1491  unsigned sz = 0;
1492  if (isa<IntegerType>(Ty)) {
1493  sz = cast<IntegerType>(Ty)->getBitWidth();
1494  if (sz < 32)
1495  sz = 32;
1496  } else if (isa<PointerType>(Ty))
1497  sz = thePointerTy.getSizeInBits();
1498  else if (Ty->isHalfTy())
1499  // PTX ABI requires all scalar parameters to be at least 32
1500  // bits in size. fp16 normally uses .b16 as its storage type
1501  // in PTX, so its size must be adjusted here, too.
1502  sz = 32;
1503  else
1504  sz = Ty->getPrimitiveSizeInBits();
1505  if (isABI)
1506  O << "\t.param .b" << sz << " ";
1507  else
1508  O << "\t.reg .b" << sz << " ";
1509  printParamName(I, paramIndex, O);
1510  continue;
1511  }
1512 
1513  // param has byVal attribute.
1514  Type *ETy = PAL.getParamByValType(paramIndex);
1515  assert(ETy && "Param should have byval type");
1516 
1517  if (isABI || isKernelFunc) {
1518  // Just print .param .align <a> .b8 .param[size];
1519  // <a> = optimal alignment for the element type; always multiple of
1520  // PAL.getParamAlignment
1521  // size = typeallocsize of element type
1522  Align OptimalAlign = getOptimalAlignForParam(ETy);
1523 
1524  // Work around a bug in ptxas. When PTX code takes address of
1525  // byval parameter with alignment < 4, ptxas generates code to
1526  // spill argument into memory. Alas on sm_50+ ptxas generates
1527  // SASS code that fails with misaligned access. To work around
1528  // the problem, make sure that we align byval parameters by at
1529  // least 4. Matching change must be made in LowerCall() where we
1530  // prepare parameters for the call.
1531  //
1532  // TODO: this will need to be undone when we get to support multi-TU
1533  // device-side compilation as it breaks ABI compatibility with nvcc.
1534  // Hopefully ptxas bug is fixed by then.
1535  if (!isKernelFunc && OptimalAlign < Align(4))
1536  OptimalAlign = Align(4);
1537  unsigned sz = DL.getTypeAllocSize(ETy);
1538  O << "\t.param .align " << OptimalAlign.value() << " .b8 ";
1539  printParamName(I, paramIndex, O);
1540  O << "[" << sz << "]";
1541  continue;
1542  } else {
1543  // Split the ETy into constituent parts and
1544  // print .param .b<size> <name> for each part.
1545  // Further, if a part is vector, print the above for
1546  // each vector element.
1547  SmallVector<EVT, 16> vtparts;
1548  ComputeValueVTs(*TLI, DL, ETy, vtparts);
1549  for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
1550  unsigned elems = 1;
1551  EVT elemtype = vtparts[i];
1552  if (vtparts[i].isVector()) {
1553  elems = vtparts[i].getVectorNumElements();
1554  elemtype = vtparts[i].getVectorElementType();
1555  }
1556 
1557  for (unsigned j = 0, je = elems; j != je; ++j) {
1558  unsigned sz = elemtype.getSizeInBits();
1559  if (elemtype.isInteger() && (sz < 32))
1560  sz = 32;
1561  O << "\t.reg .b" << sz << " ";
1562  printParamName(I, paramIndex, O);
1563  if (j < je - 1)
1564  O << ",\n";
1565  ++paramIndex;
1566  }
1567  if (i < e - 1)
1568  O << ",\n";
1569  }
1570  --paramIndex;
1571  continue;
1572  }
1573  }
1574 
1575  O << "\n)\n";
1576 }
1577 
1578 void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
1579  raw_ostream &O) {
1580  const Function &F = MF.getFunction();
1581  emitFunctionParamList(&F, O);
1582 }
1583 
1584 void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
1585  const MachineFunction &MF) {
1586  SmallString<128> Str;
1587  raw_svector_ostream O(Str);
1588 
1589  // Map the global virtual register number to a register class specific
1590  // virtual register number starting from 1 with that class.
1592  //unsigned numRegClasses = TRI->getNumRegClasses();
1593 
1594  // Emit the Fake Stack Object
1595  const MachineFrameInfo &MFI = MF.getFrameInfo();
1596  int NumBytes = (int) MFI.getStackSize();
1597  if (NumBytes) {
1598  O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
1599  << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
1600  if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
1601  O << "\t.reg .b64 \t%SP;\n";
1602  O << "\t.reg .b64 \t%SPL;\n";
1603  } else {
1604  O << "\t.reg .b32 \t%SP;\n";
1605  O << "\t.reg .b32 \t%SPL;\n";
1606  }
1607  }
1608 
1609  // Go through all virtual registers to establish the mapping between the
1610  // global virtual
1611  // register number and the per class virtual register number.
1612  // We use the per class virtual register number in the ptx output.
1613  unsigned int numVRs = MRI->getNumVirtRegs();
1614  for (unsigned i = 0; i < numVRs; i++) {
1616  const TargetRegisterClass *RC = MRI->getRegClass(vr);
1617  DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1618  int n = regmap.size();
1619  regmap.insert(std::make_pair(vr, n + 1));
1620  }
1621 
1622  // Emit register declarations
1623  // @TODO: Extract out the real register usage
1624  // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
1625  // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
1626  // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
1627  // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
1628  // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n";
1629  // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
1630  // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n";
1631 
1632  // Emit declaration of the virtual registers or 'physical' registers for
1633  // each register class
1634  for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
1635  const TargetRegisterClass *RC = TRI->getRegClass(i);
1636  DenseMap<unsigned, unsigned> &regmap = VRegMapping[RC];
1637  std::string rcname = getNVPTXRegClassName(RC);
1638  std::string rcStr = getNVPTXRegClassStr(RC);
1639  int n = regmap.size();
1640 
1641  // Only declare those registers that may be used.
1642  if (n) {
1643  O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
1644  << ">;\n";
1645  }
1646  }
1647 
1648  OutStreamer->emitRawText(O.str());
1649 }
1650 
1651 void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
1652  APFloat APF = APFloat(Fp->getValueAPF()); // make a copy
1653  bool ignored;
1654  unsigned int numHex;
1655  const char *lead;
1656 
1657  if (Fp->getType()->getTypeID() == Type::FloatTyID) {
1658  numHex = 8;
1659  lead = "0f";
1661  } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
1662  numHex = 16;
1663  lead = "0d";
1665  } else
1666  llvm_unreachable("unsupported fp type");
1667 
1668  APInt API = APF.bitcastToAPInt();
1669  O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true);
1670 }
1671 
1672 void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
1673  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1674  O << CI->getValue();
1675  return;
1676  }
1677  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) {
1678  printFPConstant(CFP, O);
1679  return;
1680  }
1681  if (isa<ConstantPointerNull>(CPV)) {
1682  O << "0";
1683  return;
1684  }
1685  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1686  bool IsNonGenericPointer = false;
1687  if (GVar->getType()->getAddressSpace() != 0) {
1688  IsNonGenericPointer = true;
1689  }
1690  if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
1691  O << "generic(";
1692  getSymbol(GVar)->print(O, MAI);
1693  O << ")";
1694  } else {
1695  getSymbol(GVar)->print(O, MAI);
1696  }
1697  return;
1698  }
1699  if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1700  const Value *v = Cexpr->stripPointerCasts();
1701  PointerType *PTy = dyn_cast<PointerType>(Cexpr->getType());
1702  bool IsNonGenericPointer = false;
1703  if (PTy && PTy->getAddressSpace() != 0) {
1704  IsNonGenericPointer = true;
1705  }
1706  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
1707  if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) {
1708  O << "generic(";
1709  getSymbol(GVar)->print(O, MAI);
1710  O << ")";
1711  } else {
1712  getSymbol(GVar)->print(O, MAI);
1713  }
1714  return;
1715  } else {
1716  lowerConstant(CPV)->print(O, MAI);
1717  return;
1718  }
1719  }
1720  llvm_unreachable("Not scalar type found in printScalarConstant()");
1721 }
1722 
1723 void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
1724  AggBuffer *AggBuffer) {
1725  const DataLayout &DL = getDataLayout();
1726  int AllocSize = DL.getTypeAllocSize(CPV->getType());
1727  if (isa<UndefValue>(CPV) || CPV->isNullValue()) {
1728  // Non-zero Bytes indicates that we need to zero-fill everything. Otherwise,
1729  // only the space allocated by CPV.
1730  AggBuffer->addZeros(Bytes ? Bytes : AllocSize);
1731  return;
1732  }
1733 
1734  // Helper for filling AggBuffer with APInts.
1735  auto AddIntToBuffer = [AggBuffer, Bytes](const APInt &Val) {
1736  size_t NumBytes = (Val.getBitWidth() + 7) / 8;
1737  SmallVector<unsigned char, 16> Buf(NumBytes);
1738  for (unsigned I = 0; I < NumBytes; ++I) {
1739  Buf[I] = Val.extractBitsAsZExtValue(8, I * 8);
1740  }
1741  AggBuffer->addBytes(Buf.data(), NumBytes, Bytes);
1742  };
1743 
1744  switch (CPV->getType()->getTypeID()) {
1745  case Type::IntegerTyID:
1746  if (const auto CI = dyn_cast<ConstantInt>(CPV)) {
1747  AddIntToBuffer(CI->getValue());
1748  break;
1749  }
1750  if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1751  if (const auto *CI =
1752  dyn_cast<ConstantInt>(ConstantFoldConstant(Cexpr, DL))) {
1753  AddIntToBuffer(CI->getValue());
1754  break;
1755  }
1756  if (Cexpr->getOpcode() == Instruction::PtrToInt) {
1757  Value *V = Cexpr->getOperand(0)->stripPointerCasts();
1758  AggBuffer->addSymbol(V, Cexpr->getOperand(0));
1759  AggBuffer->addZeros(AllocSize);
1760  break;
1761  }
1762  }
1763  llvm_unreachable("unsupported integer const type");
1764  break;
1765 
1766  case Type::HalfTyID:
1767  case Type::FloatTyID:
1768  case Type::DoubleTyID:
1769  AddIntToBuffer(cast<ConstantFP>(CPV)->getValueAPF().bitcastToAPInt());
1770  break;
1771 
1772  case Type::PointerTyID: {
1773  if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) {
1774  AggBuffer->addSymbol(GVar, GVar);
1775  } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
1776  const Value *v = Cexpr->stripPointerCasts();
1777  AggBuffer->addSymbol(v, Cexpr);
1778  }
1779  AggBuffer->addZeros(AllocSize);
1780  break;
1781  }
1782 
1783  case Type::ArrayTyID:
1784  case Type::FixedVectorTyID:
1785  case Type::StructTyID: {
1786  if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) {
1787  bufferAggregateConstant(CPV, AggBuffer);
1788  if (Bytes > AllocSize)
1789  AggBuffer->addZeros(Bytes - AllocSize);
1790  } else if (isa<ConstantAggregateZero>(CPV))
1791  AggBuffer->addZeros(Bytes);
1792  else
1793  llvm_unreachable("Unexpected Constant type");
1794  break;
1795  }
1796 
1797  default:
1798  llvm_unreachable("unsupported type");
1799  }
1800 }
1801 
1802 void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV,
1803  AggBuffer *aggBuffer) {
1804  const DataLayout &DL = getDataLayout();
1805  int Bytes;
1806 
1807  // Integers of arbitrary width
1808  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
1809  APInt Val = CI->getValue();
1810  for (unsigned I = 0, E = DL.getTypeAllocSize(CPV->getType()); I < E; ++I) {
1811  uint8_t Byte = Val.getLoBits(8).getZExtValue();
1812  aggBuffer->addBytes(&Byte, 1, 1);
1813  Val.lshrInPlace(8);
1814  }
1815  return;
1816  }
1817 
1818  // Old constants
1819  if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) {
1820  if (CPV->getNumOperands())
1821  for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i)
1822  bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer);
1823  return;
1824  }
1825 
1826  if (const ConstantDataSequential *CDS =
1827  dyn_cast<ConstantDataSequential>(CPV)) {
1828  if (CDS->getNumElements())
1829  for (unsigned i = 0; i < CDS->getNumElements(); ++i)
1830  bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0,
1831  aggBuffer);
1832  return;
1833  }
1834 
1835  if (isa<ConstantStruct>(CPV)) {
1836  if (CPV->getNumOperands()) {
1837  StructType *ST = cast<StructType>(CPV->getType());
1838  for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) {
1839  if (i == (e - 1))
1840  Bytes = DL.getStructLayout(ST)->getElementOffset(0) +
1841  DL.getTypeAllocSize(ST) -
1842  DL.getStructLayout(ST)->getElementOffset(i);
1843  else
1844  Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) -
1845  DL.getStructLayout(ST)->getElementOffset(i);
1846  bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer);
1847  }
1848  }
1849  return;
1850  }
1851  llvm_unreachable("unsupported constant type in printAggregateConstant()");
1852 }
1853 
1854 /// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly
1855 /// a copy from AsmPrinter::lowerConstant, except customized to only handle
1856 /// expressions that are representable in PTX and create
1857 /// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions.
1858 const MCExpr *
1859 NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) {
1860  MCContext &Ctx = OutContext;
1861 
1862  if (CV->isNullValue() || isa<UndefValue>(CV))
1863  return MCConstantExpr::create(0, Ctx);
1864 
1865  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
1866  return MCConstantExpr::create(CI->getZExtValue(), Ctx);
1867 
1868  if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
1869  const MCSymbolRefExpr *Expr =
1871  if (ProcessingGeneric) {
1872  return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
1873  } else {
1874  return Expr;
1875  }
1876  }
1877 
1878  const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
1879  if (!CE) {
1880  llvm_unreachable("Unknown constant value to lower!");
1881  }
1882 
1883  switch (CE->getOpcode()) {
1884  default: {
1885  // If the code isn't optimized, there may be outstanding folding
1886  // opportunities. Attempt to fold the expression using DataLayout as a
1887  // last resort before giving up.
1889  if (C != CE)
1890  return lowerConstantForGV(C, ProcessingGeneric);
1891 
1892  // Otherwise report the problem to the user.
1893  std::string S;
1894  raw_string_ostream OS(S);
1895  OS << "Unsupported expression in static initializer: ";
1896  CE->printAsOperand(OS, /*PrintType=*/false,
1897  !MF ? nullptr : MF->getFunction().getParent());
1898  report_fatal_error(Twine(OS.str()));
1899  }
1900 
1901  case Instruction::AddrSpaceCast: {
1902  // Strip the addrspacecast and pass along the operand
1903  PointerType *DstTy = cast<PointerType>(CE->getType());
1904  if (DstTy->getAddressSpace() == 0) {
1905  return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true);
1906  }
1907  std::string S;
1908  raw_string_ostream OS(S);
1909  OS << "Unsupported expression in static initializer: ";
1910  CE->printAsOperand(OS, /*PrintType=*/ false,
1911  !MF ? nullptr : MF->getFunction().getParent());
1912  report_fatal_error(Twine(OS.str()));
1913  }
1914 
1915  case Instruction::GetElementPtr: {
1916  const DataLayout &DL = getDataLayout();
1917 
1918  // Generate a symbolic expression for the byte address
1919  APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0);
1920  cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI);
1921 
1922  const MCExpr *Base = lowerConstantForGV(CE->getOperand(0),
1923  ProcessingGeneric);
1924  if (!OffsetAI)
1925  return Base;
1926 
1927  int64_t Offset = OffsetAI.getSExtValue();
1929  Ctx);
1930  }
1931 
1932  case Instruction::Trunc:
1933  // We emit the value and depend on the assembler to truncate the generated
1934  // expression properly. This is important for differences between
1935  // blockaddress labels. Since the two labels are in the same function, it
1936  // is reasonable to treat their delta as a 32-bit value.
1938  case Instruction::BitCast:
1939  return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
1940 
1941  case Instruction::IntToPtr: {
1942  const DataLayout &DL = getDataLayout();
1943 
1944  // Handle casts to pointers by changing them into casts to the appropriate
1945  // integer type. This promotes constant folding and simplifies this code.
1946  Constant *Op = CE->getOperand(0);
1947  Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()),
1948  false/*ZExt*/);
1949  return lowerConstantForGV(Op, ProcessingGeneric);
1950  }
1951 
1952  case Instruction::PtrToInt: {
1953  const DataLayout &DL = getDataLayout();
1954 
1955  // Support only foldable casts to/from pointers that can be eliminated by
1956  // changing the pointer to the appropriately sized integer type.
1957  Constant *Op = CE->getOperand(0);
1958  Type *Ty = CE->getType();
1959 
1960  const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric);
1961 
1962  // We can emit the pointer value into this slot if the slot is an
1963  // integer slot equal to the size of the pointer.
1964  if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
1965  return OpExpr;
1966 
1967  // Otherwise the pointer is smaller than the resultant integer, mask off
1968  // the high bits so we are sure to get a proper truncation if the input is
1969  // a constant expr.
1970  unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
1971  const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
1972  return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
1973  }
1974 
1975  // The MC library also has a right-shift operator, but it isn't consistently
1976  // signed or unsigned between different targets.
1977  case Instruction::Add: {
1978  const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
1979  const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
1980  switch (CE->getOpcode()) {
1981  default: llvm_unreachable("Unknown binary operator constant cast expr");
1982  case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
1983  }
1984  }
1985  }
1986 }
1987 
1988 // Copy of MCExpr::print customized for NVPTX
1989 void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
1990  switch (Expr.getKind()) {
1991  case MCExpr::Target:
1992  return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI);
1993  case MCExpr::Constant:
1994  OS << cast<MCConstantExpr>(Expr).getValue();
1995  return;
1996 
1997  case MCExpr::SymbolRef: {
1998  const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr);
1999  const MCSymbol &Sym = SRE.getSymbol();
2000  Sym.print(OS, MAI);
2001  return;
2002  }
2003 
2004  case MCExpr::Unary: {
2005  const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr);
2006  switch (UE.getOpcode()) {
2007  case MCUnaryExpr::LNot: OS << '!'; break;
2008  case MCUnaryExpr::Minus: OS << '-'; break;
2009  case MCUnaryExpr::Not: OS << '~'; break;
2010  case MCUnaryExpr::Plus: OS << '+'; break;
2011  }
2012  printMCExpr(*UE.getSubExpr(), OS);
2013  return;
2014  }
2015 
2016  case MCExpr::Binary: {
2017  const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr);
2018 
2019  // Only print parens around the LHS if it is non-trivial.
2020  if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) ||
2021  isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) {
2022  printMCExpr(*BE.getLHS(), OS);
2023  } else {
2024  OS << '(';
2025  printMCExpr(*BE.getLHS(), OS);
2026  OS<< ')';
2027  }
2028 
2029  switch (BE.getOpcode()) {
2030  case MCBinaryExpr::Add:
2031  // Print "X-42" instead of "X+-42".
2032  if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
2033  if (RHSC->getValue() < 0) {
2034  OS << RHSC->getValue();
2035  return;
2036  }
2037  }
2038 
2039  OS << '+';
2040  break;
2041  default: llvm_unreachable("Unhandled binary operator");
2042  }
2043 
2044  // Only print parens around the LHS if it is non-trivial.
2045  if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
2046  printMCExpr(*BE.getRHS(), OS);
2047  } else {
2048  OS << '(';
2049  printMCExpr(*BE.getRHS(), OS);
2050  OS << ')';
2051  }
2052  return;
2053  }
2054  }
2055 
2056  llvm_unreachable("Invalid expression kind!");
2057 }
2058 
2059 /// PrintAsmOperand - Print out an operand for an inline asm expression.
2060 ///
2061 bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
2062  const char *ExtraCode, raw_ostream &O) {
2063  if (ExtraCode && ExtraCode[0]) {
2064  if (ExtraCode[1] != 0)
2065  return true; // Unknown modifier.
2066 
2067  switch (ExtraCode[0]) {
2068  default:
2069  // See if this is a generic print operand
2070  return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O);
2071  case 'r':
2072  break;
2073  }
2074  }
2075 
2076  printOperand(MI, OpNo, O);
2077 
2078  return false;
2079 }
2080 
2081 bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
2082  unsigned OpNo,
2083  const char *ExtraCode,
2084  raw_ostream &O) {
2085  if (ExtraCode && ExtraCode[0])
2086  return true; // Unknown modifier
2087 
2088  O << '[';
2089  printMemOperand(MI, OpNo, O);
2090  O << ']';
2091 
2092  return false;
2093 }
2094 
2095 void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
2096  raw_ostream &O) {
2097  const MachineOperand &MO = MI->getOperand(opNum);
2098  switch (MO.getType()) {
2101  if (MO.getReg() == NVPTX::VRDepot)
2102  O << DEPOTNAME << getFunctionNumber();
2103  else
2105  } else {
2106  emitVirtualRegister(MO.getReg(), O);
2107  }
2108  break;
2109 
2111  O << MO.getImm();
2112  break;
2113 
2115  printFPConstant(MO.getFPImm(), O);
2116  break;
2117 
2119  PrintSymbolOperand(MO, O);
2120  break;
2121 
2123  MO.getMBB()->getSymbol()->print(O, MAI);
2124  break;
2125 
2126  default:
2127  llvm_unreachable("Operand type not supported.");
2128  }
2129 }
2130 
2131 void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
2132  raw_ostream &O, const char *Modifier) {
2133  printOperand(MI, opNum, O);
2134 
2135  if (Modifier && strcmp(Modifier, "add") == 0) {
2136  O << ", ";
2137  printOperand(MI, opNum + 1, O);
2138  } else {
2139  if (MI->getOperand(opNum + 1).isImm() &&
2140  MI->getOperand(opNum + 1).getImm() == 0)
2141  return; // don't print ',0' or '+0'
2142  O << "+";
2143  printOperand(MI, opNum + 1, O);
2144  }
2145 }
2146 
2147 // Force static initialization.
2151 }
llvm::NVPTXAsmPrinter::AggBuffer
friend class AggBuffer
Definition: NVPTXAsmPrinter.h:197
llvm::codeview::SimpleTypeKind::Byte
@ Byte
i
i
Definition: README.txt:29
llvm::NVPTXAsmPrinter::doFinalization
bool doFinalization(Module &M) override
Shut down the asmprinter.
Definition: NVPTXAsmPrinter.cpp:879
NVPTXInstPrinter.h
llvm::lltok::APFloat
@ APFloat
Definition: LLToken.h:425
llvm::Type::ArrayTyID
@ ArrayTyID
Arrays.
Definition: Type.h:75
ValueTypes.h
llvm::Argument
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
llvm::StringRef::startswith
LLVM_NODISCARD bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:286
llvm::Type::FloatTyID
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
llvm::getReqNTIDx
bool getReqNTIDx(const Function &F, unsigned &x)
Definition: NVPTXUtilities.cpp:254
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:104
MachineInstr.h
llvm::MachineOperand::MO_Immediate
@ MO_Immediate
Immediate operand.
Definition: MachineOperand.h:52
llvm::Type::DoubleTyID
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::GlobalValue::hasCommonLinkage
bool hasCommonLinkage() const
Definition: GlobalValue.h:454
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
__CLK_ADDRESS_BASE
@ __CLK_ADDRESS_BASE
Definition: cl_common_defines.h:69
llvm::MachineLoopInfo::getLoopFor
MachineLoop * getLoopFor(const MachineBasicBlock *BB) const
Return the innermost loop that BB lives in.
Definition: MachineLoopInfo.h:126
llvm::NVPTXInstPrinter::getRegisterName
static const char * getRegisterName(unsigned RegNo)
llvm::MCOperand::createExpr
static MCOperand createExpr(const MCExpr *Val)
Definition: MCInst.h:162
llvm::MCUnaryExpr::getOpcode
Opcode getOpcode() const
Get the kind of this unary expression.
Definition: MCExpr.h:468
llvm::AsmPrinter::lowerConstant
virtual const MCExpr * lowerConstant(const Constant *CV)
Lower the specified LLVM Constant to an MCExpr.
Definition: AsmPrinter.cpp:2673
llvm::GlobalValue::hasExternalLinkage
bool hasExternalLinkage() const
Definition: GlobalValue.h:433
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:218
llvm::APFloatBase::IEEEsingle
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:170
DebugInfoMetadata.h
MCInstrDesc.h
llvm::MachineOperand::getGlobal
const GlobalValue * getGlobal() const
Definition: MachineOperand.h:572
llvm::MCOperand::createImm
static MCOperand createImm(int64_t Val)
Definition: MCInst.h:141
llvm::MCContext
Context object for machine code objects.
Definition: MCContext.h:74
llvm::Function
Definition: Function.h:60
llvm::Type::VoidTyID
@ VoidTyID
type with no size
Definition: Type.h:63
StringRef.h
is64Bit
static bool is64Bit(const char *name)
Definition: X86Disassembler.cpp:1019
llvm::NVPTXSubtarget::getTargetLowering
const NVPTXTargetLowering * getTargetLowering() const override
Definition: NVPTXSubtarget.h:64
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:632
llvm::isImageReadWrite
bool isImageReadWrite(const Value &val)
Definition: NVPTXUtilities.cpp:200
llvm::NVPTXTargetMachine::is64Bit
bool is64Bit() const
Definition: NVPTXTargetMachine.h:47
llvm::GlobalObject::getSection
StringRef getSection() const
Get the custom section of this global if it has one.
Definition: GlobalObject.h:111
llvm::AsmPrinter::MAI
const MCAsmInfo * MAI
Target Asm Printer information.
Definition: AsmPrinter.h:87
llvm::MCConstantExpr::create
static const MCConstantExpr * create(int64_t Value, MCContext &Ctx, bool PrintInHex=false, unsigned SizeInBytes=0)
Definition: MCExpr.cpp:194
llvm::GlobalValue::hasWeakLinkage
bool hasWeakLinkage() const
Definition: GlobalValue.h:444
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
usedInGlobalVarDef
static bool usedInGlobalVarDef(const Constant *C)
Definition: NVPTXAsmPrinter.cpp:612
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::GlobalObject::getAlign
MaybeAlign getAlign() const
Returns the alignment of the given variable or function.
Definition: GlobalObject.h:79
Path.h
llvm::ADDRESS_SPACE_LOCAL
@ ADDRESS_SPACE_LOCAL
Definition: NVPTXBaseInfo.h:26
llvm::Value::hasName
bool hasName() const
Definition: Value.h:261
ErrorHandling.h
llvm::NVPTXGenericMCSymbolRefExpr::create
static const NVPTXGenericMCSymbolRefExpr * create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx)
Definition: NVPTXMCExpr.cpp:54
DiscoverDependentGlobals
static void DiscoverDependentGlobals(const Value *V, DenseSet< const GlobalVariable * > &Globals)
DiscoverDependentGlobals - Return a set of GlobalVariables on which V depends.
Definition: NVPTXAsmPrinter.cpp:98
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::GlobalVariable
Definition: GlobalVariable.h:39
llvm::PointerType::getAddressSpace
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:682
usedInOneFunc
static bool usedInOneFunc(const User *U, Function const *&oneFunc)
Definition: NVPTXAsmPrinter.cpp:628
llvm::MCUnaryExpr::Plus
@ Plus
Unary plus.
Definition: MCExpr.h:431
llvm::MCBinaryExpr::createAnd
static const MCBinaryExpr * createAnd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:530
llvm::MCBinaryExpr::Add
@ Add
Addition.
Definition: MCExpr.h:484
llvm::Type::getTypeID
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:136
MachineBasicBlock.h
llvm::NVPTXSubtarget::getSmVersion
unsigned int getSmVersion() const
Definition: NVPTXSubtarget.h:80
llvm::AsmPrinter::doFinalization
bool doFinalization(Module &M) override
Shut down the asmprinter.
Definition: AsmPrinter.cpp:1903
llvm::TargetSubtargetInfo::getRegisterInfo
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Definition: TargetSubtargetInfo.h:125
llvm::MachineModuleInfo::hasDebugInfo
bool hasDebugInfo() const
Returns true if valid debug info is present.
Definition: MachineModuleInfo.h:183
APInt.h
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:234
llvm::getMinCTASm
bool getMinCTASm(const Function &F, unsigned &x)
Definition: NVPTXUtilities.cpp:266
llvm::DenseMapIterator
Definition: DenseMap.h:57
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::AsmPrinter::PrintSymbolOperand
virtual void PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS)
Print the MachineOperand as a symbol.
Definition: AsmPrinterInlineAsm.cpp:457
DenseMap.h
Module.h
llvm::AttributeList
Definition: Attributes.h:408
llvm::NVPTXFloatMCExpr::createConstantFPDouble
static const NVPTXFloatMCExpr * createConstantFPDouble(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:53
printOperand
static bool printOperand(raw_ostream &OS, const SelectionDAG *G, const SDValue Value)
Definition: SelectionDAGDumper.cpp:952
llvm::isImage
bool isImage(const Value &val)
Definition: NVPTXUtilities.cpp:212
llvm::ConstantFP::getValueAPF
const APFloat & getValueAPF() const
Definition: Constants.h:297
NVPTXMCAsmInfo.h
ret
to esp esp setne al movzbw ax esp setg cl movzbw cx cmove cx cl jne LBB1_2 esp ret(also really horrible code on ppc). This is due to the expand code for 64-bit compares. GCC produces multiple branches
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:79
llvm::MachineRegisterInfo::getNumVirtRegs
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
Definition: MachineRegisterInfo.h:765
Operator.h
llvm::Register::index2VirtReg
static Register index2VirtReg(unsigned Index)
Convert a 0-based index to a virtual register number.
Definition: Register.h:84
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:119
llvm::MCInst
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
NVPTXTargetStreamer.h
llvm::MCBinaryExpr
Binary assembler expressions.
Definition: MCExpr.h:481
caller
int caller(int32 arg1, int32 arg2)
Definition: README.txt:681
llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:163
llvm::format_hex_no_prefix
FormattedNumber format_hex_no_prefix(uint64_t N, unsigned Width, bool Upper=false)
format_hex_no_prefix - Output N as a fixed width hexadecimal.
Definition: Format.h:199
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::insert
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::count
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1620
llvm::MCUnaryExpr
Unary assembler expressions.
Definition: MCExpr.h:425
NVPTXRegisterInfo.h
ConstantFolding.h
llvm::NVPTXII::IsTexFlag
@ IsTexFlag
Definition: NVPTXBaseInfo.h:35
llvm::NVPTXTargetMachine
NVPTXTargetMachine.
Definition: NVPTXTargetMachine.h:25
llvm::isTexture
bool isTexture(const Value &val)
Definition: NVPTXUtilities.cpp:133
llvm::MachineOperand::MO_Register
@ MO_Register
Register operand.
Definition: MachineOperand.h:51
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::GlobalVariable::hasInitializer
bool hasInitializer() const
Definitions have initializers, declarations don't.
Definition: GlobalVariable.h:91
llvm::MachineLoopInfo
Definition: MachineLoopInfo.h:89
NVPTX.h
MachineRegisterInfo.h
llvm::AsmPrinter::EmitToStreamer
void EmitToStreamer(MCStreamer &S, const MCInst &Inst)
Definition: AsmPrinter.cpp:401
llvm::isSampler
bool isSampler(const Value &val)
Definition: NVPTXUtilities.cpp:155
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::ComputeValueVTs
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty, SmallVectorImpl< EVT > &ValueVTs, SmallVectorImpl< uint64_t > *Offsets=nullptr, uint64_t StartingOffset=0)
ComputeValueVTs - Given an LLVM IR type, compute a sequence of EVTs that represent all the individual...
Definition: Analysis.cpp:121
NVPTXUtilities.h
MachineValueType.h
llvm::MCInst::setOpcode
void setOpcode(unsigned Op)
Definition: MCInst.h:197
cl_common_defines.h
NVPTXTargetInfo.h
Instruction.h
llvm::MachineBasicBlock::getSymbol
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
Definition: MachineBasicBlock.cpp:57
CommandLine.h
llvm::NVPTXFloatMCExpr::createConstantFPSingle
static const NVPTXFloatMCExpr * createConstantFPSingle(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:48
llvm::NVPTXAsmPrinter::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &F) override
Emit the specified function out to the OutStreamer.
Definition: NVPTXAsmPrinter.cpp:477
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
bb
< i1 > br i1 label label bb bb
Definition: README.txt:978
llvm::getReqNTIDy
bool getReqNTIDy(const Function &F, unsigned &y)
Definition: NVPTXUtilities.cpp:258
printMCExpr
static void printMCExpr(const MCExpr *E, raw_ostream &OS)
Definition: SystemZAsmParser.cpp:703
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:650
llvm::MCInstrDesc::TSFlags
uint64_t TSFlags
Definition: MCInstrDesc.h:205
llvm::MCContext::getOrCreateSymbol
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:204
GlobalValue.h
MachineLoopInfo.h
llvm::ConstantDataSequential
ConstantDataSequential - A vector or array constant whose element type is a simple 1/2/4/8-byte integ...
Definition: Constants.h:569
TargetMachine.h
llvm::GlobalValue::isDeclaration
bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition: Globals.cpp:241
llvm::GetUnrollMetadata
MDNode * GetUnrollMetadata(MDNode *LoopID, StringRef Name)
Given an llvm.loop loop id metadata node, returns the loop hint metadata node with the given name (fo...
Definition: LoopUnroll.cpp:848
llvm::APInt::lshrInPlace
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:839
llvm::GlobalValue::hasAppendingLinkage
bool hasAppendingLinkage() const
Definition: GlobalValue.h:447
Constants.h
llvm::Constant::isNullValue
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition: Constants.cpp:76
SmallString.h
f
Itanium Name Demangler i e convert the string _Z1fv into f()". You can also use the CRTP base ManglingParser to perform some simple analysis on the mangled name
llvm::MachineOperand::MO_GlobalAddress
@ MO_GlobalAddress
Address of a global value.
Definition: MachineOperand.h:61
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MachineOperand::getImm
int64_t getImm() const
Definition: MachineOperand.h:546
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:738
llvm::User
Definition: User.h:44
llvm::MCExpr::Target
@ Target
Target specific expression.
Definition: MCExpr.h:42
llvm::ADDRESS_SPACE_GLOBAL
@ ADDRESS_SPACE_GLOBAL
Definition: NVPTXBaseInfo.h:23
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:34
llvm::AsmPrinter::OutStreamer
std::unique_ptr< MCStreamer > OutStreamer
This is the MCStreamer object for the file we are generating.
Definition: AsmPrinter.h:96
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
Twine.h
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::DICompileUnit::LineTablesOnly
@ LineTablesOnly
Definition: DebugInfoMetadata.h:1324
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
llvm::Register::isPhysicalRegister
static bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:45
MCSymbol.h
llvm::GlobalValue::hasPrivateLinkage
bool hasPrivateLinkage() const
Definition: GlobalValue.h:449
llvm::NVPTXSubtarget::getPTXVersion
unsigned getPTXVersion() const
Definition: NVPTXSubtarget.h:83
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
MCInst.h
DenseSet.h
llvm::MCBinaryExpr::getRHS
const MCExpr * getRHS() const
Get the right-hand side expression of the binary operator.
Definition: MCExpr.h:631
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:109
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:197
llvm::getSamplerName
std::string getSamplerName(const Value &val)
Definition: NVPTXUtilities.cpp:237
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:48
llvm::ConstantArray
ConstantArray - Constant Array Declarations.
Definition: Constants.h:409
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::size
size_type size() const
Definition: DenseSet.h:81
llvm::NVPTXTargetMachine::getManagedStrPool
ManagedStringPool * getManagedStrPool() const
Definition: NVPTXTargetMachine.h:50
llvm::getReqNTIDz
bool getReqNTIDz(const Function &F, unsigned &z)
Definition: NVPTXUtilities.cpp:262
llvm::EVT::isInteger
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:144
llvm::Instruction
Definition: Instruction.h:42
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:189
llvm::NVPTXSubtarget::getRegisterInfo
const NVPTXRegisterInfo * getRegisterInfo() const override
Definition: NVPTXSubtarget.h:61
llvm::ADDRESS_SPACE_CONST
@ ADDRESS_SPACE_CONST
Definition: NVPTXBaseInfo.h:25
llvm::AsmPrinter::CurrentFnSym
MCSymbol * CurrentFnSym
The symbol for the current function.
Definition: AsmPrinter.h:118
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:143
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1478
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:54
llvm::ConstantFP
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:257
APFloat.h
llvm::MCSymbolRefExpr::getSymbol
const MCSymbol & getSymbol() const
Definition: MCExpr.h:399
llvm::raw_ostream::flush
void flush()
Definition: raw_ostream.h:187
llvm::MachineFrameInfo::getStackSize
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
Definition: MachineFrameInfo.h:577
DebugLoc.h
llvm::GlobalValue::hasInternalLinkage
bool hasInternalLinkage() const
Definition: GlobalValue.h:448
llvm::Type::PointerTyID
@ PointerTyID
Pointers.
Definition: Type.h:73
Align
uint64_t Align
Definition: ELFObjHandler.cpp:81
__CLK_FILTER_MASK
@ __CLK_FILTER_MASK
Definition: cl_common_defines.h:97
llvm::APFloat::bitcastToAPInt
APInt bitcastToAPInt() const
Definition: APFloat.h:1129
llvm::MCExpr::getKind
ExprKind getKind() const
Definition: MCExpr.h:81
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::DICompileUnit::DebugDirectivesOnly
@ DebugDirectivesOnly
Definition: DebugInfoMetadata.h:1325
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::MCSymbol::print
void print(raw_ostream &OS, const MCAsmInfo *MAI) const
print - Print the value to the stream OS.
Definition: MCSymbol.cpp:58
llvm::MCUnaryExpr::Minus
@ Minus
Unary minus.
Definition: MCExpr.h:429
llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:344
llvm::AsmPrinter::emitBasicBlockStart
virtual void emitBasicBlockStart(const MachineBasicBlock &MBB)
Targets can override this to emit stuff at the start of a basic block.
Definition: AsmPrinter.cpp:3449
llvm::MachineLoopInfo::isLoopHeader
bool isLoopHeader(const MachineBasicBlock *BB) const
True if the block is a loop header node.
Definition: MachineLoopInfo.h:141
Type.h
llvm::MCExpr::Binary
@ Binary
Binary expressions.
Definition: MCExpr.h:38
llvm::DICompileUnit::FullDebug
@ FullDebug
Definition: DebugInfoMetadata.h:1323
NVPTXMachineFunctionInfo.h
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:94
llvm::getNVPTXRegClassStr
std::string getNVPTXRegClassStr(TargetRegisterClass const *RC)
Definition: NVPTXRegisterInfo.cpp:73
llvm::NVPTX::NVCL
@ NVCL
Definition: NVPTX.h:71
llvm::MCInst::addOperand
void addOperand(const MCOperand Op)
Definition: MCInst.h:210
llvm::SmallString< 128 >
llvm::ManagedStringPool::getManagedString
std::string * getManagedString(const char *S)
Definition: ManagedStringPool.h:39
llvm::getTheNVPTXTarget64
Target & getTheNVPTXTarget64()
Definition: NVPTXTargetInfo.cpp:17
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:642
llvm::MCConstantExpr
Definition: MCExpr.h:144
llvm::NVPTXSubtarget
Definition: NVPTXSubtarget.h:31
llvm::NVPTXFloatMCExpr::createConstantFPHalf
static const NVPTXFloatMCExpr * createConstantFPHalf(const APFloat &Flt, MCContext &Ctx)
Definition: NVPTXMCExpr.h:43
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:191
llvm::isKernelFunction
bool isKernelFunction(const Function &F)
Definition: NVPTXUtilities.cpp:274
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::NVPTXII::IsSuldMask
@ IsSuldMask
Definition: NVPTXBaseInfo.h:36
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:640
llvm::NVPTXRegisterInfo::getName
const char * getName(unsigned RegNo) const
Definition: NVPTXRegisterInfo.h:52
BasicBlock.h
llvm::GlobalObject::hasSection
bool hasSection() const
Check if this global has a custom object file section.
Definition: GlobalObject.h:103
llvm::APFloat
Definition: APFloat.h:700
llvm::GlobalValue::hasAvailableExternallyLinkage
bool hasAvailableExternallyLinkage() const
Definition: GlobalValue.h:434
LLVMInitializeNVPTXAsmPrinter
LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXAsmPrinter()
Definition: NVPTXAsmPrinter.cpp:2148
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:239
llvm::GlobalValue
Definition: GlobalValue.h:44
NVPTXAsmPrinter.h
llvm::GlobalVariable::getInitializer
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
Definition: GlobalVariable.h:135
VI
@ VI
Definition: SIInstrInfo.cpp:7831
llvm::NVPTXSubtarget::hasImageHandles
bool hasImageHandles() const
Definition: NVPTXSubtarget.cpp:55
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:340
llvm::TargetRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(unsigned i) const
Returns the register class associated with the enumeration value.
Definition: TargetRegisterInfo.h:750
DEPOTNAME
#define DEPOTNAME
Definition: NVPTXAsmPrinter.cpp:93
llvm::DenseMapBase::clear
void clear()
Definition: DenseMap.h:112
Index
uint32_t Index
Definition: ELFObjHandler.cpp:82
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:66
llvm::ARMII::VecSize
@ VecSize
Definition: ARMBaseInfo.h:421
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:577
LLVM_EXTERNAL_VISIBILITY
#define LLVM_EXTERNAL_VISIBILITY
Definition: Compiler.h:126
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::Type::isIntOrPtrTy
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:206
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::NVPTXAsmPrinter::doInitialization
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
Definition: NVPTXAsmPrinter.cpp:773
llvm::DenseMap< unsigned, unsigned >
DebugInfo.h
NVPTXBaseInfo.h
I
#define I(x, y, z)
Definition: MD5.cpp:58
Analysis.h
StringExtras.h
isEmptyXXStructor
static bool isEmptyXXStructor(GlobalVariable *GV)
Definition: NVPTXAsmPrinter.cpp:752
llvm::DICompileUnit
Compile unit.
Definition: DebugInfoMetadata.h:1316
llvm::ADDRESS_SPACE_SHARED
@ ADDRESS_SPACE_SHARED
Definition: NVPTXBaseInfo.h:24
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
useFuncSeen
static bool useFuncSeen(const Constant *C, DenseMap< const Function *, bool > &seenMap)
Definition: NVPTXAsmPrinter.cpp:677
llvm::MachineOperand::getType
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
Definition: MachineOperand.h:218
printMemOperand
static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO, const MachineFunction *MF, const Module *M, const MachineFrameInfo *MFI, const TargetInstrInfo *TII, LLVMContext &Ctx)
Definition: SelectionDAGDumper.cpp:517
llvm::Type::isHalfTy
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:142
llvm::MachineOperand::getFPImm
const ConstantFP * getFPImm() const
Definition: MachineOperand.h:556
llvm::Register::isVirtualRegister
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:152
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::AsmPrinter::getSymbol
MCSymbol * getSymbol(const GlobalValue *GV) const
Definition: AsmPrinter.cpp:654
llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition: MachineValueType.h:864
llvm::NVPTXMachineFunctionInfo::getImageHandleSymbol
const char * getImageHandleSymbol(unsigned Idx) const
Returns the symbol name at the given index.
Definition: NVPTXMachineFunctionInfo.h:43
llvm::MachineFunction::getFrameInfo
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Definition: MachineFunction.h:656
__CLK_FILTER_BASE
@ __CLK_FILTER_BASE
Definition: cl_common_defines.h:93
llvm::AsmPrinter::MF
MachineFunction * MF
The current machine function.
Definition: AsmPrinter.h:99
llvm::AsmPrinter::OutContext
MCContext & OutContext
This is the context for the output file that we are streaming.
Definition: AsmPrinter.h:91
MachineModuleInfo.h
llvm::WinEH::EncodingType::CE
@ CE
Windows NT (Windows on ARM)
llvm::MachineOperand::MO_FPImmediate
@ MO_FPImmediate
Floating-point immediate operand.
Definition: MachineOperand.h:54
__CLK_NORMALIZED_BASE
@ __CLK_NORMALIZED_BASE
Definition: cl_common_defines.h:85
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
VisitGlobalVariableForEmission
static void VisitGlobalVariableForEmission(const GlobalVariable *GV, SmallVectorImpl< const GlobalVariable * > &Order, DenseSet< const GlobalVariable * > &Visited, DenseSet< const GlobalVariable * > &Visiting)
VisitGlobalVariableForEmission - Add GV to the list of GlobalVariable instances to be emitted,...
Definition: NVPTXAsmPrinter.cpp:115
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:359
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::MachineBasicBlock::predecessors
iterator_range< pred_iterator > predecessors()
Definition: MachineBasicBlock.h:358
llvm::MDNode
Metadata node.
Definition: Metadata.h:937
llvm::MCSymbolRefExpr
Represent a reference to a symbol from inside an expression.
Definition: MCExpr.h:192
llvm::NVPTXII::IsTexModeUnifiedFlag
@ IsTexModeUnifiedFlag
Definition: NVPTXBaseInfo.h:40
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::MachineFunction
Definition: MachineFunction.h:241
Triple.h
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1588
llvm::AsmPrinter::runOnMachineFunction
bool runOnMachineFunction(MachineFunction &MF) override
Emit the specified function out to the OutStreamer.
Definition: AsmPrinter.h:376
llvm::MCBinaryExpr::createAdd
static const MCBinaryExpr * createAdd(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:525
llvm::NVPTXAsmPrinter::getVirtualRegisterName
std::string getVirtualRegisterName(unsigned) const
Definition: NVPTXAsmPrinter.cpp:574
llvm::DICompileUnit::NoDebug
@ NoDebug
Definition: DebugInfoMetadata.h:1322
llvm::AsmPrinter::GetExternalSymbolSymbol
MCSymbol * GetExternalSymbolSymbol(StringRef Sym) const
Return the MCSymbol for the specified ExternalSymbol.
Definition: AsmPrinter.cpp:3377
llvm::NVPTXII::IsSurfTexQueryFlag
@ IsSurfTexQueryFlag
Definition: NVPTXBaseInfo.h:39
llvm::MCUnaryExpr::getSubExpr
const MCExpr * getSubExpr() const
Get the child of this unary expression.
Definition: MCExpr.h:471
llvm::MachineOperand::getMBB
MachineBasicBlock * getMBB() const
Definition: MachineOperand.h:561
DataLayout.h
llvm::StructType
Class to represent struct types.
Definition: DerivedTypes.h:213
llvm::APFloatBase::IEEEdouble
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:173
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
GetSymbolRef
static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, HexagonAsmPrinter &Printer, bool MustExtend)
Definition: HexagonMCInstLower.cpp:41
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::MCOperand::createReg
static MCOperand createReg(unsigned Reg)
Definition: MCInst.h:134
TargetLoweringObjectFile.h
A
* A
Definition: README_ALTIVEC.txt:89
llvm::MachineOperand::MO_MachineBasicBlock
@ MO_MachineBasicBlock
MachineBasicBlock reference.
Definition: MachineOperand.h:55
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::ConstantExpr::getIntegerCast
static Constant * getIntegerCast(Constant *C, Type *Ty, bool IsSigned)
Create a ZExt, Bitcast or Trunc for integer -> integer casts.
Definition: Constants.cpp:2096
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:280
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:305
llvm::DenseMapBase::insert
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:209
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::MachineFrameInfo::getMaxAlign
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
Definition: MachineFrameInfo.h:593
llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:682
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::AsmPrinter::MMI
MachineModuleInfo * MMI
This is a pointer to the current MachineModuleInfo.
Definition: AsmPrinter.h:102
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
llvm::Type::IntegerTyID
@ IntegerTyID
Arbitrary bit width integers.
Definition: Type.h:71
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
Attributes.h
canDemoteGlobalVar
static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f)
Definition: NVPTXAsmPrinter.cpp:659
llvm::NVPTXTargetStreamer
Implments NVPTX-specific streamer.
Definition: NVPTXTargetStreamer.h:18
name
static const char * name
Definition: SVEIntrinsicOpts.cpp:74
j
return j(j<< 16)
Constant.h
llvm::Twine
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
llvm::MCBinaryExpr::getOpcode
Opcode getOpcode() const
Get the kind of this binary expression.
Definition: MCExpr.h:625
llvm::ConstantExpr
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:971
llvm::GraphProgram::Name
Name
Definition: GraphWriter.h:50
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:606
llvm::getMaxNTIDz
bool getMaxNTIDz(const Function &F, unsigned &z)
Definition: NVPTXUtilities.cpp:250
llvm::NVPTX::CUDA
@ CUDA
Definition: NVPTX.h:72
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:636
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:84
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:341
MachineFrameInfo.h
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
llvm::Type::FixedVectorTyID
@ FixedVectorTyID
Fixed width SIMD vector type.
Definition: Type.h:76
llvm::MachineOperand::MO_ExternalSymbol
@ MO_ExternalSymbol
Name of external global symbol.
Definition: MachineOperand.h:60
GlobalVariable.h
llvm::isSurface
bool isSurface(const Value &val)
Definition: NVPTXUtilities.cpp:144
llvm::MCUnaryExpr::LNot
@ LNot
Logical negation.
Definition: MCExpr.h:428
Casting.h
llvm::NVPTXTargetMachine::getSubtargetImpl
const NVPTXSubtarget * getSubtargetImpl(const Function &) const override
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Definition: NVPTXTargetMachine.h:43
NVPTXTargetMachine.h
Function.h
__CLK_ADDRESS_MASK
@ __CLK_ADDRESS_MASK
Definition: cl_common_defines.h:79
llvm::DenseMapBase::size
unsigned size() const
Definition: DenseMap.h:101
llvm::APInt::getLoBits
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:605
llvm::LLVMTargetMachine
This class describes a target machine that is implemented with the LLVM target-independent code gener...
Definition: TargetMachine.h:405
llvm::MCSymbolRefExpr::create
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:386
llvm::TargetRegisterInfo::getNumRegClasses
unsigned getNumRegClasses() const
Definition: TargetRegisterInfo.h:744
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
llvm::MachineOperand::isImm
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Definition: MachineOperand.h:322
llvm::NVPTXII::IsSuldShift
@ IsSuldShift
Definition: NVPTXBaseInfo.h:37
llvm::getAlign
bool getAlign(const Function &F, unsigned index, unsigned &align)
Definition: NVPTXUtilities.cpp:284
llvm::getSurfaceName
std::string getSurfaceName(const Value &val)
Definition: NVPTXUtilities.cpp:232
llvm::ARMBuildAttrs::Symbol
@ Symbol
Definition: ARMBuildAttributes.h:83
llvm::getMaxNTIDy
bool getMaxNTIDy(const Function &F, unsigned &y)
Definition: NVPTXUtilities.cpp:246
llvm::SPIRV::ExecutionMode::Initializer
@ Initializer
llvm::isImageWriteOnly
bool isImageWriteOnly(const Value &val)
Definition: NVPTXUtilities.cpp:188
llvm::Type::StructTyID
@ StructTyID
Structures.
Definition: Type.h:74
llvm::LegalityPredicates::isVector
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Definition: LegalityPredicates.cpp:73
llvm::MachineFrameInfo
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
Definition: MachineFrameInfo.h:105
llvm::APFloatBase::rmNearestTiesToEven
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:189
llvm::clearAnnotationCache
void clearAnnotationCache(const Module *Mod)
Definition: NVPTXUtilities.cpp:41
llvm::MCExpr::print
void print(raw_ostream &OS, const MCAsmInfo *MAI, bool InParens=false) const
Definition: MCExpr.cpp:41
llvm::APFloat::convert
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:4836
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
llvm::AsmPrinter::TM
TargetMachine & TM
Target machine description.
Definition: AsmPrinter.h:84
SmallVector.h
llvm::NVPTXSubtarget::getTargetName
std::string getTargetName() const
Definition: NVPTXSubtarget.h:81
User.h
llvm::MCExpr::SymbolRef
@ SymbolRef
References to labels and assigned expressions.
Definition: MCExpr.h:40
__CLK_NORMALIZED_MASK
@ __CLK_NORMALIZED_MASK
Definition: cl_common_defines.h:88
llvm::MachineOperand::getSymbolName
const char * getSymbolName() const
Definition: MachineOperand.h:617
llvm::NVPTXII::IsSustFlag
@ IsSustFlag
Definition: NVPTXBaseInfo.h:38
llvm::TargetMachine::getSubtarget
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Definition: TargetMachine.h:164
llvm::MCExpr::Unary
@ Unary
Unary expressions.
Definition: MCExpr.h:41
MCStreamer.h
UnrollLoop.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::MCExpr::Constant
@ Constant
Constant expressions.
Definition: MCExpr.h:39
llvm::raw_svector_ostream
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:662
NVPTXSubtarget.h
llvm::NVPTXMachineFunctionInfo
Definition: NVPTXMachineFunctionInfo.h:20
llvm::AsmPrinter::getDataLayout
const DataLayout & getDataLayout() const
Return information about data layout.
Definition: AsmPrinter.cpp:386
llvm::detail::DenseSetImpl< ValueT, DenseMap< ValueT, detail::DenseSetEmpty, DenseMapInfo< ValueT >, detail::DenseSetPair< ValueT > >, DenseMapInfo< ValueT > >::erase
bool erase(const ValueT &V)
Definition: DenseSet.h:101
llvm::StringRef::data
const LLVM_NODISCARD char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:149
llvm::GlobalValue::hasLinkOnceLinkage
bool hasLinkOnceLinkage() const
Definition: GlobalValue.h:437
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
llvm::ConstantFoldConstant
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
Definition: ConstantFolding.cpp:1159
MachineOperand.h
llvm::GlobalValue::getType
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:270
DerivedTypes.h
llvm::GlobalValue::getValueType
Type * getValueType() const
Definition: GlobalValue.h:272
llvm::MCOperand
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
llvm::NVPTXAsmPrinter::getFunctionFrameSymbol
const MCSymbol * getFunctionFrameSymbol() const override
Return symbol for the function pseudo stack if the stack frame is not a register based.
Definition: NVPTXAsmPrinter.cpp:499
llvm::AsmPrinter::emitInitialRawDwarfLocDirective
void emitInitialRawDwarfLocDirective(const MachineFunction &MF)
Emits inital debug location directive.
Definition: AsmPrinter.cpp:405
llvm::MCSymbolRefExpr::VK_None
@ VK_None
Definition: MCExpr.h:195
llvm::Type::isAggregateType
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:257
LLVMContext.h
llvm::NVPTXTargetMachine::getDrvInterface
NVPTX::DrvInterface getDrvInterface() const
Definition: NVPTXTargetMachine.h:49
llvm::RegisterAsmPrinter
RegisterAsmPrinter - Helper template for registering a target specific assembly printer,...
Definition: TargetRegistry.h:1359
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::getMaxNTIDx
bool getMaxNTIDx(const Function &F, unsigned &x)
Definition: NVPTXUtilities.cpp:242
llvm::getTextureName
std::string getTextureName(const Value &val)
Definition: NVPTXUtilities.cpp:227
raw_ostream.h
llvm::getMaxNReg
bool getMaxNReg(const Function &F, unsigned &x)
Definition: NVPTXUtilities.cpp:270
n
The same transformation can work with an even modulo with the addition of a and shrink the compare RHS by the same amount Unless the target supports that transformation probably isn t worthwhile The transformation can also easily be made to work with non zero equality for n
Definition: README.txt:685
MachineFunction.h
llvm::getNVPTXRegClassName
std::string getNVPTXRegClassName(TargetRegisterClass const *RC)
Definition: NVPTXRegisterInfo.cpp:29
TargetRegistry.h
MCExpr.h
llvm::isManaged
bool isManaged(const Value &val)
Definition: NVPTXUtilities.cpp:216
CU
Definition: AArch64AsmBackend.cpp:505
llvm::AsmPrinter::getFunctionNumber
unsigned getFunctionNumber() const
Return a unique ID for the current function.
Definition: AsmPrinter.cpp:378
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
TargetRegisterInfo.h
llvm::MCExpr
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
NVPTXMCExpr.h
llvm::Type::HalfTyID
@ HalfTyID
16-bit floating point type
Definition: Type.h:56
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::AsmPrinter::PrintAsmOperand
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, const char *ExtraCode, raw_ostream &OS)
Print the specified operand of MI, an INLINEASM instruction, using the specified assembler variant.
Definition: AsmPrinterInlineAsm.cpp:467
llvm::MCUnaryExpr::Not
@ Not
Bitwise negation.
Definition: MCExpr.h:430
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:164
llvm::MCBinaryExpr::getLHS
const MCExpr * getLHS() const
Get the left-hand side expression of the binary operator.
Definition: MCExpr.h:628
llvm::AsmPrinter::doInitialization
bool doInitialization(Module &M) override
Set up the AsmPrinter when we are working on a new module.
Definition: AsmPrinter.cpp:430
llvm::getTheNVPTXTarget32
Target & getTheNVPTXTarget32()
Definition: NVPTXTargetInfo.cpp:13